1 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8-*- */
2 
3 /* libcroco - Library for parsing and applying CSS
4  * Copyright (C) 2006-2019 Free Software Foundation, Inc.
5  *
6  * This file is not part of the GNU gettext program, but is used with
7  * GNU gettext.
8  *
9  * The original copyright notice is as follows:
10  */
11 
12 /*
13  * This file is part of The Croco Library
14  *
15  * Copyright (C) 2003-2004 Dodji Seketeli.  All Rights Reserved.
16  *
17  * This program is free software; you can redistribute it and/or
18  * modify it under the terms of version 2.1 of the GNU Lesser General Public
19  * License as published by the Free Software Foundation.
20  *
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
29  * USA
30  *
31  * Author: Dodji Seketeli
32  */
33 
34 #include <config.h>
35 #include "stdio.h"
36 #include <string.h>
37 #include "cr-input.h"
38 #include "cr-enc-handler.h"
39 
40 /**
41  *@CRInput:
42  *
43  *The definition of the #CRInput class.
44  */
45 
46 /*******************
47  *Private type defs
48  *******************/
49 
50 /**
51  *The private attributes of
52  *the #CRInputPriv class.
53  */
54 struct _CRInputPriv {
55         /*
56          *The input buffer
57          */
58         guchar *in_buf;
59         gulong in_buf_size;
60 
61         gulong nb_bytes;
62 
63         /*
64          *The index of the next byte
65          *to be read.
66          */
67         gulong next_byte_index;
68 
69         /*
70          *The current line number
71          */
72         gulong line;
73 
74         /*
75          *The current col number
76          */
77         gulong col;
78 
79         gboolean end_of_line;
80         gboolean end_of_input;
81 
82         /*
83          *the reference count of this
84          *instance.
85          */
86         guint ref_count;
87         gboolean free_in_buf;
88 };
89 
90 #define PRIVATE(object) (object)->priv
91 
92 /***************************
93  *private constants
94  **************************/
95 #define CR_INPUT_MEM_CHUNK_SIZE 1024 * 4
96 
97 static CRInput *cr_input_new_real (void);
98 
99 static CRInput *
cr_input_new_real(void)100 cr_input_new_real (void)
101 {
102         CRInput *result = NULL;
103 
104         result = g_try_malloc (sizeof (CRInput));
105         if (!result) {
106                 cr_utils_trace_info ("Out of memory");
107                 return NULL;
108         }
109         memset (result, 0, sizeof (CRInput));
110 
111         PRIVATE (result) = g_try_malloc (sizeof (CRInputPriv));
112         if (!PRIVATE (result)) {
113                 cr_utils_trace_info ("Out of memory");
114                 g_free (result);
115                 return NULL;
116         }
117         memset (PRIVATE (result), 0, sizeof (CRInputPriv));
118         PRIVATE (result)->free_in_buf = TRUE;
119         return result;
120 }
121 
122 /****************
123  *Public methods
124  ***************/
125 
126 /**
127  * cr_input_new_from_buf:
128  *@a_buf: the memory buffer to create the input stream from.
129  *The #CRInput keeps this pointer so user should not free it !.
130  *@a_len: the size of the input buffer.
131  *@a_enc: the buffer's encoding.
132  *@a_free_buf: if set to TRUE, this a_buf will be freed
133  *at the destruction of this instance. If set to false, it is up
134  *to the caller to free it.
135  *
136  *Creates a new input stream from a memory buffer.
137  *Returns the newly built instance of #CRInput.
138  */
139 CRInput *
cr_input_new_from_buf(guchar * a_buf,gulong a_len,enum CREncoding a_enc,gboolean a_free_buf)140 cr_input_new_from_buf (guchar * a_buf,
141                        gulong a_len,
142                        enum CREncoding a_enc,
143                        gboolean a_free_buf)
144 {
145         CRInput *result = NULL;
146         enum CRStatus status = CR_OK;
147         CREncHandler *enc_handler = NULL;
148         gulong len = a_len;
149 
150         g_return_val_if_fail (a_buf, NULL);
151 
152         result = cr_input_new_real ();
153         g_return_val_if_fail (result, NULL);
154 
155         /*transform the encoding in utf8 */
156         if (a_enc != CR_UTF_8) {
157                 enc_handler = cr_enc_handler_get_instance (a_enc);
158                 if (!enc_handler) {
159                         goto error;
160                 }
161 
162                 status = cr_enc_handler_convert_input
163                         (enc_handler, a_buf, &len,
164                          &PRIVATE (result)->in_buf,
165                          &PRIVATE (result)->in_buf_size);
166                 if (status != CR_OK)
167                         goto error;
168                 PRIVATE (result)->free_in_buf = TRUE;
169                 if (a_free_buf == TRUE && a_buf) {
170                         g_free (a_buf) ;
171                         a_buf = NULL ;
172                 }
173                 PRIVATE (result)->nb_bytes = PRIVATE (result)->in_buf_size;
174         } else {
175                 PRIVATE (result)->in_buf = (guchar *) a_buf;
176                 PRIVATE (result)->in_buf_size = a_len;
177                 PRIVATE (result)->nb_bytes = a_len;
178                 PRIVATE (result)->free_in_buf = a_free_buf;
179         }
180         PRIVATE (result)->line = 1;
181         PRIVATE (result)->col =  0;
182         return result;
183 
184  error:
185         if (result) {
186                 cr_input_destroy (result);
187                 result = NULL;
188         }
189 
190         return NULL;
191 }
192 
193 /**
194  * cr_input_new_from_uri:
195  *@a_file_uri: the file to create *the input stream from.
196  *@a_enc: the encoding of the file *to create the input from.
197  *
198  *Creates a new input stream from
199  *a file.
200  *
201  *Returns the newly created input stream if
202  *this method could read the file and create it,
203  *NULL otherwise.
204  */
205 
206 CRInput *
cr_input_new_from_uri(const gchar * a_file_uri,enum CREncoding a_enc)207 cr_input_new_from_uri (const gchar * a_file_uri, enum CREncoding a_enc)
208 {
209         CRInput *result = NULL;
210         enum CRStatus status = CR_OK;
211         FILE *file_ptr = NULL;
212         guchar tmp_buf[CR_INPUT_MEM_CHUNK_SIZE] = { 0 };
213         gulong nb_read = 0,
214                 len = 0,
215                 buf_size = 0;
216         gboolean loop = TRUE;
217         guchar *buf = NULL;
218 
219         g_return_val_if_fail (a_file_uri, NULL);
220 
221         file_ptr = fopen (a_file_uri, "r");
222 
223         if (file_ptr == NULL) {
224 
225 #ifdef CR_DEBUG
226                 cr_utils_trace_debug ("could not open file");
227 #endif
228                 g_warning ("Could not open file %s\n", a_file_uri);
229 
230                 return NULL;
231         }
232 
233         /*load the file */
234         while (loop) {
235                 nb_read = fread (tmp_buf, 1 /*read bytes */ ,
236                                  CR_INPUT_MEM_CHUNK_SIZE /*nb of bytes */ ,
237                                  file_ptr);
238 
239                 if (nb_read != CR_INPUT_MEM_CHUNK_SIZE) {
240                         /*we read less chars than we wanted */
241                         if (feof (file_ptr)) {
242                                 /*we reached eof */
243                                 loop = FALSE;
244                         } else {
245                                 /*a pb occurred !! */
246                                 cr_utils_trace_debug ("an io error occurred");
247                                 status = CR_ERROR;
248                                 goto cleanup;
249                         }
250                 }
251 
252                 if (status == CR_OK) {
253                         /*read went well */
254                         buf = g_realloc (buf, len + CR_INPUT_MEM_CHUNK_SIZE);
255                         memcpy (buf + len, tmp_buf, nb_read);
256                         len += nb_read;
257                         buf_size += CR_INPUT_MEM_CHUNK_SIZE;
258                 }
259         }
260 
261         if (status == CR_OK) {
262                 result = cr_input_new_from_buf (buf, len, a_enc, TRUE);
263                 if (!result) {
264                         goto cleanup;
265                 }
266                 /*
267                  *we should  free buf here because it's own by CRInput.
268                  *(see the last parameter of cr_input_new_from_buf().
269                  */
270                 buf = NULL;
271         }
272 
273  cleanup:
274         if (file_ptr) {
275                 fclose (file_ptr);
276                 file_ptr = NULL;
277         }
278 
279         if (buf) {
280                 g_free (buf);
281                 buf = NULL;
282         }
283 
284         return result;
285 }
286 
287 /**
288  * cr_input_destroy:
289  *@a_this: the current instance of #CRInput.
290  *
291  *The destructor of the #CRInput class.
292  */
293 void
cr_input_destroy(CRInput * a_this)294 cr_input_destroy (CRInput * a_this)
295 {
296         if (a_this == NULL)
297                 return;
298 
299         if (PRIVATE (a_this)) {
300                 if (PRIVATE (a_this)->in_buf && PRIVATE (a_this)->free_in_buf) {
301                         g_free (PRIVATE (a_this)->in_buf);
302                         PRIVATE (a_this)->in_buf = NULL;
303                 }
304 
305                 g_free (PRIVATE (a_this));
306                 PRIVATE (a_this) = NULL;
307         }
308 
309         g_free (a_this);
310 }
311 
312 /**
313  * cr_input_ref:
314  *@a_this: the current instance of #CRInput.
315  *
316  *Increments the reference count of the current
317  *instance of #CRInput.
318  */
319 void
cr_input_ref(CRInput * a_this)320 cr_input_ref (CRInput * a_this)
321 {
322         g_return_if_fail (a_this && PRIVATE (a_this));
323 
324         PRIVATE (a_this)->ref_count++;
325 }
326 
327 /**
328  * cr_input_unref:
329  *@a_this: the current instance of #CRInput.
330  *
331  *Decrements the reference count of this instance
332  *of #CRInput. If the reference count goes down to
333  *zero, this instance is destroyed.
334  *
335  * Returns TRUE if the instance of #CRInput got destroyed, false otherwise.
336  */
337 gboolean
cr_input_unref(CRInput * a_this)338 cr_input_unref (CRInput * a_this)
339 {
340         g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
341 
342         if (PRIVATE (a_this)->ref_count) {
343                 PRIVATE (a_this)->ref_count--;
344         }
345 
346         if (PRIVATE (a_this)->ref_count == 0) {
347                 cr_input_destroy (a_this);
348                 return TRUE;
349         }
350         return FALSE;
351 }
352 
353 /**
354  * cr_input_end_of_input:
355  *@a_this: the current instance of #CRInput.
356  *@a_end_of_input: out parameter. Is set to TRUE if
357  *the current instance has reached the end of its input buffer,
358  *FALSE otherwise.
359  *
360  *Tests wether the current instance of
361  *#CRInput has reached its input buffer.
362  *
363  * Returns CR_OK upon successful completion, an error code otherwise.
364  * Note that all the out parameters of this method are valid if
365  * and only if this method returns CR_OK.
366  */
367 enum CRStatus
cr_input_end_of_input(CRInput const * a_this,gboolean * a_end_of_input)368 cr_input_end_of_input (CRInput const * a_this, gboolean * a_end_of_input)
369 {
370         g_return_val_if_fail (a_this && PRIVATE (a_this)
371                               && a_end_of_input, CR_BAD_PARAM_ERROR);
372 
373         *a_end_of_input = (PRIVATE (a_this)->next_byte_index
374                            >= PRIVATE (a_this)->in_buf_size) ? TRUE : FALSE;
375 
376         return CR_OK;
377 }
378 
379 /**
380  * cr_input_get_nb_bytes_left:
381  *@a_this: the current instance of #CRInput.
382  *
383  *Returns the number of bytes left in the input stream
384  *before the end, -1 in case of error.
385  */
386 glong
cr_input_get_nb_bytes_left(CRInput const * a_this)387 cr_input_get_nb_bytes_left (CRInput const * a_this)
388 {
389         g_return_val_if_fail (a_this && PRIVATE (a_this), -1);
390         g_return_val_if_fail (PRIVATE (a_this)->nb_bytes
391                               <= PRIVATE (a_this)->in_buf_size, -1);
392         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index
393                               <= PRIVATE (a_this)->nb_bytes, -1);
394 
395         if (PRIVATE (a_this)->end_of_input)
396                 return 0;
397 
398         return PRIVATE (a_this)->nb_bytes - PRIVATE (a_this)->next_byte_index;
399 }
400 
401 /**
402  * cr_input_read_byte:
403  *@a_this: the current instance of #CRInput.
404  *@a_byte: out parameter the returned byte.
405  *
406  *Gets the next byte of the input.
407  *Updates the state of the input so that
408  *the next invocation of this method  returns
409  *the next coming byte.
410  *
411  *Returns CR_OK upon successful completion, an error code
412  *otherwise. All the out parameters of this method are valid if
413  *and only if this method returns CR_OK.
414  */
415 enum CRStatus
cr_input_read_byte(CRInput * a_this,guchar * a_byte)416 cr_input_read_byte (CRInput * a_this, guchar * a_byte)
417 {
418         gulong nb_bytes_left = 0;
419 
420         g_return_val_if_fail (a_this && PRIVATE (a_this)
421                               && a_byte, CR_BAD_PARAM_ERROR);
422 
423         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index <=
424                               PRIVATE (a_this)->nb_bytes, CR_BAD_PARAM_ERROR);
425 
426         if (PRIVATE (a_this)->end_of_input == TRUE)
427                 return CR_END_OF_INPUT_ERROR;
428 
429         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
430 
431         if (nb_bytes_left < 1) {
432                 return CR_END_OF_INPUT_ERROR;
433         }
434 
435         *a_byte = PRIVATE (a_this)->in_buf[PRIVATE (a_this)->next_byte_index];
436 
437         if (PRIVATE (a_this)->nb_bytes -
438             PRIVATE (a_this)->next_byte_index < 2) {
439                 PRIVATE (a_this)->end_of_input = TRUE;
440         } else {
441                 PRIVATE (a_this)->next_byte_index++;
442         }
443 
444         return CR_OK;
445 }
446 
447 /**
448  * cr_input_read_char:
449  *@a_this: the current instance of CRInput.
450  *@a_char: out parameter. The read character.
451  *
452  *Reads an unicode character from the current instance of
453  *#CRInput.
454  *
455  *Returns CR_OK upon successful completion, an error code
456  *otherwise.
457  */
458 enum CRStatus
cr_input_read_char(CRInput * a_this,guint32 * a_char)459 cr_input_read_char (CRInput * a_this, guint32 * a_char)
460 {
461         enum CRStatus status = CR_OK;
462         gulong consumed = 0,
463                 nb_bytes_left = 0;
464 
465         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
466                               CR_BAD_PARAM_ERROR);
467 
468         if (PRIVATE (a_this)->end_of_input == TRUE)
469                 return CR_END_OF_INPUT_ERROR;
470 
471         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
472 
473         if (nb_bytes_left < 1) {
474                 return CR_END_OF_INPUT_ERROR;
475         }
476 
477         status = cr_utils_read_char_from_utf8_buf
478                 (PRIVATE (a_this)->in_buf
479                  +
480                  PRIVATE (a_this)->next_byte_index,
481                  nb_bytes_left, a_char, &consumed);
482 
483         if (status == CR_OK) {
484                 /*update next byte index */
485                 PRIVATE (a_this)->next_byte_index += consumed;
486 
487                 /*update line and column number */
488                 if (PRIVATE (a_this)->end_of_line == TRUE) {
489                         PRIVATE (a_this)->col = 1;
490                         PRIVATE (a_this)->line++;
491                         PRIVATE (a_this)->end_of_line = FALSE;
492                 } else if (*a_char != '\n') {
493                         PRIVATE (a_this)->col++;
494                 }
495 
496                 if (*a_char == '\n') {
497                         PRIVATE (a_this)->end_of_line = TRUE;
498                 }
499         }
500 
501         return status;
502 }
503 
504 /**
505  * cr_input_set_line_num:
506  *@a_this: the "this pointer" of the current instance of #CRInput.
507  *@a_line_num: the new line number.
508  *
509  *Setter of the current line number.
510  *
511  *Return CR_OK upon successful completion, an error code otherwise.
512  */
513 enum CRStatus
cr_input_set_line_num(CRInput * a_this,glong a_line_num)514 cr_input_set_line_num (CRInput * a_this, glong a_line_num)
515 {
516         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
517 
518         PRIVATE (a_this)->line = a_line_num;
519 
520         return CR_OK;
521 }
522 
523 /**
524  * cr_input_get_line_num:
525  *@a_this: the "this pointer" of the current instance of #CRInput.
526  *@a_line_num: the returned line number.
527  *
528  *Getter of the current line number.
529  *
530  *Returns CR_OK upon successful completion, an error code otherwise.
531  */
532 enum CRStatus
cr_input_get_line_num(CRInput const * a_this,glong * a_line_num)533 cr_input_get_line_num (CRInput const * a_this, glong * a_line_num)
534 {
535         g_return_val_if_fail (a_this && PRIVATE (a_this)
536                               && a_line_num, CR_BAD_PARAM_ERROR);
537 
538         *a_line_num = PRIVATE (a_this)->line;
539 
540         return CR_OK;
541 }
542 
543 /**
544  * cr_input_set_column_num:
545  *@a_this: the "this pointer" of the current instance of #CRInput.
546  *@a_col: the new column number.
547  *
548  *Setter of the current column number.
549  *
550  *Returns CR_OK upon successful completion, an error code otherwise.
551  */
552 enum CRStatus
cr_input_set_column_num(CRInput * a_this,glong a_col)553 cr_input_set_column_num (CRInput * a_this, glong a_col)
554 {
555         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
556 
557         PRIVATE (a_this)->col = a_col;
558 
559         return CR_OK;
560 }
561 
562 /**
563  * cr_input_get_column_num:
564  *@a_this: the "this pointer" of the current instance of #CRInput.
565  *@a_col: out parameter
566  *
567  *Getter of the current column number.
568  *
569  *Returns CR_OK upon successful completion, an error code otherwise.
570  */
571 enum CRStatus
cr_input_get_column_num(CRInput const * a_this,glong * a_col)572 cr_input_get_column_num (CRInput const * a_this, glong * a_col)
573 {
574         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_col,
575                               CR_BAD_PARAM_ERROR);
576 
577         *a_col = PRIVATE (a_this)->col;
578 
579         return CR_OK;
580 }
581 
582 /**
583  * cr_input_increment_line_num:
584  *@a_this: the "this pointer" of the current instance of #CRInput.
585  *@a_increment: the increment to add to the line number.
586  *
587  *Increments the current line number.
588  *
589  *Returns CR_OK upon successful completion, an error code otherwise.
590  */
591 enum CRStatus
cr_input_increment_line_num(CRInput * a_this,glong a_increment)592 cr_input_increment_line_num (CRInput * a_this, glong a_increment)
593 {
594         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
595 
596         PRIVATE (a_this)->line += a_increment;
597 
598         return CR_OK;
599 }
600 
601 /**
602  * cr_input_increment_col_num:
603  *@a_this: the "this pointer" of the current instance of #CRInput.
604  *@a_increment: the increment to add to the column number.
605  *
606  *Increments the current column number.
607  *
608  *Returns CR_OK upon successful completion, an error code otherwise.
609  */
610 enum CRStatus
cr_input_increment_col_num(CRInput * a_this,glong a_increment)611 cr_input_increment_col_num (CRInput * a_this, glong a_increment)
612 {
613         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
614 
615         PRIVATE (a_this)->col += a_increment;
616 
617         return CR_OK;
618 }
619 
620 /**
621  * cr_input_consume_char:
622  *@a_this: the this pointer.
623  *@a_char: the character to consume. If set to zero,
624  *consumes any character.
625  *
626  *Consumes the next character of the input stream if
627  *and only if that character equals a_char.
628  *
629  *Returns CR_OK upon successful completion, CR_PARSING_ERROR if
630  *next char is different from a_char, an other error code otherwise
631  */
632 enum CRStatus
cr_input_consume_char(CRInput * a_this,guint32 a_char)633 cr_input_consume_char (CRInput * a_this, guint32 a_char)
634 {
635         guint32 c;
636         enum CRStatus status;
637 
638         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
639 
640         if ((status = cr_input_peek_char (a_this, &c)) != CR_OK) {
641                 return status;
642         }
643 
644         if (c == a_char || a_char == 0) {
645                 status = cr_input_read_char (a_this, &c);
646         } else {
647                 return CR_PARSING_ERROR;
648         }
649 
650         return status;
651 }
652 
653 /**
654  * cr_input_consume_chars:
655  *@a_this: the this pointer of the current instance of #CRInput.
656  *@a_char: the character to consume.
657  *@a_nb_char: in/out parameter. The number of characters to consume.
658  *If set to a negative value, the function will consume all the occurences
659  *of a_char found.
660  *After return, if the return value equals CR_OK, this variable contains
661  *the number of characters actually consumed.
662  *
663  *Consumes up to a_nb_char occurences of the next contiguous characters
664  *which equal a_char. Note that the next character of the input stream
665  **MUST* equal a_char to trigger the consumption, or else, the error
666  *code CR_PARSING_ERROR is returned.
667  *If the number of contiguous characters that equals a_char is less than
668  *a_nb_char, then this function consumes all the characters it can consume.
669  *
670  *Returns CR_OK if at least one character has been consumed, an error code
671  *otherwise.
672  */
673 enum CRStatus
cr_input_consume_chars(CRInput * a_this,guint32 a_char,gulong * a_nb_char)674 cr_input_consume_chars (CRInput * a_this, guint32 a_char, gulong * a_nb_char)
675 {
676         enum CRStatus status = CR_OK;
677         gulong nb_consumed = 0;
678 
679         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_char,
680                               CR_BAD_PARAM_ERROR);
681 
682         g_return_val_if_fail (a_char != 0 || a_nb_char != NULL,
683                               CR_BAD_PARAM_ERROR);
684 
685         for (nb_consumed = 0; ((status == CR_OK)
686                                && (*a_nb_char > 0
687                                    && nb_consumed < *a_nb_char));
688              nb_consumed++) {
689                 status = cr_input_consume_char (a_this, a_char);
690         }
691 
692         *a_nb_char = nb_consumed;
693 
694         if ((nb_consumed > 0)
695             && ((status == CR_PARSING_ERROR)
696                 || (status == CR_END_OF_INPUT_ERROR))) {
697                 status = CR_OK;
698         }
699 
700         return status;
701 }
702 
703 /**
704  * cr_input_consume_white_spaces:
705  *@a_this: the "this pointer" of the current instance of #CRInput.
706  *@a_nb_chars: in/out parameter. The number of white spaces to
707  *consume. After return, holds the number of white spaces actually consumed.
708  *
709  *Same as cr_input_consume_chars() but this one consumes white
710  *spaces.
711  *
712  *Returns CR_OK upon successful completion, an error code otherwise.
713  */
714 enum CRStatus
cr_input_consume_white_spaces(CRInput * a_this,gulong * a_nb_chars)715 cr_input_consume_white_spaces (CRInput * a_this, gulong * a_nb_chars)
716 {
717         enum CRStatus status = CR_OK;
718         guint32 cur_char = 0,
719                 nb_consumed = 0;
720 
721         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_chars,
722                               CR_BAD_PARAM_ERROR);
723 
724         for (nb_consumed = 0;
725              ((*a_nb_chars > 0) && (nb_consumed < *a_nb_chars));
726              nb_consumed++) {
727                 status = cr_input_peek_char (a_this, &cur_char);
728                 if (status != CR_OK)
729                         break;
730 
731                 /*if the next char is a white space, consume it ! */
732                 if (cr_utils_is_white_space (cur_char) == TRUE) {
733                         status = cr_input_read_char (a_this, &cur_char);
734                         if (status != CR_OK)
735                                 break;
736                         continue;
737                 }
738 
739                 break;
740 
741         }
742 
743 	*a_nb_chars = (gulong) nb_consumed;
744 
745         if (nb_consumed && status == CR_END_OF_INPUT_ERROR) {
746                 status = CR_OK;
747         }
748 
749         return status;
750 }
751 
752 /**
753  * cr_input_peek_char:
754  *@a_this: the current instance of #CRInput.
755  *@a_char: out parameter. The returned character.
756  *
757  *Same as cr_input_read_char() but does not update the
758  *internal state of the input stream. The next call
759  *to cr_input_peek_char() or cr_input_read_char() will thus
760  *return the same character as the current one.
761  *
762  *Returns CR_OK upon successful completion, an error code
763  *otherwise.
764  */
765 enum CRStatus
cr_input_peek_char(CRInput const * a_this,guint32 * a_char)766 cr_input_peek_char (CRInput const * a_this, guint32 * a_char)
767 {
768         enum CRStatus status = CR_OK;
769         gulong consumed = 0,
770                 nb_bytes_left = 0;
771 
772         g_return_val_if_fail (a_this && PRIVATE (a_this)
773                               && a_char, CR_BAD_PARAM_ERROR);
774 
775         if (PRIVATE (a_this)->next_byte_index >=
776             PRIVATE (a_this)->in_buf_size) {
777                 return CR_END_OF_INPUT_ERROR;
778         }
779 
780         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
781 
782         if (nb_bytes_left < 1) {
783                 return CR_END_OF_INPUT_ERROR;
784         }
785 
786         status = cr_utils_read_char_from_utf8_buf
787                 (PRIVATE (a_this)->in_buf +
788                  PRIVATE (a_this)->next_byte_index,
789                  nb_bytes_left, a_char, &consumed);
790 
791         return status;
792 }
793 
794 /**
795  * cr_input_peek_byte:
796  *@a_this: the current instance of #CRInput.
797  *@a_origin: the origin to consider in the calculation
798  *of the position of the byte to peek.
799  *@a_offset: the offset of the byte to peek, starting from
800  *the origin specified by a_origin.
801  *@a_byte: out parameter the peeked byte.
802  *
803  *Gets a byte from the input stream,
804  *starting from the current position in the input stream.
805  *Unlike cr_input_peek_next_byte() this method
806  *does not update the state of the current input stream.
807  *Subsequent calls to cr_input_peek_byte with the same arguments
808  *will return the same byte.
809  *
810  *Returns CR_OK upon successful completion or,
811  *CR_BAD_PARAM_ERROR if at least one of the parameters is invalid;
812  *CR_OUT_OF_BOUNDS_ERROR if the indexed byte is out of bounds.
813  */
814 enum CRStatus
cr_input_peek_byte(CRInput const * a_this,enum CRSeekPos a_origin,gulong a_offset,guchar * a_byte)815 cr_input_peek_byte (CRInput const * a_this, enum CRSeekPos a_origin,
816                     gulong a_offset, guchar * a_byte)
817 {
818         gulong abs_offset = 0;
819 
820         g_return_val_if_fail (a_this && PRIVATE (a_this)
821                               && a_byte, CR_BAD_PARAM_ERROR);
822 
823         switch (a_origin) {
824 
825         case CR_SEEK_CUR:
826                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_offset;
827                 break;
828 
829         case CR_SEEK_BEGIN:
830                 abs_offset = a_offset;
831                 break;
832 
833         case CR_SEEK_END:
834                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_offset;
835                 break;
836 
837         default:
838                 return CR_BAD_PARAM_ERROR;
839         }
840 
841         if (abs_offset < PRIVATE (a_this)->in_buf_size) {
842 
843                 *a_byte = PRIVATE (a_this)->in_buf[abs_offset];
844 
845                 return CR_OK;
846 
847         } else {
848                 return CR_END_OF_INPUT_ERROR;
849         }
850 }
851 
852 /**
853  * cr_input_peek_byte2:
854  *@a_this: the current byte input stream.
855  *@a_offset: the offset of the byte to peek, starting
856  *from the current input position pointer.
857  *@a_eof: out parameter. Is set to true is we reach end of
858  *stream. If set to NULL by the caller, this parameter is not taken
859  *in account.
860  *
861  *Same as cr_input_peek_byte() but with a simplified
862  *interface.
863  *
864  *Returns the read byte or 0 if something bad happened.
865  */
866 guchar
cr_input_peek_byte2(CRInput const * a_this,gulong a_offset,gboolean * a_eof)867 cr_input_peek_byte2 (CRInput const * a_this, gulong a_offset, gboolean * a_eof)
868 {
869         guchar result = 0;
870         enum CRStatus status = CR_ERROR;
871 
872         g_return_val_if_fail (a_this && PRIVATE (a_this), 0);
873 
874         if (a_eof)
875                 *a_eof = FALSE;
876 
877         status = cr_input_peek_byte (a_this, CR_SEEK_CUR, a_offset, &result);
878 
879         if ((status == CR_END_OF_INPUT_ERROR)
880             && a_eof)
881                 *a_eof = TRUE;
882 
883         return result;
884 }
885 
886 /**
887  * cr_input_get_byte_addr:
888  *@a_this: the current instance of #CRInput.
889  *@a_offset: the offset of the byte in the input stream starting
890  *from the beginning of the stream.
891  *
892  *Gets the memory address of the byte located at a given offset
893  *in the input stream.
894  *
895  *Returns the address, otherwise NULL if an error occurred.
896  */
897 guchar *
cr_input_get_byte_addr(CRInput * a_this,gulong a_offset)898 cr_input_get_byte_addr (CRInput * a_this, gulong a_offset)
899 {
900         g_return_val_if_fail (a_this && PRIVATE (a_this), NULL);
901 
902         if (a_offset >= PRIVATE (a_this)->nb_bytes) {
903                 return NULL;
904         }
905 
906         return &PRIVATE (a_this)->in_buf[a_offset];
907 }
908 
909 /**
910  * cr_input_get_cur_byte_addr:
911  *@a_this: the current input stream
912  *@a_offset: out parameter. The returned address.
913  *
914  *Gets the address of the current character pointer.
915  *
916  *Returns CR_OK upon successful completion, an error code otherwise.
917  */
918 enum CRStatus
cr_input_get_cur_byte_addr(CRInput * a_this,guchar ** a_offset)919 cr_input_get_cur_byte_addr (CRInput * a_this, guchar ** a_offset)
920 {
921         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_offset,
922                               CR_BAD_PARAM_ERROR);
923 
924         if (!PRIVATE (a_this)->next_byte_index) {
925                 return CR_START_OF_INPUT_ERROR;
926         }
927 
928         *a_offset = cr_input_get_byte_addr
929                 (a_this, PRIVATE (a_this)->next_byte_index - 1);
930 
931         return CR_OK;
932 }
933 
934 /**
935  * cr_input_seek_index:
936  *@a_this: the current instance of #CRInput.
937  *@a_origin: the origin to consider during the calculation
938  *of the absolute position of the new "current byte index".
939  *@a_pos: the relative offset of the new "current byte index."
940  *This offset is relative to the origin a_origin.
941  *
942  *Sets the "current byte index" of the current instance
943  *of #CRInput. Next call to cr_input_get_byte() will return
944  *the byte next after the new "current byte index".
945  *
946  *Returns CR_OK upon successful completion otherwise returns
947  *CR_BAD_PARAM_ERROR if at least one of the parameters is not valid
948  *or CR_OUT_BOUNDS_ERROR in case of error.
949  */
950 enum CRStatus
cr_input_seek_index(CRInput * a_this,enum CRSeekPos a_origin,gint a_pos)951 cr_input_seek_index (CRInput * a_this, enum CRSeekPos a_origin, gint a_pos)
952 {
953 
954         glong abs_offset = 0;
955 
956         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
957 
958         switch (a_origin) {
959 
960         case CR_SEEK_CUR:
961                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_pos;
962                 break;
963 
964         case CR_SEEK_BEGIN:
965                 abs_offset = a_pos;
966                 break;
967 
968         case CR_SEEK_END:
969                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_pos;
970                 break;
971 
972         default:
973                 return CR_BAD_PARAM_ERROR;
974         }
975 
976         if ((abs_offset > 0)
977             && (gulong) abs_offset < PRIVATE (a_this)->nb_bytes) {
978 
979                 /*update the input stream's internal state */
980                 PRIVATE (a_this)->next_byte_index = abs_offset + 1;
981 
982                 return CR_OK;
983         }
984 
985         return CR_OUT_OF_BOUNDS_ERROR;
986 }
987 
988 /**
989  * cr_input_get_cur_pos:
990  *@a_this: the current instance of #CRInput.
991  *@a_pos: out parameter. The returned position.
992  *
993  *Gets the position of the "current byte index" which
994  *is basically the position of the last returned byte in the
995  *input stream.
996  *
997  *Returns CR_OK upon successful completion. Otherwise,
998  *CR_BAD_PARAMETER_ERROR if at least one of the arguments is invalid.
999  *CR_START_OF_INPUT if no call to either cr_input_read_byte()
1000  *or cr_input_seek_index() have been issued before calling
1001  *cr_input_get_cur_pos()
1002  *Note that the out parameters of this function are valid if and only if this
1003  *function returns CR_OK.
1004  */
1005 enum CRStatus
cr_input_get_cur_pos(CRInput const * a_this,CRInputPos * a_pos)1006 cr_input_get_cur_pos (CRInput const * a_this, CRInputPos * a_pos)
1007 {
1008         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
1009                               CR_BAD_PARAM_ERROR);
1010 
1011         a_pos->next_byte_index = PRIVATE (a_this)->next_byte_index;
1012         a_pos->line = PRIVATE (a_this)->line;
1013         a_pos->col = PRIVATE (a_this)->col;
1014         a_pos->end_of_line = PRIVATE (a_this)->end_of_line;
1015         a_pos->end_of_file = PRIVATE (a_this)->end_of_input;
1016 
1017         return CR_OK;
1018 }
1019 
1020 /**
1021  * cr_input_get_parsing_location:
1022  *@a_this: the current instance of #CRInput
1023  *@a_loc: the set parsing location.
1024  *
1025  *Gets the current parsing location.
1026  *The Parsing location is a public datastructure that
1027  *represents the current line/column/byte offset/ in the input
1028  *stream.
1029  *
1030  *Returns CR_OK upon successful completion, an error
1031  *code otherwise.
1032  */
1033 enum CRStatus
cr_input_get_parsing_location(CRInput const * a_this,CRParsingLocation * a_loc)1034 cr_input_get_parsing_location (CRInput const *a_this,
1035                                CRParsingLocation *a_loc)
1036 {
1037         g_return_val_if_fail (a_this
1038                               && PRIVATE (a_this)
1039                               && a_loc,
1040                               CR_BAD_PARAM_ERROR) ;
1041 
1042         a_loc->line = PRIVATE (a_this)->line ;
1043         a_loc->column = PRIVATE (a_this)->col ;
1044         if (PRIVATE (a_this)->next_byte_index) {
1045                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index - 1 ;
1046         } else {
1047                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index  ;
1048         }
1049         return CR_OK ;
1050 }
1051 
1052 /**
1053  * cr_input_get_cur_index:
1054  *@a_this: the "this pointer" of the current instance of
1055  *#CRInput
1056  *@a_index: out parameter. The returned index.
1057  *
1058  *Getter of the next byte index.
1059  *It actually returns the index of the
1060  *next byte to be read.
1061  *
1062  *Returns CR_OK upon successful completion, an error code
1063  *otherwise.
1064  */
1065 enum CRStatus
cr_input_get_cur_index(CRInput const * a_this,glong * a_index)1066 cr_input_get_cur_index (CRInput const * a_this, glong * a_index)
1067 {
1068         g_return_val_if_fail (a_this && PRIVATE (a_this)
1069                               && a_index, CR_BAD_PARAM_ERROR);
1070 
1071         *a_index = PRIVATE (a_this)->next_byte_index;
1072 
1073         return CR_OK;
1074 }
1075 
1076 /**
1077  * cr_input_set_cur_index:
1078  *@a_this: the "this pointer" of the current instance
1079  *of #CRInput .
1080  *@a_index: the new index to set.
1081  *
1082  *Setter of the next byte index.
1083  *It sets the index of the next byte to be read.
1084  *
1085  *Returns CR_OK upon successful completion, an error code otherwise.
1086  */
1087 enum CRStatus
cr_input_set_cur_index(CRInput * a_this,glong a_index)1088 cr_input_set_cur_index (CRInput * a_this, glong a_index)
1089 {
1090         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1091 
1092         PRIVATE (a_this)->next_byte_index = a_index;
1093 
1094         return CR_OK;
1095 }
1096 
1097 /**
1098  * cr_input_set_end_of_file:
1099  *@a_this: the current instance of #CRInput.
1100  *@a_eof: the new end of file flag.
1101  *
1102  *Sets the end of file flag.
1103  *
1104  *Returns CR_OK upon successful completion, an error code otherwise.
1105  */
1106 enum CRStatus
cr_input_set_end_of_file(CRInput * a_this,gboolean a_eof)1107 cr_input_set_end_of_file (CRInput * a_this, gboolean a_eof)
1108 {
1109         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1110 
1111         PRIVATE (a_this)->end_of_input = a_eof;
1112 
1113         return CR_OK;
1114 }
1115 
1116 /**
1117  * cr_input_get_end_of_file:
1118  *@a_this: the current instance of #CRInput.
1119  *@a_eof: out parameter the place to put the end of
1120  *file flag.
1121  *
1122  *Gets the end of file flag.
1123  *
1124  *Returns CR_OK upon successful completion, an error code otherwise.
1125  */
1126 enum CRStatus
cr_input_get_end_of_file(CRInput const * a_this,gboolean * a_eof)1127 cr_input_get_end_of_file (CRInput const * a_this, gboolean * a_eof)
1128 {
1129         g_return_val_if_fail (a_this && PRIVATE (a_this)
1130                               && a_eof, CR_BAD_PARAM_ERROR);
1131 
1132         *a_eof = PRIVATE (a_this)->end_of_input;
1133 
1134         return CR_OK;
1135 }
1136 
1137 /**
1138  * cr_input_set_end_of_line:
1139  *@a_this: the current instance of #CRInput.
1140  *@a_eol: the new end of line flag.
1141  *
1142  *Sets the end of line flag.
1143  *
1144  *Returns CR_OK upon successful completion, an error code
1145  *otherwise.
1146  */
1147 enum CRStatus
cr_input_set_end_of_line(CRInput * a_this,gboolean a_eol)1148 cr_input_set_end_of_line (CRInput * a_this, gboolean a_eol)
1149 {
1150         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1151 
1152         PRIVATE (a_this)->end_of_line = a_eol;
1153 
1154         return CR_OK;
1155 }
1156 
1157 /**
1158  * cr_input_get_end_of_line:
1159  *@a_this: the current instance of #CRInput
1160  *@a_eol: out parameter. The place to put
1161  *the returned flag
1162  *
1163  *Gets the end of line flag of the current input.
1164  *
1165  *Returns CR_OK upon successful completion, an error code
1166  *otherwise.
1167  */
1168 enum CRStatus
cr_input_get_end_of_line(CRInput const * a_this,gboolean * a_eol)1169 cr_input_get_end_of_line (CRInput const * a_this, gboolean * a_eol)
1170 {
1171         g_return_val_if_fail (a_this && PRIVATE (a_this)
1172                               && a_eol, CR_BAD_PARAM_ERROR);
1173 
1174         *a_eol = PRIVATE (a_this)->end_of_line;
1175 
1176         return CR_OK;
1177 }
1178 
1179 /**
1180  * cr_input_set_cur_pos:
1181  *@a_this: the "this pointer" of the current instance of
1182  *#CRInput.
1183  *@a_pos: the new position.
1184  *
1185  *Sets the current position in the input stream.
1186  *
1187  * Returns CR_OK upon successful completion, an error code otherwise.
1188  */
1189 enum CRStatus
cr_input_set_cur_pos(CRInput * a_this,CRInputPos const * a_pos)1190 cr_input_set_cur_pos (CRInput * a_this, CRInputPos const * a_pos)
1191 {
1192         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
1193                               CR_BAD_PARAM_ERROR);
1194 
1195         cr_input_set_column_num (a_this, a_pos->col);
1196         cr_input_set_line_num (a_this, a_pos->line);
1197         cr_input_set_cur_index (a_this, a_pos->next_byte_index);
1198         cr_input_set_end_of_line (a_this, a_pos->end_of_line);
1199         cr_input_set_end_of_file (a_this, a_pos->end_of_file);
1200 
1201         return CR_OK;
1202 }
1203