1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4
5 #ifdef FOR_LT
6
7 #include "lt-memory.h"
8 #include "nsllib.h"
9
10 #define ERR(m) LT_ERROR(NECHAR,m)
11 #define ERR1(m,x) LT_ERROR1(NECHAR,m,x)
12 #define ERR2(m,x,y) LT_ERROR2(NECHAR,m,x,y)
13 #define ERR3(m,x,y,z) LT_ERROR3(NECHAR,m,x,y,z)
14
15 #define Malloc salloc
16 #define Realloc srealloc
17 #define Free sfree
18
19 #else
20
21 #include "system.h"
22 #define ERR(m) fprintf(stderr,m)
23 #define ERR1(m,x) fprintf(stderr,m,x)
24 #define ERR2(m,x,y) fprintf(stderr,m,x,y)
25 #define ERR3(m,x,y,z) fprintf(stderr,m,x,y,z)
26
27 #endif
28
29 #include "charset.h"
30 #include "string16.h"
31 #include "dtd.h"
32 #include "input.h"
33 #include "url.h"
34 #include "ctype16.h"
35
36 static void internal_reader(InputSource s);
37 static void external_reader(InputSource s);
38
SourceFromFILE16(const char8 * description,FILE16 * file16)39 InputSource SourceFromFILE16(const char8 *description, FILE16 *file16)
40 {
41 Entity e;
42
43 e = NewExternalEntity(0, 0, description, 0, 0);
44 if(!strchr8(description, '/'))
45 {
46 char8 *base = default_base_url();
47 EntitySetBaseURL(e, base);
48 Free(base);
49 }
50
51 return NewInputSource(e, file16);
52 }
53
SourceFromStream(const char8 * description,FILE * file)54 InputSource SourceFromStream(const char8 *description, FILE *file)
55 {
56 FILE16 *file16;
57
58 if(!(file16 = MakeFILE16FromFILE(file, "r")))
59 return 0;
60
61 return SourceFromFILE16(description, file16);
62 }
63
EntityOpen(Entity e)64 InputSource EntityOpen(Entity e)
65 {
66 FILE16 *f16;
67 char8 *r_url;
68
69 if(e->type == ET_external)
70 {
71 const char8 *url = EntityURL(e);
72
73 if(!url || !(f16 = url_open(url, 0, "r", &r_url)))
74 return 0;
75 if(r_url && !e->base_url)
76 EntitySetBaseURL(e, r_url);
77 Free(r_url);
78 }
79 else
80 {
81 f16 = MakeFILE16FromString(e->text, -1, "r");
82 }
83
84 return NewInputSource(e, f16);
85 }
86
87
NewInputSource(Entity e,FILE16 * f16)88 InputSource NewInputSource(Entity e, FILE16 *f16)
89 {
90 InputSource source;
91
92 if(!(source = Malloc(sizeof(*source))))
93 return 0;
94
95 source->line = 0;
96 source->line_alloc = 0;
97 source->line_length = 0;
98 source->expecting_low_surrogate = 0;
99 source->complicated_utf8_line = 0;
100 source->line_is_incomplete = 0;
101 source->next = 0;
102 source->seen_eoe = 0;
103
104 source->entity = e;
105
106 source->reader =
107 (e->type == ET_external) ? external_reader :internal_reader;
108 source->map = xml_char_map; /* 1.0 map unless changed by parser */
109
110 source->file16 = f16;
111
112 source->bytes_consumed = 0;
113 source->bytes_before_current_line = 0;
114 source->line_end_was_cr = 0;
115 source->line_number = 0;
116 source->not_read_yet = 1;
117 source->read_carefully = 0;
118
119 source->nextin = source->insize = 0;
120
121 source->parent = 0;
122
123 source->seen_error = 0;
124 strcpy(source->error_msg, "no error (you should never see this)");
125
126 return source;
127 }
128
SourceClose(InputSource source)129 void SourceClose(InputSource source)
130 {
131 Fclose(source->file16);
132
133 if(source->entity->type == ET_external)
134 Free(source->line);
135 Free(source);
136 }
137
SourceLineAndChar(InputSource s,int * linenum,int * charnum)138 int SourceLineAndChar(InputSource s, int *linenum, int *charnum)
139 {
140 Entity e = s->entity, f = e->parent;
141
142 if(e->type == ET_external)
143 {
144 *linenum = s->line_number;
145 *charnum = s->next;
146 return 1;
147 }
148
149 if(f && f->type == ET_external)
150 {
151 if(e->matches_parent_text)
152 {
153 *linenum = e->line_offset + s->line_number;
154 *charnum = (s->line_number == 0 ? e->line1_char_offset : 0) +
155 s->next;
156 return 1;
157 }
158 else
159 {
160 *linenum = e->line_offset;
161 *charnum = e->line1_char_offset;
162 return 0;
163 }
164 }
165
166 if(f && f->matches_parent_text)
167 {
168 *linenum = f->line_offset + e->line_offset;
169 *charnum = (e->line_offset == 0 ? f->line1_char_offset : 0) +
170 e->line1_char_offset;
171 return 0;
172 }
173
174 return -1;
175 }
176
SourcePosition(InputSource s,Entity * entity,int * byte_offset)177 void SourcePosition(InputSource s, Entity *entity, int *byte_offset)
178 {
179 *entity = s->entity;
180 *byte_offset = SourceTell(s);
181 }
182
SourceTell(InputSource s)183 int SourceTell(InputSource s)
184 {
185 #if CHAR_SIZE == 8
186 return s->bytes_before_current_line + s->next;
187 #else
188 switch(s->entity->encoding)
189 {
190 case CE_ISO_10646_UCS_2B:
191 case CE_UTF_16B:
192 case CE_ISO_10646_UCS_2L:
193 case CE_UTF_16L:
194 return s->bytes_before_current_line + 2 * s->next;
195 case CE_ISO_646:
196 case CE_ISO_8859_1:
197 case CE_ISO_8859_2:
198 case CE_ISO_8859_3:
199 case CE_ISO_8859_4:
200 case CE_ISO_8859_5:
201 case CE_ISO_8859_6:
202 case CE_ISO_8859_7:
203 case CE_ISO_8859_8:
204 case CE_ISO_8859_9:
205 case CE_ISO_8859_10:
206 case CE_ISO_8859_11:
207 case CE_ISO_8859_13:
208 case CE_ISO_8859_14:
209 case CE_ISO_8859_15:
210 case CE_unspecified_ascii_superset:
211 return s->bytes_before_current_line + s->next;
212 case CE_UTF_8:
213 if(s->complicated_utf8_line)
214 {
215 /* examine earlier chars in line to see how many bytes they used */
216 int i, c, n;
217
218 /* We cache the last result to avoid N^2 slowness on very
219 long lines. Thanks to Gait Boxman for suggesting this. */
220
221 if(s->next < s->cached_line_char)
222 {
223 /* Moved backwards in line; doesn't happen, I think */
224 s->cached_line_char = 0;
225 s->cached_line_byte = 0;
226 }
227
228 n = s->cached_line_byte;
229 for(i = s->cached_line_char; i < s->next; i++)
230 {
231 c = s->line[i];
232 if(c <= 0x7f)
233 n += 1;
234 else if(c <= 0x7ff)
235 n += 2;
236 else if(c >= 0xd800 && c <= 0xdfff)
237 /* One of a surrogate pair, count 2 each */
238 n += 2;
239 else if(c <= 0xffff)
240 n += 3;
241 else if(c <= 0x1ffff)
242 n += 4;
243 else if(c <= 0x3ffffff)
244 n += 5;
245 else
246 n += 6;
247
248 }
249
250 s->cached_line_char = s->next;
251 s->cached_line_byte = n;
252
253 return s->bytes_before_current_line + n;
254 }
255 else
256 return s->bytes_before_current_line + s->next;
257 default:
258 return -1;
259 }
260 #endif
261 }
262
SourceSeek(InputSource s,int byte_offset)263 int SourceSeek(InputSource s, int byte_offset)
264 {
265 s->line_length = 0;
266 s->next = 0;
267 s->seen_eoe = 0;
268 s->bytes_consumed = s->bytes_before_current_line = byte_offset;
269 s->nextin = s->insize = 0;
270 /* XXX line number will be wrong! */
271 s->line_number = -999999;
272 return Fseek(s->file16, byte_offset, SEEK_SET);
273 }
274
275 /* reader for internal entities, doesn't need to do any encoding translation */
276
internal_reader(InputSource s)277 static void internal_reader(InputSource s)
278 {
279 /* XXX reconsider use of FILE16 for internal entities */
280
281 struct _FILE16 {
282 void *handle;
283 int handle2, handle3;
284 /* we don't need the rest here */
285 };
286
287 Char *p;
288 struct _FILE16 *f16 = (struct _FILE16 *)s->file16;
289
290 s->line = (void *)((char *)f16->handle + f16->handle2);
291 for(p=s->line; *p && *p != '\n'; p++)
292 ;
293 if(*p)
294 p++;
295 f16->handle2 = (char *)p - (char *)f16->handle;
296 s->line_length = p - s->line;
297
298 s->bytes_before_current_line = f16->handle2;
299 s->next = 0;
300 if(s->not_read_yet)
301 s->not_read_yet = 0;
302 else
303 s->line_number++;
304
305 return;
306 }
307
308 /*
309 * Translate bytes starting at s->inbuf[s->nextin] until end of line
310 * or until s->nextin == s->insize.
311 * The output is placed starting at s->line[s->nextout], which must
312 * have enough space.
313 * Returns zero at end of line or error, one if more input is needed.
314 * In the case of an error (encoding error or illegal XML character) we
315 * set s->seen_error and put a BADCHAR in the output as a marker.
316 */
317
318
319 #define SETUP \
320 int c; /* can't use Char, it might be >0x10000 */ \
321 \
322 /* local copies of fields of s, that are not modified */ \
323 \
324 unsigned char * const inbuf = s->inbuf; \
325 const int insize = s->insize; \
326 const int startin = s->nextin; \
327 Char * const outbuf = s->line; \
328 unsigned char *map = s->map; \
329 \
330 /* local copies of fields of s, that are modified (and restored) */ \
331 \
332 int nextin = s->nextin; \
333 int nextout = s->line_length; \
334 int ignore_linefeed = s->ignore_linefeed; \
335
336 #define ERROR_CHECK \
337 if(c == -1) \
338 { \
339 /* There was an error. Put a BADCHAR character (see input.h) in \
340 as a marker, and end the line. */ \
341 outbuf[nextout++] = BADCHAR; \
342 s->seen_error = 1; \
343 goto end_of_line; \
344 }
345
346 #define LINEFEED \
347 if((c == '\n' || (c == 0x85 && map == xml_char_map_11)) && \
348 ignore_linefeed) \
349 { \
350 /* Ignore lf at start of line if last line ended with cr */ \
351 ignore_linefeed = 0; \
352 s->bytes_before_current_line += (nextin - startin); \
353 continue; \
354 } \
355 \
356 ignore_linefeed = 0; \
357 \
358 if(c == '\r') \
359 { \
360 s->line_end_was_cr = 1; \
361 c = '\n'; \
362 } \
363 if((c == 0x85 || c == 0x2028) && map == xml_char_map_11) \
364 c = '\n';
365
366 #define OUTPUT \
367 outbuf[nextout++] = c; \
368 \
369 if(c == '\n') \
370 goto end_of_line
371
372 #define OUTPUT_WITH_SURROGATES \
373 if(c >= 0x10000) \
374 { \
375 /* Use surrogates */ \
376 outbuf[nextout++] = ((c - 0x10000) >> 10) + 0xd800; \
377 outbuf[nextout++] = ((c - 0x10000) & 0x3ff) + 0xdc00; \
378 } \
379 else \
380 outbuf[nextout++] = c; \
381 \
382 if(c == '\n') \
383 goto end_of_line
384
385 #define MORE_BYTES \
386 more_bytes: \
387 s->nextin = nextin; \
388 s->line_length = nextout; \
389 s->ignore_linefeed = ignore_linefeed; \
390 return 1 \
391
392 #define END_OF_LINE \
393 end_of_line: \
394 s->nextin = nextin; \
395 s->line_length = nextout; \
396 s->ignore_linefeed = ignore_linefeed; \
397 return 0
398
399 #if CHAR_SIZE == 8
400
translate_8bit(InputSource s)401 static int translate_8bit(InputSource s)
402 {
403 SETUP;
404
405 while(nextin < insize)
406 {
407 c = inbuf[nextin++];
408
409 if(!is_xml_legal(c, map))
410 {
411 sprintf(s->error_msg,
412 "Illegal character <0x%x> at file offset %d",
413 c, s->bytes_consumed + nextin - startin - 1);
414 c = -1;
415 }
416
417 ERROR_CHECK;
418
419 LINEFEED;
420
421 OUTPUT;
422 }
423
424 MORE_BYTES;
425
426 END_OF_LINE;
427 }
428
429 #else
430
translate_latin(InputSource s)431 static int translate_latin(InputSource s)
432 {
433 CharacterEncoding enc = s->entity->encoding;
434 int *to_unicode = iso_to_unicode[enc - CE_ISO_8859_2];
435 SETUP;
436
437 while(nextin < insize)
438 {
439 c = to_unicode[inbuf[nextin++]];
440 if(c == -1)
441 {
442 sprintf(s->error_msg,
443 "Illegal byte <0x%x> for encoding %s at file offset %d",
444 inbuf[nextin-1], CharacterEncodingName[enc],
445 s->bytes_consumed + nextin - 1 - startin);
446 }
447 else if(!is_xml_legal(c, map))
448 {
449 sprintf(s->error_msg,
450 "Illegal character <0x%x> "
451 "immediately before file offset %d",
452 c, s->bytes_consumed + nextin - startin);
453 c = -1;
454 }
455
456 ERROR_CHECK;
457
458 LINEFEED;
459
460 OUTPUT;
461 }
462
463 END_OF_LINE;
464 }
465
translate_latin1(InputSource s)466 static int translate_latin1(InputSource s)
467 {
468 SETUP;
469
470 while(nextin < insize)
471 {
472 c = inbuf[nextin++];
473 if(!is_xml_legal(c, map))
474 {
475 sprintf(s->error_msg,
476 "Illegal character <0x%x> "
477 "immediately before file offset %d",
478 c, s->bytes_consumed + nextin - startin);
479 c = -1;
480 }
481
482 ERROR_CHECK;
483
484 LINEFEED;
485
486 OUTPUT;
487 }
488
489 END_OF_LINE;
490 }
491
translate_utf8(InputSource s)492 static int translate_utf8(InputSource s)
493 {
494 int more, i, mincode;
495 SETUP;
496
497 while(nextin < insize)
498 {
499 c = inbuf[nextin++];
500 if(c <= 0x7f)
501 goto gotit;
502 else if(c <= 0xc0 || c >= 0xfe)
503 {
504 sprintf(s->error_msg,
505 "Illegal UTF-8 start byte <0x%x> at file offset %d",
506 c, s->bytes_consumed + nextin - 1 - startin);
507 c = -1;
508 goto gotit;
509 }
510 else if(c <= 0xdf)
511 {
512 c &= 0x1f;
513 more = 1;
514 mincode = 0x80;
515 }
516 else if(c <= 0xef)
517 {
518 c &= 0x0f;
519 more = 2;
520 mincode = 0x800;
521 }
522 else if(c <= 0xf7)
523 {
524 c &= 0x07;
525 more = 3;
526 mincode = 0x10000;
527 }
528 else if(c <= 0xfb)
529 {
530 c &= 0x03;
531 more = 4;
532 mincode = 0x200000;
533 }
534 else
535 {
536 c &= 0x01;
537 more = 5;
538 mincode = 0x4000000;
539 }
540 if(nextin+more > insize)
541 {
542 nextin--;
543 goto more_bytes;
544 }
545 s->complicated_utf8_line = 1;
546 s->cached_line_char = 0;
547 s->cached_line_byte = 0;
548
549 for(i=0; i<more; i++)
550 {
551 int t = inbuf[nextin++];
552 if((t & 0xc0) != 0x80)
553 {
554 c = -1;
555 sprintf(s->error_msg,
556 "Illegal UTF-8 byte %d <0x%x> at file offset %d",
557 i+2, t,
558 s->bytes_consumed + nextin - 1 - startin);
559 break;
560 }
561 c = (c << 6) + (t & 0x3f);
562 }
563
564 if(c < mincode && c != -1)
565 {
566 sprintf(s->error_msg,
567 "Illegal (non-shortest) UTF-8 sequence for "
568 "character <0x%x> "
569 "immediately before file offset %d",
570 c, s->bytes_consumed + nextin - startin);
571 c = -1;
572 }
573
574 gotit:
575 if(c >= 0 && !is_xml_legal(c, map))
576 {
577 sprintf(s->error_msg,
578 "Illegal character <0x%x> "
579 "immediately before file offset %d",
580 c, s->bytes_consumed + nextin - startin);
581 c = -1;
582 }
583
584 ERROR_CHECK;
585
586 LINEFEED;
587
588 OUTPUT_WITH_SURROGATES;
589
590 if(c == '>' && s->read_carefully)
591 {
592 s->line_is_incomplete = 1;
593 goto end_of_line;
594 }
595 }
596
597 MORE_BYTES;
598
599 END_OF_LINE;
600 }
601
translate_utf16(InputSource s)602 static int translate_utf16(InputSource s)
603 {
604 int le = (s->entity->encoding == CE_ISO_10646_UCS_2L ||
605 s->entity->encoding == CE_UTF_16L);
606 SETUP;
607
608 while(nextin < insize)
609 {
610 if(nextin+2 > insize)
611 goto more_bytes;
612
613 if(le)
614 c = (inbuf[nextin+1] << 8) + inbuf[nextin];
615 else
616 c = (inbuf[nextin] << 8) + inbuf[nextin+1];
617 nextin += 2;
618
619 if(c >= 0xdc00 && c <= 0xdfff) /* low (2nd) surrogate */
620 {
621 if(s->expecting_low_surrogate)
622 s->expecting_low_surrogate = 0;
623 else
624 {
625 sprintf(s->error_msg,
626 "Unexpected low surrogate <0x%x> "
627 "at file offset %d",
628 c, s->bytes_consumed + nextin - startin - 2);
629 c = -1;
630 }
631 }
632 else if(s->expecting_low_surrogate)
633 {
634 sprintf(s->error_msg,
635 "Expected low surrogate but got <0x%x> "
636 "at file offset %d",
637 c, s->bytes_consumed + nextin - startin - 2);
638 c = -1;
639 }
640 if(c >= 0xd800 && c <= 0xdbff) /* high (1st) surrogate */
641 s->expecting_low_surrogate = 1;
642
643 if(c >= 0 && !is_xml_legal(c, map) &&
644 /* surrogates are legal in utf-16 */
645 !(c >= 0xd800 && c <= 0xdfff))
646 {
647 sprintf(s->error_msg,
648 "Illegal character <0x%x> "
649 "immediately before file offset %d",
650 c, s->bytes_consumed + nextin - startin);
651 c = -1;
652 }
653
654 ERROR_CHECK;
655
656 LINEFEED;
657
658 OUTPUT;
659 }
660
661 MORE_BYTES;
662
663 END_OF_LINE;
664 }
665
666 #endif
667
external_reader(InputSource s)668 static void external_reader(InputSource s)
669 {
670 int startin = s->nextin;
671 int (*trans)(InputSource);
672 int continuing_incomplete_line = s->line_is_incomplete;
673
674 if(s->seen_error)
675 return;
676
677 s->line_is_incomplete = 0;
678 if(!continuing_incomplete_line)
679 {
680 s->ignore_linefeed = s->line_end_was_cr;
681 s->line_end_was_cr = 0;
682 s->complicated_utf8_line = 0;
683 s->line_length = 0;
684 s->bytes_before_current_line = s->bytes_consumed;
685 s->next = 0;
686 }
687
688 #if CHAR_SIZE == 8
689 trans = translate_8bit;
690 #else
691 switch(s->entity->encoding)
692 {
693 case CE_ISO_646: /* should really check for >127 in this case */
694 case CE_ISO_8859_1:
695 case CE_unspecified_ascii_superset:
696 trans = translate_latin1;
697 break;
698 case CE_ISO_8859_2:
699 case CE_ISO_8859_3:
700 case CE_ISO_8859_4:
701 case CE_ISO_8859_5:
702 case CE_ISO_8859_6:
703 case CE_ISO_8859_7:
704 case CE_ISO_8859_8:
705 case CE_ISO_8859_9:
706 case CE_ISO_8859_10:
707 case CE_ISO_8859_11:
708 case CE_ISO_8859_13:
709 case CE_ISO_8859_14:
710 case CE_ISO_8859_15:
711 trans = translate_latin;
712 break;
713 case CE_UTF_8:
714 trans = translate_utf8;
715 break;
716 case CE_ISO_10646_UCS_2B:
717 case CE_UTF_16B:
718 case CE_ISO_10646_UCS_2L:
719 case CE_UTF_16L:
720 trans=translate_utf16;
721 break;
722 default:
723 assert(1==0);
724 break;
725 }
726 #endif
727
728 while(1)
729 {
730 /* There are never more characters than bytes in the input */
731 if(s->line_alloc < s->line_length + (s->insize - s->nextin))
732 {
733 s->line_alloc = s->line_length + (s->insize - s->nextin);
734 s->line = Realloc(s->line, s->line_alloc * sizeof(Char));
735 }
736
737 if(trans(s) == 0)
738 {
739 s->bytes_consumed += (s->nextin - startin);
740 if(s->not_read_yet)
741 s->not_read_yet = 0;
742 else if(!continuing_incomplete_line)
743 s->line_number++;
744 return;
745 }
746 else
747 {
748 int i, bytes_read, remaining = 0;
749
750 /* more input needed */
751
752 /* Copy down any partial character */
753
754 remaining = s->insize - s->nextin;
755 for(i=0; i<remaining; i++)
756 s->inbuf[i] = s->inbuf[s->nextin + i];
757
758 /* Get another block */
759
760 s->bytes_consumed += (s->nextin - startin);
761
762 bytes_read = Readu(s->file16,
763 s->inbuf+remaining, sizeof(s->inbuf)-remaining);
764 s->nextin = startin = 0;
765
766 if(bytes_read <= 0)
767 {
768 if(remaining > 0)
769 {
770 /* EOF or error in the middle of a character */
771 sprintf(s->error_msg, "EOF or error inside character at "
772 "file offset %d",
773 s->bytes_consumed + remaining);
774 /* There must be space because there is unconsumed input */
775 s->line[s->line_length++] = BADCHAR;
776 s->seen_error = 1;
777 }
778
779 s->insize = 0;
780
781 if(s->not_read_yet)
782 s->not_read_yet = 0;
783 else if(!continuing_incomplete_line)
784 s->line_number++;
785
786 return;
787 }
788
789 s->insize = bytes_read + remaining;
790 }
791 }
792 }
793
determine_character_encoding(InputSource s)794 void determine_character_encoding(InputSource s)
795 {
796 Entity e = s->entity;
797 int nread;
798 unsigned char *b = (unsigned char *)s->inbuf;
799
800 b[0] = b[1] = b[2] = b[3] = 0;
801
802 while(s->insize < 4)
803 {
804 nread = Readu(s->file16, s->inbuf + s->insize, 4 - s->insize);
805 if(nread == -1)
806 return;
807 if(nread == 0)
808 break;
809 s->insize += nread;
810 }
811
812 #if 0
813 if(b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] == '<')
814 e->encoding = CE_ISO_10646_UCS_4B;
815 else if(b[0] == '<' && b[1] == 0 && b[2] == 0 && b[3] == 0)
816 e->encoding = CE_ISO_10646_UCS_4L;
817 else
818 #endif
819 if(b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf)
820 {
821 e->encoding = CE_UTF_8;
822 s->nextin = 3;
823 s->bytes_consumed = 3;
824 }
825 else
826 if(b[0] == 0xfe && b[1] == 0xff)
827 {
828 e->encoding = CE_UTF_16B;
829 s->nextin = 2;
830 s->bytes_consumed = 2;
831 }
832 else if(b[0] == 0 && b[1] == '<' && b[2] == 0 && b[3] == '?')
833 e->encoding = CE_UTF_16B;
834 else if(b[0] == 0xff && b[1] == 0xfe)
835 {
836 e->encoding = CE_UTF_16L;
837 s->nextin = 2;
838 s->bytes_consumed = 2;
839 }
840 else if(b[0] == '<' && b[1] == 0 && b[2] == '?' && b[3] == 0)
841 e->encoding = CE_UTF_16L;
842 else
843 {
844 #if CHAR_SIZE == 8
845 e->encoding = CE_unspecified_ascii_superset;
846 #else
847 e->encoding = CE_UTF_8;
848 s->read_carefully = 1;
849 #endif
850 }
851 }
852
get_with_fill(InputSource s)853 int get_with_fill(InputSource s)
854 {
855 int old_length = s->next;
856 int old_cu8l = s->complicated_utf8_line;
857 int old_bbcl = s->bytes_before_current_line;
858 int old_ln = s->line_number;
859
860 assert(!s->seen_eoe);
861
862 if(s->seen_error)
863 {
864 s->seen_eoe = 1;
865 return XEOE;
866 }
867
868 s->reader(s);
869
870 if(s->line_length == 0)
871 {
872 /* Restore old line */
873 s->line_length = s->next = old_length;
874 s->complicated_utf8_line = old_cu8l;
875 s->bytes_before_current_line = old_bbcl;
876 s->line_number = old_ln;
877 s->seen_eoe = 1;
878 #if 0
879 fprintf(stderr, "EOE on %s\n", EntityDescription(s->entity));
880 #endif
881 return XEOE;
882 }
883
884 if(s->next == s->line_length)
885 {
886 /* "incomplete" line turned out to be at EOF */
887 #if 0
888 fprintf(stderr, "EOE on %s\n", EntityDescription(s->entity));
889 #endif
890 s->seen_eoe = 1;
891 return XEOE;
892 }
893
894 #if 0
895 Fprintf(Stderr, "line (len %d, next %d): |%.*S|\n",
896 s->line_length, s->next, s->line_length, s->line);
897 #endif
898
899 return s->line[s->next++];
900 }
901