1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker and Anjo Anjewierden
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 2011-2020, University of Amsterdam
7 VU University Amsterdam
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in
19 the documentation and/or other materials provided with the
20 distribution.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include "pl-incl.h"
37 #include "pl-arith.h"
38 #include "pl-ctype.h"
39 #include "pl-utf8.h"
40 #include "pl-codelist.h"
41 #include <errno.h>
42 #include <stdio.h>
43 #if HAVE_LIMITS_H
44 #include <limits.h> /* solaris compatibility */
45 #endif
46
47 #undef LD
48 #define LD LOCAL_LD
49
50
51 /*******************************
52 * UNIFIED TEXT STUFF *
53 *******************************/
54
55 static inline size_t
bufsize_text(PL_chars_t * text,size_t len)56 bufsize_text(PL_chars_t *text, size_t len)
57 { size_t unit;
58
59 switch(text->encoding)
60 { case ENC_ISO_LATIN_1:
61 case ENC_ASCII:
62 case ENC_UTF8:
63 case ENC_ANSI:
64 unit = sizeof(char);
65 break;
66 case ENC_WCHAR:
67 unit = sizeof(pl_wchar_t);
68 break;
69 default:
70 assert(0);
71 unit = sizeof(char); /*NOTREACHED*/
72 }
73
74 return len*unit;
75 }
76
77
78 int
PL_save_text(PL_chars_t * text,int flags)79 PL_save_text(PL_chars_t *text, int flags)
80 { if ( (flags & BUF_MALLOC) && text->storage != PL_CHARS_MALLOC )
81 { size_t bl = bufsize_text(text, text->length+1);
82 void *new = PL_malloc(bl);
83
84 if ( new )
85 { memcpy(new, text->text.t, bl);
86 text->text.t = new;
87 text->storage = PL_CHARS_MALLOC;
88 } else
89 { return FALSE;
90 }
91 } else if ( text->storage == PL_CHARS_LOCAL )
92 { Buffer b = findBuffer(BUF_STACK);
93 size_t bl = bufsize_text(text, text->length+1);
94
95 addMultipleBuffer(b, text->text.t, bl, char);
96 text->text.t = baseBuffer(b, char);
97
98 text->storage = PL_CHARS_RING;
99 } else if ( text->storage == PL_CHARS_MALLOC )
100 { Buffer b = findBuffer(BUF_STACK);
101 size_t bl = bufsize_text(text, text->length+1);
102
103 addMultipleBuffer(b, text->text.t, bl, char);
104 PL_free_text(text);
105 text->text.t = baseBuffer(b, char);
106
107 text->storage = PL_CHARS_RING;
108 }
109
110 return TRUE;
111 }
112
113
114 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
115 PL_from_stack_text() moves a string from the stack, so it won't get
116 corrupted if GC/shift comes along.
117 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
118
119 static int
PL_from_stack_text(PL_chars_t * text,int flags)120 PL_from_stack_text(PL_chars_t *text, int flags)
121 { if ( !(flags&BUF_ALLOW_STACK) )
122 { if ( text->storage == PL_CHARS_STACK )
123 { size_t bl = bufsize_text(text, text->length+1);
124
125 if ( bl < sizeof(text->buf) )
126 { memcpy(text->buf, text->text.t, bl);
127 text->text.t = text->buf;
128 text->storage = PL_CHARS_LOCAL;
129 } else
130 { Buffer b = findBuffer(BUF_STACK);
131
132 addMultipleBuffer(b, text->text.t, bl, char);
133 text->text.t = baseBuffer(b, char);
134 text->storage = PL_CHARS_RING;
135 }
136 }
137 }
138
139 return TRUE;
140 }
141
142
143 #define INT64_DIGITS 20
144
145 static char *
ui64toa(uint64_t val,char * out)146 ui64toa(uint64_t val, char *out)
147 { char tmpBuf[INT64_DIGITS + 1];
148 char *ptrOrg = tmpBuf + INT64_DIGITS;
149 char *ptr = ptrOrg;
150 size_t nbDigs;
151
152 do
153 { int rem = val % 10;
154
155 *--ptr = rem + '0';
156 val /= 10;
157 } while ( val );
158
159 nbDigs = ptrOrg - ptr;
160 memcpy(out, ptr, nbDigs);
161 out += nbDigs;
162 *out = '\0';
163
164 return out; /* points to the END */
165 };
166
167
168 static char *
i64toa(int64_t val,char * out)169 i64toa(int64_t val, char *out)
170 { if ( val < 0 )
171 { *out++ = '-';
172 val = -(uint64_t)val;
173 }
174
175 return ui64toa((uint64_t)val, out);
176 }
177
178
179 int
PL_get_text__LD(term_t l,PL_chars_t * text,int flags ARG_LD)180 PL_get_text__LD(term_t l, PL_chars_t *text, int flags ARG_LD)
181 { word w = valHandle(l);
182
183 if ( (flags & CVT_ATOM) && isAtom(w) )
184 { if ( isNil(w) && (flags&CVT_LIST) )
185 goto case_list;
186 if ( !get_atom_text(w, text) )
187 goto maybe_write;
188 } else if ( (flags & CVT_STRING) && isString(w) )
189 { if ( !get_string_text(w, text PASS_LD) )
190 goto maybe_write;
191 if ( !PL_from_stack_text(text, flags) )
192 return FALSE; /* no memory */
193 } else if ( ((flags&CVT_RATIONAL) && isRational(w)) ||
194 ((flags&CVT_INTEGER) && isInteger(w)) )
195 { number n;
196
197 PL_get_number(l, &n);
198 switch(n.type)
199 { case V_INTEGER:
200 { char *ep = i64toa(n.value.i, text->buf);
201
202 text->text.t = text->buf;
203 text->length = ep-text->text.t;
204 text->storage = PL_CHARS_LOCAL;
205 break;
206 }
207 #ifdef O_GMP
208 case V_MPZ:
209 { size_t sz = mpz_sizeinbase(n.value.mpz, 10) + 2;
210 Buffer b = findBuffer(BUF_STACK);
211
212 if ( !growBuffer(b, sz) )
213 outOfCore();
214 mpz_get_str(b->base, 10, n.value.mpz);
215 b->top = b->base + strlen(b->base);
216 text->text.t = baseBuffer(b, char);
217 text->length = entriesBuffer(b, char);
218 text->storage = PL_CHARS_RING;
219
220 break;
221 }
222 case V_MPQ:
223 { size_t sz = ( mpz_sizeinbase(mpq_numref(n.value.mpq), 10) +
224 mpz_sizeinbase(mpq_denref(n.value.mpq), 10) + 4 );
225 Buffer b = findBuffer(BUF_STACK);
226
227 if ( !growBuffer(b, sz) )
228 outOfCore();
229 mpz_get_str(b->base, 10, mpq_numref(n.value.mpq));
230 b->top = b->base + strlen(b->base);
231 *b->top++ = 'r'; /* '/' under some condition? */
232 mpz_get_str(b->top, 10, mpq_denref(n.value.mpq));
233 b->top += strlen(b->top);
234 text->text.t = baseBuffer(b, char);
235 text->length = entriesBuffer(b, char);
236 text->storage = PL_CHARS_RING;
237
238 break;
239 }
240 #endif
241 default:
242 assert(0);
243 }
244 text->encoding = ENC_ISO_LATIN_1;
245 text->canonical = TRUE;
246 } else if ( (flags & CVT_FLOAT) && isFloat(w) )
247 { format_float(valFloat(w), text->buf);
248 text->text.t = text->buf;
249 text->length = strlen(text->text.t);
250 text->encoding = ENC_ISO_LATIN_1;
251 text->storage = PL_CHARS_LOCAL;
252 text->canonical = TRUE;
253 } else if ( (flags & CVT_LIST) )
254 { Buffer b;
255 CVT_result result;
256
257 case_list:
258 if ( (b = codes_or_chars_to_buffer(l, BUF_STACK, FALSE, &result)) )
259 { text->length = entriesBuffer(b, char);
260 addBuffer(b, EOS, char);
261 text->text.t = baseBuffer(b, char);
262 text->encoding = ENC_ISO_LATIN_1;
263 } else if ( result.status == CVT_wide &&
264 (b = codes_or_chars_to_buffer(l, BUF_STACK, TRUE, &result)) )
265 { text->length = entriesBuffer(b, pl_wchar_t);
266 addBuffer(b, EOS, pl_wchar_t);
267 text->text.w = baseBuffer(b, pl_wchar_t);
268 text->encoding = ENC_WCHAR;
269 } else if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) )
270 { goto case_write;
271 } else
272 { if ( (flags & CVT_VARNOFAIL) && result.status == CVT_partial )
273 return 2;
274
275 if ( (flags & CVT_EXCEPTION) )
276 { switch(result.status)
277 { case CVT_partial:
278 return PL_error(NULL, 0, NULL, ERR_INSTANTIATION);
279 case CVT_nolist:
280 goto error;
281 case CVT_nocode:
282 case CVT_nochar:
283 { term_t culprit = PL_new_term_ref();
284 atom_t type;
285
286 *valTermRef(culprit) = result.culprit;
287 if ( result.status == CVT_nocode )
288 type = ATOM_character_code;
289 else
290 type = ATOM_character;
291
292 return PL_error(NULL, 0, NULL, ERR_TYPE, type, culprit);
293 }
294 case CVT_representation:
295 return PL_representation_error("character_code");
296 default:
297 break;
298 }
299 }
300 goto error;
301 }
302
303 text->storage = PL_CHARS_RING;
304 text->canonical = TRUE;
305 } else if ( (flags & CVT_VARIABLE) && isVar(w) )
306 { text->text.t = varName(l, text->buf);
307 text->length = strlen(text->text.t);
308 text->encoding = ENC_ISO_LATIN_1;
309 text->storage = PL_CHARS_LOCAL;
310 text->canonical = TRUE;
311 } else if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) )
312 { IOENC encodings[3];
313 IOENC *enc;
314 char *r;
315 int wflags;
316
317 case_write:
318 if ( (flags&REP_UTF8) )
319 { encodings[0] = ENC_UTF8;
320 encodings[1] = ENC_UNKNOWN;
321 } else
322 { encodings[0] = ENC_ISO_LATIN_1;
323 encodings[1] = ENC_WCHAR;
324 encodings[2] = ENC_UNKNOWN;
325 }
326
327 if ( (flags&CVT_WRITEQ) == CVT_WRITEQ )
328 wflags = PL_WRT_QUOTED|PL_WRT_NUMBERVARS;
329 else if ( (flags&CVT_WRITE_CANONICAL) )
330 wflags = PL_WRT_QUOTED|PL_WRT_IGNOREOPS|PL_WRT_NUMBERVARS;
331 else
332 wflags = PL_WRT_NUMBERVARS;
333
334 for(enc = encodings; *enc != ENC_UNKNOWN; enc++)
335 { size_t size;
336 IOSTREAM *fd;
337
338 r = text->buf;
339 size = sizeof(text->buf);
340 fd = Sopenmem(&r, &size, "w");
341 fd->encoding = *enc;
342 if ( PL_write_term(fd, l, 1200, wflags) &&
343 Sputcode(EOS, fd) >= 0 &&
344 Sflush(fd) >= 0 )
345 { text->encoding = *enc;
346 text->storage = (r == text->buf ? PL_CHARS_LOCAL : PL_CHARS_MALLOC);
347 text->canonical = TRUE;
348
349 if ( *enc == ENC_WCHAR )
350 { text->length = (size/sizeof(pl_wchar_t))-1;
351 text->text.w = (pl_wchar_t *)r;
352 } else
353 { text->length = size-1;
354 text->text.t = r;
355 }
356
357 Sclose(fd);
358
359 goto out;
360 } else
361 { Sclose(fd);
362 if ( *enc == ENC_ISO_LATIN_1 && enc[1] != ENC_UNKNOWN )
363 PL_clear_exception();
364
365 if ( r != text->buf )
366 Sfree(r);
367 }
368 }
369
370 goto error;
371 } else
372 { goto error;
373 }
374
375 out:
376 return TRUE;
377
378 maybe_write:
379 if ( (flags & (CVT_WRITE|CVT_WRITE_CANONICAL)) )
380 goto case_write;
381
382 error:
383 if ( canBind(w) && (flags & CVT_VARNOFAIL) )
384 return 2;
385
386 if ( (flags & CVT_EXCEPTION) )
387 { atom_t expected;
388
389 if ( (flags & CVT_LIST) && !(flags&(CVT_ATOM|CVT_NUMBER)) )
390 expected = ATOM_list; /* List and/or string object */
391 else if ( flags & CVT_LIST )
392 expected = ATOM_text;
393 else if ( (flags & CVT_ATOM) && w == ATOM_nil )
394 expected = ATOM_atom; /* [] \== '[]' */
395 else if ( flags & CVT_NUMBER )
396 expected = ATOM_atomic;
397 else
398 expected = ATOM_atom;
399
400 return PL_error(NULL, 0, NULL, ERR_TYPE, expected, l);
401 }
402
403 fail;
404 }
405
406
407 atom_t
textToAtom(PL_chars_t * text)408 textToAtom(PL_chars_t *text)
409 { GET_LD
410 atom_t a;
411
412 PL_STRINGS_MARK();
413 if ( PL_canonicalise_text(text) )
414 { if ( text->encoding == ENC_ISO_LATIN_1 )
415 a = lookupAtom(text->text.t, text->length);
416 else
417 a = lookupUCSAtom(text->text.w, text->length);
418 } else
419 { a = 0;
420 }
421 PL_STRINGS_RELEASE();
422
423 return a;
424 }
425
426
427 word
textToString(PL_chars_t * text)428 textToString(PL_chars_t *text)
429 { GET_LD
430 atom_t a;
431
432 PL_STRINGS_MARK();
433 if ( PL_canonicalise_text(text) )
434 { if ( text->encoding == ENC_ISO_LATIN_1 )
435 a = globalString(text->length, text->text.t);
436 else
437 a = globalWString(text->length, text->text.w);
438 } else
439 { a = 0;
440 }
441 PL_STRINGS_RELEASE();
442
443 return a;
444 }
445
446
447 static size_t
globalSpaceRequirement(PL_chars_t * text)448 globalSpaceRequirement(PL_chars_t *text)
449 { size_t len;
450
451 if ( text->encoding == ENC_ISO_LATIN_1 )
452 { len = text->length+1;
453 } else
454 { len = (text->length+1)*sizeof(pl_wchar_t);
455 }
456
457 return 2 + (len+sizeof(word))/sizeof(word);
458 }
459
460
461
462 static int
unify_text(term_t term,term_t tail,PL_chars_t * text,int type ARG_LD)463 unify_text(term_t term, term_t tail, PL_chars_t *text, int type ARG_LD)
464 { switch(type)
465 { case PL_ATOM:
466 { atom_t a = textToAtom(text);
467
468 if ( a )
469 { int rval = _PL_unify_atomic(term, a);
470
471 PL_unregister_atom(a);
472 return rval;
473 }
474 return FALSE;
475 }
476 case PL_STRING:
477 { word w;
478
479 if ( PL_canonicalise_text(text) )
480 { if ( hasGlobalSpace(globalSpaceRequirement(text)) ||
481 PL_from_stack_text(text, 0) )
482 { if ( (w = textToString(text)) )
483 return _PL_unify_atomic(term, w);
484 }
485 }
486
487 return FALSE;
488 }
489 case PL_CODE_LIST:
490 case PL_CHAR_LIST:
491 { if ( !hasGlobalSpace(text->length*3+1) &&
492 !PL_from_stack_text(text, 0) )
493 return FALSE;
494
495 if ( text->length == 0 )
496 { if ( tail )
497 { PL_put_term(tail, term);
498 return TRUE;
499 } else
500 { return PL_unify_nil(term);
501 }
502 } else
503 { term_t l = PL_new_term_ref();
504 Word p0, p;
505
506 switch(text->encoding)
507 { case ENC_ISO_LATIN_1:
508 { const unsigned char *s = (const unsigned char *)text->text.t;
509 const unsigned char *e = &s[text->length];
510
511 if ( !(p0 = p = INIT_SEQ_STRING(text->length)) )
512 return FALSE;
513
514 if ( type == PL_CODE_LIST ) {
515 for( ; s < e; s++)
516 p = EXTEND_SEQ_CODES(p, *s);
517 } else {
518 for( ; s < e; s++)
519 p = EXTEND_SEQ_CHARS(p, *s);
520 }
521 break;
522 }
523 case ENC_WCHAR:
524 { const pl_wchar_t *s = (const pl_wchar_t *)text->text.t;
525 const pl_wchar_t *e = &s[text->length];
526
527 if ( !(p0 = p = INIT_SEQ_STRING(text->length)) )
528 return FALSE;
529
530 if ( type == PL_CODE_LIST ) {
531 for( ; s < e; s++)
532 p = EXTEND_SEQ_CODES(p, *s);
533 } else {
534 for( ; s < e; s++)
535 p = EXTEND_SEQ_CHARS(p, *s);
536 }
537 break;
538 }
539 case ENC_UTF8:
540 { const char *s = text->text.t;
541 const char *e = &s[text->length];
542 size_t len = utf8_strlen(s, text->length);
543
544 if ( !(p0 = p = INIT_SEQ_STRING(len)) )
545 return FALSE;
546
547 if ( type == PL_CODE_LIST ) {
548 while (s < e) {
549 int chr;
550
551 s = utf8_get_char(s, &chr);
552 p = EXTEND_SEQ_CODES(p, chr);
553 }
554 } else {
555 while (s < e) {
556 int chr;
557
558 s = utf8_get_char(s, &chr);
559 p = EXTEND_SEQ_CHARS(p, chr);
560 }
561 }
562 break;
563 }
564 case ENC_ANSI:
565 { const char *s = text->text.t;
566 size_t rc, n = text->length;
567 size_t len = 0;
568 mbstate_t mbs;
569 wchar_t wc;
570
571 memset(&mbs, 0, sizeof(mbs));
572 while( n > 0 )
573 { if ( (rc=mbrtowc(&wc, s, n, &mbs)) == (size_t)-1 || rc == 0 )
574 return PL_error(NULL, 0, "cannot represent text in current locale",
575 ERR_REPRESENTATION, ATOM_encoding);
576
577 len++;
578 n -= rc;
579 s += rc;
580 }
581
582 if ( !(p0 = p = INIT_SEQ_STRING(len)) )
583 return FALSE;
584
585 n = text->length;
586 s = text->text.t;
587 memset(&mbs, 0, sizeof(mbs));
588 while(n > 0)
589 { rc = mbrtowc(&wc, s, n, &mbs);
590
591 if ( type == PL_CODE_LIST )
592 p = EXTEND_SEQ_CODES(p, wc);
593 else
594 p = EXTEND_SEQ_CHARS(p, wc);
595
596 s += rc;
597 n -= rc;
598 }
599 break;
600 }
601 default:
602 { assert(0);
603
604 return FALSE;
605 }
606 }
607
608 return CLOSE_SEQ_STRING(p, p0, tail, term, l );
609 }
610 }
611 default:
612 { assert(0);
613
614 return FALSE;
615 }
616 }
617 }
618
619
620 int
PL_unify_text(term_t term,term_t tail,PL_chars_t * text,int type)621 PL_unify_text(term_t term, term_t tail, PL_chars_t *text, int type)
622 { GET_LD
623 int rc;
624
625 PL_STRINGS_MARK();
626 rc = unify_text(term, tail, text, type PASS_LD);
627 PL_STRINGS_RELEASE();
628 return rc;
629 }
630
631
632 int
PL_unify_text_range(term_t term,PL_chars_t * text,size_t offset,size_t len,int type)633 PL_unify_text_range(term_t term, PL_chars_t *text,
634 size_t offset, size_t len, int type)
635 { if ( offset == 0 && len == text->length )
636 { return PL_unify_text(term, 0, text, type);
637 } else
638 { PL_chars_t sub;
639 int rc;
640
641 if ( offset > text->length || offset + len > text->length )
642 return FALSE;
643
644 if ( len == 1 && type == PL_ATOM )
645 { GET_LD
646 int c;
647
648 if ( text->encoding == ENC_ISO_LATIN_1 )
649 c = text->text.t[offset]&0xff;
650 else
651 c = text->text.w[offset];
652
653 return PL_unify_atom(term, codeToAtom(c));
654 }
655
656 sub.length = len;
657 sub.storage = text->storage == PL_CHARS_STACK ? PL_CHARS_STACK : PL_CHARS_HEAP;
658 if ( text->encoding == ENC_ISO_LATIN_1 )
659 { sub.text.t = text->text.t+offset;
660 sub.encoding = ENC_ISO_LATIN_1;
661 sub.canonical = TRUE;
662 } else
663 { sub.text.w = text->text.w+offset;
664 sub.encoding = ENC_WCHAR;
665 sub.canonical = FALSE;
666 }
667
668 rc = PL_unify_text(term, 0, &sub, type);
669
670 PL_free_text(&sub);
671
672 return rc;
673 }
674 }
675
676
677 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
678 int PL_promote_text(PL_chars_t *text)
679
680 Promote a text to USC if it is currently 8-bit text.
681 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
682
683 int
PL_promote_text(PL_chars_t * text)684 PL_promote_text(PL_chars_t *text)
685 { if ( text->encoding != ENC_WCHAR )
686 { if ( text->storage == PL_CHARS_MALLOC )
687 { pl_wchar_t *new = PL_malloc(sizeof(pl_wchar_t)*(text->length+1));
688 pl_wchar_t *t = new;
689 const unsigned char *s = (const unsigned char *)text->text.t;
690 const unsigned char *e = &s[text->length];
691
692 while(s<e)
693 { *t++ = *s++;
694 }
695 *t = EOS;
696
697 PL_free(text->text.t);
698 text->text.w = new;
699
700 text->encoding = ENC_WCHAR;
701 } else if ( text->storage == PL_CHARS_LOCAL &&
702 (text->length+1)*sizeof(pl_wchar_t) < sizeof(text->buf) )
703 { unsigned char buf[sizeof(text->buf)];
704 unsigned char *f = buf;
705 unsigned char *e = &buf[text->length];
706 pl_wchar_t *t = (pl_wchar_t*)text->buf;
707
708 memcpy(buf, text->buf, text->length*sizeof(char));
709 while(f<e)
710 { *t++ = *f++;
711 }
712 *t = EOS;
713 text->encoding = ENC_WCHAR;
714 } else
715 { Buffer b = findBuffer(BUF_STACK);
716 const unsigned char *s = (const unsigned char *)text->text.t;
717 const unsigned char *e = &s[text->length];
718
719 for( ; s<e; s++)
720 addBuffer(b, *s, pl_wchar_t);
721 addBuffer(b, EOS, pl_wchar_t);
722
723 text->text.w = baseBuffer(b, pl_wchar_t);
724 text->encoding = ENC_WCHAR;
725 text->storage = PL_CHARS_RING;
726 }
727 }
728
729 succeed;
730 }
731
732
733 static int
PL_demote_text(PL_chars_t * text,int flags)734 PL_demote_text(PL_chars_t *text, int flags)
735 { if ( text->encoding != ENC_ISO_LATIN_1 )
736 { if ( text->storage == PL_CHARS_MALLOC )
737 { char *new = PL_malloc(sizeof(char)*(text->length+1));
738 char *t = new;
739 const pl_wchar_t *s = (const pl_wchar_t *)text->text.t;
740 const pl_wchar_t *e = &s[text->length];
741
742 while(s<e)
743 { if ( *s > 0xff )
744 { PL_free(new);
745 reperr:
746 if ( (flags&CVT_EXCEPTION) )
747 PL_error(NULL, 0, "cannot represent text as ISO latin 1",
748 ERR_REPRESENTATION, ATOM_encoding);
749 return FALSE;
750 }
751 *t++ = *s++ & 0xff;
752 }
753 *t = EOS;
754
755 PL_free(text->text.t);
756 text->text.t = new;
757
758 text->encoding = ENC_ISO_LATIN_1;
759 } else if ( text->storage == PL_CHARS_LOCAL )
760 { pl_wchar_t buf[sizeof(text->buf)/sizeof(pl_wchar_t)];
761 pl_wchar_t *f = buf;
762 pl_wchar_t *e = &buf[text->length];
763 char *t = text->buf;
764
765 memcpy(buf, text->buf, text->length*sizeof(pl_wchar_t));
766 while(f<e)
767 { if ( *f > 0xff )
768 goto reperr;
769 *t++ = *f++ & 0xff;
770 }
771 *t = EOS;
772 text->encoding = ENC_ISO_LATIN_1;
773 } else
774 { Buffer b = findBuffer(BUF_STACK);
775 const pl_wchar_t *s = (const pl_wchar_t*)text->text.w;
776 const pl_wchar_t *e = &s[text->length];
777
778 for( ; s<e; s++)
779 { if ( *s > 0xff )
780 { unfindBuffer(b, BUF_STACK);
781 goto reperr;
782 }
783 addBuffer(b, *s&0xff, char);
784 }
785 addBuffer(b, EOS, char);
786
787 text->text.t = baseBuffer(b, char);
788 text->storage = PL_CHARS_RING;
789 text->encoding = ENC_ISO_LATIN_1;
790 }
791 }
792
793 succeed;
794 }
795
796
797 static int
can_demote(PL_chars_t * text)798 can_demote(PL_chars_t *text)
799 { if ( text->encoding != ENC_ISO_LATIN_1 )
800 { const pl_wchar_t *w = (const pl_wchar_t*)text->text.w;
801 const pl_wchar_t *e = &w[text->length];
802
803 for(; w<e; w++)
804 { if ( *w > 0xff )
805 return FALSE;
806 }
807 }
808
809 return TRUE;
810 }
811
812
813 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
814 Convert text to 8-bit according to flags. May hold REP_UTF8 to convert
815 to UTF-8, REP_MB to convert to locale 8-bit representation or nothing to
816 convert to ISO Latin-1. This predicate can fail of the text cannot be
817 represented.
818 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
819
820 static int
wctobuffer(wchar_t c,mbstate_t * mbs,Buffer buf)821 wctobuffer(wchar_t c, mbstate_t *mbs, Buffer buf)
822 { char b[PL_MB_LEN_MAX];
823 size_t n;
824
825 if ( (n=wcrtomb(b, c, mbs)) != (size_t)-1 )
826 { size_t i;
827
828 for(i=0; i<n; i++)
829 addBuffer(buf, b[i], char);
830
831 return TRUE;
832 }
833
834 return FALSE; /* cannot represent */
835 }
836
837
838 static void
utf8tobuffer(wchar_t c,Buffer buf)839 utf8tobuffer(wchar_t c, Buffer buf)
840 { if ( c <= 0x7f )
841 { addBuffer(buf, (char)c, char);
842 } else
843 { char b[6];
844 char *e = b;
845 const char *s;
846
847 e = utf8_put_char(e, c);
848 for(s=b; s<e; s++)
849 addBuffer(buf, *s, char);
850 }
851 }
852
853
854 int
PL_mb_text(PL_chars_t * text,int flags)855 PL_mb_text(PL_chars_t *text, int flags)
856 { int norep = -1;
857 IOENC target = ((flags&REP_UTF8) ? ENC_UTF8 :
858 (flags&REP_MB) ? ENC_ANSI : ENC_ISO_LATIN_1);
859
860 if ( text->encoding != target )
861 { Buffer b = findBuffer(BUF_STACK);
862
863 switch(text->encoding)
864 { case ENC_ISO_LATIN_1:
865 { const unsigned char *s = (const unsigned char*)text->text.t;
866 const unsigned char *e = &s[text->length];
867
868 if ( target == ENC_UTF8 )
869 { for( ; s<e; s++)
870 { utf8tobuffer(*s, b);
871 }
872 addBuffer(b, 0, char);
873 } else /* if ( target == ENC_MB ) */
874 { mbstate_t mbs;
875
876 memset(&mbs, 0, sizeof(mbs));
877 for( ; s<e; s++)
878 { if ( !wctobuffer(*s, &mbs, b) )
879 { unfindBuffer(b, BUF_STACK);
880 norep = *s;
881 goto rep_error;
882 }
883 }
884 wctobuffer(0, &mbs, b);
885 }
886
887 break;
888 }
889 case ENC_WCHAR:
890 { if ( target == ENC_ISO_LATIN_1 )
891 { return PL_demote_text(text, flags);
892 } else
893 { const pl_wchar_t *w = (const pl_wchar_t*)text->text.w;
894 const pl_wchar_t *e = &w[text->length];
895
896 if ( target == ENC_UTF8 )
897 { for( ; w<e; w++)
898 { utf8tobuffer(*w, b);
899 }
900 addBuffer(b, 0, char);
901 } else /* if ( target == ENC_MB ) */
902 { mbstate_t mbs;
903
904 memset(&mbs, 0, sizeof(mbs));
905 for( ; w<e; w++)
906 { if ( !wctobuffer(*w, &mbs, b) )
907 { unfindBuffer(b, BUF_STACK);
908 norep = *w;
909 goto rep_error;
910 }
911 }
912 wctobuffer(0, &mbs, b);
913 }
914 }
915 break;
916 }
917 default:
918 { assert(0);
919 fail;
920 }
921 }
922
923 PL_free_text(text);
924
925 text->length = sizeOfBuffer(b)-1;
926 text->text.t = baseBuffer(b, char);
927 text->encoding = target;
928 text->storage = PL_CHARS_RING;
929 }
930
931 succeed;
932
933 rep_error:
934 if ( (flags & CVT_EXCEPTION) )
935 { char msg[128];
936
937 sprintf(msg,
938 "Cannot represent char U%04x using %s encoding",
939 norep,
940 target == ENC_ISO_LATIN_1 ? "ISO Latin-1" : "current locale");
941
942 return PL_error(NULL, 0, msg, ERR_REPRESENTATION, ATOM_encoding);
943 }
944
945 fail;
946 }
947
948
949 static void
flip_shorts(unsigned char * s,size_t len)950 flip_shorts(unsigned char *s, size_t len)
951 { unsigned char *e = s+len;
952
953 for(; s<e; s+=2)
954 { unsigned char t = s[0];
955 s[0] = s[1];
956 s[1] = t;
957 }
958 }
959
960
961 static int
native_byte_order(IOENC enc)962 native_byte_order(IOENC enc)
963 {
964 #ifdef WORDS_BIGENDIAN
965 return enc == ENC_UNICODE_BE;
966 #else
967 return enc == ENC_UNICODE_LE;
968 #endif
969 }
970
971
972 int
PL_canonicalise_text(PL_chars_t * text)973 PL_canonicalise_text(PL_chars_t *text)
974 { if ( !text->canonical )
975 { switch(text->encoding )
976 { case ENC_OCTET:
977 text->encoding = ENC_ISO_LATIN_1;
978 case ENC_ISO_LATIN_1:
979 text->canonical = TRUE;
980 break; /* nothing to do */
981 case ENC_WCHAR:
982 { const pl_wchar_t *w = (const pl_wchar_t*)text->text.w;
983 const pl_wchar_t *e = &w[text->length];
984
985 for(; w<e; w++)
986 { if ( *w > 0xff )
987 return TRUE;
988 }
989
990 return PL_demote_text(text, 0);
991 }
992 case ENC_UNICODE_LE: /* assume text->length is in bytes */
993 case ENC_UNICODE_BE:
994 {
995 #if SIZEOF_WCHAR_T == 2
996 assert(text->length%2 == 0);
997 if ( !native_byte_order(text->encoding) )
998 { if ( text->storage == PL_CHARS_HEAP )
999 PL_save_text(text, BUF_MALLOC);
1000 flip_shorts((unsigned char*)text->text.t, text->length);
1001 }
1002 text->encoding = ENC_WCHAR;
1003 return TRUE;
1004 #else /*SIZEOF_WCHAR_T!=2*/
1005 size_t len = text->length/sizeof(short);
1006 const unsigned short *w = (const unsigned short *)text->text.t;
1007 const unsigned short *e = &w[len];
1008 int wide = FALSE;
1009
1010 assert(text->length%2 == 0);
1011 if ( !native_byte_order(text->encoding) )
1012 { if ( text->storage == PL_CHARS_HEAP )
1013 PL_save_text(text, BUF_MALLOC);
1014 flip_shorts((unsigned char*)text->text.t, text->length);
1015 }
1016
1017 for(; w<e; w++)
1018 { if ( *w > 0xff )
1019 { wide = TRUE;
1020 break;
1021 }
1022 }
1023 w = (const unsigned short*)text->text.t;
1024
1025 if ( wide )
1026 { pl_wchar_t *t, *to = PL_malloc(sizeof(pl_wchar_t)*(len+1));
1027
1028 for(t=to; w<e; )
1029 { *t++ = *w++;
1030 }
1031 *t = EOS;
1032
1033 text->length = len;
1034 text->encoding = ENC_WCHAR;
1035 if ( text->storage == PL_CHARS_MALLOC )
1036 PL_free(text->text.t);
1037 else
1038 text->storage = PL_CHARS_MALLOC;
1039
1040 text->text.w = to;
1041 } else
1042 { unsigned char *t, *to = PL_malloc(len+1);
1043
1044 for(t=to; w<e; )
1045 *t++ = (unsigned char)*w++;
1046 *t = EOS;
1047
1048 text->length = len;
1049 text->encoding = ENC_ISO_LATIN_1;
1050 if ( text->storage == PL_CHARS_MALLOC )
1051 PL_free(text->text.t);
1052 else
1053 text->storage = PL_CHARS_MALLOC;
1054
1055 text->text.t = (char*)to;
1056 }
1057
1058 succeed;
1059 #endif /*SIZEOF_WCHAR_T==2*/
1060 }
1061 case ENC_UTF8:
1062 { const char *s = text->text.t;
1063 const char *e = &s[text->length];
1064
1065 while(s<e && !(*s & 0x80))
1066 s++;
1067 if ( s == e )
1068 { text->encoding = ENC_ISO_LATIN_1;
1069 text->canonical = TRUE;
1070 } else
1071 { int chr;
1072 int wide = FALSE;
1073 size_t len = s - text->text.t;
1074
1075 while(s<e)
1076 { s = utf8_get_char(s, &chr);
1077 if ( chr > 0xff ) /* requires wide characters */
1078 wide = TRUE;
1079 len++;
1080 }
1081
1082 s = (const char *)text->text.t;
1083 text->length = len;
1084
1085 if ( wide )
1086 { pl_wchar_t *t, *to = PL_malloc(sizeof(pl_wchar_t)*(len+1));
1087
1088 for(t=to; s<e; )
1089 { s = utf8_get_char(s, &chr);
1090 *t++ = chr;
1091 }
1092 *t = EOS;
1093
1094 text->encoding = ENC_WCHAR;
1095 if ( text->storage == PL_CHARS_MALLOC )
1096 PL_free(text->text.t);
1097 text->text.w = to;
1098 text->storage = PL_CHARS_MALLOC;
1099 } else
1100 { char *t, *to = PL_malloc(len+1);
1101
1102 for(t=to; s<e;)
1103 { s = utf8_get_char(s, &chr);
1104 *t++ = chr;
1105 }
1106 *t = EOS;
1107
1108 text->encoding = ENC_ISO_LATIN_1;
1109 if ( text->storage == PL_CHARS_MALLOC )
1110 PL_free(text->text.t);
1111 text->text.t = to;
1112 text->storage = PL_CHARS_MALLOC;
1113 }
1114
1115 text->canonical = TRUE;
1116 }
1117
1118 succeed;
1119 }
1120 case ENC_ANSI:
1121 { mbstate_t mbs;
1122 size_t len = 0;
1123 int iso = TRUE;
1124 char *s = text->text.t;
1125 size_t rc, n = text->length;
1126 wchar_t wc;
1127
1128 memset(&mbs, 0, sizeof(mbs));
1129 while( n > 0 )
1130 { if ( (rc=mbrtowc(&wc, s, n, &mbs)) == (size_t)-1 || rc == 0)
1131 return FALSE; /* encoding error */
1132
1133 if ( wc > 0xff )
1134 iso = FALSE;
1135 len++;
1136 n -= rc;
1137 s += rc;
1138 }
1139
1140 if ( n == 0 )
1141 { const char *from = text->text.t;
1142 void *do_free;
1143
1144 n = text->length;
1145 memset(&mbs, 0, sizeof(mbs));
1146
1147 if ( text->storage == PL_CHARS_MALLOC )
1148 do_free = text->text.t;
1149 else
1150 do_free = NULL;
1151
1152 if ( iso )
1153 { char *to;
1154
1155 text->encoding = ENC_ISO_LATIN_1;
1156 if ( len+1 < sizeof(text->buf) )
1157 { text->text.t = text->buf;
1158 text->storage = PL_CHARS_LOCAL;
1159 } else
1160 { text->text.t = PL_malloc(len+1);
1161 text->storage = PL_CHARS_MALLOC;
1162 }
1163
1164 to = text->text.t;
1165 while( n > 0 )
1166 { rc = mbrtowc(&wc, from, n, &mbs);
1167
1168 *to++ = (char)wc;
1169 n -= rc;
1170 from += rc;
1171 }
1172 *to = EOS;
1173 } else
1174 { wchar_t *to;
1175 char b2[sizeof(text->buf)];
1176
1177 text->encoding = ENC_WCHAR;
1178 if ( len+1 < sizeof(text->buf)/sizeof(wchar_t) )
1179 { if ( text->text.t == text->buf )
1180 { memcpy(b2, text->buf, sizeof(text->buf));
1181 from = b2;
1182 }
1183 text->text.w = (wchar_t*)text->buf;
1184 } else
1185 { text->text.w = PL_malloc((len+1)*sizeof(wchar_t));
1186 text->storage = PL_CHARS_MALLOC;
1187 }
1188
1189 to = text->text.w;
1190 while( n > 0 )
1191 { rc = mbrtowc(&wc, from, n, &mbs);
1192
1193 *to++ = wc;
1194 n -= rc;
1195 from += rc;
1196 }
1197 *to = EOS;
1198 }
1199
1200 text->length = len;
1201 text->canonical = TRUE;
1202 if ( do_free )
1203 PL_free(do_free);
1204
1205 succeed;
1206 }
1207
1208 fail;
1209 }
1210 default:
1211 assert(0);
1212 }
1213 }
1214
1215 succeed;
1216 }
1217
1218
1219 void
PL_free_text(PL_chars_t * text)1220 PL_free_text(PL_chars_t *text)
1221 { if ( text->storage == PL_CHARS_MALLOC && text->text.t )
1222 PL_free(text->text.t);
1223 }
1224
1225
1226 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1227 Recode a text to the given encoding. Currrenly only supports re-coding
1228 to UTF-8 for ENC_ASCII, ENC_ISO_LATIN_1, ENC_WCHAR and ENC_ANSI.
1229 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1230
1231 int
PL_text_recode(PL_chars_t * text,IOENC encoding)1232 PL_text_recode(PL_chars_t *text, IOENC encoding)
1233 { if ( text->encoding != encoding )
1234 { switch(encoding)
1235 { case ENC_UTF8:
1236 { Buffer b;
1237
1238 switch(text->encoding)
1239 { case ENC_ASCII:
1240 text->encoding = ENC_UTF8;
1241 break;
1242 case ENC_ISO_LATIN_1:
1243 { const unsigned char *s = (const unsigned char *)text->text.t;
1244 const unsigned char *e = &s[text->length];
1245
1246 for( ; s<e; s++)
1247 { if ( *s&0x80 )
1248 { s = (const unsigned char *)text->text.t;
1249 goto convert_utf8;
1250 }
1251 }
1252 /* ASCII; nothing to do */
1253 text->encoding = ENC_UTF8;
1254 break;
1255
1256 convert_utf8:
1257 b = findBuffer(BUF_STACK);
1258 for( ; s<e; s++)
1259 utf8tobuffer(*s, b);
1260 swap_to_utf8:
1261 PL_free_text(text);
1262 text->length = entriesBuffer(b, char);
1263 addBuffer(b, EOS, char);
1264 text->text.t = baseBuffer(b, char);
1265 text->encoding = ENC_UTF8;
1266 text->storage = PL_CHARS_RING;
1267
1268 break;
1269 }
1270 case ENC_WCHAR:
1271 { const pl_wchar_t *s = text->text.w;
1272 const pl_wchar_t *e = &s[text->length];
1273
1274 b = findBuffer(BUF_STACK);
1275 for( ; s<e; s++)
1276 utf8tobuffer(*s, b);
1277 goto swap_to_utf8;
1278 }
1279 case ENC_ANSI:
1280 { mbstate_t mbs;
1281 size_t rc, n = text->length;
1282 wchar_t wc;
1283 const char *s = (const char *)text->text.t;
1284
1285 b = findBuffer(BUF_STACK);
1286 memset(&mbs, 0, sizeof(mbs));
1287 while( n > 0 )
1288 { if ( (rc=mbrtowc(&wc, s, n, &mbs)) == (size_t)-1 || rc == 0)
1289 return FALSE; /* encoding error */
1290
1291 utf8tobuffer(wc, b);
1292 n -= rc;
1293 s += rc;
1294 }
1295 if ( n == 0 )
1296 goto swap_to_utf8;
1297
1298 return FALSE;
1299 }
1300 default:
1301 assert(0);
1302 return FALSE;
1303 }
1304 return TRUE;
1305 default:
1306 assert(0);
1307 return FALSE;
1308 }
1309 }
1310 } else
1311 return TRUE;
1312 }
1313
1314
1315 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1316 PL_cmp_text(PL_chars_t *t1, size_t o1,
1317 PL_chars_t *t2, size_t o2,
1318 size_t len)
1319
1320 Compares two substrings of two text representations.
1321 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1322
1323 int
PL_cmp_text(PL_chars_t * t1,size_t o1,PL_chars_t * t2,size_t o2,size_t len)1324 PL_cmp_text(PL_chars_t *t1, size_t o1, PL_chars_t *t2, size_t o2,
1325 size_t len)
1326 { ssize_t l = len;
1327 int ifeq = 0;
1328
1329 if ( l > (ssize_t)(t1->length - o1) )
1330 { l = t1->length - o1;
1331 ifeq = CMP_LESS; /* first is short */
1332 }
1333 if ( l > (ssize_t)(t2->length - o2) )
1334 { l = t2->length - o2;
1335 if ( ifeq == 0 )
1336 ifeq = CMP_GREATER;
1337 }
1338
1339 if ( l == 0 ) /* too long offsets */
1340 return ifeq;
1341
1342 if ( t1->encoding == ENC_ISO_LATIN_1 && t2->encoding == ENC_ISO_LATIN_1 )
1343 { const unsigned char *s = (const unsigned char *)t1->text.t+o1;
1344 const unsigned char *q = (const unsigned char *)t2->text.t+o2;
1345
1346 for(; l-- > 0 && *s == *q; s++, q++ )
1347 ;
1348 if ( l < 0 )
1349 return ifeq;
1350 else
1351 return *s > *q ? CMP_GREATER : CMP_LESS;
1352 } else if ( t1->encoding == ENC_WCHAR && t2->encoding == ENC_WCHAR )
1353 { const pl_wchar_t *s = t1->text.w+o1;
1354 const pl_wchar_t *q = t2->text.w+o2;
1355
1356 for(; l-- > 0 && *s == *q; s++, q++ )
1357 ;
1358 if ( l < 0 )
1359 return ifeq;
1360 else
1361 return *s > *q ? CMP_GREATER : CMP_LESS;
1362 } else if ( t1->encoding == ENC_ISO_LATIN_1 && t2->encoding == ENC_WCHAR )
1363 { const unsigned char *s = (const unsigned char *)t1->text.t+o1;
1364 const pl_wchar_t *q = t2->text.w+o2;
1365
1366 for(; l-- > 0 && *s == *q; s++, q++ )
1367 ;
1368 if ( l < 0 )
1369 return ifeq;
1370 else
1371 return *s > *q ? CMP_GREATER : CMP_LESS;
1372 } else
1373 { const pl_wchar_t *s = t1->text.w+o1;
1374 const unsigned char *q = (const unsigned char *)t2->text.t+o2;
1375
1376 for(; l-- > 0 && *s == *q; s++, q++ )
1377 ;
1378 if ( l < 0 )
1379 return ifeq;
1380 else
1381 return *s > *q ? CMP_GREATER : CMP_LESS;
1382 }
1383 }
1384
1385
1386 int
PL_concat_text(int n,PL_chars_t ** text,PL_chars_t * result)1387 PL_concat_text(int n, PL_chars_t **text, PL_chars_t *result)
1388 { size_t total_length = 0;
1389 int latin = TRUE;
1390 int i;
1391
1392 for(i=0; i<n; i++)
1393 { if ( latin && !can_demote(text[i]) )
1394 latin = FALSE;
1395 total_length += text[i]->length;
1396 }
1397
1398 result->canonical = TRUE;
1399 result->length = total_length;
1400
1401 if ( latin )
1402 { char *to;
1403
1404 result->encoding = ENC_ISO_LATIN_1;
1405 if ( total_length+1 < sizeof(result->buf) )
1406 { result->text.t = result->buf;
1407 result->storage = PL_CHARS_LOCAL;
1408 } else
1409 { result->text.t = PL_malloc(total_length+1);
1410 result->storage = PL_CHARS_MALLOC;
1411 }
1412
1413 for(to=result->text.t, i=0; i<n; i++)
1414 { memcpy(to, text[i]->text.t, text[i]->length);
1415 to += text[i]->length;
1416 }
1417 *to = EOS;
1418 } else
1419 { pl_wchar_t *to;
1420
1421 result->encoding = ENC_WCHAR;
1422 if ( total_length+1 < sizeof(result->buf)/sizeof(pl_wchar_t) )
1423 { result->text.w = (pl_wchar_t*)result->buf;
1424 result->storage = PL_CHARS_LOCAL;
1425 } else
1426 { result->text.w = PL_malloc((total_length+1)*sizeof(pl_wchar_t));
1427 result->storage = PL_CHARS_MALLOC;
1428 }
1429
1430 for(to=result->text.w, i=0; i<n; i++)
1431 { if ( text[i]->encoding == ENC_WCHAR )
1432 { memcpy(to, text[i]->text.w, text[i]->length*sizeof(pl_wchar_t));
1433 to += text[i]->length;
1434 } else
1435 { const unsigned char *f = (const unsigned char *)text[i]->text.t;
1436 const unsigned char *e = &f[text[i]->length];
1437
1438 while(f<e)
1439 *to++ = *f++;
1440 }
1441 }
1442 assert((size_t)(to-result->text.w) == total_length);
1443 *to = EOS;
1444 }
1445
1446 return TRUE;
1447 }
1448
1449
1450 IOSTREAM *
Sopen_text(PL_chars_t * txt,const char * mode)1451 Sopen_text(PL_chars_t *txt, const char *mode)
1452 { IOSTREAM *stream;
1453
1454 if ( !streq(mode, "r") )
1455 { errno = EINVAL;
1456 return NULL;
1457 }
1458
1459 stream = Sopen_string(NULL,
1460 txt->text.t,
1461 bufsize_text(txt, txt->length),
1462 mode);
1463 stream->encoding = txt->encoding;
1464
1465 return stream;
1466 }
1467