1 /* $Id$
2
3 Part of SWI-Prolog
4
5 Author: Jan Wielemaker
6 E-mail: wielemak@science.uva.nl
7 WWW: http://www.swi-prolog.org
8 Copyright (C): 1985-2007, University of Amsterdam
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #include "pl-incl.h"
26 #include <ctype.h>
27 #include "pl-ctype.h"
28
29 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30 This module defines:
31
32 char_type(?Char, ?Type)
33 code_type(?Char, ?Type)
34
35 See manual for details.
36 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37
38 #define CHAR_MODE 0
39 #define CODE_MODE 1
40
41 #define CTX_CHAR 0 /* Class(Char) */
42 #define CTX_CODE 1 /* Class(Int) */
43
44 typedef struct
45 { atom_t name; /* name of the class */
46 int (*test)(wint_t chr); /* boolean */
47 int (*reverse)(wint_t chr); /* reverse mapping */
48 short arity; /* arity of class (i.e. lower('A')) */
49 short ctx_type; /* CTX_* */
50 } char_type;
51
52 #define ENUM_NONE 0x00
53 #define ENUM_CHAR 0x01
54 #define ENUM_CLASS 0x02
55 #define ENUM_BOTH 0x03
56
57 typedef struct
58 { int current; /* current character */
59 const char_type *class; /* current class */
60 int do_enum; /* what to enumerate */
61 } generator;
62
63
64 static int unicode_separator(wint_t c);
65
66 static int
iswhite(wint_t chr)67 iswhite(wint_t chr)
68 { return chr == ' ' || chr == '\t';
69 }
70
71
72 #ifdef __YAP_PROLOG__
73 #include "pl-umap.c" /* Unicode map */
74
75 #define CharTypeW(c, t, w) \
76 ((unsigned)(c) <= 0xff ? (_PL_char_types[(unsigned)(c)] t) \
77 : (uflagsW(c) & w))
78
79 #define PlBlankW(c) CharTypeW(c, <= SP, U_SEPARATOR)
80
81
82 inline int
unicode_separator(wint_t c)83 unicode_separator(wint_t c)
84 { return PlBlankW(c);
85 }
86 #endif
87
88 static int
fiscsym(wint_t chr)89 fiscsym(wint_t chr)
90 { return iswalnum(chr) || chr == '_';
91 }
92
93
94 static int
fiscsymf(wint_t chr)95 fiscsymf(wint_t chr)
96 { return iswalpha(chr) || chr == '_';
97 }
98
99 static int
iseof(wint_t chr)100 iseof(wint_t chr)
101 { return chr == (wint_t)-1;
102 }
103
104 static int
iseol(wint_t chr)105 iseol(wint_t chr)
106 { return chr >= 10 && chr <= 13;
107 }
108
109 static int
isnl(wint_t chr)110 isnl(wint_t chr)
111 { return chr == '\n';
112 }
113
114 static int
isperiod(wint_t chr)115 isperiod(wint_t chr)
116 { return chr && strchr(".?!", chr) != NULL;
117 }
118
119 static int
isquote(wint_t chr)120 isquote(wint_t chr)
121 { return chr && strchr("'`\"", chr) != NULL;
122 }
123
124 static int
fupper(wint_t chr)125 fupper(wint_t chr)
126 { return iswlower(chr) ? (int)towupper(chr) : -1;
127 }
128
129 static int
flower(wint_t chr)130 flower(wint_t chr)
131 { return iswupper(chr) ? (int)towlower(chr) : -1;
132 }
133
134 static int
ftoupper(wint_t chr)135 ftoupper(wint_t chr)
136 { return towupper(chr);
137 }
138
139 static int
ftolower(wint_t chr)140 ftolower(wint_t chr)
141 { return towlower(chr);
142 }
143
144 static int
fparen(wint_t chr)145 fparen(wint_t chr)
146 { switch(chr)
147 { case '(':
148 return ')';
149 case '{':
150 return '}';
151 case '[':
152 return ']';
153 default:
154 return -1;
155 }
156 }
157
158
159 static int
rparen(wint_t chr)160 rparen(wint_t chr)
161 { switch(chr)
162 { case ')':
163 return '(';
164 case '}':
165 return '{';
166 case ']':
167 return '[';
168 default:
169 return -1;
170 }
171 }
172
173
174 static int
fdigit(wint_t chr)175 fdigit(wint_t chr)
176 { if ( chr <= 0xff && isdigit(chr) )
177 return chr - '0';
178 return -1;
179 }
180
181
182 static int
rdigit(wint_t d)183 rdigit(wint_t d)
184 { if ( (int)d >= 0 && d <= 9 )
185 return d+'0';
186 return -1;
187 }
188
189
190 static int
fxdigit(wint_t chr)191 fxdigit(wint_t chr)
192 { if ( chr > 0xff )
193 return -1;
194 if ( isdigit(chr) )
195 return chr - '0';
196 if ( chr >= 'a' && chr <= 'f' )
197 return chr - 'a' + 10;
198 if ( chr >= 'A' && chr <= 'F' )
199 return chr - 'A' + 10;
200 return -1;
201 }
202
203
204 static int
rxdigit(wint_t d)205 rxdigit(wint_t d)
206 { if ( (int)d >= 0 && d <= 9 )
207 return d+'0';
208 if ( d >= 10 && d <= 15 )
209 return d-10+'a';
210 return -1;
211 }
212
213
214
215 #define mkfunction(name) \
216 static int f ## name(wint_t chr) { return name(chr); }
217
218 mkfunction(iswalnum)
219 mkfunction(iswalpha)
220 mkfunction(isascii)
221 mkfunction(iswcntrl)
222 mkfunction(iswdigit)
223 mkfunction(iswgraph)
224 mkfunction(iswlower)
225 mkfunction(iswupper)
226 mkfunction(iswpunct)
227 mkfunction(iswspace)
228
229 static const char_type char_types[] =
230 { { ATOM_alnum, fiswalnum },
231 { ATOM_alpha, fiswalpha },
232 { ATOM_csym, fiscsym },
233 { ATOM_csymf, fiscsymf },
234 { ATOM_ascii, fisascii },
235 { ATOM_white, iswhite },
236 { ATOM_cntrl, fiswcntrl },
237 { ATOM_digit, fiswdigit },
238 { ATOM_graph, fiswgraph },
239 { ATOM_lower, fiswlower },
240 { ATOM_upper, fiswupper },
241 { ATOM_punct, fiswpunct },
242 { ATOM_space, fiswspace },
243 { ATOM_end_of_file, iseof },
244 { ATOM_end_of_line, iseol },
245 { ATOM_newline, isnl },
246 { ATOM_period, isperiod },
247 { ATOM_quote, isquote },
248 { ATOM_lower, fupper, flower, 1, CTX_CHAR },
249 { ATOM_upper, flower, fupper, 1, CTX_CHAR },
250 { ATOM_to_lower, ftoupper, ftolower, 1, CTX_CHAR },
251 { ATOM_to_upper, ftolower, ftoupper, 1, CTX_CHAR },
252 { ATOM_paren, fparen, rparen, 1, CTX_CHAR },
253 { ATOM_digit, fdigit, rdigit, 1, CTX_CODE },
254 { ATOM_xdigit, fxdigit, rxdigit, 1, CTX_CODE },
255 { NULL_ATOM, NULL }
256 };
257
258 static const char_type *
char_type_by_name(atom_t name,int arity)259 char_type_by_name(atom_t name, int arity)
260 { const char_type *cc;
261
262 for(cc = char_types; cc->name; cc++)
263 { if ( cc->name == name && cc->arity == arity )
264 return cc;
265 }
266
267 return NULL;
268 }
269
270
271 static int
advanceGen(generator * gen)272 advanceGen(generator *gen)
273 { if ( gen->do_enum & ENUM_CHAR )
274 { if ( ++gen->current == 256 )
275 fail;
276 } else
277 { gen->class++;
278 if ( !gen->class->name )
279 fail;
280 }
281
282 succeed;
283 }
284
285
286 static int
unify_char_type(term_t type,const char_type * ct,int context,int how)287 unify_char_type(term_t type, const char_type *ct, int context, int how)
288 { if ( ct->arity == 0 )
289 return PL_unify_atom(type, ct->name);
290 else /*if ( ct->arity == 1 )*/
291 { if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) )
292 { term_t a = PL_new_term_ref();
293
294 _PL_get_arg(1, type, a);
295
296 if ( ct->ctx_type == CTX_CHAR )
297 return PL_unify_char(a, context, how);
298 else
299 return PL_unify_integer(a, context);
300 }
301 }
302
303 fail;
304 }
305
306
307 static foreign_t
do_char_type(term_t chr,term_t class,control_t h,int how)308 do_char_type(term_t chr, term_t class, control_t h, int how)
309 { GET_LD
310 generator *gen;
311 fid_t fid;
312
313 switch( ForeignControl(h) )
314 { case FRG_FIRST_CALL:
315 { const char_type *cc = NULL;
316 int c;
317 int do_enum = ENUM_NONE;
318 atom_t cn;
319 int arity;
320
321 if ( PL_is_variable(chr) )
322 do_enum |= ENUM_CHAR;
323 if ( PL_is_variable(class) )
324 do_enum |= ENUM_CLASS;
325
326 if ( do_enum == ENUM_BOTH )
327 return PL_error("char_type", 2, NULL, ERR_INSTANTIATION);
328
329 if ( !(do_enum & ENUM_CHAR) )
330 { if ( !PL_get_char(chr, &c, TRUE) )
331 fail;
332 if ( c == -1 )
333 return PL_unify_atom(class, ATOM_end_of_file);
334 }
335
336 if ( !(do_enum & ENUM_CLASS) )
337 { if ( !PL_get_name_arity(class, &cn, &arity) ||
338 !(cc = char_type_by_name(cn, arity)) )
339 return PL_error("char_type", 2, NULL,
340 ERR_TYPE, ATOM_char_type, class);
341 }
342
343 if ( do_enum == ENUM_NONE )
344 { if ( arity == 0 )
345 return (*cc->test)((wint_t)c) ? TRUE : FALSE;
346 else
347 { int rval = (*cc->test)((wint_t)c);
348
349 if ( rval >= 0 )
350 { term_t a = PL_new_term_ref();
351 int ok;
352
353 _PL_get_arg(1, class, a);
354
355 if ( cc->ctx_type == CTX_CHAR )
356 ok = PL_unify_char(a, rval, how);
357 else
358 ok = PL_unify_integer(a, rval);
359
360 if ( ok )
361 return TRUE;
362 else
363 do_enum = ENUM_CHAR; /* try the other way around */
364 } else
365 fail;
366 }
367 }
368
369 if ( do_enum == ENUM_CHAR && arity == 1 )
370 { term_t a = PL_new_term_ref(); /* char_type(X, lower('A')) */
371 int ca;
372
373 _PL_get_arg(1, class, a);
374 if ( !PL_is_variable(a) )
375 { if ( PL_get_char(a, &ca, FALSE) )
376 { int c = (*cc->reverse)((wint_t)ca);
377
378 if ( c < 0 )
379 fail;
380
381 return PL_unify_char(chr, c, how);
382 }
383 fail; /* error */
384 }
385 }
386
387 gen = allocHeap(sizeof(*gen));
388 gen->do_enum = do_enum;
389
390 if ( do_enum & ENUM_CHAR )
391 { gen->class = cc;
392 gen->current = -1;
393 } else if ( do_enum & ENUM_CLASS )
394 { gen->class = char_types;
395 gen->current = c;
396 }
397
398 break;
399 }
400 case FRG_REDO:
401 gen = ForeignContextPtr(h);
402 break;
403 case FRG_CUTTED:
404 gen = ForeignContextPtr(h);
405 if ( gen )
406 freeHeap(gen, sizeof(*gen));
407 default:
408 succeed;
409 }
410
411 if ( !(fid = PL_open_foreign_frame()) )
412 goto error;
413
414 for(;;)
415 { int rval;
416
417 if ( (rval = (*gen->class->test)((wint_t)gen->current)) )
418 { if ( gen->do_enum & ENUM_CHAR )
419 { if ( !PL_unify_char(chr, gen->current, how) )
420 goto next;
421 }
422 if ( gen->class->arity > 0 )
423 { if ( rval < 0 ||
424 !unify_char_type(class, gen->class, rval, how) )
425 goto next;
426
427 } else if ( gen->do_enum & ENUM_CLASS )
428 { if ( !unify_char_type(class, gen->class, rval, how) )
429 goto next;
430 }
431
432 if ( advanceGen(gen) ) /* ok, found one */
433 ForeignRedoPtr(gen);
434 else
435 { freeHeap(gen, sizeof(*gen)); /* the only one */
436 succeed;
437 }
438 }
439 next:
440 PL_rewind_foreign_frame(fid);
441
442 if ( !advanceGen(gen) )
443 break;
444 }
445
446 error:
447 freeHeap(gen, sizeof(*gen));
448 fail;
449 }
450
451
452
453 static
454 PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
455 { return do_char_type(A1, A2, PL__ctx, PL_CHAR);
456 }
457
458
459 static
460 PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
461 { return do_char_type(A1, A2, PL__ctx, PL_CODE);
462 }
463
464
465 #if 0
466 static
467 PRED_IMPL("iswctype", 2, iswctype, 0)
468 { char *s;
469 int chr;
470 wctype_t t;
471
472 if ( !PL_get_char_ex(A1, &chr, FALSE) ||
473 !PL_get_chars_ex(A2, &s, CVT_ATOM) )
474 return FALSE;
475
476 if ( !(t=wctype(s)) )
477 return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2);
478
479 return iswctype(chr, t) ? TRUE : FALSE;
480 }
481 #endif
482
483
484 static int
init_tout(PL_chars_t * t,size_t len)485 init_tout(PL_chars_t *t, size_t len)
486 { switch(t->encoding)
487 { case ENC_ISO_LATIN_1:
488 if ( len < sizeof(t->buf) )
489 { t->text.t = t->buf;
490 t->storage = PL_CHARS_LOCAL;
491 } else
492 { t->text.t = PL_malloc(len);
493 t->storage = PL_CHARS_MALLOC;
494 }
495 succeed;
496 case ENC_WCHAR:
497 if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) )
498 { t->text.w = (pl_wchar_t*)t->buf;
499 t->storage = PL_CHARS_LOCAL;
500 } else
501 { t->text.w = PL_malloc(len*sizeof(pl_wchar_t));
502 t->storage = PL_CHARS_MALLOC;
503 }
504 succeed;
505 default:
506 assert(0);
507 fail;
508 }
509 }
510
511
512 static inline wint_t
get_chr_from_text(const PL_chars_t * t,size_t index)513 get_chr_from_text(const PL_chars_t *t, size_t index)
514 { switch(t->encoding)
515 { case ENC_ISO_LATIN_1:
516 return t->text.t[index]&0xff;
517 case ENC_WCHAR:
518 return t->text.w[index];
519 default:
520 assert(0);
521 return 0;
522 }
523 }
524
525
526 static foreign_t
modify_case_atom(term_t in,term_t out,int down)527 modify_case_atom(term_t in, term_t out, int down)
528 { GET_LD
529 PL_chars_t tin, tout;
530
531 if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
532 return FALSE;
533
534 if ( PL_get_text(out, &tout, CVT_ATOMIC) )
535 { unsigned int i;
536
537 if ( tin.length != tout.length )
538 fail;
539
540 for(i=0; i<tin.length; i++)
541 { wint_t ci = get_chr_from_text(&tin, i);
542 wint_t co = get_chr_from_text(&tout, i);
543
544 if ( down )
545 { if ( co != towlower(ci) )
546 fail;
547 } else
548 { if ( co != towupper(ci) )
549 fail;
550 }
551 }
552
553 succeed;
554 } else if ( PL_is_variable(out) )
555 { unsigned int i;
556
557 tout.encoding = tin.encoding;
558 tout.length = tin.length;
559 tout.canonical = FALSE; /* or TRUE? Can WCHAR map to ISO? */
560
561 init_tout(&tout, tin.length);
562
563 if ( tin.encoding == ENC_ISO_LATIN_1 )
564 { const unsigned char *in = (const unsigned char*)tin.text.t;
565
566 if ( down )
567 { for(i=0; i<tin.length; i++)
568 { wint_t c = towlower(in[i]);
569
570 if ( c > 255 )
571 { PL_promote_text(&tout);
572 for( ; i<tin.length; i++)
573 { tout.text.w[i] = towlower(in[i]);
574 }
575 break;
576 } else
577 { tout.text.t[i] = (char)c;
578 }
579 }
580 } else /* upcase */
581 { for(i=0; i<tin.length; i++)
582 { wint_t c = towupper(in[i]);
583
584 if ( c > 255 )
585 { PL_promote_text(&tout);
586 for( ; i<tin.length; i++)
587 { tout.text.w[i] = towupper(in[i]);
588 }
589 break;
590 } else
591 { tout.text.t[i] = (char)c;
592 }
593 }
594 }
595 } else
596 { if ( down )
597 { for(i=0; i<tin.length; i++)
598 { tout.text.w[i] = towlower(tin.text.w[i]);
599 }
600 } else
601 { for(i=0; i<tin.length; i++)
602 { tout.text.w[i] = towupper(tin.text.w[i]);
603 }
604 }
605 }
606
607 PL_unify_text(out, 0, &tout, PL_ATOM);
608 PL_free_text(&tout);
609
610 succeed;
611 } else
612 { return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out);
613 }
614 }
615
616
617 static
618 PRED_IMPL("downcase_atom", 2, downcase_atom, 0)
619 { return modify_case_atom(A1, A2, TRUE);
620 }
621
622
623 static
624 PRED_IMPL("upcase_atom", 2, upcase_atom, 0)
625 { return modify_case_atom(A1, A2, FALSE);
626 }
627
628
629 /*******************************
630 * WHITE SPACE *
631 *******************************/
632
633 static int
write_normalize_space(IOSTREAM * out,term_t in)634 write_normalize_space(IOSTREAM *out, term_t in)
635 { GET_LD
636 PL_chars_t tin;
637 size_t i, end;
638
639 if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
640 return FALSE;
641
642 end = tin.length;
643 i = 0;
644
645 while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
646 i++;
647 while( i<end )
648 { wint_t c;
649
650 while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i))))
651 { if ( Sputcode(c, out) < 0 )
652 fail;
653 i++;
654 }
655 while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
656 i++;
657 if ( i < end )
658 { if ( Sputcode(' ', out) < 0 )
659 fail;
660 }
661 }
662
663 succeed;
664 }
665
666
667 static
668 PRED_IMPL("normalize_space", 2, normalize_space, 0)
669 { redir_context ctx;
670 word rc;
671
672 if ( (rc = setupOutputRedirect(A1, &ctx, FALSE)) )
673 { if ( (rc = write_normalize_space(ctx.stream, A2)) )
674 rc = closeOutputRedirect(&ctx);
675 else
676 discardOutputRedirect(&ctx);
677 }
678
679 return rc;
680 }
681
682
683
684 /*******************************
685 * LOCALE *
686 *******************************/
687
688 #if defined(HAVE_LOCALE_H) && defined(HAVE_SETLOCALE)
689 #include <locale.h>
690
691 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
692 Note: on some installations, locale doesn't work correctly. Printing a
693 message isn't really cute. It would be better to use printMessage(), but
694 the system isn't yet initialised far enough. Maybe we should store the
695 failure and print a message at the end of the initialisation?
696
697 We only return FALSE if LC_CTYPE fails. This is a serious indication
698 that locale support is broken. We don't depend too much on the others,
699 so we ignore possible problems.
700 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
701
702 static int
initLocale()703 initLocale()
704 { int rc = TRUE;
705
706 if ( !setlocale(LC_CTYPE, "") )
707 { rc = FALSE;
708 DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n"));
709 }
710 if ( !setlocale(LC_TIME, "") )
711 { DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n"));
712 }
713 if ( !setlocale(LC_COLLATE, "") )
714 { DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n"));
715 }
716
717 return rc;
718 }
719
720 typedef struct
721 { int category;
722 const char *name;
723 } lccat;
724
725 static lccat lccats[] =
726 { { LC_ALL, "all" },
727 { LC_COLLATE, "collate" },
728 { LC_CTYPE, "ctype" },
729 #ifdef LC_MESSAGES
730 { LC_MESSAGES, "messages" },
731 #endif
732 { LC_MONETARY, "monetary" },
733 { LC_NUMERIC, "numeric" },
734 { LC_TIME, "time" },
735 { 0, NULL }
736 };
737
738
739 static
740 PRED_IMPL("setlocale", 3, setlocale, 0)
741 { PRED_LD
742 char *what;
743 char *locale;
744 const lccat *lcp;
745
746
747 if ( !PL_get_chars_ex(A1, &what, CVT_ATOM) )
748 fail;
749 if ( PL_is_variable(A3) )
750 locale = NULL;
751 else if ( !PL_get_chars_ex(A3, &locale, CVT_ATOM) )
752 fail;
753
754 for ( lcp = lccats; lcp->name; lcp++ )
755 { if ( streq(lcp->name, what) )
756 { char *old = setlocale(lcp->category, NULL);
757
758 if ( !PL_unify_chars(A2, PL_ATOM, -1, old) )
759 fail;
760
761 if ( PL_compare(A2, A3) != 0 )
762 { if ( !setlocale(lcp->category, locale) )
763 return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale");
764 }
765
766 succeed;
767 }
768 }
769
770 return PL_error(NULL, 0, NULL, ERR_DOMAIN,
771 PL_new_atom("category"), A1);
772 }
773
774 #else
775
776 #define initLocale() 1
777
778 static
779 PRED_IMPL("setlocale", 3, setlocale, 0)
780 { return notImplemented("setlocale", 3);
781 }
782
783 #endif
784
785
786 /*******************************
787 * PUBLISH PREDICATES *
788 *******************************/
789
790 BeginPredDefs(ctype)
791 PRED_DEF("swi_char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
792 PRED_DEF("swi_code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
793 PRED_DEF("setlocale", 3, setlocale, 0)
794 PRED_DEF("swi_downcase_atom", 2, downcase_atom, 0)
795 PRED_DEF("swi_upcase_atom", 2, upcase_atom, 0)
796 PRED_DEF("normalize_space", 2, normalize_space, 0)
797 EndPredDefs
798
799
800 /*******************************
801 * PROLOG CHARACTERS *
802 *******************************/
803
804 const char _PL_char_types[] = {
805 /* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O 0-15 */
806 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
807 /* ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ 16-31 */
808 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
809 /* sp ! " # $ % & ' ( ) * + , - . / 32-47 */
810 SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY,
811 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 48-63 */
812 DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY,
813 /* @ A B C D E F G H I J K L M N O 64-79 */
814 SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
815 /* P Q R S T U V W X Y Z [ \ ] ^ _ 80-95 */
816 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC,
817 /* ` a b c d e f g h i j k l m n o 96-111 */
818 SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
819 /* p q r s t u v w x y z { | } ~ ^? 112-127 */
820 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT,
821 /* 128-159 (C1 controls) */
822 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
823 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
824 /* 160-255 (G1 graphics) */
825 /* ISO Latin 1 is assumed */
826 SP, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,
827 SO, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,
828 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
829 UC, UC, UC, UC, UC, UC, UC, SO, UC, UC, UC, UC, UC, UC, UC, LC,
830 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
831 LC, LC, LC, LC, LC, LC, LC, SO, LC, LC, LC, LC, LC, LC, LC, LC
832 };
833
834
835 typedef struct
836 { const char *name;
837 IOENC encoding;
838 } enc_map;
839
840 static const enc_map map[] =
841 { { "UTF-8", ENC_UTF8 },
842 { "utf8", ENC_UTF8 },
843 { "ISO8859-1", ENC_ISO_LATIN_1 },
844 { "ISO8859_1", ENC_ISO_LATIN_1 },
845 { "iso88591", ENC_ISO_LATIN_1 },
846 { "iso_8859_1", ENC_ISO_LATIN_1 },
847 { NULL, ENC_UNKNOWN }
848 };
849
850 IOENC
initEncoding(void)851 initEncoding(void)
852 { GET_LD
853
854 if ( LD )
855 { if ( !LD->encoding )
856 { char *enc;
857
858 if ( !initLocale() )
859 { LD->encoding = ENC_ISO_LATIN_1;
860 } else if ( (enc = setlocale(LC_CTYPE, NULL)) )
861 { LD->encoding = ENC_ANSI; /* text encoding */
862
863 if ( (enc = strchr(enc, '.')) )
864 { const enc_map *m;
865 enc++; /* skip '.' */
866
867 for ( m=map; m->name; m++ )
868 { if ( strcmp(enc, m->name) == 0 )
869 { LD->encoding = m->encoding;
870 break;
871 }
872 }
873 }
874 } else
875 { LD->encoding = ENC_ISO_LATIN_1;
876 }
877 }
878
879 #if __YAP_PROLOG__
880 PL_register_extensions(PL_predicates_from_ctype);
881 #endif
882 return LD->encoding;
883 }
884
885 return ENC_ANSI;
886 }
887
888
889 void
initCharTypes(void)890 initCharTypes(void)
891 {
892 initEncoding();
893 }
894
895 #if __SWI_PROLOG__
896 bool
systemMode(bool accept)897 systemMode(bool accept)
898 { GET_LD
899 bool old = SYSTEM_MODE ? TRUE : FALSE;
900
901 if ( accept )
902 debugstatus.styleCheck |= DOLLAR_STYLE;
903 else
904 debugstatus.styleCheck &= ~DOLLAR_STYLE;
905
906 return old;
907 }
908
909 #endif
910