1 /*  $Id$
2 
3     Part of SWI-Prolog
4 
5     Author:        Jan Wielemaker
6     E-mail:        wielemak@science.uva.nl
7     WWW:           http://www.swi-prolog.org
8     Copyright (C): 1985-2007, University of Amsterdam
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24 
25 #include "pl-incl.h"
26 #include <ctype.h>
27 #include "pl-ctype.h"
28 
29 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30 This module defines:
31 
32 	char_type(?Char, ?Type)
33 	code_type(?Char, ?Type)
34 
35 See manual for details.
36 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37 
38 #define CHAR_MODE 0
39 #define CODE_MODE 1
40 
41 #define CTX_CHAR 0			/* Class(Char) */
42 #define CTX_CODE 1			/* Class(Int) */
43 
44 typedef struct
45 { atom_t	name;			/* name of the class */
46   int (*test)(wint_t chr);		/* boolean */
47   int (*reverse)(wint_t chr);		/* reverse mapping */
48   short		arity;			/* arity of class (i.e. lower('A')) */
49   short		ctx_type;		/* CTX_* */
50 } char_type;
51 
52 #define ENUM_NONE	0x00
53 #define ENUM_CHAR	0x01
54 #define ENUM_CLASS	0x02
55 #define ENUM_BOTH	0x03
56 
57 typedef struct
58 { int		current;		/* current character */
59   const char_type   *class;		/* current class */
60   int   	do_enum;		/* what to enumerate */
61 } generator;
62 
63 
64 static  int unicode_separator(wint_t c);
65 
66 static int
iswhite(wint_t chr)67 iswhite(wint_t chr)
68 { return chr == ' ' || chr == '\t';
69 }
70 
71 
72 #ifdef __YAP_PROLOG__
73 #include "pl-umap.c"			/* Unicode map */
74 
75 #define CharTypeW(c, t, w) \
76 	((unsigned)(c) <= 0xff ? (_PL_char_types[(unsigned)(c)] t) \
77 			       : (uflagsW(c) & w))
78 
79 #define PlBlankW(c)	CharTypeW(c, <= SP, U_SEPARATOR)
80 
81 
82 inline int
unicode_separator(wint_t c)83 unicode_separator(wint_t c)
84 { return PlBlankW(c);
85 }
86 #endif
87 
88 static int
fiscsym(wint_t chr)89 fiscsym(wint_t chr)
90 { return iswalnum(chr) || chr == '_';
91 }
92 
93 
94 static int
fiscsymf(wint_t chr)95 fiscsymf(wint_t chr)
96 { return iswalpha(chr) || chr == '_';
97 }
98 
99 static int
iseof(wint_t chr)100 iseof(wint_t chr)
101 { return chr == (wint_t)-1;
102 }
103 
104 static int
iseol(wint_t chr)105 iseol(wint_t chr)
106 { return chr >= 10 && chr <= 13;
107 }
108 
109 static int
isnl(wint_t chr)110 isnl(wint_t chr)
111 { return chr == '\n';
112 }
113 
114 static int
isperiod(wint_t chr)115 isperiod(wint_t chr)
116 { return chr && strchr(".?!", chr) != NULL;
117 }
118 
119 static int
isquote(wint_t chr)120 isquote(wint_t chr)
121 { return chr && strchr("'`\"", chr) != NULL;
122 }
123 
124 static int
fupper(wint_t chr)125 fupper(wint_t chr)
126 { return iswlower(chr) ? (int)towupper(chr) : -1;
127 }
128 
129 static int
flower(wint_t chr)130 flower(wint_t chr)
131 { return iswupper(chr) ? (int)towlower(chr) : -1;
132 }
133 
134 static int
ftoupper(wint_t chr)135 ftoupper(wint_t chr)
136 { return towupper(chr);
137 }
138 
139 static int
ftolower(wint_t chr)140 ftolower(wint_t chr)
141 { return towlower(chr);
142 }
143 
144 static int
fparen(wint_t chr)145 fparen(wint_t chr)
146 { switch(chr)
147   { case '(':
148       return ')';
149     case '{':
150       return '}';
151     case '[':
152       return ']';
153     default:
154       return -1;
155   }
156 }
157 
158 
159 static int
rparen(wint_t chr)160 rparen(wint_t chr)
161 { switch(chr)
162   { case ')':
163       return '(';
164     case '}':
165       return '{';
166     case ']':
167       return '[';
168     default:
169       return -1;
170   }
171 }
172 
173 
174 static int
fdigit(wint_t chr)175 fdigit(wint_t chr)
176 { if ( chr <= 0xff && isdigit(chr) )
177     return chr - '0';
178   return -1;
179 }
180 
181 
182 static int
rdigit(wint_t d)183 rdigit(wint_t d)
184 { if ( (int)d >= 0 && d <= 9 )
185     return d+'0';
186   return -1;
187 }
188 
189 
190 static int
fxdigit(wint_t chr)191 fxdigit(wint_t chr)
192 { if ( chr > 0xff )
193     return -1;
194   if ( isdigit(chr) )
195     return chr - '0';
196   if ( chr >= 'a' && chr <= 'f' )
197     return chr - 'a' + 10;
198   if ( chr >= 'A' && chr <= 'F' )
199     return chr - 'A' + 10;
200   return -1;
201 }
202 
203 
204 static int
rxdigit(wint_t d)205 rxdigit(wint_t d)
206 { if ( (int)d >= 0 && d <= 9 )
207     return d+'0';
208   if ( d >= 10 && d <= 15 )
209     return d-10+'a';
210   return -1;
211 }
212 
213 
214 
215 #define mkfunction(name) \
216 	static int f ## name(wint_t chr) { return name(chr); }
217 
218 mkfunction(iswalnum)
219 mkfunction(iswalpha)
220 mkfunction(isascii)
221 mkfunction(iswcntrl)
222 mkfunction(iswdigit)
223 mkfunction(iswgraph)
224 mkfunction(iswlower)
225 mkfunction(iswupper)
226 mkfunction(iswpunct)
227 mkfunction(iswspace)
228 
229 static const char_type char_types[] =
230 { { ATOM_alnum,		fiswalnum },
231   { ATOM_alpha,		fiswalpha },
232   { ATOM_csym,		fiscsym },
233   { ATOM_csymf,		fiscsymf },
234   { ATOM_ascii,		fisascii },
235   { ATOM_white,		iswhite },
236   { ATOM_cntrl,		fiswcntrl },
237   { ATOM_digit,		fiswdigit },
238   { ATOM_graph,		fiswgraph },
239   { ATOM_lower,		fiswlower },
240   { ATOM_upper,		fiswupper },
241   { ATOM_punct,		fiswpunct },
242   { ATOM_space,		fiswspace },
243   { ATOM_end_of_file,	iseof },
244   { ATOM_end_of_line,	iseol },
245   { ATOM_newline,	isnl },
246   { ATOM_period,	isperiod },
247   { ATOM_quote,	        isquote },
248   { ATOM_lower,		fupper,		flower,   1, CTX_CHAR },
249   { ATOM_upper,		flower,		fupper,   1, CTX_CHAR },
250   { ATOM_to_lower,	ftoupper,	ftolower, 1, CTX_CHAR },
251   { ATOM_to_upper,	ftolower,	ftoupper, 1, CTX_CHAR },
252   { ATOM_paren,		fparen,		rparen,   1, CTX_CHAR },
253   { ATOM_digit,		fdigit,		rdigit,   1, CTX_CODE  },
254   { ATOM_xdigit,	fxdigit,	rxdigit,  1, CTX_CODE  },
255   { NULL_ATOM,		NULL }
256 };
257 
258 static const char_type *
char_type_by_name(atom_t name,int arity)259 char_type_by_name(atom_t name, int arity)
260 { const char_type *cc;
261 
262   for(cc = char_types; cc->name; cc++)
263   { if ( cc->name == name && cc->arity == arity )
264       return cc;
265   }
266 
267   return NULL;
268 }
269 
270 
271 static int
advanceGen(generator * gen)272 advanceGen(generator *gen)
273 { if ( gen->do_enum & ENUM_CHAR )
274   { if ( ++gen->current == 256 )
275       fail;
276   } else
277   { gen->class++;
278     if ( !gen->class->name )
279       fail;
280   }
281 
282   succeed;
283 }
284 
285 
286 static int
unify_char_type(term_t type,const char_type * ct,int context,int how)287 unify_char_type(term_t type, const char_type *ct, int context, int how)
288 { if ( ct->arity == 0 )
289     return PL_unify_atom(type, ct->name);
290   else /*if ( ct->arity == 1 )*/
291   { if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) )
292     { term_t a = PL_new_term_ref();
293 
294       _PL_get_arg(1, type, a);
295 
296       if ( ct->ctx_type == CTX_CHAR )
297 	return PL_unify_char(a, context, how);
298       else
299 	return PL_unify_integer(a, context);
300     }
301   }
302 
303   fail;
304 }
305 
306 
307 static foreign_t
do_char_type(term_t chr,term_t class,control_t h,int how)308 do_char_type(term_t chr, term_t class, control_t h, int how)
309 { GET_LD
310   generator *gen;
311   fid_t fid;
312 
313   switch( ForeignControl(h) )
314   { case FRG_FIRST_CALL:
315     { const char_type *cc = NULL;
316       int c;
317       int do_enum = ENUM_NONE;
318       atom_t cn;
319       int arity;
320 
321       if ( PL_is_variable(chr) )
322 	do_enum |= ENUM_CHAR;
323       if ( PL_is_variable(class) )
324 	do_enum |= ENUM_CLASS;
325 
326       if ( do_enum == ENUM_BOTH )
327 	return PL_error("char_type", 2, NULL, ERR_INSTANTIATION);
328 
329       if ( !(do_enum & ENUM_CHAR) )
330       { if ( !PL_get_char(chr, &c, TRUE) )
331 	  fail;
332 	if ( c == -1 )
333 	  return PL_unify_atom(class, ATOM_end_of_file);
334       }
335 
336       if ( !(do_enum & ENUM_CLASS) )
337       { if ( !PL_get_name_arity(class, &cn, &arity) ||
338 	     !(cc = char_type_by_name(cn, arity)) )
339 	  return PL_error("char_type", 2, NULL,
340 			  ERR_TYPE, ATOM_char_type, class);
341       }
342 
343       if ( do_enum == ENUM_NONE )
344       { if ( arity == 0 )
345 	  return (*cc->test)((wint_t)c) ? TRUE : FALSE;
346 	else
347 	{ int rval = (*cc->test)((wint_t)c);
348 
349 	  if ( rval >= 0 )
350 	  { term_t a = PL_new_term_ref();
351 	    int ok;
352 
353 	    _PL_get_arg(1, class, a);
354 
355 	    if ( cc->ctx_type == CTX_CHAR )
356 	      ok = PL_unify_char(a, rval, how);
357 	    else
358 	      ok = PL_unify_integer(a, rval);
359 
360 	    if ( ok )
361 	      return TRUE;
362 	    else
363 	      do_enum = ENUM_CHAR;	/* try the other way around */
364 	  } else
365 	    fail;
366 	}
367       }
368 
369       if ( do_enum == ENUM_CHAR && arity == 1 )
370       {	term_t a = PL_new_term_ref();	/* char_type(X, lower('A')) */
371 	int ca;
372 
373 	_PL_get_arg(1, class, a);
374 	if ( !PL_is_variable(a) )
375 	{ if ( PL_get_char(a, &ca, FALSE) )
376 	  { int c = (*cc->reverse)((wint_t)ca);
377 
378 	    if ( c < 0 )
379 	      fail;
380 
381 	    return PL_unify_char(chr, c, how);
382 	  }
383 	  fail;				/* error */
384 	}
385       }
386 
387       gen = allocHeap(sizeof(*gen));
388       gen->do_enum = do_enum;
389 
390       if ( do_enum & ENUM_CHAR )
391       { gen->class      = cc;
392 	gen->current    = -1;
393       } else if ( do_enum & ENUM_CLASS )
394       { gen->class	= char_types;
395 	gen->current    = c;
396       }
397 
398       break;
399     }
400     case FRG_REDO:
401       gen = ForeignContextPtr(h);
402       break;
403     case FRG_CUTTED:
404       gen = ForeignContextPtr(h);
405       if ( gen )
406 	freeHeap(gen, sizeof(*gen));
407     default:
408       succeed;
409   }
410 
411   if ( !(fid = PL_open_foreign_frame()) )
412     goto error;
413 
414   for(;;)
415   { int rval;
416 
417     if ( (rval = (*gen->class->test)((wint_t)gen->current)) )
418     { if ( gen->do_enum & ENUM_CHAR )
419       { if ( !PL_unify_char(chr, gen->current, how) )
420 	  goto next;
421       }
422       if ( gen->class->arity > 0 )
423       { if ( rval < 0 ||
424 	     !unify_char_type(class, gen->class, rval, how) )
425 	  goto next;
426 
427       } else if ( gen->do_enum & ENUM_CLASS )
428       { if ( !unify_char_type(class, gen->class, rval, how) )
429 	  goto next;
430       }
431 
432       if ( advanceGen(gen) )		/* ok, found one */
433 	ForeignRedoPtr(gen);
434       else
435       { freeHeap(gen, sizeof(*gen));	/* the only one */
436 	succeed;
437       }
438     }
439   next:
440     PL_rewind_foreign_frame(fid);
441 
442     if ( !advanceGen(gen) )
443       break;
444   }
445 
446 error:
447   freeHeap(gen, sizeof(*gen));
448   fail;
449 }
450 
451 
452 
453 static
454 PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
455 { return do_char_type(A1, A2, PL__ctx, PL_CHAR);
456 }
457 
458 
459 static
460 PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
461 { return do_char_type(A1, A2, PL__ctx, PL_CODE);
462 }
463 
464 
465 #if 0
466 static
467 PRED_IMPL("iswctype", 2, iswctype, 0)
468 { char *s;
469   int chr;
470   wctype_t t;
471 
472   if ( !PL_get_char_ex(A1, &chr, FALSE) ||
473        !PL_get_chars_ex(A2, &s, CVT_ATOM) )
474     return FALSE;
475 
476   if ( !(t=wctype(s)) )
477     return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2);
478 
479   return iswctype(chr, t) ? TRUE : FALSE;
480 }
481 #endif
482 
483 
484 static int
init_tout(PL_chars_t * t,size_t len)485 init_tout(PL_chars_t *t, size_t len)
486 { switch(t->encoding)
487   { case ENC_ISO_LATIN_1:
488       if ( len < sizeof(t->buf) )
489       { t->text.t = t->buf;
490 	t->storage = PL_CHARS_LOCAL;
491       } else
492       { t->text.t = PL_malloc(len);
493 	t->storage = PL_CHARS_MALLOC;
494       }
495       succeed;
496     case ENC_WCHAR:
497       if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) )
498       { t->text.w = (pl_wchar_t*)t->buf;
499 	t->storage = PL_CHARS_LOCAL;
500       } else
501       { t->text.w = PL_malloc(len*sizeof(pl_wchar_t));
502 	t->storage = PL_CHARS_MALLOC;
503       }
504       succeed;
505     default:
506       assert(0);
507       fail;
508   }
509 }
510 
511 
512 static inline wint_t
get_chr_from_text(const PL_chars_t * t,size_t index)513 get_chr_from_text(const PL_chars_t *t, size_t index)
514 { switch(t->encoding)
515   { case ENC_ISO_LATIN_1:
516       return t->text.t[index]&0xff;
517     case ENC_WCHAR:
518       return t->text.w[index];
519     default:
520       assert(0);
521       return 0;
522   }
523 }
524 
525 
526 static foreign_t
modify_case_atom(term_t in,term_t out,int down)527 modify_case_atom(term_t in, term_t out, int down)
528 { GET_LD
529   PL_chars_t tin, tout;
530 
531   if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
532     return FALSE;
533 
534   if ( PL_get_text(out, &tout, CVT_ATOMIC) )
535   { unsigned int i;
536 
537     if ( tin.length != tout.length )
538       fail;
539 
540     for(i=0; i<tin.length; i++)
541     { wint_t ci = get_chr_from_text(&tin, i);
542       wint_t co = get_chr_from_text(&tout, i);
543 
544       if ( down )
545       { if ( co != towlower(ci) )
546 	  fail;
547       } else
548       { if ( co != towupper(ci) )
549 	  fail;
550       }
551     }
552 
553     succeed;
554   } else if ( PL_is_variable(out) )
555   { unsigned int i;
556 
557     tout.encoding  = tin.encoding;
558     tout.length    = tin.length;
559     tout.canonical = FALSE;		/* or TRUE? Can WCHAR map to ISO? */
560 
561     init_tout(&tout, tin.length);
562 
563     if ( tin.encoding == ENC_ISO_LATIN_1 )
564     { const unsigned char *in = (const unsigned char*)tin.text.t;
565 
566       if ( down )
567       { for(i=0; i<tin.length; i++)
568 	{ wint_t c = towlower(in[i]);
569 
570 	  if ( c > 255 )
571 	  { PL_promote_text(&tout);
572 	    for( ; i<tin.length; i++)
573 	    { tout.text.w[i] = towlower(in[i]);
574 	    }
575 	    break;
576 	  } else
577 	  { tout.text.t[i] = (char)c;
578 	  }
579 	}
580       } else				/* upcase */
581       { for(i=0; i<tin.length; i++)
582 	{ wint_t c = towupper(in[i]);
583 
584 	  if ( c > 255 )
585 	  { PL_promote_text(&tout);
586 	    for( ; i<tin.length; i++)
587 	    { tout.text.w[i] = towupper(in[i]);
588 	    }
589 	    break;
590 	  } else
591 	  { tout.text.t[i] = (char)c;
592 	  }
593 	}
594       }
595     } else
596     { if ( down )
597       { for(i=0; i<tin.length; i++)
598 	{ tout.text.w[i] = towlower(tin.text.w[i]);
599 	}
600       } else
601       { for(i=0; i<tin.length; i++)
602 	{ tout.text.w[i] = towupper(tin.text.w[i]);
603 	}
604       }
605     }
606 
607     PL_unify_text(out, 0, &tout, PL_ATOM);
608     PL_free_text(&tout);
609 
610     succeed;
611   } else
612   { return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out);
613   }
614 }
615 
616 
617 static
618 PRED_IMPL("downcase_atom", 2, downcase_atom, 0)
619 { return modify_case_atom(A1, A2, TRUE);
620 }
621 
622 
623 static
624 PRED_IMPL("upcase_atom", 2, upcase_atom, 0)
625 { return modify_case_atom(A1, A2, FALSE);
626 }
627 
628 
629 		 /*******************************
630 		 *	    WHITE SPACE		*
631 		 *******************************/
632 
633 static int
write_normalize_space(IOSTREAM * out,term_t in)634 write_normalize_space(IOSTREAM *out, term_t in)
635 { GET_LD
636   PL_chars_t tin;
637   size_t i, end;
638 
639   if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
640     return FALSE;
641 
642   end = tin.length;
643   i = 0;
644 
645   while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
646     i++;
647   while( i<end )
648   { wint_t c;
649 
650     while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i))))
651     { if ( Sputcode(c, out) < 0 )
652 	fail;
653       i++;
654     }
655     while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
656       i++;
657     if ( i < end )
658     { if (  Sputcode(' ', out) < 0 )
659 	fail;
660     }
661   }
662 
663   succeed;
664 }
665 
666 
667 static
668 PRED_IMPL("normalize_space", 2, normalize_space, 0)
669 { redir_context ctx;
670   word rc;
671 
672   if ( (rc = setupOutputRedirect(A1, &ctx, FALSE)) )
673   { if ( (rc = write_normalize_space(ctx.stream, A2)) )
674       rc = closeOutputRedirect(&ctx);
675     else
676       discardOutputRedirect(&ctx);
677   }
678 
679   return rc;
680 }
681 
682 
683 
684 		 /*******************************
685 		 *	       LOCALE		*
686 		 *******************************/
687 
688 #if defined(HAVE_LOCALE_H) && defined(HAVE_SETLOCALE)
689 #include <locale.h>
690 
691 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
692 Note: on some installations, locale doesn't   work correctly. Printing a
693 message isn't really cute. It would be better to use printMessage(), but
694 the system isn't yet initialised far enough.   Maybe we should store the
695 failure and print a message at the end of the initialisation?
696 
697 We only return FALSE if LC_CTYPE  fails.   This  is a serious indication
698 that locale support is broken. We don't   depend too much on the others,
699 so we ignore possible problems.
700 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
701 
702 static int
initLocale()703 initLocale()
704 { int rc = TRUE;
705 
706   if ( !setlocale(LC_CTYPE, "") )
707   { rc = FALSE;
708     DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n"));
709   }
710   if ( !setlocale(LC_TIME, "") )
711   { DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n"));
712   }
713   if ( !setlocale(LC_COLLATE, "") )
714   { DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n"));
715   }
716 
717   return rc;
718 }
719 
720 typedef struct
721 { int category;
722   const char *name;
723 } lccat;
724 
725 static lccat lccats[] =
726 { { LC_ALL,      "all" },
727   { LC_COLLATE,  "collate" },
728   { LC_CTYPE,    "ctype" },
729 #ifdef LC_MESSAGES
730   { LC_MESSAGES, "messages" },
731 #endif
732   { LC_MONETARY, "monetary" },
733   { LC_NUMERIC,  "numeric" },
734   { LC_TIME,     "time" },
735   { 0,           NULL }
736 };
737 
738 
739 static
740 PRED_IMPL("setlocale", 3, setlocale, 0)
741 { PRED_LD
742   char *what;
743   char *locale;
744   const lccat *lcp;
745 
746 
747   if ( !PL_get_chars_ex(A1, &what, CVT_ATOM) )
748     fail;
749   if ( PL_is_variable(A3) )
750     locale = NULL;
751   else if ( !PL_get_chars_ex(A3, &locale, CVT_ATOM) )
752     fail;
753 
754   for ( lcp = lccats; lcp->name; lcp++ )
755   { if ( streq(lcp->name, what) )
756     { char *old = setlocale(lcp->category, NULL);
757 
758       if ( !PL_unify_chars(A2, PL_ATOM, -1, old) )
759 	fail;
760 
761       if ( PL_compare(A2, A3) != 0 )
762       { if ( !setlocale(lcp->category, locale) )
763 	  return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale");
764       }
765 
766       succeed;
767     }
768   }
769 
770   return PL_error(NULL, 0, NULL, ERR_DOMAIN,
771 		  PL_new_atom("category"), A1);
772 }
773 
774 #else
775 
776 #define initLocale() 1
777 
778 static
779 PRED_IMPL("setlocale", 3, setlocale, 0)
780 { return notImplemented("setlocale", 3);
781 }
782 
783 #endif
784 
785 
786 		 /*******************************
787 		 *      PUBLISH PREDICATES	*
788 		 *******************************/
789 
790 BeginPredDefs(ctype)
791   PRED_DEF("swi_char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
792   PRED_DEF("swi_code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
793   PRED_DEF("setlocale", 3, setlocale, 0)
794   PRED_DEF("swi_downcase_atom", 2, downcase_atom, 0)
795   PRED_DEF("swi_upcase_atom", 2, upcase_atom, 0)
796   PRED_DEF("normalize_space", 2, normalize_space, 0)
797 EndPredDefs
798 
799 
800 		 /*******************************
801 		 *	PROLOG CHARACTERS	*
802 		 *******************************/
803 
804 const char _PL_char_types[] = {
805 /* ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N  ^O    0-15 */
806    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
807 /* ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]  ^^  ^_   16-31 */
808    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
809 /* sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   32-47 */
810    SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY,
811 /*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   48-63 */
812    DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY,
813 /*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   64-79 */
814    SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
815 /*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   80-95 */
816    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC,
817 /*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   96-111 */
818    SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
819 /*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  ^?   112-127 */
820    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT,
821 			  /* 128-159 (C1 controls) */
822    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
823    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT,
824 			  /* 160-255 (G1 graphics) */
825 			  /* ISO Latin 1 is assumed */
826    SP, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,
827    SO, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,
828    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
829    UC, UC, UC, UC, UC, UC, UC, SO, UC, UC, UC, UC, UC, UC, UC, LC,
830    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
831    LC, LC, LC, LC, LC, LC, LC, SO, LC, LC, LC, LC, LC, LC, LC, LC
832 };
833 
834 
835 typedef struct
836 { const char *name;
837   IOENC encoding;
838 } enc_map;
839 
840 static const enc_map map[] =
841 { { "UTF-8",	  ENC_UTF8 },
842   { "utf8",	  ENC_UTF8 },
843   { "ISO8859-1",  ENC_ISO_LATIN_1 },
844   { "ISO8859_1",  ENC_ISO_LATIN_1 },
845   { "iso88591",   ENC_ISO_LATIN_1 },
846   { "iso_8859_1", ENC_ISO_LATIN_1 },
847   { NULL, ENC_UNKNOWN }
848 };
849 
850 IOENC
initEncoding(void)851 initEncoding(void)
852 { GET_LD
853 
854   if ( LD )
855   { if ( !LD->encoding )
856     { char *enc;
857 
858       if ( !initLocale() )
859       { LD->encoding = ENC_ISO_LATIN_1;
860       } else if ( (enc = setlocale(LC_CTYPE, NULL)) )
861       { LD->encoding = ENC_ANSI;		/* text encoding */
862 
863 	if ( (enc = strchr(enc, '.')) )
864 	{ const enc_map *m;
865 	  enc++;				/* skip '.' */
866 
867 	  for ( m=map; m->name; m++ )
868 	  { if ( strcmp(enc, m->name) == 0 )
869 	    { LD->encoding = m->encoding;
870 	      break;
871 	    }
872 	  }
873 	}
874       } else
875       { LD->encoding = ENC_ISO_LATIN_1;
876       }
877     }
878 
879 #if __YAP_PROLOG__
880     PL_register_extensions(PL_predicates_from_ctype);
881 #endif
882     return LD->encoding;
883   }
884 
885   return ENC_ANSI;
886 }
887 
888 
889 void
initCharTypes(void)890 initCharTypes(void)
891 {
892   initEncoding();
893 }
894 
895 #if __SWI_PROLOG__
896 bool
systemMode(bool accept)897 systemMode(bool accept)
898 { GET_LD
899   bool old = SYSTEM_MODE ? TRUE : FALSE;
900 
901   if ( accept )
902     debugstatus.styleCheck |= DOLLAR_STYLE;
903   else
904     debugstatus.styleCheck &= ~DOLLAR_STYLE;
905 
906   return old;
907 }
908 
909 #endif
910