1 /*** type.h *******************************************************************
2 **
3 ** This file is part of BibTool.
4 ** It is distributed under the GNU General Public License.
5 ** See the file COPYING for details.
6 **
7 ** (c) 1996-2020 Gerd Neugebauer
8 **
9 ** Net: gene@gerd-neugebauer.de
10 **
11 ** This program is free software; you can redistribute it and/or modify
12 ** it under the terms of the GNU General Public License as published by
13 ** the Free Software Foundation; either version 2, or (at your option)
14 ** any later version.
15 **
16 ** This program is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ** GNU General Public License for more details.
20 **
21 ** You should have received a copy of the GNU General Public License
22 ** along with this program; if not, write to the Free Software
23 ** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 **
25 **-----------------------------------------------------------------------------
26 ** Description:
27 **	This module is a replacement for the system header file
28 **	|ctype.h|. In contrast to some implementations of the |isalpha|
29 **	and friends the macros in this header are stable. This means
30 **	that the argument is evaluated exactly once and each macro
31 **	consists of exactly one C statement. Thus these macros can
32 **	be used even at those places where only a single statement is
33 **	allowed (conditionals without braces) or with arguments
34 **	containing side effects.
35 **
36 **	In addition this is a starting point to implement an xord
37 **	array like \TeX{} has one (some day\dots)
38 **
39 **	This header file requires the initialization function
40 **	|init_type()| to be called before the macros will work as
41 **	described.
42 **
43 **	This header file also provides the functions and varaibles
44 **	defined in |type.c|
45 **
46 ******************************************************************************/
47 
48 #ifndef TYPE_H_LOADED
49 #define TYPE_H_LOADED
50 
51 #include <bibtool/general.h>
52 
53  typedef unsigned char Uchar;
54  typedef Uchar* String;
55 
56 #define StringNULL (String)NULL
57 
58 #define T__None      0
59 #define T__Upper     1
60 #define T__Lower     2
61 #define T__Allowed   4
62 #define T__Number    8
63 #define T__Space    16
64 #define T__Extended 32
65 #define T__WordSep  64
66 
67 #ifdef INIT_TYPE
68 
69  int type__allowed[257] = {
70   /*0   */ T__None,
71   /*1   */ T__Space|T__WordSep,
72   /*2   */ T__Space|T__WordSep,
73   /*3   */ T__Space|T__WordSep,
74   /*4   */ T__Space|T__WordSep,
75   /*5   */ T__Space|T__WordSep,
76   /*6   */ T__Space|T__WordSep,
77   /*7   */ T__Space|T__WordSep,
78   /*8   */ T__Space|T__WordSep,
79   /*9   */ T__Space|T__WordSep,
80   /*a   */ T__Space|T__WordSep,
81   /*b   */ T__Space|T__WordSep,
82   /*c   */ T__Space|T__WordSep,
83   /*d   */ T__Space|T__WordSep,
84   /*e   */ T__Space|T__WordSep,
85   /*f   */ T__Space|T__WordSep,
86   /*0   */ T__Space|T__WordSep,
87   /*1   */ T__Space|T__WordSep,
88   /*2   */ T__Space|T__WordSep,
89   /*3   */ T__Space|T__WordSep,
90   /*4   */ T__Space|T__WordSep,
91   /*5   */ T__Space|T__WordSep,
92   /*6   */ T__Space|T__WordSep,
93   /*7   */ T__Space|T__WordSep,
94   /*8   */ T__Space|T__WordSep,
95   /*9   */ T__Space|T__WordSep,
96   /*a   */ T__Space|T__WordSep,
97   /*b   */ T__Space|T__WordSep,
98   /*c   */ T__Space|T__WordSep,
99   /*d   */ T__Space|T__WordSep,
100   /*e   */ T__Space|T__WordSep,
101   /*f   */ T__Space|T__WordSep,
102   /*0   */ T__Space|T__WordSep,
103   /*1 ! */ T__Allowed|T__WordSep,
104   /*2 " */ T__None,
105   /*3 # */ T__None,
106   /*4 $ */ T__Allowed,
107   /*5 % */ T__None,
108   /*6 & */ T__Allowed,
109   /*7 ' */ T__None,
110   /*8 ( */ T__None,
111   /*9 ) */ T__None,
112   /*a * */ T__Allowed,
113   /*b + */ T__Allowed,
114   /*c , */ T__None|T__WordSep,
115   /*d - */ T__Allowed,
116   /*e . */ T__Allowed|T__WordSep,
117   /*f / */ T__Allowed,
118   /*0 0 */ T__Allowed|T__Number,
119   /*1 1 */ T__Allowed|T__Number,
120   /*2 2 */ T__Allowed|T__Number,
121   /*3 3 */ T__Allowed|T__Number,
122   /*4 4 */ T__Allowed|T__Number,
123   /*5 5 */ T__Allowed|T__Number,
124   /*6 6 */ T__Allowed|T__Number,
125   /*7 7 */ T__Allowed|T__Number,
126   /*8 8 */ T__Allowed|T__Number,
127   /*9 9 */ T__Allowed|T__Number,
128   /*a : */ T__Allowed|T__WordSep,
129   /*b ; */ T__Allowed|T__WordSep,
130   /*c < */ T__Allowed,
131   /*d = */ T__None,
132   /*e > */ T__Allowed,
133   /*f ? */ T__Allowed,
134   /*0 @ */ T__Allowed|T__WordSep,
135   /*1 A */ T__Allowed|T__Upper,
136   /*2 B */ T__Allowed|T__Upper,
137   /*3 C */ T__Allowed|T__Upper,
138   /*4 D */ T__Allowed|T__Upper,
139   /*5 E */ T__Allowed|T__Upper,
140   /*6 F */ T__Allowed|T__Upper,
141   /*7 G */ T__Allowed|T__Upper,
142   /*8 H */ T__Allowed|T__Upper,
143   /*9 I */ T__Allowed|T__Upper,
144   /*a J */ T__Allowed|T__Upper,
145   /*b K */ T__Allowed|T__Upper,
146   /*c L */ T__Allowed|T__Upper,
147   /*d M */ T__Allowed|T__Upper,
148   /*e N */ T__Allowed|T__Upper,
149   /*f O */ T__Allowed|T__Upper,
150   /*0 P */ T__Allowed|T__Upper,
151   /*1 Q */ T__Allowed|T__Upper,
152   /*2 R */ T__Allowed|T__Upper,
153   /*3 S */ T__Allowed|T__Upper,
154   /*4 T */ T__Allowed|T__Upper,
155   /*5 U */ T__Allowed|T__Upper,
156   /*6 V */ T__Allowed|T__Upper,
157   /*7 W */ T__Allowed|T__Upper,
158   /*8 X */ T__Allowed|T__Upper,
159   /*9 Y */ T__Allowed|T__Upper,
160   /*a Z */ T__Allowed|T__Upper,
161   /*b [ */ T__Allowed,
162   /*c \ */ T__Allowed,
163   /*d ] */ T__Allowed,
164   /*e ^ */ T__Allowed,
165   /*f _ */ T__Allowed,
166   /*0 ` */ T__Allowed,
167   /*1 a */ T__Allowed|T__Lower,
168   /*2 b */ T__Allowed|T__Lower,
169   /*3 c */ T__Allowed|T__Lower,
170   /*4 d */ T__Allowed|T__Lower,
171   /*5 e */ T__Allowed|T__Lower,
172   /*6 f */ T__Allowed|T__Lower,
173   /*7 g */ T__Allowed|T__Lower,
174   /*8 h */ T__Allowed|T__Lower,
175   /*9 i */ T__Allowed|T__Lower,
176   /*a j */ T__Allowed|T__Lower,
177   /*b k */ T__Allowed|T__Lower,
178   /*c l */ T__Allowed|T__Lower,
179   /*d m */ T__Allowed|T__Lower,
180   /*e n */ T__Allowed|T__Lower,
181   /*f o */ T__Allowed|T__Lower,
182   /*0 p */ T__Allowed|T__Lower,
183   /*1 q */ T__Allowed|T__Lower,
184   /*2 r */ T__Allowed|T__Lower,
185   /*3 s */ T__Allowed|T__Lower,
186   /*4 t */ T__Allowed|T__Lower,
187   /*5 u */ T__Allowed|T__Lower,
188   /*6 v */ T__Allowed|T__Lower,
189   /*7 w */ T__Allowed|T__Lower,
190   /*8 x */ T__Allowed|T__Lower,
191   /*9 y */ T__Allowed|T__Lower,
192   /*a z */ T__Allowed|T__Lower,
193   /*b { */ T__None,
194   /*c | */ T__Allowed,
195   /*d } */ T__None,
196   /*e ~ */ T__WordSep,
197   /*f  */ T__Allowed,
198   /*0  */ T__Allowed|T__Extended,
199   /*1  */ T__Allowed|T__Extended,
200   /*2  */ T__Allowed|T__Extended,
201   /*3  */ T__Allowed|T__Extended,
202   /*4  */ T__Allowed|T__Extended,
203   /*5  */ T__Allowed|T__Extended,
204   /*6  */ T__Allowed|T__Extended,
205   /*7  */ T__Allowed|T__Extended,
206   /*8  */ T__Allowed|T__Extended,
207   /*9  */ T__Allowed|T__Extended,
208   /*a  */ T__Allowed|T__Extended,
209   /*b  */ T__Allowed|T__Extended,
210   /*c  */ T__Allowed|T__Extended,
211   /*d  */ T__Allowed|T__Extended,
212   /*e  */ T__Allowed|T__Extended,
213   /*f  */ T__Allowed|T__Extended,
214   /*0  */ T__Allowed|T__Extended,
215   /*1  */ T__Allowed|T__Extended,
216   /*2  */ T__Allowed|T__Extended,
217   /*3  */ T__Allowed|T__Extended,
218   /*4  */ T__Allowed|T__Extended,
219   /*5  */ T__Allowed|T__Extended,
220   /*6  */ T__Allowed|T__Extended,
221   /*7  */ T__Allowed|T__Extended,
222   /*8  */ T__Allowed|T__Extended,
223   /*9  */ T__Allowed|T__Extended,
224   /*a  */ T__Allowed|T__Extended,
225   /*b  */ T__Allowed|T__Extended,
226   /*c  */ T__Allowed|T__Extended,
227   /*d  */ T__Allowed|T__Extended,
228   /*e  */ T__Allowed|T__Extended,
229   /*f  */ T__Allowed|T__Extended,
230   /*0  */ T__Allowed|T__Extended,
231   /*1  */ T__Allowed|T__Extended,
232   /*2  */ T__Allowed|T__Extended,
233   /*3  */ T__Allowed|T__Extended,
234   /*4  */ T__Allowed|T__Extended,
235   /*5  */ T__Allowed|T__Extended,
236   /*6  */ T__Allowed|T__Extended,
237   /*7  */ T__Allowed|T__Extended,
238   /*8  */ T__Allowed|T__Extended,
239   /*9  */ T__Allowed|T__Extended,
240   /*a  */ T__Allowed|T__Extended,
241   /*b  */ T__Allowed|T__Extended,
242   /*c  */ T__Allowed|T__Extended,
243   /*d  */ T__Allowed|T__Extended,
244   /*e  */ T__Allowed|T__Extended,
245   /*f  */ T__Allowed|T__Extended,
246   /*0  */ T__Allowed|T__Extended,
247   /*1  */ T__Allowed|T__Extended,
248   /*2  */ T__Allowed|T__Extended,
249   /*3  */ T__Allowed|T__Extended,
250   /*4  */ T__Allowed|T__Extended,
251   /*5  */ T__Allowed|T__Extended,
252   /*6  */ T__Allowed|T__Extended,
253   /*7  */ T__Allowed|T__Extended,
254   /*8  */ T__Allowed|T__Extended,
255   /*9  */ T__Allowed|T__Extended,
256   /*a  */ T__Allowed|T__Extended,
257   /*b  */ T__Allowed|T__Extended,
258   /*c  */ T__Allowed|T__Extended,
259   /*d  */ T__Allowed|T__Extended,
260   /*e  */ T__Allowed|T__Extended,
261   /*f  */ T__Allowed|T__Extended,
262   /*0  */ T__Allowed|T__Extended,
263   /*1  */ T__Allowed|T__Extended,
264   /*2  */ T__Allowed|T__Extended,
265   /*3  */ T__Allowed|T__Extended,
266   /*4  */ T__Allowed|T__Extended,
267   /*5  */ T__Allowed|T__Extended,
268   /*6  */ T__Allowed|T__Extended,
269   /*7  */ T__Allowed|T__Extended,
270   /*8  */ T__Allowed|T__Extended,
271   /*9  */ T__Allowed|T__Extended,
272   /*a  */ T__Allowed|T__Extended,
273   /*b  */ T__Allowed|T__Extended,
274   /*c  */ T__Allowed|T__Extended,
275   /*d  */ T__Allowed|T__Extended,
276   /*e  */ T__Allowed|T__Extended,
277   /*f  */ T__Allowed|T__Extended,
278   /*0  */ T__Allowed|T__Extended,
279   /*1  */ T__Allowed|T__Extended,
280   /*2  */ T__Allowed|T__Extended,
281   /*3  */ T__Allowed|T__Extended,
282   /*4  */ T__Allowed|T__Extended,
283   /*5  */ T__Allowed|T__Extended,
284   /*6  */ T__Allowed|T__Extended,
285   /*7  */ T__Allowed|T__Extended,
286   /*8  */ T__Allowed|T__Extended,
287   /*9  */ T__Allowed|T__Extended,
288   /*a  */ T__Allowed|T__Extended,
289   /*b  */ T__Allowed|T__Extended,
290   /*c  */ T__Allowed|T__Extended,
291   /*d  */ T__Allowed|T__Extended,
292   /*e  */ T__Allowed|T__Extended,
293   /*f  */ T__Allowed|T__Extended,
294   /*0  */ T__Allowed|T__Extended,
295   /*1  */ T__Allowed|T__Extended,
296   /*2  */ T__Allowed|T__Extended,
297   /*3  */ T__Allowed|T__Extended,
298   /*4  */ T__Allowed|T__Extended,
299   /*5  */ T__Allowed|T__Extended,
300   /*6  */ T__Allowed|T__Extended,
301   /*7  */ T__Allowed|T__Extended,
302   /*8  */ T__Allowed|T__Extended,
303   /*9  */ T__Allowed|T__Extended,
304   /*a  */ T__Allowed|T__Extended,
305   /*b  */ T__Allowed|T__Extended,
306   /*c  */ T__Allowed|T__Extended,
307   /*d  */ T__Allowed|T__Extended,
308   /*e  */ T__Allowed|T__Extended,
309   /*f  */ T__Allowed|T__Extended,
310   /*0  */ T__Allowed|T__Extended,
311   /*1  */ T__Allowed|T__Extended,
312   /*2  */ T__Allowed|T__Extended,
313   /*3  */ T__Allowed|T__Extended,
314   /*4  */ T__Allowed|T__Extended,
315   /*5  */ T__Allowed|T__Extended,
316   /*6  */ T__Allowed|T__Extended,
317   /*7  */ T__Allowed|T__Extended,
318   /*8  */ T__Allowed|T__Extended,
319   /*9  */ T__Allowed|T__Extended,
320   /*a  */ T__Allowed|T__Extended,
321   /*b  */ T__Allowed|T__Extended,
322   /*c  */ T__Allowed|T__Extended,
323   /*d  */ T__Allowed|T__Extended,
324   /*e  */ T__Allowed|T__Extended,
325   /*f  */ T__Allowed|T__Extended
326  };
327 
328 /*-----------------------------------------------------------------------------
329 ** Variable:	trans_lower
330 ** Type:	char*
331 ** Purpose:	Translation table mapping upper case letters to lower
332 **		case. Such a translation table can be used as argument
333 **		to the regular expression functions.
334 **___________________________________________________			     */
335 Uchar trans_lower[256];			   	   /*                        */
336 
337 /*-----------------------------------------------------------------------------
338 ** Variable:	trans_upper
339 ** Type:	char*
340 ** Purpose:	Translation table mapping lower case letters to upper
341 **		case. Such a translation table can be used as argument
342 **		to the regular expression functions.
343 **___________________________________________________			     */
344 Uchar trans_upper[256];			   	   /*                        */
345 
346 /*-----------------------------------------------------------------------------
347 ** Variable:	trans_id
348 ** Type:	char*
349 ** Purpose:	Translation table performing no translation. Thus it
350 **		implements the identity a translation table can be
351 **		used as argument to the regular expression functions.
352 **___________________________________________________			     */
353 Uchar trans_id[256];			   	   /*                        */
354 
355 #else
356 
357  extern int  type__allowed[];
358  extern Uchar trans_lower[256];
359  extern Uchar trans_upper[256];
360  extern Uchar trans_id[256];
361 #endif
362 
363 /*-----------------------------------------------------------------------------
364 ** Macro:	is_allowed()
365 ** Type:	bool
366 ** Purpose:	Decide whether the character given as argument is an
367 **		allowed character in the sense of \BibTeX.
368 ** Arguments:
369 **	C	Character to consider
370 ** Returns:	|TRUE| iff the argument is an allowed character.
371 **___________________________________________________			     */
372 #define is_allowed(C)	  (type__allowed[(Uchar)C]&T__Allowed)
373 
374 /*-----------------------------------------------------------------------------
375 ** Macro:	is_upper()
376 ** Type:	bool
377 ** Purpose:	Decide whether the character given as argument is a
378 **		upper case letter.
379 **		(Characters outside the ASCII range are not considered
380 **		letters yet)
381 ** Arguments:
382 **	C	Character to consider
383 ** Returns:	|TRUE| iff the character is an uppercase letter.
384 **___________________________________________________			     */
385 #define is_upper(C)	  (type__allowed[(Uchar)C]&T__Upper)
386 
387 /*-----------------------------------------------------------------------------
388 ** Macro:	is_lower()
389 ** Type:	bool
390 ** Purpose:	Decide whether the character given as argument is a
391 **		lower case letter.
392 **		(Characters outside the ASCII range are not considered
393 **		letters yet)
394 ** Arguments:
395 **	C	Character to consider
396 ** Returns:	|TRUE| iff the character is a lowercase letter.
397 **___________________________________________________			     */
398 #define is_lower(C)	  (type__allowed[(Uchar)C]&T__Lower)
399 
400 /*-----------------------------------------------------------------------------
401 ** Macro:	is_alpha()
402 ** Type:	bool
403 ** Purpose:	Decide whether the character given as argument is a
404 **		letter.
405 **		(Characters outside the ASCII range are not considered
406 **		letters yet)
407 ** Arguments:
408 **	C	Character to consider
409 ** Returns:	|TRUE| iff the character is a letter.
410 **___________________________________________________			     */
411 #define is_alpha(C)	  (type__allowed[(Uchar)C]&(T__Upper|T__Lower))
412 
413 /*-----------------------------------------------------------------------------
414 ** Macro:	is_digit()
415 ** Type:	bool
416 ** Purpose:	Decide whether the character given as argument is a
417 **		digit.
418 **		(Characters outside the ASCII range are not considered
419 **		letters yet)
420 ** Arguments:
421 **	C	Character to consider
422 ** Returns:	|TRUE| iff the character is a digit.
423 **___________________________________________________			     */
424 #define is_digit(C)	  (type__allowed[(Uchar)C]&T__Number)
425 
426 /*-----------------------------------------------------------------------------
427 ** Macro:	is_space()
428 ** Type:	bool
429 ** Purpose:	Decide whether the character given as argument is a
430 **		space character. |'\0'| is not a space character.
431 ** Arguments:
432 **	C	Character to consider
433 ** Returns:	|TRUE| iff the character is a space character.
434 **___________________________________________________			     */
435 #define is_space(C)	  (type__allowed[(Uchar)C]&T__Space)
436 
437 /*-----------------------------------------------------------------------------
438 ** Macro:	is_extended()
439 ** Type:	bool
440 ** Purpose:	Decide whether the character given as argument is an
441 **		extended character outside the ASCII range.
442 ** Arguments:
443 **	C	Character to consider
444 ** Returns:	|TRUE| iff the character is an extended character.
445 **___________________________________________________			     */
446 #define is_extended(C)	  (type__allowed[(Uchar)C]&T__Extended)
447 
448 /*-----------------------------------------------------------------------------
449 ** Macro:	is_wordsep()
450 ** Type:	bool
451 ** Purpose:	Decide whether the character given as argument is a
452 **		word separator which denotes no word constituent.
453 ** Arguments:
454 **	C	Character to consider
455 ** Returns:	|TRUE| iff the character is a word separator.
456 **___________________________________________________			     */
457 #define is_wordsep(C)	  (type__allowed[(Uchar)C]&T__WordSep)
458 
459 /*-----------------------------------------------------------------------------
460 ** Macro:	ToLower()
461 ** Type:	char
462 ** Purpose:	Translate a character to it's lower case dual. If the
463 **		character is no upper case letter then the character
464 **		is returned unchanged.
465 ** Arguments:
466 **	C	Character to translate
467 ** Returns:	The lower case letter or the character itself.
468 **___________________________________________________			     */
469 #define ToLower(C)	  trans_lower[(Uchar)(C)]
470 
471 /*-----------------------------------------------------------------------------
472 ** Macro:	ToUpper()
473 ** Type:	char
474 ** Purpose:	Translate a character to it's upper case dual. If the
475 **		character is no lower case letter then the character
476 **		is returned unchanged.
477 ** Arguments:
478 **	C	Character to translate
479 ** Returns:	The upper case letter or the character itself.
480 **___________________________________________________			     */
481 #define ToUpper(C)	  trans_upper[(Uchar)(C)]
482 
483 #define SYMBOL_TYPE_LOWER 0
484 #define SYMBOL_TYPE_UPPER 1
485 #define SYMBOL_TYPE_CASED 2
486 
487 #endif
488 
489 #ifdef __STDC__
490 #define _ARG(A) A
491 #else
492 #define _ARG(A) ()
493 #endif
494  String lower _ARG((String s));		   	   /* type.c                 */
495  bool case_eq _ARG((String s, String t));	   /* type.c                 */
496  int cmp _ARG((String s, String t));	   	   /* type.c                 */
497  void add_word_sep _ARG((String s));		   /* type.c                 */
498  void init_type _ARG((void));			   /* type.c                 */
499 
500 /*---------------------------------------------------------------------------*/
501