1 /*   (C) Copyright 2005, 2006, 2007, 2008, 2009 Stijn van Dongen
2  *
3  * This file is part of tingea.  You can redistribute and/or modify tingea under
4  * the terms of the GNU General Public License; either version 3 of the License
5  * or (at your option) any later version.  You should have received a copy of
6  * the GPL along with tingea, in the file COPYING.
7 */
8 
9 #ifndef tingea_tr
10 #define tingea_tr
11 
12 #include <string.h>
13 
14 #include "ting.h"
15 #include "types.h"
16 #include "inttypes.h"
17 
18 
19 /*
20  * README
21  *    This interface is not POSIX compliant. It might evolve to
22  *    optionally be indeed.
23  *    However, given some of the braindeadliness of POSIX tr compliance,
24  *    I don't think the worlds needs another tr implementation.
25  *    My gripe is mainly about derailed syntax such as '[:alpha:0'.
26  *    It should go down in a ball of flames, not happily parse.
27  *    To be honest, I don't know for sure whether this is a POSIX
28  *    lack of requirement or an implementation choice.
29  *
30  *    I did choose to follow most of the POSIX syntax. It is probably
31  *    a sign of weakness.
32  *    This interface should be able to do everything a POSIX interface can,
33  *    possibly more.
34  *
35  * -  It allows separate specification of src, dst, del and squash sets.
36  * -  Provisionally we accept "^spec" to indicate complement,
37  *       for any of src dst del squash sets.
38  * -  It uses [*c*20] to denote repeats, rather than [c*20].
39  *       rationale: do not slam door shut on new syntax.
40  * -  It does not recognize '[a-z]' ranges, only 'a-z'.
41  *       rationale: none. If ever, notation will be [-a-z] or similar.
42  * -  The magic repeat operator [*c#] stops on boundaries
43  *       rationale: I like it.
44  *       A boundary is introduced by stop/start of ranges and classes.
45  * -  The magic repeat operator [*c*] does not stop on boundaries.
46  * -  For now, the interface does 1) deletion, 2) translation, 3) squashing.
47  *       in the future it may provide a custom order of doing things.
48  *
49  *
50  * Apart from the fact that you cannot have '\0' in C strings, everything
51  * here should work for '\0' as well - specifically the mcxTrTable structure.
52  * However, the current interface uses C strings for dst and src and C strings
53  * for data.
54  *
55  * More documentation to follow.
56  *
57 */
58 
59 extern const char* mcx_tr_err;
60 extern mcxbool     mcx_tr_debug;
61 
62 
63 typedef struct
64 {  u32      tlt[256]
65 ;  mcxbits  modes
66 ;
67 }  mcxTR    ;
68 
69 
70 #define MCX_TR_DEFAULT           0
71 #define MCX_TR_TRANSLATE   1 <<  1
72 
73 #define MCX_TR_SOURCE      1 <<  2
74 #define MCX_TR_DEST        1 <<  3
75 #define MCX_TR_SQUASH      1 <<  4
76 #define MCX_TR_DELETE      1 <<  5
77 
78 #define MCX_TR_SOURCE_C    1 <<  6
79 #define MCX_TR_DEST_C      1 <<  7
80 #define MCX_TR_DELETE_C    1 <<  8
81 #define MCX_TR_SQUASH_C    1 <<  9
82 
83 
84 #define MCX_TR_COMPLEMENT  1 << 10
85 
86 
87 mcxstatus mcxTRloadTable
88 (  mcxTR*      tr
89 ,  const char* src
90 ,  const char* dst
91 ,  const char* set_delete
92 ,  const char* set_squash
93 ,  mcxbits     modes
94 )  ;
95 
96 
97   /*  returns new length of string.
98    *  fixme: document map/squash semantics.
99   */
100 ofs mcxTRtranslate
101 (  char*    src
102 ,  mcxTR*   tr
103 )  ;
104 
105 
106 ofs mcxTingTranslate
107 (  mcxTing*       src
108 ,  mcxTR*         tr
109 )  ;
110 
111 ofs mcxTingTr
112 (  mcxTing*       txt
113 ,  const char*    src
114 ,  const char*    dst
115 ,  const char*    set_delete
116 ,  const char*    set_squash
117 ,  mcxbits        flags
118 )  ;
119 
120 
121 /* Accepts e.g. \012 and sets *value to 10.
122  * idem \xa0 and \n (\t, \r, \b etc)
123  * Does *not* yet accept \0xa0
124  *
125  * Returns next parsable character.
126  *
127  * This interface should be moved to ding.
128 */
129 
130 char* mcxStrEscapedValue
131 (  const char* p
132 ,  const char* z
133 ,  int   *value
134 )  ;
135 
136 
137 
138 /*
139  * returns a ting containing all the characters according to bits.
140  * bits accept
141  *    MCX_TR_SOURCE
142  *    MCX_TR_SOURCE_C
143  *    MCX_TR_SQUASH
144  *    MCX_TR_SQUASH_C
145  *    MCX_TR_DELETE
146  *    MCX_TR_DELETE_C
147  *
148  * NOTE
149  *    MCX_TR_DEST
150  *    MCX_TR_DEST_C
151  *    are not yet implemented.
152  *
153  * NOTE DANGER SIGN
154  *    tr no longer contains information on complements that were
155  *    used in constructing it.
156  *    The complements that bits refer to is simply the information
157  *    present in tr.
158  *    So a   source of "^a-z"  given to mcxTRloadTable
159  *    and    MCX_TR_SOURCE_C given to mcxTRsplash
160  *    result in a string containing all of a-z.
161 */
162 
163 mcxTing* mcxTRsplash
164 (  mcxTR*   tr
165 ,  mcxbits  bits
166 )  ;
167 
168 
169 #endif
170 
171