1 /*
2 
3 					Parser for libpics
4 
5 
6 
7 
8 
9 !Parser for libpics!
10 
11 */
12 
13 /*
14 **	(c) COPYRIGHT MIT 1996.
15 **	Please first read the full copyright statement in the file COPYRIGH.
16 */
17 
18 /*
19 
20 This module provides the interface to CSParse.c.
21 The parser is used to parse labels,
22 machine-readable descriptions, and
23 users. The application creates one of these and iteratevely calls CSParse_parseChunk until it returns a done or an error.
24 
25 
26 */
27 
28 #ifndef CSPARSE_H
29 #define CSPARSE_H
30 #include "CSLUtils.h"
31 #include "HTChunk.h"
32 
33 /*
34 
35 .NowIn.
36 tells CSParse where it is in the task of tokenizing
37 */
38 
39 typedef enum {
40     NowIn_INVALID = 0,
41     NowIn_NEEDOPEN,
42     NowIn_ENGINE,
43     NowIn_NEEDCLOSE,
44     NowIn_END,
45     NowIn_MATCHCLOSE,
46     NowIn_ERROR,
47     NowIn_CHAIN
48     } NowIn_t;
49 
50 /*
51 
52 
53 (Construction/Destruction)
54 
55 The parse objects are never created by the application, but instead by one of
56 the objects that it is used to parse.
57 
58 */
59 
60 extern CSParse_t * CSParse_new(void);
61 extern void CSParse_delete(CSParse_t * me);
62 
63 /*
64 
65 (some handy definitions)
66 
67 */
68 
69 #define LPAREN '('
70 #define RPAREN ')'
71 #define LCURLY '{'
72 #define RCURLY '}'
73 #define LBRACKET '['
74 #define RBRACKET ']'
75 #define SQUOTE 0x27 /* avoid confusing parens checking editors */
76 #define DQUOTE 0x22
77 #define LPARENSTR "("
78 #define RPARENSTR ")"
79 #define raysize(A) (sizeof(A)/sizeof(A[0]))
80 
81 /*
82 
83 !subparser data!
84 .Punct.
85 valid punctuation
86 
87 */
88 
89 typedef enum {Punct_ZERO = 1, Punct_WHITE = 2, Punct_LPAREN = 4,
90 	      Punct_RPAREN = 8, Punct_ALL = 0xf} Punct_t;
91 
92 /*
93 
94 .SubState.
95 Enumerated bits that are used to mark a parsing state. Because they are bits,
96 as opposed to sequential numbers, a StateToken may
97 or more than one together and serve more than one state. They must have
98 identical outcomes if this is to be exploited.
99 
100 By convention, the following SubState names are used:
101 o X - has no state
102 	 o N - is a newly created object
103 	 o A-H - substate definitions. Because they are non-conflicting bits, a
104 	 subparser may have options that sit in more than state. For instance, the
105 	 string "error" may be matched in states A and C with:
106 {"error test", SubState_A|SubState_C, Punct_LPAREN, 0, "error"}
107 
108 *probs* I meant to keep these 16 bit caompatible, but ran up short at the end
109 of one StateToken list. This can be fixed if anyone needs a 16 bit enum.
110 
111 */
112 
113 typedef enum {SubState_X = -1, SubState_N = 0x4000, SubState_A = 1,
114 	      SubState_B = 2, SubState_C = 4, SubState_D = 8,
115 	      SubState_E = 0x10, SubState_F = 0x20, SubState_G = 0x40,
116 	      SubState_H = 0x80, SubState_I = 0x100} SubState_t;
117 
118 /*
119 
120 forward declaration for StateToken_t
121 
122 */
123 
124 typedef struct StateToken_s StateToken_t;
125 
126 /*
127 
128 .Engine.
129 called by CSParse to process tokens and punctuation
130 */
131 
132 typedef NowIn_t Engine_t(CSParse_t * pCSParse, char demark, void * pVoid);
133 
134 /*
135 
136 Engine employed by the Label, MacRed, and User parsers
137 */
138 
139 Engine_t CSParse_targetParser;
140 
141 /*
142 
143 .substate methods.
144 All methods return a StateRet.
145 
146 (Check)
147 see if a value is legitimate, may also record it
148 */
149 
150 typedef StateRet_t Check_t(CSParse_t * pCSParse, StateToken_t * pStateToken,
151 			   char * token, char demark);
152 
153 /*
154 
155 Punctuation checker to be employed by Check_t functions
156 */
157 
158 extern BOOL Punct_badDemark(Punct_t validPunctuation, char demark);
159 
160 /*
161 
162 (Open)
163 create a new data structure to be filled by the parser
164 */
165 
166 typedef StateRet_t Open_t(CSParse_t * pCSParse, char * token, char demark);
167 
168 /*
169 
170 (Close)
171 tell the state that the data structure is no longer current
172 */
173 
174 typedef StateRet_t Close_t(CSParse_t * pCSParse, char * token, char demark);
175 
176 /*
177 
178 (Prep)
179 get ready for next state
180 */
181 
182 typedef StateRet_t Prep_t(CSParse_t * pCSParse, char * token, char demark);
183 
184 /*
185 
186 (Destroy)
187 something went wrong, throw away the current object
188 */
189 
190 typedef void Destroy_t(CSParse_t * pCSParse);
191 
192 /*
193 
194 
195 .Command.
196 substate commands
197 
198 o open - call the open function for the current data structure
199 	 o close - call the close
200 	 o chain - call again on the next state without re-reading data
201 	 o notoken - clear the token before a chain (so next state just gets punct)
202 	 o matchany - match any string
203 
204 	 */
205 
206 typedef enum {Command_NONE = 0, Command_OPEN = 1, Command_CLOSE = 2,
207 	      Command_CHAIN = 4, Command_NOTOKEN = 8,
208 	      Command_MATCHANY = 0x10} Command_t;
209 
210 /*
211 
212 .StateToken structure.
213 Contains all the information about what tokens are expected in what substates.
214 The StateTokens are kept in array referenced by a TargetObject.
215 
216 */
217 
218 struct StateToken_s {
219     char * note;		/* some usefull text that describes the state - usefulll for debugging */
220     SubState_t validSubStates;
221     Punct_t validPunctuation;
222     Check_t * pCheck;   /* call this function to check token */
223     char * name1;       /* or compare to this name */
224     char * name2;		/* many strings have 2 spellings ("ratings" vs. "r") */
225     CSParseTC_t targetChange; /* whether target change implies diving or climbing from current state */
226     TargetObject_t * pNextTargetObject;
227     SubState_t nextSubState;
228     Command_t command;	/* open, close, chain, etc. */
229     Prep_t * pPrep;		/* prepare for next state */
230     };
231 
232 /*
233 
234 .TargetObject structure.
235 Methods and a lists of StateTokens associated with a data structure. The
236 methods know how to read data into current object and the StateTokens tell
237 when to proceed to the next object.
238 
239 */
240 
241 struct TargetObject_s {
242     char * note;
243     Open_t * pOpen;   /* call this function to open structure */
244     Close_t * pClose;   /* call this function to close structure */
245     Destroy_t * pDestroy;
246     StateToken_t * stateTokens; /* array of sub states */
247     int stateTokenCount;        /* number of sub states */
248     CSParseTC_t targetChange; /* target change signal for opening this parse state */
249     };
250 
251 /*
252 
253 .ValTarget.
254 
255 */
256 
257 typedef union {
258     BVal_t * pTargetBVal;
259     FVal_t * pTargetFVal;
260     SVal_t * pTargetSVal;
261     DVal_t * pTargetDVal;
262     HTList ** pTargetList;
263     } ValTarget_t;
264 
265 /*
266 
267 .ValType.
268 Write down what value is to be read, and what type it is
269 
270 */
271 
272 typedef enum {ValType_NONE, ValType_BVAL, ValType_FVAL,
273 	      ValType_SVAL, ValType_DVAL,
274 	      ValType_COMMENT} ValType_t;
275 
276 /*
277 
278 .ParseContext.
279 Part of a CSParse. The boundry is a litte fuzzy. Maybe it should not exist.
280 
281 */
282 
283 typedef struct {
284     Engine_t * engineOf;
285     TargetChangeCallback_t * pTargetChangeCallback;
286     ParseErrorHandler_t * pParseErrorHandler;
287 
288     /* for reading [BFSD]Val_t */
289     ValTarget_t valTarget;
290     ValType_t valType;
291 
292     char * pTokenError;
293 
294     BOOL observeQuotes;
295     BOOL observedQuotes;
296     char * legalChars;
297     int legalCharCount;
298     } ParseContext_t;
299 
300 /*
301 
302 .CSParse structure.
303 Full parser state and pointer to the object that it is reading.
304 
305 */
306 
307 struct CSParse_s {
308     char quoteState;
309     NowIn_t nowIn;
310     HTChunk * token;
311     char demark;
312     int offset;
313     int depth;
314     ParseContext_t * pParseContext;
315     union { /* all the types this parse engine fills */
316         CSMachRead_t * pCSMachRead; /* defined in CSMacRed.c */
317         CSLabel_t * pCSLabel; /* defined in CSLabel.c */
318         CSUser_t * pCSUser; /* defined in CSUser.c */
319         } target;
320     TargetObject_t * pTargetObject;
321     SubState_t currentSubState;
322     StateToken_t * pStateToken;
323     };
324 
325 /*
326 
327 */
328 
329 #endif /* CSPARSE_H */
330 
331 /*
332 
333 End of Declaration
334 
335 */
336