1 /** \file
2  * \brief Defines the interface for a common token.
3  *
4  * All token streams should provide their tokens using an instance
5  * of this common token. A custom pointer is provided, wher you may attach
6  * a further structure to enhance the common token if you feel the need
7  * to do so. The C runtime will assume that a token provides implementations
8  * of the interface functions, but all of them may be rplaced by your own
9  * implementation if you require it.
10  */
11 #ifndef	_ANTLR3_COMMON_TOKEN_H
12 #define	_ANTLR3_COMMON_TOKEN_H
13 
14 // [The "BSD licence"]
15 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
16 // http://www.temporal-wave.com
17 // http://www.linkedin.com/in/jimidle
18 //
19 // All rights reserved.
20 //
21 // Redistribution and use in source and binary forms, with or without
22 // modification, are permitted provided that the following conditions
23 // are met:
24 // 1. Redistributions of source code must retain the above copyright
25 //    notice, this list of conditions and the following disclaimer.
26 // 2. Redistributions in binary form must reproduce the above copyright
27 //    notice, this list of conditions and the following disclaimer in the
28 //    documentation and/or other materials provided with the distribution.
29 // 3. The name of the author may not be used to endorse or promote products
30 //    derived from this software without specific prior written permission.
31 //
32 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
36 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
41 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 
43 #include    <antlr3defs.h>
44 
45 /** How many tokens to allocate at once in the token factory
46  */
47 #define	ANTLR3_FACTORY_POOL_SIZE    1024
48 
49 /* Base token types, which all lexer/parser tokens come after in sequence.
50  */
51 
52 /** Indicator of an invalid token
53  */
54 #define	ANTLR3_TOKEN_INVALID	0
55 
56 #define	ANTLR3_EOR_TOKEN_TYPE	1
57 
58 /** Imaginary token type to cause a traversal of child nodes in a tree parser
59  */
60 #define	ANTLR3_TOKEN_DOWN		2
61 
62 /** Imaginary token type to signal the end of a stream of child nodes.
63  */
64 #define	ANTLR3_TOKEN_UP		3
65 
66 /** First token that can be used by users/generated code
67  */
68 
69 #define	ANTLR3_MIN_TOKEN_TYPE	ANTLR3_TOKEN_UP + 1
70 
71 /** End of file token
72  */
73 #define	ANTLR3_TOKEN_EOF	(ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)
74 
75 /** Default channel for a token
76  */
77 #define	ANTLR3_TOKEN_DEFAULT_CHANNEL	0
78 
79 /** Reserved channel number for a HIDDEN token - a token that
80  *  is hidden from the parser.
81  */
82 #define	HIDDEN				99
83 
84 #ifdef __cplusplus
85 extern "C" {
86 #endif
87 
88 // Indicates whether this token is carrying:
89 //
90 // State | Meaning
91 // ------+--------------------------------------
92 //     0 | Nothing (neither rewrite text, nor setText)
93 //     1 | char * to user supplied rewrite text
94 //     2 | pANTLR3_STRING because of setText or similar action
95 //
96 #define	ANTLR3_TEXT_NONE	0
97 #define	ANTLR3_TEXT_CHARP	1
98 #define	ANTLR3_TEXT_STRING	2
99 
100 /** The definition of an ANTLR3 common token structure, which all implementations
101  * of a token stream should provide, installing any further structures in the
102  * custom pointer element of this structure.
103  *
104  * \remark
105  * Token streams are in essence provided by lexers or other programs that serve
106  * as lexers.
107  */
108 typedef	struct ANTLR3_COMMON_TOKEN_struct
109 {
110     /** The actual type of this token
111      */
112     ANTLR3_UINT32   type;
113 
114     /** Indicates that a token was produced from the token factory and therefore
115      *  the the freeToken() method should not do anything itself because
116      *  token factory is responsible for deleting it.
117      */
118     ANTLR3_BOOLEAN  factoryMade;
119 
120 	/// A string factory that we can use if we ever need the text of a token
121 	/// and need to manufacture a pANTLR3_STRING
122 	///
123 	pANTLR3_STRING_FACTORY	strFactory;
124 
125     /** The line number in the input stream where this token was derived from
126      */
127     ANTLR3_UINT32   line;
128 
129     /** The offset into the input stream that the line in which this
130      *  token resides starts.
131      */
132     void	    * lineStart;
133 
134     /** The character position in the line that this token was derived from
135      */
136     ANTLR3_INT32    charPosition;
137 
138     /** The virtual channel that this token exists in.
139      */
140     ANTLR3_UINT32   channel;
141 
142     /** Pointer to the input stream that this token originated in.
143      */
144     pANTLR3_INPUT_STREAM    input;
145 
146     /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
147      */
148     ANTLR3_MARKER   index;
149 
150     /** The character offset in the input stream where the text for this token
151      *  starts.
152      */
153     ANTLR3_MARKER   start;
154 
155     /** The character offset in the input stream where the text for this token
156      *  stops.
157      */
158     ANTLR3_MARKER   stop;
159 
160 	/// Indicates whether this token is carrying:
161 	///
162 	/// State | Meaning
163 	/// ------+--------------------------------------
164 	///     0 | Nothing (neither rewrite text, nor setText)
165 	///     1 | char * to user supplied rewrite text
166 	///     2 | pANTLR3_STRING because of setText or similar action
167 	///
168 	/// Affects the union structure tokText below
169 	/// (uses 32 bit so alignment is always good)
170 	///
171 	ANTLR3_UINT32	textState;
172 
173 	union
174 	{
175 		/// Pointer that is used when the token just has a pointer to
176 		/// a char *, such as when a rewrite of an imaginary token supplies
177 		/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
178 		/// for that, as mostly the text will not be accessed - if it is, then
179 		/// we will build a pANTLR3_STRING for it a that point.
180 		///
181 		pANTLR3_UCHAR	chars;
182 
183 		/// Some token types actually do carry around their associated text, hence
184 		/// (*getText)() will return this pointer if it is not NULL
185 		///
186 		pANTLR3_STRING	text;
187 	}
188 		tokText;
189 
190     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
191      *   as the standard structure for a token, a number of user programmable
192      *	 elements are allowed in a token. This is one of them.
193      */
194     ANTLR3_UINT32   user1;
195 
196     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
197      *   as the standard structure for a token, a number of user programmable
198      *	 elements are allowed in a token. This is one of them.
199      */
200     ANTLR3_UINT32   user2;
201 
202     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
203      *   as the standard structure for a token, a number of user programmable
204      *	 elements are allowed in a token. This is one of them.
205      */
206     ANTLR3_UINT32   user3;
207 
208     /** Pointer to a custom element that the ANTLR3 programmer may define and install
209      */
210     void    * custom;
211 
212     /** Pointer to a function that knows how to free the custom structure when the
213      *  token is destroyed.
214      */
215     void    (*freeCustom)(void * custom);
216 
217     /* ==============================
218      * API
219      */
220 
221     /** Pointer to function that returns the text pointer of a token, use
222      *  toString() if you want a pANTLR3_STRING version of the token.
223      */
224     pANTLR3_STRING  (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);
225 
226     /** Pointer to a function that 'might' be able to set the text associated
227      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
228      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
229      *  strings associated with them but just point into the current input stream. These
230      *  tokens will implement this function with a function that errors out (probably
231      *  drastically.
232      */
233     void	    (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);
234 
235     /** Pointer to a function that 'might' be able to set the text associated
236      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
237      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
238      *  strings associated with them but just point into the current input stream. These
239      *  tokens will implement this function with a function that errors out (probably
240      *  drastically.
241      */
242     void	    (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);
243 
244     /** Pointer to a function that returns the token type of this token
245      */
246     ANTLR3_UINT32   (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);
247 
248     /** Pointer to a function that sets the type of this token
249      */
250     void	    (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);
251 
252     /** Pointer to a function that gets the 'line' number where this token resides
253      */
254     ANTLR3_UINT32   (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);
255 
256     /** Pointer to a function that sets the 'line' number where this token reside
257      */
258     void	    (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);
259 
260     /** Pointer to a function that gets the offset in the line where this token exists
261      */
262     ANTLR3_INT32    (*getCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token);
263 
264     /** Pointer to a function that sets the offset in the line where this token exists
265      */
266     void	    (*setCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);
267 
268     /** Pointer to a function that gets the channel that this token was placed in (parsers
269      *  can 'tune' to these channels.
270      */
271     ANTLR3_UINT32   (*getChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token);
272 
273     /** Pointer to a function that sets the channel that this token should belong to
274      */
275     void	    (*setChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);
276 
277     /** Pointer to a function that returns an index 0...n-1 of the token in the token
278      *  input stream.
279      */
280     ANTLR3_MARKER   (*getTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
281 
282     /** Pointer to a function that can set the token index of this token in the token
283      *  input stream.
284      */
285     void			(*setTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);
286 
287     /** Pointer to a function that gets the start index in the input stream for this token.
288      */
289     ANTLR3_MARKER   (*getStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
290 
291     /** Pointer to a function that sets the start index in the input stream for this token.
292      */
293     void			(*setStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
294 
295     /** Pointer to a function that gets the stop index in the input stream for this token.
296      */
297     ANTLR3_MARKER   (*getStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
298 
299     /** Pointer to a function that sets the stop index in the input stream for this token.
300      */
301     void			(*setStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
302 
303     /** Pointer to a function that returns this token as a text representation that can be
304      *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
305      *  This also yields a string structure that can be used more easily than the pointer to
306      *  the input stream in certain situations.
307      */
308     pANTLR3_STRING  (*toString)		(struct ANTLR3_COMMON_TOKEN_struct * token);
309 }
310     ANTLR3_COMMON_TOKEN;
311 
312 /** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
313  *  rather than creating and freeing lots of little bits of memory.
314  */
315 typedef	struct ANTLR3_TOKEN_FACTORY_struct
316 {
317     /** Pointers to the array of tokens that this factory has produced so far
318      */
319     pANTLR3_COMMON_TOKEN    *pools;
320 
321     /** Current pool tokens we are allocating from
322      */
323     ANTLR3_INT32	    thisPool;
324 
325     /** Maximum pool count we have available
326      */
327     ANTLR3_INT32            maxPool;
328 
329     /** The next token to throw out from the pool, will cause a new pool allocation
330      *  if this exceeds the available tokenCount
331      */
332     ANTLR3_UINT32	    nextToken;
333 
334     /** Trick to initialize tokens and their API quickly, we set up this token when the
335      *  factory is created, then just copy the memory it uses into the new token.
336      */
337     ANTLR3_COMMON_TOKEN	    unTruc;
338 
339     /** Pointer to an input stream that is using this token factory (may be NULL)
340      *  which will be assigned to the tokens automatically.
341      */
342     pANTLR3_INPUT_STREAM    input;
343 
344     /** Pointer to a function that returns a new token
345      */
346     pANTLR3_COMMON_TOKEN    (*newToken)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
347 
348     /** Pointer to a function that resets the factory so you can reuse the pools it
349      *  has laready allocated
350      */
351     void                    (*reset)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
352 
353     /** Pointer to a function that changes teh curent inptu stream so that
354      *  new tokens are created with reference to their originating text.
355      */
356     void		    (*setInputStream)	(struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
357     /** Pointer to a function the destroys the factory
358      */
359     void		    (*close)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
360 }
361     ANTLR3_TOKEN_FACTORY;
362 
363 #ifdef __cplusplus
364 }
365 #endif
366 
367 #endif
368