1 /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
2 /*
3  * anjuta-token-stream.c
4  * Copyright (C) Sébastien Granjoux 2009 <seb.sfo@free.fr>
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  * See the GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "anjuta-token-stream.h"
21 
22 #include "anjuta-debug.h"
23 
24 #include <glib-object.h>
25 
26 #include <stdio.h>
27 #include <string.h>
28 
29 /**
30  * SECTION:anjuta-token-stream
31  * @title: Anjuta token stream
32  * @short_description: Anjuta token stream
33  * @see_also:
34  * @stability: Unstable
35  * @include: libanjuta/anjuta-token-stream.h
36  *
37  * A #AnjutaTokenStream object reads and writes a list of tokens. It uses two
38  * list. The first list is assigned when the object is created. Each token is
39  * read as characters discarding the separation between tokens. The second list
40  * is written using the data of the first list, so no new memory is allocated,
41  * in order to recreate a new list of tokens.
42  *
43  * This is used when the lexer needs several passes. At the beginning the file
44  * is read as a single token containing the whole file content. The first pass
45  * split this content into tokens. Additional passes are done on some parts of
46  * the token list to get a more precise splitting.
47  *
48  * It is important to not allocate new memory and keep the same character
49  * pointers in the additional passes because the token list does not own the
50  * memory. The address of each character is used to find the position of the
51  * changed data in the file.
52  *
53  * Several objects can be linked together to create a stack. It is used for
54  * included file or variable expansion.
55  */
56 
57 /* Types declarations
58  *---------------------------------------------------------------------------*/
59 
60 struct _AnjutaTokenStream
61 {
62 	/* Input stream */
63 	AnjutaToken *first;
64 	AnjutaToken *last;
65 
66 	/* Read position in input stream */
67 	AnjutaToken *token;
68 	gsize pos;
69 
70 	/* Write position in input stream */
71 	AnjutaToken *start;
72 	gsize begin;
73 
74 	/* Output stream */
75 	AnjutaToken *root;
76 
77 	/* Parent stream */
78 	AnjutaTokenStream *parent;
79 
80 	/* Current directory */
81 	GFile *current_directory;
82 
83 	/* Current file */
84 	GFile *current_file;
85 
86 	/* Content */
87 	AnjutaToken *content;
88 };
89 
90 /* Helpers functions
91  *---------------------------------------------------------------------------*/
92 
93 /* Private functions
94  *---------------------------------------------------------------------------*/
95 
96 /* Public functions
97  *---------------------------------------------------------------------------*/
98 
99 /**
100  * anjuta_token_stream_append_token:
101  * @stream: a #AnjutaTokenStream object.
102  * @token: a #AnjutaToken object.
103  *
104  * Append an already existing token in the output stream.
105  */
106 void
anjuta_token_stream_append_token(AnjutaTokenStream * stream,AnjutaToken * token)107 anjuta_token_stream_append_token (AnjutaTokenStream *stream, AnjutaToken *token)
108 {
109 	anjuta_token_append_child (stream->root, token);
110 }
111 
112 /**
113  * anjuta_token_stream_tokenize:
114  * @stream: a #AnjutaTokenStream object.
115  * @type: a token type.
116  * @length: the token length in character.
117  *
118  * Create a token of type from the last length characters previously read and
119  * append it in the output stream. The characters are not copied in the output
120  * stream, the new token uses the same characters.
121  *
122  * Return value: The created token.
123  */
124 AnjutaToken*
anjuta_token_stream_tokenize(AnjutaTokenStream * stream,gint type,gsize length)125 anjuta_token_stream_tokenize (AnjutaTokenStream *stream, gint type, gsize length)
126 {
127     AnjutaToken *frag;
128     AnjutaToken *end;
129 
130     frag = anjuta_token_new_static (type, NULL);
131 
132     for (end = stream->start; end != NULL;)
133     {
134         if ((anjuta_token_get_type (end) < ANJUTA_TOKEN_PARSED) || (anjuta_token_get_length (end) == 0))
135         {
136             gint toklen = anjuta_token_get_length (end);
137             AnjutaToken *copy = anjuta_token_cut (end, stream->begin, length);
138 
139             if (toklen >= (length + stream->begin))
140             {
141 
142                 if (end == stream->start)
143                 {
144                     /* Get whole token */
145                     anjuta_token_free (frag);
146                     anjuta_token_set_type (copy, type);
147                     frag = copy;
148                 }
149                 else
150                 {
151                     /* Get several token */
152                     anjuta_token_insert_after (frag, copy);
153                     anjuta_token_merge (frag, copy);
154                 }
155 
156                 if (toklen == (length + stream->begin))
157                 {
158                     stream->start = anjuta_token_next (end);
159                     stream->begin = 0;
160                 }
161                 else
162                 {
163                     stream->start = end;
164                     stream->begin += length;
165                 }
166                 break;
167             }
168             else
169             {
170                 anjuta_token_insert_after (frag, copy);
171                 anjuta_token_merge (frag, copy);
172                 length -= toklen - stream->begin;
173                 end = anjuta_token_next (end);
174                 stream->begin = 0;
175             }
176         }
177         else
178         {
179             end = anjuta_token_next (end);
180             stream->begin = 0;
181         }
182     }
183 
184     anjuta_token_stream_append_token (stream, frag);
185 
186     return frag;
187 }
188 
189 /**
190  * anjuta_token_stream_read:
191  * @stream: a #AnjutaTokenStream object.
192  * @buffer: a character buffer to fill with token data.
193  * @max_size: the size of the buffer.
194  *
195  * Read token from the input stream and write the content as a C string in the
196  * buffer passed as argument.
197  *
198  * Return value: The number of characters written in the buffer.
199  */
200 gint
anjuta_token_stream_read(AnjutaTokenStream * stream,gchar * buffer,gsize max_size)201 anjuta_token_stream_read (AnjutaTokenStream *stream, gchar *buffer, gsize max_size)
202 {
203     gint result = 0;
204 
205     if (stream->token != NULL)
206     {
207         gsize length = anjuta_token_get_length (stream->token);
208 
209         if ((anjuta_token_get_type (stream->token) >= ANJUTA_TOKEN_PARSED) || (stream->pos >= length))
210         {
211             for (;;)
212             {
213                 /* Last token */
214                 if (stream->token== stream->last) return 0;
215 
216                 if (anjuta_token_get_type (stream->token) >= ANJUTA_TOKEN_PARSED)
217                 {
218                     stream->token = anjuta_token_next (stream->token);
219                 }
220                 else
221                 {
222                     stream->token = anjuta_token_next (stream->token);
223                 }
224 
225                 if ((stream->token == NULL) || (anjuta_token_get_type (stream->token) == ANJUTA_TOKEN_EOV))
226                 {
227                     /* Last token */
228                     return 0;
229                 }
230                 else if ((anjuta_token_get_length (stream->token) != 0) && (anjuta_token_get_type (stream->token) < ANJUTA_TOKEN_PARSED))
231                 {
232                     /* Find some data */
233                     stream->pos = 0;
234                     length = anjuta_token_get_length (stream->token);
235                     break;
236                 }
237             }
238         }
239 
240         if (stream->pos < length)
241         {
242             const gchar *start = anjuta_token_get_string (stream->token);
243 
244             length -= stream->pos;
245 
246             if (length > max_size) length = max_size;
247             memcpy (buffer, start + stream->pos, length);
248             stream->pos += length;
249             result = length;
250         }
251     }
252 
253     return result;
254 }
255 
256 /**
257  * anjuta_token_stream_get_root:
258  * @stream: a #AnjutaTokenStream object.
259  *
260  * Return the root token for the output stream.
261  *
262  * Return value: (transfer none): The output root token.
263  */
264 AnjutaToken*
anjuta_token_stream_get_root(AnjutaTokenStream * stream)265 anjuta_token_stream_get_root (AnjutaTokenStream *stream)
266 {
267 	g_return_val_if_fail (stream != NULL, NULL);
268 
269 	return stream->root;
270 }
271 
272 /**
273  * anjuta_token_stream_get_current_directory:
274  * @stream: a #AnjutaTokenStream object.
275  *
276  * Return the current directory.
277  *
278  * Return value: (transfer none): The current directory.
279  */
280 GFile*
anjuta_token_stream_get_current_directory(AnjutaTokenStream * stream)281 anjuta_token_stream_get_current_directory (AnjutaTokenStream *stream)
282 {
283 	g_return_val_if_fail (stream != NULL, NULL);
284 
285 	return stream->current_directory;
286 }
287 
288 
289 /**
290  * anjuta_token_stream_get_current_file:
291  * @stream: a #AnjutaTokenStream object.
292  *
293  * Return the current file.
294  *
295  * Return value: (transfer none): The current file.
296  */
297 GFile*
anjuta_token_stream_get_current_file(AnjutaTokenStream * stream)298 anjuta_token_stream_get_current_file (AnjutaTokenStream *stream)
299 {
300 	g_return_val_if_fail (stream != NULL, NULL);
301 
302 	return stream->current_file;
303 }
304 
305 /* Constructor & Destructor
306  *---------------------------------------------------------------------------*/
307 
308 /**
309  * anjuta_token_stream_push:
310  * @parent: (allow-none): a parent #AnjutaTokenStream object or %NULL.
311  * @root: (allow-none): a token or %NULL
312  * @content: a token list.
313  * @file: (allow-none): a #GFile of the file.
314  *
315  * Create a new stream from a list of tokens. If a parent stream is passed,
316  * the new stream keep a link on it, so we can return it when the new stream
317  * will be destroyed.
318  *
319  * Return value: The newly created stream.
320  */
321 AnjutaTokenStream *
anjuta_token_stream_push(AnjutaTokenStream * parent,AnjutaToken * root,AnjutaToken * content,GFile * file)322 anjuta_token_stream_push (AnjutaTokenStream *parent, AnjutaToken *root, AnjutaToken *content, GFile *file)
323 {
324 	AnjutaTokenStream *child;
325 	AnjutaTokenStream *stream;
326 
327 	/* Check if content is not already parsed to avoid endless parsing loop */
328 	for (stream = parent; stream != NULL; stream = stream->parent)
329 	{
330 		if (stream->content == content) return NULL;
331 	}
332 
333 	/* Create new stream */
334 	child = g_new (AnjutaTokenStream, 1);
335 	child->first = content;
336 	child->pos = 0;
337 	child->begin = 0;
338 	child->parent = parent;
339 	child->content = content;
340 	child->token = content;
341 	child->start = child->token;
342 	child->last = content == NULL ? NULL : anjuta_token_last (content);
343 
344 	child->root = root == NULL ? anjuta_token_new_static (ANJUTA_TOKEN_FILE, NULL) : root;
345 	if (file == NULL)
346 	{
347 		child->current_directory = parent == NULL ? NULL : (parent->current_directory == NULL ? NULL : g_object_ref (parent->current_directory));
348 		child->current_file = NULL;
349 	}
350 	else
351 	{
352 		child->current_directory = g_file_get_parent (file);
353 		child->current_file = g_object_ref (file);
354 	}
355 
356 	return child;
357 }
358 
359 /**
360  * anjuta_token_stream_pop:
361  * @stream: a #AnjutaTokenStream object.
362  *
363  * Destroy the stream object and return the parent stream if it exists.
364  *
365  * Return value: (transfer none) (allow-none): The parent stream or %NULL if there is no parent.
366  */
367 AnjutaTokenStream *
anjuta_token_stream_pop(AnjutaTokenStream * stream)368 anjuta_token_stream_pop (AnjutaTokenStream *stream)
369 {
370 	AnjutaTokenStream *parent;
371 
372 	g_return_val_if_fail (stream != NULL, NULL);
373 
374 	if (stream->current_directory) g_object_unref (stream->current_directory);
375 	if (stream->current_file) g_object_unref (stream->current_file);
376 	parent = stream->parent;
377 	g_free (stream);
378 
379 	return parent;
380 }
381 
382 /**
383  * anjuta_token_stream_get_parent:
384  * @stream: a #AnjutaTokenStream object.
385  *
386  * Return the parent stream
387  *
388  * Return value: (transfer none) (allow-none): The parent stream or %NULL if
389  * there is no parent.
390  */
391 AnjutaTokenStream *
anjuta_token_stream_get_parent(AnjutaTokenStream * stream)392 anjuta_token_stream_get_parent (AnjutaTokenStream *stream)
393 {
394 	g_return_val_if_fail (stream != NULL, NULL);
395 
396 	return stream->parent;
397 }
398