1 /* ========================================================================== **
2 * debugparse.c
3 *
4 * Copyright (C) 1998 by Christopher R. Hertel
5 *
6 * Email: crh@ubiqx.mn.org
7 *
8 * -------------------------------------------------------------------------- **
9 * This module is a very simple parser for Samba debug log files.
10 * -------------------------------------------------------------------------- **
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 3 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 *
25 * -------------------------------------------------------------------------- **
26 * The important function in this module is dbg_char2token(). The rest is
27 * basically fluff. (Potentially useful fluff, but still fluff.)
28 * ========================================================================== **
29 */
30
31 #include "debugparse.h"
32
33 /* -------------------------------------------------------------------------- **
34 * Constants...
35 *
36 * DBG_BSIZE - This internal constant is used only by dbg_test(). It is the
37 * size of the read buffer. I've tested the function using a
38 * DBG_BSIZE value of 2.
39 */
40
41 #define DBG_BSIZE 128
42
43 /* -------------------------------------------------------------------------- **
44 * Functions...
45 */
46
dbg_token2string(dbg_Token tok)47 const char *dbg_token2string( dbg_Token tok )
48 /* ------------------------------------------------------------------------ **
49 * Given a token, return a string describing the token.
50 *
51 * Input: tok - One of the set of dbg_Tokens defined in debugparse.h.
52 *
53 * Output: A string identifying the token. This is useful for debugging,
54 * etc.
55 *
56 * Note: If the token is not known, this function will return the
57 * string "<unknown>".
58 *
59 * ------------------------------------------------------------------------ **
60 */
61 {
62 switch( tok )
63 {
64 case dbg_null:
65 return( "null" );
66 case dbg_ignore:
67 return( "ignore" );
68 case dbg_header:
69 return( "header" );
70 case dbg_timestamp:
71 return( "time stamp" );
72 case dbg_level:
73 return( "level" );
74 case dbg_sourcefile:
75 return( "source file" );
76 case dbg_function:
77 return( "function" );
78 case dbg_lineno:
79 return( "line number" );
80 case dbg_message:
81 return( "message" );
82 case dbg_eof:
83 return( "[EOF]" );
84 }
85 return( "<unknown>" );
86 } /* dbg_token2string */
87
dbg_char2token(dbg_Token * state,int c)88 dbg_Token dbg_char2token( dbg_Token *state, int c )
89 /* ------------------------------------------------------------------------ **
90 * Parse input one character at a time.
91 *
92 * Input: state - A pointer to a token variable. This is used to
93 * maintain the parser state between calls. For
94 * each input stream, you should set up a separate
95 * state variable and initialize it to dbg_null.
96 * Pass a pointer to it into this function with each
97 * character in the input stream. See dbg_test()
98 * for an example.
99 * c - The "current" character in the input stream.
100 *
101 * Output: A token.
102 * The token value will change when delimiters are found,
103 * which indicate a transition between syntactical objects.
104 * Possible return values are:
105 *
106 * dbg_null - The input character was an end-of-line.
107 * This resets the parser to its initial state
108 * in preparation for parsing the next line.
109 * dbg_eof - Same as dbg_null, except that the character
110 * was an end-of-file.
111 * dbg_ignore - Returned for whitespace and delimiters.
112 * These lexical tokens are only of interest
113 * to the parser.
114 * dbg_header - Indicates the start of a header line. The
115 * input character was '[' and was the first on
116 * the line.
117 * dbg_timestamp - Indicates that the input character was part
118 * of a header timestamp.
119 * dbg_level - Indicates that the input character was part
120 * of the debug-level value in the header.
121 * dbg_sourcefile - Indicates that the input character was part
122 * of the sourcefile name in the header.
123 * dbg_function - Indicates that the input character was part
124 * of the function name in the header.
125 * dbg_lineno - Indicates that the input character was part
126 * of the DEBUG call line number in the header.
127 * dbg_message - Indicates that the input character was part
128 * of the DEBUG message text.
129 *
130 * ------------------------------------------------------------------------ **
131 */
132 {
133 /* The terminating characters that we see will greatly depend upon
134 * how they are read. For example, if gets() is used instead of
135 * fgets(), then we will not see newline characters. A lot also
136 * depends on the calling function, which may handle terminators
137 * itself.
138 *
139 * '\n', '\0', and EOF are all considered line terminators. The
140 * dbg_eof token is sent back if an EOF is encountered.
141 *
142 * Warning: only allow the '\0' character to be sent if you are
143 * using gets() to read whole lines (thus replacing '\n'
144 * with '\0'). Sending '\0' at the wrong time will mess
145 * up the parsing.
146 */
147 switch( c )
148 {
149 case EOF:
150 *state = dbg_null; /* Set state to null (initial state) so */
151 return( dbg_eof ); /* that we can restart with new input. */
152 case '\n':
153 case '\0':
154 *state = dbg_null; /* A newline or eoln resets to the null state. */
155 return( dbg_null );
156 }
157
158 /* When within the body of the message, only a line terminator
159 * can cause a change of state. We've already checked for line
160 * terminators, so if the current state is dbg_msgtxt, simply
161 * return that as our current token.
162 */
163 if( dbg_message == *state )
164 return( dbg_message );
165
166 /* If we are at the start of a new line, and the input character
167 * is an opening bracket, then the line is a header line, otherwise
168 * it's a message body line.
169 */
170 if( dbg_null == *state )
171 {
172 if( '[' == c )
173 {
174 *state = dbg_timestamp;
175 return( dbg_header );
176 }
177 *state = dbg_message;
178 return( dbg_message );
179 }
180
181 /* We've taken care of terminators, text blocks and new lines.
182 * The remaining possibilities are all within the header line
183 * itself.
184 */
185
186 /* Within the header line, whitespace can be ignored *except*
187 * within the timestamp.
188 */
189 if( isspace( c ) )
190 {
191 /* Fudge. The timestamp may contain space characters. */
192 if( (' ' == c) && (dbg_timestamp == *state) )
193 return( dbg_timestamp );
194 /* Otherwise, ignore whitespace. */
195 return( dbg_ignore );
196 }
197
198 /* Okay, at this point we know we're somewhere in the header.
199 * Valid header *states* are: dbg_timestamp, dbg_level,
200 * dbg_sourcefile, dbg_function, and dbg_lineno.
201 */
202 switch( c )
203 {
204 case ',':
205 if( dbg_timestamp == *state )
206 {
207 *state = dbg_level;
208 return( dbg_ignore );
209 }
210 break;
211 case ']':
212 if( dbg_level == *state )
213 {
214 *state = dbg_sourcefile;
215 return( dbg_ignore );
216 }
217 break;
218 case ':':
219 if( dbg_sourcefile == *state )
220 {
221 *state = dbg_function;
222 return( dbg_ignore );
223 }
224 break;
225 case '(':
226 if( dbg_function == *state )
227 {
228 *state = dbg_lineno;
229 return( dbg_ignore );
230 }
231 break;
232 case ')':
233 if( dbg_lineno == *state )
234 {
235 *state = dbg_null;
236 return( dbg_ignore );
237 }
238 break;
239 }
240
241 /* If the previous block did not result in a state change, then
242 * return the current state as the current token.
243 */
244 return( *state );
245 } /* dbg_char2token */
246
247 void dbg_test( void );
dbg_test(void)248 void dbg_test( void )
249 /* ------------------------------------------------------------------------ **
250 * Simple test function.
251 *
252 * Input: none.
253 * Output: none.
254 * Notes: This function was used to test dbg_char2token(). It reads a
255 * Samba log file from stdin and prints parsing info to stdout.
256 * It also serves as a simple example.
257 *
258 * ------------------------------------------------------------------------ **
259 */
260 {
261 char bufr[DBG_BSIZE];
262 int i;
263 int linecount = 1;
264 dbg_Token old = dbg_null,
265 newtok= dbg_null,
266 state = dbg_null;
267
268 while( fgets( bufr, DBG_BSIZE, stdin ) )
269 {
270 for( i = 0; bufr[i]; i++ )
271 {
272 old = newtok;
273 newtok = dbg_char2token( &state, bufr[i] );
274 switch( newtok )
275 {
276 case dbg_header:
277 if( linecount > 1 )
278 (void)putchar( '\n' );
279 break;
280 case dbg_null:
281 linecount++;
282 break;
283 case dbg_ignore:
284 break;
285 default:
286 if( old != newtok )
287 (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) );
288 (void)putchar( bufr[i] );
289 }
290 }
291 }
292 (void)putchar( '\n' );
293 } /* dbg_test */
294
295
296 /* -------------------------------------------------------------------------- **
297 * This simple main line can be uncommented and used to test the parser.
298 */
299
300 /*
301 * int main( void )
302 * {
303 * dbg_test();
304 * return( 0 );
305 * }
306 */
307
308 /* ========================================================================== */
309