1 /* pmrfc3164.c
2  * This is a parser module for RFC3164(legacy syslog)-formatted messages.
3  *
4  * NOTE: read comments in module-template.h to understand how this file
5  *       works!
6  *
7  * File begun on 2009-11-04 by RGerhards
8  *
9  * Copyright 2007-2017 Rainer Gerhards and Adiscon GmbH.
10  *
11  * This file is part of rsyslog.
12  *
13  * Licensed under the Apache License, Version 2.0 (the "License");
14  * you may not use this file except in compliance with the License.
15  * You may obtain a copy of the License at
16  *
17  *       http://www.apache.org/licenses/LICENSE-2.0
18  *       -or-
19  *       see COPYING.ASL20 in the source distribution
20  *
21  * Unless required by applicable law or agreed to in writing, software
22  * distributed under the License is distributed on an "AS IS" BASIS,
23  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24  * See the License for the specific language governing permissions and
25  * limitations under the License.
26  */
27 #include "config.h"
28 #include "rsyslog.h"
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include "syslogd.h"
35 #include "conf.h"
36 #include "syslogd-types.h"
37 #include "template.h"
38 #include "msg.h"
39 #include "module-template.h"
40 #include "glbl.h"
41 #include "errmsg.h"
42 #include "parser.h"
43 #include "datetime.h"
44 #include "unicode-helper.h"
45 MODULE_TYPE_PARSER
46 MODULE_TYPE_NOKEEP
47 PARSER_NAME("rsyslog.rfc3164")
48 MODULE_CNFNAME("pmrfc3164")
49 
50 /* internal structures
51  */
52 DEF_PMOD_STATIC_DATA
53 DEFobjCurrIf(glbl)
54 DEFobjCurrIf(parser)
55 DEFobjCurrIf(datetime)
56 
57 
58 /* static data */
59 static int bParseHOSTNAMEandTAG;	/* cache for the equally-named global param - performance enhancement */
60 
61 
62 /* parser instance parameters */
63 static struct cnfparamdescr parserpdescr[] = {
64 	{ "detect.yearaftertimestamp", eCmdHdlrBinary, 0 },
65 	{ "permit.squarebracketsinhostname", eCmdHdlrBinary, 0 },
66 	{ "permit.slashesinhostname", eCmdHdlrBinary, 0 },
67 	{ "permit.atsignsinhostname", eCmdHdlrBinary, 0 },
68 	{ "force.tagendingbycolon", eCmdHdlrBinary, 0},
69 	{ "remove.msgfirstspace", eCmdHdlrBinary, 0},
70 };
71 static struct cnfparamblk parserpblk =
72 	{ CNFPARAMBLK_VERSION,
73 	  sizeof(parserpdescr)/sizeof(struct cnfparamdescr),
74 	  parserpdescr
75 	};
76 
77 struct instanceConf_s {
78 	int bDetectYearAfterTimestamp;
79 	int bPermitSquareBracketsInHostname;
80 	int bPermitSlashesInHostname;
81 	int bPermitAtSignsInHostname;
82 	int bForceTagEndingByColon;
83 	int bRemoveMsgFirstSpace;
84 };
85 
86 
87 BEGINisCompatibleWithFeature
88 CODESTARTisCompatibleWithFeature
89 	if(eFeat == sFEATUREAutomaticSanitazion)
90 		iRet = RS_RET_OK;
91 	if(eFeat == sFEATUREAutomaticPRIParsing)
92 		iRet = RS_RET_OK;
93 ENDisCompatibleWithFeature
94 
95 
96 /* create input instance, set default parameters, and
97  * add it to the list of instances.
98  */
99 static rsRetVal
createInstance(instanceConf_t ** pinst)100 createInstance(instanceConf_t **pinst)
101 {
102 	instanceConf_t *inst;
103 	DEFiRet;
104 	CHKmalloc(inst = malloc(sizeof(instanceConf_t)));
105 	inst->bDetectYearAfterTimestamp = 0;
106 	inst->bPermitSquareBracketsInHostname = 0;
107 	inst->bPermitSlashesInHostname = 0;
108 	inst->bPermitAtSignsInHostname = 0;
109 	inst->bForceTagEndingByColon = 0;
110 	inst->bRemoveMsgFirstSpace = 0;
111 	bParseHOSTNAMEandTAG=glbl.GetParseHOSTNAMEandTAG();
112 	*pinst = inst;
113 finalize_it:
114 	RETiRet;
115 }
116 
117 BEGINnewParserInst
118 	struct cnfparamvals *pvals = NULL;
119 	int i;
120 CODESTARTnewParserInst
121 	DBGPRINTF("newParserInst (pmrfc3164)\n");
122 
123 	inst = NULL;
124 	CHKiRet(createInstance(&inst));
125 
126 	if(lst == NULL)
127 		FINALIZE;  /* just set defaults, no param block! */
128 
129 	if((pvals = nvlstGetParams(lst, &parserpblk, NULL)) == NULL) {
130 		ABORT_FINALIZE(RS_RET_MISSING_CNFPARAMS);
131 	}
132 
133 	if(Debug) {
134 		dbgprintf("parser param blk in pmrfc3164:\n");
135 		cnfparamsPrint(&parserpblk, pvals);
136 	}
137 
138 	for(i = 0 ; i < parserpblk.nParams ; ++i) {
139 		if(!pvals[i].bUsed)
140 			continue;
141 		if(!strcmp(parserpblk.descr[i].name, "detect.yearaftertimestamp")) {
142 			inst->bDetectYearAfterTimestamp = (int) pvals[i].val.d.n;
143 		} else if(!strcmp(parserpblk.descr[i].name, "permit.squarebracketsinhostname")) {
144 			inst->bPermitSquareBracketsInHostname = (int) pvals[i].val.d.n;
145 		} else if(!strcmp(parserpblk.descr[i].name, "permit.slashesinhostname")) {
146 			inst->bPermitSlashesInHostname = (int) pvals[i].val.d.n;
147 		} else if(!strcmp(parserpblk.descr[i].name, "permit.atsignsinhostname")) {
148 			inst->bPermitAtSignsInHostname = (int) pvals[i].val.d.n;
149 		} else if(!strcmp(parserpblk.descr[i].name, "force.tagendingbycolon")) {
150 			inst->bForceTagEndingByColon = (int) pvals[i].val.d.n;
151 		} else if(!strcmp(parserpblk.descr[i].name, "remove.msgfirstspace")) {
152 			inst->bRemoveMsgFirstSpace = (int) pvals[i].val.d.n;
153 		} else {
154 			dbgprintf("pmrfc3164: program error, non-handled "
155 			  "param '%s'\n", parserpblk.descr[i].name);
156 		}
157 	}
158 finalize_it:
159 CODE_STD_FINALIZERnewParserInst
160 	if(lst != NULL)
161 		cnfparamvalsDestruct(pvals, &parserpblk);
162 	if(iRet != RS_RET_OK)
163 		free(inst);
164 ENDnewParserInst
165 
166 
167 BEGINfreeParserInst
168 CODESTARTfreeParserInst
169 	dbgprintf("pmrfc3164: free parser instance %p\n", pInst);
170 ENDfreeParserInst
171 
172 
173 /* parse a legay-formatted syslog message.
174  */
175 BEGINparse2
176 	uchar *p2parse;
177 	int lenMsg;
178 	int i;	/* general index for parsing */
179 	uchar bufParseTAG[CONF_TAG_MAXSIZE];
180 	uchar bufParseHOSTNAME[CONF_HOSTNAME_MAXSIZE];
181 CODESTARTparse
182 	assert(pMsg != NULL);
183 	assert(pMsg->pszRawMsg != NULL);
184 	lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI;
185 	DBGPRINTF("Message will now be parsed by the legacy syslog parser (offAfterPRI=%d, lenMsg=%d.\n",
186 		pMsg->offAfterPRI, lenMsg);
187 	/* note: offAfterPRI is already the number of PRI chars (do not add one!) */
188 	p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */
189 	setProtocolVersion(pMsg, MSG_LEGACY_PROTOCOL);
190 	if(pMsg->iFacility == (LOG_INVLD>>3)) {
191 		DBGPRINTF("facility LOG_INVLD, do not parse\n");
192 		FINALIZE;
193 	}
194 
195 	/* now check if we have a completely headerless message. This is indicated
196 	 * by spaces or tabs followed '{' or '['.
197 	 */
198 	i = 0;
199 	while(i < lenMsg && (p2parse[i] == ' ' || p2parse[i] == '\t')) {
200 		++i;
201 	}
202 	if(i < lenMsg && (p2parse[i] == '{' || p2parse[i] == '[')) {
203 		DBGPRINTF("msg seems to be headerless, treating it as such\n");
204 		FINALIZE;
205 	}
206 
207 
208 	/* Check to see if msg contains a timestamp. We start by assuming
209 	 * that the message timestamp is the time of reception (which we
210 	 * generated ourselfs and then try to actually find one inside the
211 	 * message. There we go from high-to low precison and are done
212 	 * when we find a matching one. -- rgerhards, 2008-09-16
213 	 */
214 	if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
215 		/* we are done - parse pointer is moved by ParseTIMESTAMP3339 */;
216 	} else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg,
217 		NO_PARSE3164_TZSTRING, pInst->bDetectYearAfterTimestamp) == RS_RET_OK) {
218 		if(pMsg->dfltTZ[0] != '\0')
219 			applyDfltTZ(&pMsg->tTIMESTAMP, pMsg->dfltTZ);
220 		/* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
221 	} else if(*p2parse == ' ' && lenMsg > 1) {
222 	/* try to see if it is slighly malformed - HP procurve seems to do that sometimes */
223 		++p2parse;	/* move over space */
224 		--lenMsg;
225 		if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg,
226 			NO_PARSE3164_TZSTRING, pInst->bDetectYearAfterTimestamp) == RS_RET_OK) {
227 			/* indeed, we got it! */
228 			/* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
229 		} else {/* parse pointer needs to be restored, as we moved it off-by-one
230 			 * for this try.
231 			 */
232 			--p2parse;
233 			++lenMsg;
234 		}
235 	}
236 
237 	if(pMsg->msgFlags & IGNDATE) {
238 		/* we need to ignore the msg data, so simply copy over reception date */
239 		memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
240 	}
241 
242 	/* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we
243 	 * do this only when the user has not forbidden this. I now introduce some
244 	 * code that allows a user to configure rsyslogd to treat the rest of the
245 	 * message as MSG part completely. In this case, the hostname will be the
246 	 * machine that we received the message from and the tag will be empty. This
247 	 * is meant to be an interim solution, but for now it is in the code.
248 	 */
249 	if(bParseHOSTNAMEandTAG && !(pMsg->msgFlags & INTERNAL_MSG)) {
250 		/* parse HOSTNAME - but only if this is network-received!
251 		 * rger, 2005-11-14: we still have a problem with BSD messages. These messages
252 		 * do NOT include a host name. In most cases, this leads to the TAG to be treated
253 		 * as hostname and the first word of the message as the TAG. Clearly, this is not
254 		 * of advantage ;) I think I have now found a way to handle this situation: there
255 		 * are certain characters which are frequently used in TAG (e.g. ':'), which are
256 		 * *invalid* in host names. So while parsing the hostname, I check for these characters.
257 		 * If I find them, I set a simple flag but continue. After parsing, I check the flag.
258 		 * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change
259 		 * the fields. I think this logic shall work with any type of syslog message.
260 		 * rgerhards, 2009-06-23: and I now have extended this logic to every character
261 		 * that is not a valid hostname.
262 		 * A "hostname" can validly include "[]" at the beginning and end. This sometimes
263 		 * happens with IP address (e.g. "[192.168.0.1]"). This must be turned on via
264 		 * an option as it may interfere with non-hostnames in some message formats.
265 		 * rgerhards, 2015-04-20
266 		 */
267 		if(lenMsg > 0 && pMsg->msgFlags & PARSE_HOSTNAME) {
268 			i = 0;
269 			int bHadSBracket = 0;
270 			if(pInst->bPermitSquareBracketsInHostname) {
271 				assert(i < lenMsg);
272 				if(p2parse[i] == '[') {
273 					bHadSBracket = 1;
274 					bufParseHOSTNAME[0] = '[';
275 					++i;
276 				}
277 			}
278 			while(i < lenMsg
279 			        && (isalnum(p2parse[i]) || p2parse[i] == '.'
280 					|| p2parse[i] == '_' || p2parse[i] == '-'
281 					|| (p2parse[i] == ']' && bHadSBracket)
282 					|| (p2parse[i] == '@' && pInst->bPermitAtSignsInHostname)
283 					|| (p2parse[i] == '/' && pInst->bPermitSlashesInHostname) )
284 				&& i < (CONF_HOSTNAME_MAXSIZE - 1)) {
285 				bufParseHOSTNAME[i] = p2parse[i];
286 				++i;
287 				if(p2parse[i] == ']')
288 					break;	/* must be closing bracket */
289 			}
290 
291 			if(i == lenMsg) {
292 				/* we have a message that is empty immediately after the hostname,
293 				* but the hostname thus is valid! -- rgerhards, 2010-02-22
294 				*/
295 				p2parse += i;
296 				lenMsg -= i;
297 				bufParseHOSTNAME[i] = '\0';
298 				MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
299 			} else {
300 				int isHostName = 0;
301 				if(i > 0) {
302 					if(bHadSBracket) {
303 						if(p2parse[i] == ']') {
304 							bufParseHOSTNAME[i] = ']';
305 							++i;
306 							isHostName = 1;
307 						}
308 					} else {
309 						if(isalnum(p2parse[i-1])) {
310 							isHostName = 1;
311 						}
312 					}
313 					if(p2parse[i] != ' ')
314 						isHostName = 0;
315 				}
316 
317 				if(isHostName) {
318 					/* we got a hostname! */
319 					p2parse += i + 1; /* "eat" it (including SP delimiter) */
320 					lenMsg -= i + 1;
321 					bufParseHOSTNAME[i] = '\0';
322 					MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
323 				}
324 			}
325 		}
326 
327 		/* now parse TAG - that should be present in message from all sources.
328 		 * This code is somewhat not compliant with RFC 3164. As of 3164,
329 		 * the TAG field is ended by any non-alphanumeric character. In
330 		 * practice, however, the TAG often contains dashes and other things,
331 		 * which would end the TAG. So it is not desirable. As such, we only
332 		 * accept colon and SP to be terminators. Even there is a slight difference:
333 		 * a colon is PART of the TAG, while a SP is NOT part of the tag
334 		 * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character
335 		 * size limit (from RFC3164) on the tag. This had bad effects on existing
336 		 * envrionments, as sysklogd didn't obey it either (probably another bug
337 		 * in RFC3164...). We now receive the full size, but will modify the
338 		 * outputs so that only 32 characters max are used by default.
339 		 */
340 		i = 0;
341 		while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE - 2) {
342 			bufParseTAG[i++] = *p2parse++;
343 			--lenMsg;
344 		}
345 		if(lenMsg > 0 && *p2parse == ':') {
346 			++p2parse;
347 			--lenMsg;
348 			bufParseTAG[i++] = ':';
349 		}
350 		else if (pInst->bForceTagEndingByColon) {
351 			/* Tag need to be ended by a colon or it's not a tag but the
352 			 * begin of the message
353 			 */
354 			p2parse -= ( i + 1 );
355 			lenMsg += ( i + 1 );
356 			i = 0;
357 			/* Default TAG is dash (without ':')
358 			 */
359 			bufParseTAG[i++] = '-';
360 		}
361 
362 		/* no TAG can only be detected if the message immediatly ends, in which case an empty TAG
363 		 * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23
364 		 */
365 		bufParseTAG[i] = '\0';	/* terminate string */
366 		MsgSetTAG(pMsg, bufParseTAG, i);
367 	} else {/* we enter this code area when the user has instructed rsyslog NOT
368 		 * to parse HOSTNAME and TAG - rgerhards, 2006-03-13
369 		 */
370 		if(!(pMsg->msgFlags & INTERNAL_MSG)) {
371 			DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n");
372 		}
373 	}
374 
375 finalize_it:
376 	if (pInst->bRemoveMsgFirstSpace && *p2parse == ' ') {
377 		/* Bypass first space found in MSG part */
378 	        p2parse++;
379 	        lenMsg--;
380 	}
381 	MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg);
382 ENDparse2
383 
384 
385 BEGINmodExit
386 CODESTARTmodExit
387 	/* release what we no longer need */
388 	objRelease(glbl, CORE_COMPONENT);
389 	objRelease(parser, CORE_COMPONENT);
390 	objRelease(datetime, CORE_COMPONENT);
391 ENDmodExit
392 
393 
394 BEGINqueryEtryPt
395 CODESTARTqueryEtryPt
396 CODEqueryEtryPt_STD_PMOD2_QUERIES
397 CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES
398 ENDqueryEtryPt
399 
400 
401 BEGINmodInit(pmrfc3164)
402 CODESTARTmodInit
403 	*ipIFVersProvided = CURR_MOD_IF_VERSION;
404 	/* we only support the current interface specification */
405 CODEmodInit_QueryRegCFSLineHdlr
406 	CHKiRet(objUse(glbl, CORE_COMPONENT));
407 	CHKiRet(objUse(parser, CORE_COMPONENT));
408 	CHKiRet(objUse(datetime, CORE_COMPONENT));
409 
410 	DBGPRINTF("rfc3164 parser init called\n");
411 	bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG();
412 	/* cache value, is set only during rsyslogd option processing */
413 
414 
415 ENDmodInit
416 
417 /* vim:set ai:
418  */
419