1 /* pmrfc3164.c
2 * This is a parser module for RFC3164(legacy syslog)-formatted messages.
3 *
4 * NOTE: read comments in module-template.h to understand how this file
5 * works!
6 *
7 * File begun on 2009-11-04 by RGerhards
8 *
9 * Copyright 2007-2017 Rainer Gerhards and Adiscon GmbH.
10 *
11 * This file is part of rsyslog.
12 *
13 * Licensed under the Apache License, Version 2.0 (the "License");
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 * -or-
19 * see COPYING.ASL20 in the source distribution
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 */
27 #include "config.h"
28 #include "rsyslog.h"
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include "syslogd.h"
35 #include "conf.h"
36 #include "syslogd-types.h"
37 #include "template.h"
38 #include "msg.h"
39 #include "module-template.h"
40 #include "glbl.h"
41 #include "errmsg.h"
42 #include "parser.h"
43 #include "datetime.h"
44 #include "unicode-helper.h"
45 MODULE_TYPE_PARSER
46 MODULE_TYPE_NOKEEP
47 PARSER_NAME("rsyslog.rfc3164")
48 MODULE_CNFNAME("pmrfc3164")
49
50 /* internal structures
51 */
52 DEF_PMOD_STATIC_DATA
53 DEFobjCurrIf(glbl)
54 DEFobjCurrIf(parser)
55 DEFobjCurrIf(datetime)
56
57
58 /* static data */
59 static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */
60
61
62 /* parser instance parameters */
63 static struct cnfparamdescr parserpdescr[] = {
64 { "detect.yearaftertimestamp", eCmdHdlrBinary, 0 },
65 { "permit.squarebracketsinhostname", eCmdHdlrBinary, 0 },
66 { "permit.slashesinhostname", eCmdHdlrBinary, 0 },
67 { "permit.atsignsinhostname", eCmdHdlrBinary, 0 },
68 { "force.tagendingbycolon", eCmdHdlrBinary, 0},
69 { "remove.msgfirstspace", eCmdHdlrBinary, 0},
70 };
71 static struct cnfparamblk parserpblk =
72 { CNFPARAMBLK_VERSION,
73 sizeof(parserpdescr)/sizeof(struct cnfparamdescr),
74 parserpdescr
75 };
76
77 struct instanceConf_s {
78 int bDetectYearAfterTimestamp;
79 int bPermitSquareBracketsInHostname;
80 int bPermitSlashesInHostname;
81 int bPermitAtSignsInHostname;
82 int bForceTagEndingByColon;
83 int bRemoveMsgFirstSpace;
84 };
85
86
87 BEGINisCompatibleWithFeature
88 CODESTARTisCompatibleWithFeature
89 if(eFeat == sFEATUREAutomaticSanitazion)
90 iRet = RS_RET_OK;
91 if(eFeat == sFEATUREAutomaticPRIParsing)
92 iRet = RS_RET_OK;
93 ENDisCompatibleWithFeature
94
95
96 /* create input instance, set default parameters, and
97 * add it to the list of instances.
98 */
99 static rsRetVal
createInstance(instanceConf_t ** pinst)100 createInstance(instanceConf_t **pinst)
101 {
102 instanceConf_t *inst;
103 DEFiRet;
104 CHKmalloc(inst = malloc(sizeof(instanceConf_t)));
105 inst->bDetectYearAfterTimestamp = 0;
106 inst->bPermitSquareBracketsInHostname = 0;
107 inst->bPermitSlashesInHostname = 0;
108 inst->bPermitAtSignsInHostname = 0;
109 inst->bForceTagEndingByColon = 0;
110 inst->bRemoveMsgFirstSpace = 0;
111 bParseHOSTNAMEandTAG=glbl.GetParseHOSTNAMEandTAG();
112 *pinst = inst;
113 finalize_it:
114 RETiRet;
115 }
116
117 BEGINnewParserInst
118 struct cnfparamvals *pvals = NULL;
119 int i;
120 CODESTARTnewParserInst
121 DBGPRINTF("newParserInst (pmrfc3164)\n");
122
123 inst = NULL;
124 CHKiRet(createInstance(&inst));
125
126 if(lst == NULL)
127 FINALIZE; /* just set defaults, no param block! */
128
129 if((pvals = nvlstGetParams(lst, &parserpblk, NULL)) == NULL) {
130 ABORT_FINALIZE(RS_RET_MISSING_CNFPARAMS);
131 }
132
133 if(Debug) {
134 dbgprintf("parser param blk in pmrfc3164:\n");
135 cnfparamsPrint(&parserpblk, pvals);
136 }
137
138 for(i = 0 ; i < parserpblk.nParams ; ++i) {
139 if(!pvals[i].bUsed)
140 continue;
141 if(!strcmp(parserpblk.descr[i].name, "detect.yearaftertimestamp")) {
142 inst->bDetectYearAfterTimestamp = (int) pvals[i].val.d.n;
143 } else if(!strcmp(parserpblk.descr[i].name, "permit.squarebracketsinhostname")) {
144 inst->bPermitSquareBracketsInHostname = (int) pvals[i].val.d.n;
145 } else if(!strcmp(parserpblk.descr[i].name, "permit.slashesinhostname")) {
146 inst->bPermitSlashesInHostname = (int) pvals[i].val.d.n;
147 } else if(!strcmp(parserpblk.descr[i].name, "permit.atsignsinhostname")) {
148 inst->bPermitAtSignsInHostname = (int) pvals[i].val.d.n;
149 } else if(!strcmp(parserpblk.descr[i].name, "force.tagendingbycolon")) {
150 inst->bForceTagEndingByColon = (int) pvals[i].val.d.n;
151 } else if(!strcmp(parserpblk.descr[i].name, "remove.msgfirstspace")) {
152 inst->bRemoveMsgFirstSpace = (int) pvals[i].val.d.n;
153 } else {
154 dbgprintf("pmrfc3164: program error, non-handled "
155 "param '%s'\n", parserpblk.descr[i].name);
156 }
157 }
158 finalize_it:
159 CODE_STD_FINALIZERnewParserInst
160 if(lst != NULL)
161 cnfparamvalsDestruct(pvals, &parserpblk);
162 if(iRet != RS_RET_OK)
163 free(inst);
164 ENDnewParserInst
165
166
167 BEGINfreeParserInst
168 CODESTARTfreeParserInst
169 dbgprintf("pmrfc3164: free parser instance %p\n", pInst);
170 ENDfreeParserInst
171
172
173 /* parse a legay-formatted syslog message.
174 */
175 BEGINparse2
176 uchar *p2parse;
177 int lenMsg;
178 int i; /* general index for parsing */
179 uchar bufParseTAG[CONF_TAG_MAXSIZE];
180 uchar bufParseHOSTNAME[CONF_HOSTNAME_MAXSIZE];
181 CODESTARTparse
182 assert(pMsg != NULL);
183 assert(pMsg->pszRawMsg != NULL);
184 lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI;
185 DBGPRINTF("Message will now be parsed by the legacy syslog parser (offAfterPRI=%d, lenMsg=%d.\n",
186 pMsg->offAfterPRI, lenMsg);
187 /* note: offAfterPRI is already the number of PRI chars (do not add one!) */
188 p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */
189 setProtocolVersion(pMsg, MSG_LEGACY_PROTOCOL);
190 if(pMsg->iFacility == (LOG_INVLD>>3)) {
191 DBGPRINTF("facility LOG_INVLD, do not parse\n");
192 FINALIZE;
193 }
194
195 /* now check if we have a completely headerless message. This is indicated
196 * by spaces or tabs followed '{' or '['.
197 */
198 i = 0;
199 while(i < lenMsg && (p2parse[i] == ' ' || p2parse[i] == '\t')) {
200 ++i;
201 }
202 if(i < lenMsg && (p2parse[i] == '{' || p2parse[i] == '[')) {
203 DBGPRINTF("msg seems to be headerless, treating it as such\n");
204 FINALIZE;
205 }
206
207
208 /* Check to see if msg contains a timestamp. We start by assuming
209 * that the message timestamp is the time of reception (which we
210 * generated ourselfs and then try to actually find one inside the
211 * message. There we go from high-to low precison and are done
212 * when we find a matching one. -- rgerhards, 2008-09-16
213 */
214 if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
215 /* we are done - parse pointer is moved by ParseTIMESTAMP3339 */;
216 } else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg,
217 NO_PARSE3164_TZSTRING, pInst->bDetectYearAfterTimestamp) == RS_RET_OK) {
218 if(pMsg->dfltTZ[0] != '\0')
219 applyDfltTZ(&pMsg->tTIMESTAMP, pMsg->dfltTZ);
220 /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
221 } else if(*p2parse == ' ' && lenMsg > 1) {
222 /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */
223 ++p2parse; /* move over space */
224 --lenMsg;
225 if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg,
226 NO_PARSE3164_TZSTRING, pInst->bDetectYearAfterTimestamp) == RS_RET_OK) {
227 /* indeed, we got it! */
228 /* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
229 } else {/* parse pointer needs to be restored, as we moved it off-by-one
230 * for this try.
231 */
232 --p2parse;
233 ++lenMsg;
234 }
235 }
236
237 if(pMsg->msgFlags & IGNDATE) {
238 /* we need to ignore the msg data, so simply copy over reception date */
239 memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
240 }
241
242 /* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we
243 * do this only when the user has not forbidden this. I now introduce some
244 * code that allows a user to configure rsyslogd to treat the rest of the
245 * message as MSG part completely. In this case, the hostname will be the
246 * machine that we received the message from and the tag will be empty. This
247 * is meant to be an interim solution, but for now it is in the code.
248 */
249 if(bParseHOSTNAMEandTAG && !(pMsg->msgFlags & INTERNAL_MSG)) {
250 /* parse HOSTNAME - but only if this is network-received!
251 * rger, 2005-11-14: we still have a problem with BSD messages. These messages
252 * do NOT include a host name. In most cases, this leads to the TAG to be treated
253 * as hostname and the first word of the message as the TAG. Clearly, this is not
254 * of advantage ;) I think I have now found a way to handle this situation: there
255 * are certain characters which are frequently used in TAG (e.g. ':'), which are
256 * *invalid* in host names. So while parsing the hostname, I check for these characters.
257 * If I find them, I set a simple flag but continue. After parsing, I check the flag.
258 * If it was set, then we most probably do not have a hostname but a TAG. Thus, I change
259 * the fields. I think this logic shall work with any type of syslog message.
260 * rgerhards, 2009-06-23: and I now have extended this logic to every character
261 * that is not a valid hostname.
262 * A "hostname" can validly include "[]" at the beginning and end. This sometimes
263 * happens with IP address (e.g. "[192.168.0.1]"). This must be turned on via
264 * an option as it may interfere with non-hostnames in some message formats.
265 * rgerhards, 2015-04-20
266 */
267 if(lenMsg > 0 && pMsg->msgFlags & PARSE_HOSTNAME) {
268 i = 0;
269 int bHadSBracket = 0;
270 if(pInst->bPermitSquareBracketsInHostname) {
271 assert(i < lenMsg);
272 if(p2parse[i] == '[') {
273 bHadSBracket = 1;
274 bufParseHOSTNAME[0] = '[';
275 ++i;
276 }
277 }
278 while(i < lenMsg
279 && (isalnum(p2parse[i]) || p2parse[i] == '.'
280 || p2parse[i] == '_' || p2parse[i] == '-'
281 || (p2parse[i] == ']' && bHadSBracket)
282 || (p2parse[i] == '@' && pInst->bPermitAtSignsInHostname)
283 || (p2parse[i] == '/' && pInst->bPermitSlashesInHostname) )
284 && i < (CONF_HOSTNAME_MAXSIZE - 1)) {
285 bufParseHOSTNAME[i] = p2parse[i];
286 ++i;
287 if(p2parse[i] == ']')
288 break; /* must be closing bracket */
289 }
290
291 if(i == lenMsg) {
292 /* we have a message that is empty immediately after the hostname,
293 * but the hostname thus is valid! -- rgerhards, 2010-02-22
294 */
295 p2parse += i;
296 lenMsg -= i;
297 bufParseHOSTNAME[i] = '\0';
298 MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
299 } else {
300 int isHostName = 0;
301 if(i > 0) {
302 if(bHadSBracket) {
303 if(p2parse[i] == ']') {
304 bufParseHOSTNAME[i] = ']';
305 ++i;
306 isHostName = 1;
307 }
308 } else {
309 if(isalnum(p2parse[i-1])) {
310 isHostName = 1;
311 }
312 }
313 if(p2parse[i] != ' ')
314 isHostName = 0;
315 }
316
317 if(isHostName) {
318 /* we got a hostname! */
319 p2parse += i + 1; /* "eat" it (including SP delimiter) */
320 lenMsg -= i + 1;
321 bufParseHOSTNAME[i] = '\0';
322 MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
323 }
324 }
325 }
326
327 /* now parse TAG - that should be present in message from all sources.
328 * This code is somewhat not compliant with RFC 3164. As of 3164,
329 * the TAG field is ended by any non-alphanumeric character. In
330 * practice, however, the TAG often contains dashes and other things,
331 * which would end the TAG. So it is not desirable. As such, we only
332 * accept colon and SP to be terminators. Even there is a slight difference:
333 * a colon is PART of the TAG, while a SP is NOT part of the tag
334 * (it is CONTENT). Starting 2008-04-04, we have removed the 32 character
335 * size limit (from RFC3164) on the tag. This had bad effects on existing
336 * envrionments, as sysklogd didn't obey it either (probably another bug
337 * in RFC3164...). We now receive the full size, but will modify the
338 * outputs so that only 32 characters max are used by default.
339 */
340 i = 0;
341 while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE - 2) {
342 bufParseTAG[i++] = *p2parse++;
343 --lenMsg;
344 }
345 if(lenMsg > 0 && *p2parse == ':') {
346 ++p2parse;
347 --lenMsg;
348 bufParseTAG[i++] = ':';
349 }
350 else if (pInst->bForceTagEndingByColon) {
351 /* Tag need to be ended by a colon or it's not a tag but the
352 * begin of the message
353 */
354 p2parse -= ( i + 1 );
355 lenMsg += ( i + 1 );
356 i = 0;
357 /* Default TAG is dash (without ':')
358 */
359 bufParseTAG[i++] = '-';
360 }
361
362 /* no TAG can only be detected if the message immediatly ends, in which case an empty TAG
363 * is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23
364 */
365 bufParseTAG[i] = '\0'; /* terminate string */
366 MsgSetTAG(pMsg, bufParseTAG, i);
367 } else {/* we enter this code area when the user has instructed rsyslog NOT
368 * to parse HOSTNAME and TAG - rgerhards, 2006-03-13
369 */
370 if(!(pMsg->msgFlags & INTERNAL_MSG)) {
371 DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n");
372 }
373 }
374
375 finalize_it:
376 if (pInst->bRemoveMsgFirstSpace && *p2parse == ' ') {
377 /* Bypass first space found in MSG part */
378 p2parse++;
379 lenMsg--;
380 }
381 MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg);
382 ENDparse2
383
384
385 BEGINmodExit
386 CODESTARTmodExit
387 /* release what we no longer need */
388 objRelease(glbl, CORE_COMPONENT);
389 objRelease(parser, CORE_COMPONENT);
390 objRelease(datetime, CORE_COMPONENT);
391 ENDmodExit
392
393
394 BEGINqueryEtryPt
395 CODESTARTqueryEtryPt
396 CODEqueryEtryPt_STD_PMOD2_QUERIES
397 CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES
398 ENDqueryEtryPt
399
400
401 BEGINmodInit(pmrfc3164)
402 CODESTARTmodInit
403 *ipIFVersProvided = CURR_MOD_IF_VERSION;
404 /* we only support the current interface specification */
405 CODEmodInit_QueryRegCFSLineHdlr
406 CHKiRet(objUse(glbl, CORE_COMPONENT));
407 CHKiRet(objUse(parser, CORE_COMPONENT));
408 CHKiRet(objUse(datetime, CORE_COMPONENT));
409
410 DBGPRINTF("rfc3164 parser init called\n");
411 bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG();
412 /* cache value, is set only during rsyslogd option processing */
413
414
415 ENDmodInit
416
417 /* vim:set ai:
418 */
419