1 /*
2   Copyright 2020 Northern.tech AS
3 
4   This file is part of CFEngine 3 - written and maintained by Northern.tech AS.
5 
6   This program is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by the
8   Free Software Foundation; version 3.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
18 
19   To the extent this program is licensed as part of the Enterprise
20   versions of CFEngine, the applicable Commercial Open Source License
21   (COSL) may apply to this file if you as a licensee so wish it. See
22   included file COSL.txt.
23 */
24 
25 #include <matching.h>
26 
27 #include <eval_context.h>
28 #include <vars.h>
29 #include <promises.h>
30 #include <item_lib.h>
31 #include <conversion.h>
32 #include <scope.h>
33 #include <misc_lib.h>
34 #include <rlist.h>
35 #include <regex.h>                          /* CompileRegex,StringMatchFull */
36 #include <string_lib.h>
37 
38 
39 /* Pure, non-thread-safe */
FirstBackReference(pcre * rx,const char * teststring)40 static char *FirstBackReference(pcre *rx, const char *teststring)
41 {
42     static char backreference[CF_BUFSIZE]; /* GLOBAL_R, no initialization needed */
43 
44     int ovector[OVECCOUNT], i, rc;
45 
46     memset(backreference, 0, CF_BUFSIZE);
47 
48     if ((rc = pcre_exec(rx, NULL, teststring, strlen(teststring), 0, 0, ovector, OVECCOUNT)) >= 0)
49     {
50         for (i = 1; i < rc; i++)        /* make backref vars $(1),$(2) etc */
51         {
52             const char *backref_start = teststring + ovector[i * 2];
53             int backref_len = ovector[i * 2 + 1] - ovector[i * 2];
54 
55             if (backref_len < CF_MAXVARSIZE)
56             {
57                 strncpy(backreference, backref_start, backref_len);
58             }
59 
60             break;
61         }
62     }
63 
64     free(rx);
65 
66     return backreference;
67 }
68 
ExtractFirstReference(const char * regexp,const char * teststring)69 char *ExtractFirstReference(const char *regexp, const char *teststring)
70 {
71     char *backreference;
72 
73     pcre *rx;
74 
75     if ((regexp == NULL) || (teststring == NULL))
76     {
77         return "";
78     }
79 
80     rx = CompileRegex(regexp);
81     if (rx == NULL)
82     {
83         return "";
84     }
85 
86     backreference = FirstBackReference(rx, teststring);
87 
88     if (strlen(backreference) == 0)
89     {
90         strlcpy(backreference, "CF_NOMATCH", CF_MAXVARSIZE);
91     }
92 
93     return backreference;
94 }
95 
IsRegex(const char * str)96 bool IsRegex(const char *str)
97 {
98     const char *sp;
99     bool ret = false;
100     enum { r_norm, r_norepeat, r_literal } special = r_norepeat;
101     int bracket = 0;
102     int paren = 0;
103 
104 /* Try to see when something is intended as a regular expression */
105 
106     for (sp = str; *sp != '\0'; sp++)
107     {
108         if (special == r_literal)
109         {
110             special = r_norm;
111             continue;
112         }
113         else if (*sp == '\\')
114         {
115             special = r_literal;
116             continue;
117         }
118         else if (bracket && (*sp != ']'))
119         {
120             if (*sp == '[')
121             {
122                 return false;
123             }
124             continue;
125         }
126 
127         switch (*sp)
128         {
129         case '^':
130             special = (sp == str) ? r_norepeat : r_norm;
131             break;
132         case '*':
133         case '+':
134             if (special == r_norepeat)
135             {
136                 return false;
137             }
138             special = r_norepeat;
139             ret = true;
140             break;
141         case '[':
142             special = r_norm;
143             bracket++;
144             ret = true;
145             break;
146         case ']':
147             if (bracket == 0)
148             {
149                 return false;
150             }
151             bracket = 0;
152             special = r_norm;
153             break;
154         case '(':
155             special = r_norepeat;
156             paren++;
157             break;
158 
159         case ')':
160             special = r_norm;
161             paren--;
162             if (paren < 0)
163             {
164                 return false;
165             }
166             break;
167 
168         case '|':
169             special = r_norepeat;
170             if (paren > 0)
171             {
172                 ret = true;
173             }
174             break;
175 
176         default:
177             special = r_norm;
178         }
179 
180     }
181 
182     if ((bracket != 0) || (paren != 0) || (special == r_literal))
183     {
184         return false;
185     }
186     else
187     {
188         return ret;
189     }
190 }
191 
IsPathRegex(const char * str)192 bool IsPathRegex(const char *str)
193 {
194     bool result = IsRegex(str);
195 
196     if (result)
197     {
198         int s = 0, r = 0; /* Count square and round brackets. */
199         for (const char *sp = str; *sp != '\0'; sp++)
200         {
201             switch (*sp)
202             {
203             case '[':
204                 s++;
205                 break;
206             case ']':
207                 s--;
208                 break;
209             case '(':
210                 r++;
211                 break;
212             case ')':
213                 r--;
214                 break;
215             default:
216 
217                 if (*sp == FILE_SEPARATOR && (r || s))
218                 {
219                     Log(LOG_LEVEL_ERR,
220                           "Path regular expression %s seems to use expressions containing the directory symbol %c", str,
221                           FILE_SEPARATOR);
222                     Log(LOG_LEVEL_ERR, "Use a work-around to avoid pathological behaviour");
223                     return false;
224                 }
225                 break;
226             }
227         }
228     }
229 
230     return result;
231 }
232 
233 /* Checks whether item matches a list of wildcards */
234 
IsRegexItemIn(const EvalContext * ctx,const Item * list,const char * regex)235 bool IsRegexItemIn(const EvalContext *ctx, const Item *list, const char *regex)
236 {
237     for (const Item *ptr = list; ptr != NULL; ptr = ptr->next)
238     {
239         if (ctx != NULL && ptr->classes != NULL &&
240             !IsDefinedClass(ctx, ptr->classes))
241         {
242             continue;
243         }
244 
245         /* Cheap pre-test: */
246         if (strcmp(regex, ptr->name) == 0)
247         {
248             return true;
249         }
250 
251         /* Make it commutative */
252 
253         if (StringMatchFull(regex, ptr->name) || StringMatchFull(ptr->name, regex))
254         {
255             return true;
256         }
257     }
258 
259     return false;
260 }
261 
262 /* Escapes non-alphanumeric chars, except sequence given in noEscSeq */
263 
EscapeSpecialChars(const char * str,char * strEsc,int strEscSz,char * noEscSeq,char * noEscList)264 void EscapeSpecialChars(const char *str, char *strEsc, int strEscSz, char *noEscSeq, char *noEscList)
265 {
266     int strEscPos = 0;
267 
268     if (noEscSeq == NULL)
269     {
270         noEscSeq = "";
271     }
272 
273     if (noEscList == NULL)
274     {
275         noEscList = "";
276     }
277 
278     memset(strEsc, 0, strEscSz);
279 
280     for (const char *sp = str; (*sp != '\0') && (strEscPos < strEscSz - 2); sp++)
281     {
282         if (strncmp(sp, noEscSeq, strlen(noEscSeq)) == 0)
283         {
284             if (strEscSz <= strEscPos + strlen(noEscSeq))
285             {
286                 Log(LOG_LEVEL_ERR,
287                       "EscapeSpecialChars: Output string truncated. in='%s' out='%s'",
288                       str, strEsc);
289                 break;
290             }
291 
292             strlcat(strEsc, noEscSeq, strEscSz);
293             strEscPos += strlen(noEscSeq);
294             sp += strlen(noEscSeq);
295         }
296 
297         if (strchr(noEscList,*sp) != NULL)
298         {
299             // Found current char (*sp) in noEscList, do nothing
300         }
301         else if ((*sp != '\0') && (!isalnum((int)*sp)))
302         {
303             strEsc[strEscPos++] = '\\';
304         }
305 
306         strEsc[strEscPos++] = *sp;
307     }
308 }
309 
EscapeRegexCharsLen(const char * str)310 size_t EscapeRegexCharsLen(const char *str)
311 {
312     size_t ret = 2;
313     for (const char *sp = str; *sp != '\0'; sp++)
314     {
315         switch (*sp)
316         {
317             case '.':
318             case '*':
319                 ret++;
320                 break;
321             default:
322                 break;
323         }
324 
325         ret++;
326     }
327 
328     return ret;
329 }
330 
EscapeRegexChars(char * str,char * strEsc,int strEscSz)331 void EscapeRegexChars(char *str, char *strEsc, int strEscSz)
332 {
333     char *sp;
334     int strEscPos = 0;
335 
336     memset(strEsc, 0, strEscSz);
337 
338     for (sp = str; (*sp != '\0') && (strEscPos < strEscSz - 2); sp++)
339     {
340         switch (*sp)
341         {
342         case '.':
343         case '*':
344             strEsc[strEscPos++] = '\\';
345             break;
346         default:
347             break;
348         }
349 
350         strEsc[strEscPos++] = *sp;
351     }
352 }
353 
354 /* Escapes characters esc in the string str of size strSz  */
355 
EscapeChar(char * str,int strSz,char esc)356 char *EscapeChar(char *str, int strSz, char esc)
357 {
358     char strDup[CF_BUFSIZE];
359     int strPos, strDupPos;
360 
361     if (sizeof(strDup) < strSz)
362     {
363         ProgrammingError("Too large string passed to EscapeCharInplace()");
364     }
365 
366     snprintf(strDup, sizeof(strDup), "%s", str);
367     memset(str, 0, strSz);
368 
369     for (strPos = 0, strDupPos = 0; strPos < strSz - 2; strPos++, strDupPos++)
370     {
371         if (strDup[strDupPos] == esc)
372         {
373             str[strPos] = '\\';
374             strPos++;
375         }
376 
377         str[strPos] = strDup[strDupPos];
378     }
379 
380     return str;
381 }
382 
AnchorRegex(const char * regex,char * out,int outSz)383 void AnchorRegex(const char *regex, char *out, int outSz)
384 {
385     if (NULL_OR_EMPTY(regex))
386     {
387         memset(out, 0, outSz);
388     }
389     else
390     {
391         snprintf(out, outSz, "^(%s)$", regex);
392     }
393 }
394 
AnchorRegexNew(const char * regex)395 char *AnchorRegexNew(const char *regex)
396 {
397     if (NULL_OR_EMPTY(regex))
398     {
399         return xstrdup("^$");
400     }
401 
402     char *ret = NULL;
403     xasprintf(&ret, "^(%s)$", regex);
404 
405     return ret;
406 }
407 
HasRegexMetaChars(const char * string)408 bool HasRegexMetaChars(const char *string)
409 {
410     if (!string)
411     {
412         return false;
413     }
414 
415     if (string[strcspn(string, "\\^${}[]().*+?|<>-&")] == '\0') /* i.e. no metachars appear in string */
416     {
417         return false;
418     }
419 
420     return true;
421 }
422