1 /*
2   Copyright 2020 Northern.tech AS
3 
4   This file is part of CFEngine 3 - written and maintained by Northern.tech AS.
5 
6   This program is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by the
8   Free Software Foundation; version 3.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software
17   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
18 
19   To the extent this program is licensed as part of the Enterprise
20   versions of CFEngine, the applicable Commercial Open Source License
21   (COSL) may apply to this file if you as a licensee so wish it. See
22   included file COSL.txt.
23 */
24 
25 #include <platform.h>
26 #include <json-utils.h>
27 #include <logging.h>    // Log()
28 #include <file_lib.h>   // safe_fopen()
29 #include <string_lib.h> // TrimWhitespace()
30 #include <csv_parser.h>
31 #include <json-yaml.h>  // JsonParseYamlFile()
32 #include <alloc.h>
33 #define ENV_BYTE_LIMIT 4096
34 
35 /**
36  * @brief Filters a string, according to env file format
37  *
38  * This is used to parse the part after the equal sign in an env file
39  * Leading and trailing whitespace should already be removed
40  * Nonescaped single or double quotes must either be at src[0] to start a
41  * quoted string or inside a quoted string of the other type.
42  * Will terminate string after closing quote, even when there's more
43  * Closing quote is optional (implied at null terminator)
44  *
45  * Supported escape characters: \\ \" \' \n
46  * Anything else will just add the character directly, ex: \x -> x
47  *
48  * @param src Copy from pointer, can be same as dst
49  * @param dst Copy to pointer, can be same as src
50  * @return beginning of processed string, either dst or dst + 1
51  */
filtered_copy(const char * src,char * dst)52 static char *filtered_copy(const char *src, char *dst)
53 {
54     assert(src);
55     assert(dst);
56     char *beginning = dst;
57     char opening_quote = '\0';
58     // Check for opening quote, must be at src[0]
59     if (*src == '\"' || *src == '\'')
60     {
61         opening_quote = *src;
62         ++src;
63     }
64     // Loop until null terminator or quote matching opening_quote:
65     while (*src != '\0' && (*src != opening_quote))
66     {
67         if (opening_quote == '\0' && (*src == '\"' || *src == '\''))
68         {
69             // Return NULL when encountering an unmatched, unescaped quote
70             // Invalid input: AB"CD
71             // Correct ways:  AB\"CD or 'AB"CD'
72             return NULL;
73         }
74         if (*src == '\\')
75         {
76             // Backslash escape char
77             ++src;
78             // Special case for \n newline:
79             if (*src == 'n')
80             {
81                 *dst = '\n';
82                 ++src;
83                 ++dst;
84                 continue;
85             }
86             // Otherwise: copy next char directly
87         }
88         *dst = *src;
89         ++src;
90         ++dst;
91     }
92     *dst = '\0';
93     return beginning;
94 }
95 
96 /**
97  * @brief Splits a line of en env file into key and value string
98  *
99  * See filtered_copy for details on how value in (key-value pair) is parsed
100  * Empty lines are skipped
101  * Lines with first nonspace symbol '#' are skipped (comments)
102  * To preserve whitespace use quotes: WHITESPACE="   "
103  *
104  * @param raw_line input from CfReadLine. Will be edited!
105  * @param key_out   Where to store pointer to key in raw_line
106  * @param value_out Where to store pointer to value in raw_line
107  * @param filename_for_log Optional name for logging purposes
108  */
ParseEnvLine(char * raw_line,char ** key_out,char ** value_out,const char * filename_for_log,int linenumber)109  void ParseEnvLine(char *raw_line, char **key_out, char **value_out, const char *filename_for_log, int linenumber)
110  {
111     assert(raw_line);
112     assert(key_out);
113     assert(value_out);
114     char *key = NULL;
115     char *value = NULL;
116     *key_out = NULL;
117     *value_out = NULL;
118 
119     char *line = TrimWhitespace(raw_line);
120     if (NULL_OR_EMPTY(line))
121     {
122         return;
123     }
124 
125     const char *myname = "ParseEnvLine";
126     size_t line_length = strlen(line);
127 
128     if (line[0] == '#' || line_length == 0)
129     {
130         return;
131     }
132     char *next_equal = strchr(line, '=');
133     if (next_equal == NULL)
134     {
135         Log(LOG_LEVEL_DEBUG, "%s: Line %d in ENV file '%s' isn't empty, but was skipped because it's missing an equal sign",
136             myname, linenumber, filename_for_log);
137         return;
138     }
139     long equal_index = next_equal - line;
140     if (equal_index == 0)
141     {
142         Log(LOG_LEVEL_DEBUG, "%s: Line %d in ENV file '%s' was skipped because it's missing a key",
143             myname, linenumber, filename_for_log);
144         return;
145     }
146     *next_equal = '\0';
147     key = TrimWhitespace(line);
148     value = TrimWhitespace(next_equal + 1);
149 
150     // Copy from value to value (dest=src) and return new starting pointer
151     // new_start = filtered_copy(src,dst)
152     // Modifies the string in place, removing enclosing quotes and
153     // obeys backslash escape characters
154     value = filtered_copy(value, value);
155 
156     if (value != NULL && key != NULL)
157     {
158         // Succeeded in finding both key and value, copy to output
159         *key_out = key;
160         *value_out = value;
161     }
162     else if (value != NULL || key != NULL)
163     {
164         // Parsing failed for either key or value, print log message and skip
165         Log(LOG_LEVEL_DEBUG, "%s: Line %d in ENV file '%s' was skipped because it has invalid syntax",
166             myname, linenumber, filename_for_log);
167     }
168 }
169 
170 /**
171  * @brief Parses an env file and creates a key-value pair json element
172  *
173  * Creates JSON element where all keys and values are strings
174  *
175  * @param input_path file to read from ex: "/etc/os-release"
176  * @param size_max   Maximum size of env file (in bytes)
177  * @param json_out   Where to save pointer to new JsonElement, must destroy
178  * @return true for success, false for failure
179  */
JsonParseEnvFile(const char * input_path,size_t size_max,JsonElement ** json_out)180 bool JsonParseEnvFile(const char *input_path, size_t size_max, JsonElement **json_out)
181 {
182     assert(json_out != NULL);
183     assert(input_path != NULL);
184 
185     const char *myname = "JsonParseEnvFile";
186     size_t line_size = ENV_BYTE_LIMIT;
187     char *key, *value;
188     int linenumber = 0;
189     size_t byte_count = 0;
190     FILE *fin = safe_fopen(input_path, "r");
191     if (fin == NULL)
192     {
193         Log(LOG_LEVEL_VERBOSE, "%s cannot open the ENV file '%s' (fopen: %s)",
194             myname, input_path, GetErrorStr());
195         return false;
196     }
197 
198     JsonElement *json = JsonObjectCreate(10);
199 
200     char *raw_line = xmalloc(line_size);
201     while (CfReadLine(&raw_line, &line_size, fin) != -1)
202     {
203         ++linenumber;
204         byte_count += strlen(raw_line);
205         if (byte_count > size_max)
206         {
207             Log(LOG_LEVEL_VERBOSE, "%s: ENV file '%s' exceeded byte limit %zu at line %d",
208                 myname, input_path, size_max, linenumber);
209             Log(LOG_LEVEL_VERBOSE, "Done with ENV file, the rest will not be parsed");
210             break;
211         }
212 
213         ParseEnvLine(raw_line, &key, &value, input_path, linenumber);
214         if (key != NULL && value != NULL)
215         {
216             JsonObjectAppendString(json, key, value);
217         }
218     }
219 
220     bool reached_eof = feof(fin);
221     fclose(fin);
222     free(raw_line);
223 
224     if (!reached_eof && byte_count <= size_max)
225     {
226         Log(LOG_LEVEL_ERR,
227             "%s: failed to read ENV file '%s'. (fread: %s)",
228             myname, input_path, GetErrorStr());
229         JsonDestroy(json);
230         return false;
231     }
232 
233     *json_out = json;
234     return true;
235 }
236 
JsonParseCsvFile(const char * input_path,size_t size_max,JsonElement ** json_out)237 bool JsonParseCsvFile(const char *input_path, size_t size_max, JsonElement **json_out)
238 {
239     assert(json_out != NULL);
240 
241     const char *myname = "JsonParseCsvFile";
242     char *line;
243     size_t byte_count = 0;
244 
245     int linenumber = 0;
246 
247     FILE *fin = safe_fopen(input_path, "r");
248     if (fin == NULL)
249     {
250         Log(LOG_LEVEL_VERBOSE, "%s cannot open the csv file '%s' (fopen: %s)",
251             myname, input_path, GetErrorStr());
252         return false;
253     }
254 
255     JsonElement *const json = JsonArrayCreate(50);
256 
257     if (feof(fin))
258     {
259         *json_out = json;
260         Log(LOG_LEVEL_VERBOSE,
261             "%s: CSV file '%s' was empty, so nothing was parsed",
262             myname, input_path);
263         return true;
264     }
265 
266     while ((line = GetCsvLineNext(fin)) != NULL)
267     {
268         ++linenumber;
269 
270         byte_count += strlen(line);
271         if (byte_count > size_max)
272         {
273             Log(LOG_LEVEL_VERBOSE, "%s: CSV file '%s' exceeded byte limit %zu at line %d",
274                 myname, input_path, size_max, linenumber);
275             Log(LOG_LEVEL_VERBOSE, "Done with CSV file, the rest will not be parsed");
276             free(line);
277             break;
278         }
279 
280         Seq *list = SeqParseCsvString(line);
281         free(line);
282 
283         if (list != NULL)
284         {
285             JsonElement *line_arr = JsonArrayCreate(SeqLength(list));
286 
287             for (size_t i = 0; i < SeqLength(list); i++)
288             {
289                 JsonArrayAppendString(line_arr, SeqAt(list, i));
290             }
291 
292             SeqDestroy(list);
293             JsonArrayAppendArray(json, line_arr);
294         }
295     }
296 
297     bool reached_eof = feof(fin);
298 
299     if (!reached_eof && byte_count <= size_max)
300     {
301         Log(LOG_LEVEL_ERR,
302             "%s: unable to read line from CSV file '%s'. (fread: %s)",
303             myname, input_path, GetErrorStr());
304         JsonDestroy(json);
305         fclose(fin);
306         return false;
307     }
308 
309     if (JsonLength(json) == 0)
310     {
311         Log(LOG_LEVEL_WARNING,
312             "%s: CSV file '%s' is not empty, but nothing was parsed",
313             myname, input_path);
314         Log(LOG_LEVEL_WARNING,
315             "Make sure the file contains DOS (CRLF) line endings");
316     }
317 
318     fclose(fin);
319     *json_out = json;
320     return true;
321 }
322 
JsonReadDataFile(const char * log_identifier,const char * input_path,const DataFileType requested_mode,size_t size_max)323 JsonElement *JsonReadDataFile(const char *log_identifier, const char *input_path,
324                               const DataFileType requested_mode, size_t size_max)
325 {
326     const char *myname = log_identifier ? log_identifier : "JsonReadDataFile";
327     bool env_mode = (requested_mode == DATAFILETYPE_ENV);
328     bool csv_mode = (requested_mode == DATAFILETYPE_CSV);
329     bool yaml_mode = (requested_mode == DATAFILETYPE_YAML);
330 
331     if (env_mode || csv_mode)
332     {
333         JsonElement *json = NULL;
334         bool success;
335         if (env_mode)
336         {
337             success = JsonParseEnvFile(input_path, size_max, &json);
338         }
339         else
340         {
341             success = JsonParseCsvFile(input_path, size_max, &json);
342         }
343         if (success == false)
344         {
345             return NULL;
346         }
347         return json;
348     }
349 
350     JsonElement *json = NULL;
351     JsonParseError res =
352         JsonParseAnyFile(input_path, size_max, &json, yaml_mode);
353 
354     if ((res == JSON_PARSE_ERROR_NO_DATA) || (res == JSON_PARSE_ERROR_NO_SUCH_FILE))
355     {
356         Log(LOG_LEVEL_ERR,
357             "%s: data error parsing %s file '%s': %s",
358             myname, DataFileTypeToString(requested_mode),
359             input_path, JsonParseErrorToString(res));
360     }
361     else if (res != JSON_PARSE_OK)
362     {
363         Log(LOG_LEVEL_ERR,
364             "%s: error parsing %s file '%s': %s",
365             myname, DataFileTypeToString(requested_mode),
366             input_path, JsonParseErrorToString(res));
367     }
368     else if (JsonGetElementType(json) == JSON_ELEMENT_TYPE_PRIMITIVE)
369     {
370         Log(LOG_LEVEL_ERR,
371             "%s: non-container from parsing %s file '%s'",
372             myname, DataFileTypeToString(requested_mode), input_path);
373         JsonDestroy(json);
374     }
375     else
376     {
377         return json;
378     }
379 
380     return NULL;
381 }
382 
GetDataFileTypeFromString(const char * const requested_mode)383 DataFileType GetDataFileTypeFromString(const char *const requested_mode)
384 {
385     DataFileType type = DATAFILETYPE_UNKNOWN;
386     if (StringEqual_IgnoreCase(requested_mode, "yaml"))
387     {
388         type = DATAFILETYPE_YAML;
389     }
390     else if (StringEqual_IgnoreCase(requested_mode, "csv"))
391     {
392         type = DATAFILETYPE_CSV;
393     }
394     else if (StringEqual_IgnoreCase(requested_mode, "env"))
395     {
396         type = DATAFILETYPE_ENV;
397     }
398     else if (StringEqual_IgnoreCase(requested_mode, "json"))
399     {
400         type = DATAFILETYPE_JSON;
401     }
402 
403     return type;
404 }
405 
GetDataFileTypeFromSuffix(const char * filename)406 DataFileType GetDataFileTypeFromSuffix(const char *filename)
407 {
408     if (StringEndsWithCase(filename, ".csv", true))
409     {
410         return DATAFILETYPE_CSV;
411     }
412     else if (StringEndsWithCase(filename, ".yaml", true) ||
413              StringEndsWithCase(filename, ".yml", true))
414     {
415         return DATAFILETYPE_YAML;
416     }
417     else if (StringEndsWithCase(filename, ".env", true))
418     {
419         return DATAFILETYPE_ENV;
420     }
421     else // always default to JSON
422     {
423         return DATAFILETYPE_JSON;
424     }
425 }
426 
DataFileTypeToString(const DataFileType type)427 const char *DataFileTypeToString(const DataFileType type)
428 {
429     switch (type)
430     {
431     case DATAFILETYPE_CSV:
432         return "CSV";
433     case DATAFILETYPE_YAML:
434         return "YAML";
435     case DATAFILETYPE_ENV:
436         return "ENV";
437     case DATAFILETYPE_JSON:
438         return "JSON";
439     default:
440         return "unknown";
441     }
442 }
443