1 /**
2 
3 	MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more.
4 
5 	@file file.c
6 
7 	@brief
8 
9 
10 	@author	Fletcher T. Penney
11 	@bug
12 
13 **/
14 
15 /*
16 
17 	Copyright © 2016 - 2017 Fletcher T. Penney.
18 
19 
20 	The `MultiMarkdown 6` project is released under the MIT License..
21 
22 	GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23 
24 		https://github.com/fletcher/MultiMarkdown-4/
25 
26 	MMD 4 is released under both the MIT License and GPL.
27 
28 
29 	CuTest is released under the zlib/libpng license. See CuTest.c for the
30 	text of the license.
31 
32 	uthash library:
33 		Copyright (c) 2005-2016, Troy D. Hanson
34 
35 		Licensed under Revised BSD license
36 
37 	miniz library:
38 		Copyright 2013-2014 RAD Game Tools and Valve Software
39 		Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
40 
41 		Licensed under the MIT license
42 
43 	argtable3 library:
44 		Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann
45 		<sheitmann@users.sourceforge.net>
46 		All rights reserved.
47 
48 		Licensed under the Revised BSD License
49 
50 
51 	## The MIT License ##
52 
53 	Permission is hereby granted, free of charge, to any person obtaining
54 	a copy of this software and associated documentation files (the
55 	"Software"), to deal in the Software without restriction, including
56 	without limitation the rights to use, copy, modify, merge, publish,
57 	distribute, sublicense, and/or sell copies of the Software, and to
58 	permit persons to whom the Software is furnished to do so, subject to
59 	the following conditions:
60 
61 	The above copyright notice and this permission notice shall be
62 	included in all copies or substantial portions of the Software.
63 
64 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
65 	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
66 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
67 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
68 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
69 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
70 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
71 
72 
73 	## Revised BSD License ##
74 
75 	Redistribution and use in source and binary forms, with or without
76 	modification, are permitted provided that the following conditions are
77 	met:
78 	    * Redistributions of source code must retain the above copyright
79 	      notice, this list of conditions and the following disclaimer.
80 	    * Redistributions in binary form must reproduce the above
81 	      copyright notice, this list of conditions and the following
82 	      disclaimer in the documentation and/or other materials provided
83 	      with the distribution.
84 	    * Neither the name of the <organization> nor the
85 	      names of its contributors may be used to endorse or promote
86 	      products derived from this software without specific prior
87 	      written permission.
88 
89 	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
90 	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
91 	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
92 	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
93 	HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
94 	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
95 	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE, DATA, OR
96 	PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
97 	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
98 	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
99 	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
100 
101 
102 */
103 
104 
105 #include <stdio.h>
106 #include <stdlib.h>
107 #include <string.h>
108 
109 #include "d_string.h"
110 #include "file.h"
111 
112 #if defined(__WIN32)
113 	#include <windows.h>
114 #endif
115 
116 #define kBUFFERSIZE 4096	// How many bytes to read at a time
117 
118 
119 /// Scan file into a DString
scan_file(const char * fname)120 DString * scan_file(const char * fname) {
121 	/* Read from stdin and return a DString *
122 	 `buffer` will need to be freed elsewhere */
123 
124 	char chunk[kBUFFERSIZE];
125 	size_t bytes;
126 
127 	FILE * file;
128 
129 #if defined(__WIN32)
130 	int wchars_num = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
131 	wchar_t wstr[wchars_num];
132 	MultiByteToWideChar(CP_UTF8, 0, fname, -1, wstr, wchars_num);
133 
134 	if ((file = _wfopen(wstr, L"rb")) == NULL) {
135 		return NULL;
136 	}
137 
138 #else
139 
140 	if ((file = fopen(fname, "r")) == NULL ) {
141 		return NULL;
142 	}
143 
144 #endif
145 
146 	DString * buffer = d_string_new("");
147 
148 	while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) {
149 		d_string_append_c_array(buffer, chunk, bytes);
150 	}
151 
152 	// Strip UTF-8 BOM
153 	if (strncmp(buffer->str, "\xef\xbb\xbf", 3) == 0) {
154 		d_string_erase(buffer, 0, 3);
155 	}
156 
157 	// Strip UTF-16 BOMs
158 	if (strncmp(buffer->str, "\xef\xff", 2) == 0) {
159 		d_string_erase(buffer, 0, 2);
160 	}
161 
162 	if (strncmp(buffer->str, "\xff\xfe", 2) == 0) {
163 		d_string_erase(buffer, 0, 2);
164 	}
165 
166 	fclose(file);
167 
168 	return buffer;
169 }
170 
171 
172 /// Scan from stdin into a DString
stdin_buffer(void)173 DString * stdin_buffer(void) {
174 	/* Read from stdin and return a GString *
175 		`buffer` will need to be freed elsewhere */
176 
177 	char chunk[kBUFFERSIZE];
178 	size_t bytes;
179 
180 	DString * buffer = d_string_new("");
181 
182 	while ((bytes = fread(chunk, 1, kBUFFERSIZE, stdin)) > 0) {
183 		d_string_append_c_array(buffer, chunk, bytes);
184 	}
185 
186 	fclose(stdin);
187 
188 	return buffer;
189 }
190 
191 
192 /// Windows can use either `\` or `/` as a separator -- thanks to t-beckmann on github
193 ///	for suggesting a fix for this.
is_separator(char c)194 bool is_separator(char c) {
195 #if defined(__WIN32)
196 	return c == '\\' || c == '/';
197 #else
198 	return c == '/';
199 #endif
200 }
201 
202 
203 #ifdef TEST
Test_is_separator(CuTest * tc)204 void Test_is_separator(CuTest * tc) {
205 	char * test = "a/\\";
206 
207 #if defined(__WIN32)
208 	CuAssertIntEquals(tc, false, is_separator(test[0]));
209 	CuAssertIntEquals(tc, true, is_separator(test[1]));
210 	CuAssertIntEquals(tc, true, is_separator(test[2]));
211 #else
212 	CuAssertIntEquals(tc, false, is_separator(test[0]));
213 	CuAssertIntEquals(tc, true, is_separator(test[1]));
214 	CuAssertIntEquals(tc, false, is_separator(test[2]));
215 #endif
216 }
217 #endif
218 
219 
220 /// Ensure that path ends in separator
add_trailing_sep(DString * path)221 void add_trailing_sep(DString * path) {
222 #if defined(__WIN32)
223 	char sep = '\\';
224 #else
225 	char sep = '/';
226 #endif
227 
228 	// Ensure that folder ends in separator
229 	if ((path->currentStringLength == 0) || (!is_separator(path->str[path->currentStringLength - 1]))) {
230 		d_string_append_c(path, sep);
231 	}
232 }
233 
234 
235 /// strndup not available on all platforms
my_strndup(const char * source,size_t n)236 static char * my_strndup(const char * source, size_t n) {
237 	if (source == NULL) {
238 		return NULL;
239 	}
240 
241 	size_t len = 0;
242 	char * result;
243 	const char * test = source;
244 
245 	// strlen is too slow if strlen(source) >> n
246 	for (len = 0; len < n; ++len) {
247 		if (*test == '\0') {
248 			break;
249 		}
250 
251 		test++;
252 	}
253 
254 	result = malloc(len + 1);
255 
256 	if (result) {
257 		memcpy(result, source, len);
258 		result[len] = '\0';
259 	}
260 
261 	return result;
262 }
263 
264 
265 /// strdup() not available on all platforms
my_strdup(const char * source)266 static char * my_strdup(const char * source) {
267 	if (source == NULL) {
268 		return NULL;
269 	}
270 
271 	char * result = malloc(strlen(source) + 1);
272 
273 	if (result) {
274 		strcpy(result, source);
275 	}
276 
277 	return result;
278 }
279 
280 
281 /// Combine directory and base filename to create a full path */
path_from_dir_base(const char * dir,const char * base)282 char * path_from_dir_base(const char * dir, const char * base) {
283 	if (!dir && !base) {
284 		return NULL;
285 	}
286 
287 	DString * path = NULL;
288 	char * result = NULL;
289 
290 	if ((base != NULL) && (is_separator(base[0]))) {
291 		// We have an absolute path
292 		return my_strdup(base);
293 	}
294 
295 	// We have a directory and relative path
296 	path = d_string_new(dir);
297 
298 	// Ensure that folder ends in separator
299 	add_trailing_sep(path);
300 
301 	// Append filename (if present)
302 	if (base) {
303 		d_string_append(path, base);
304 	}
305 
306 	result = path->str;
307 	d_string_free(path, false);
308 
309 	return result;
310 }
311 
312 
313 #ifdef TEST
Test_path_from_dir_base(CuTest * tc)314 void Test_path_from_dir_base(CuTest * tc) {
315 	char dir[10] = "/foo";
316 	char base[10] = "bar";
317 
318 	char * path = path_from_dir_base(dir, base);
319 
320 #if defined(__WIN32)
321 	CuAssertStrEquals(tc, "/foo\\bar", path);
322 #else
323 	CuAssertStrEquals(tc, "/foo/bar", path);
324 #endif
325 
326 	free(path);
327 	strcpy(base, "/bar");
328 
329 	path = path_from_dir_base(dir, base);
330 
331 	CuAssertStrEquals(tc, "/bar", path);
332 
333 	free(path);
334 
335 	path = path_from_dir_base(NULL, NULL);
336 	CuAssertStrEquals(tc, NULL, path);
337 }
338 #endif
339 
340 
341 /// Separate filename and directory from a full path
342 ///
343 /// See http://stackoverflow.com/questions/1575278/function-to-split-a-filepath-into-path-and-file
split_path_file(char ** dir,char ** file,const char * path)344 void split_path_file(char ** dir, char ** file, const char * path) {
345 	const char * slash = path, * next;
346 
347 #if defined(__WIN32)
348 	const char sep[] = "\\/";	// Windows allows either variant
349 #else
350 	const char sep[] = "/";
351 #endif
352 
353 	while ((next = strpbrk(slash + 1, sep))) {
354 		slash = next;
355 	}
356 
357 	if (path != slash) {
358 		slash++;
359 	}
360 
361 	if (dir) {
362 		*dir = my_strndup(path, slash - path);
363 	}
364 
365 	if (file) {
366 		*file = my_strdup(slash);
367 	}
368 }
369 
370 
371 #ifdef TEST
Test_split_path_file(CuTest * tc)372 void Test_split_path_file(CuTest * tc) {
373 	char * dir, * file;
374 
375 	char * path = "/foo/bar.txt";
376 	split_path_file(&dir, &file, path);
377 
378 	CuAssertStrEquals(tc, "/foo/", dir);
379 	CuAssertStrEquals(tc, "bar.txt", file);
380 
381 	path = "\\foo\\bar.txt";
382 	split_path_file(&dir, &file, path);
383 
384 #if defined(__WIN32)
385 	CuAssertStrEquals(tc, "\\foo\\", dir);
386 	CuAssertStrEquals(tc, "bar.txt", file);
387 #else
388 	CuAssertStrEquals(tc, "", dir);
389 	CuAssertStrEquals(tc, "\\foo\\bar.txt", file);
390 #endif
391 }
392 #endif
393 
394 
395 // Windows does not know realpath(), so we need a "windows port"
396 // Fix by @f8ttyc8t (<https://github.com/f8ttyc8t>)
397 #if (defined(_WIN32) || defined(__WIN32__))
398 // Let compiler know where to find GetFullPathName()
399 #include <windows.h>
400 
realpath(const char * path,char * resolved_path)401 char * realpath(const char * path, char * resolved_path) {
402 	DWORD  retval = 0;
403 	DWORD  dwBufSize = 0; // Just in case MAX_PATH differs from PATH_MAX
404 	TCHAR * buffer = NULL;
405 
406 	if (resolved_path == NULL) {
407 		// realpath allocates appropiate bytes if resolved_path is null. This is to mimic realpath behavior
408 		dwBufSize = PATH_MAX; // Use windows PATH_MAX constant, because we are in Windows context now.
409 		buffer = (char *)malloc(dwBufSize);
410 
411 		if (buffer == NULL) {
412 			return NULL; // some really weird is going on...
413 		}
414 	} else {
415 		dwBufSize = MAX_PATH;  // buffer has been allocated using MAX_PATH earlier
416 		buffer = resolved_path;
417 	}
418 
419 	retval = GetFullPathName(path, dwBufSize, buffer, NULL);
420 
421 	if (retval == 0) {
422 		return NULL;
423 		printf("Failed to GetFullPathName()\n");
424 	}
425 
426 	return buffer;
427 }
428 #endif
429 
430 
431 // Convert argument to absolute path
absolute_path_for_argument(const char * arg)432 char * absolute_path_for_argument(const char * arg) {
433 	char * result = NULL;
434 #ifdef PATH_MAX
435 	// If PATH_MAX defined, use it
436 	char absolute[PATH_MAX + 1];
437 	realpath(arg, absolute);
438 	result = my_strdup(absolute);
439 #else
440 	// If undefined, then we *should* be able to use a NULL pointer to allocate
441 	result = realpath(arg, NULL);
442 #endif
443 
444 	return result;
445 }
446 
447