1 #include <stdio.h>
2 #include <string.h>
3 #include <unistd.h>
4 #include <ctype.h>
5 #include <sys/stat.h>
6 #include "lib/mlrutil.h"
7 #include "lib/mlr_globals.h"
8 #include "lib/free_flags.h"
9
10 // ----------------------------------------------------------------
mlr_internal_coding_error(char * file,int line)11 void mlr_internal_coding_error(char* file, int line) {
12 fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n",
13 MLR_GLOBALS.bargv0, file, line);
14 exit(1);
15 }
16
mlr_internal_coding_error_if(int v,char * file,int line)17 void mlr_internal_coding_error_if(int v, char* file, int line) {
18 if (v) {
19 mlr_internal_coding_error(file, line);
20 }
21 }
22
mlr_internal_coding_error_unless(int v,char * file,int line)23 void mlr_internal_coding_error_unless(int v, char* file, int line) {
24 if (!v) {
25 mlr_internal_coding_error(file, line);
26 }
27 }
28
29 // ----------------------------------------------------------------
mlr_strmsep(char ** pstring,const char * sep,int seplen)30 char* mlr_strmsep(char **pstring, const char *sep, int seplen) {
31 char* string = *pstring;
32 if (string == NULL) {
33 return NULL;
34 }
35 char* pnext = strstr(string, sep);
36 if (pnext == NULL) {
37 *pstring = NULL;
38 return string;
39 } else {
40 *pnext = 0;
41 *pstring = pnext + seplen;
42 return string;
43 }
44 }
45
46 // ----------------------------------------------------------------
mlr_bsearch_double_for_insert(double * array,int size,double value)47 int mlr_bsearch_double_for_insert(double* array, int size, double value) {
48 int lo = 0;
49 int hi = size-1;
50 int mid = (hi+lo)/2;
51 int newmid;
52
53 if (size == 0)
54 return 0;
55 if (value > array[0])
56 return 0;
57 if (value < array[hi])
58 return size;
59
60 while (lo < hi) {
61 double a = array[mid];
62 if (value == a) {
63 return mid;
64 }
65 else if (value > a) {
66 hi = mid;
67 newmid = (hi+lo)/2;
68 }
69 else {
70 lo = mid;
71 newmid = (hi+lo)/2;
72 }
73 if (mid == newmid) {
74 if (value >= array[lo])
75 return lo;
76 else if (value >= array[hi])
77 return hi;
78 else
79 return hi+1;
80 }
81 mid = newmid;
82 }
83
84 return lo;
85 }
86
87 // ----------------------------------------------------------------
mlr_malloc_or_die(size_t size)88 void* mlr_malloc_or_die(size_t size) {
89 void* p = malloc(size);
90 if (p == NULL) {
91 fprintf(stderr, "malloc(%llu) failed.\n", (unsigned long long)size);
92 exit(1);
93 }
94 #ifdef MLR_MALLOC_TRACE
95 fprintf(stderr, "MALLOC size=%llu,p=%p\n", (unsigned long long)size, p);
96 #endif
97 return p;
98 }
99
100 // ----------------------------------------------------------------
mlr_realloc_or_die(void * optr,size_t size)101 void* mlr_realloc_or_die(void *optr, size_t size) {
102 void* nptr = realloc(optr, size);
103 if (nptr == NULL) {
104 fprintf(stderr, "realloc(%llu) failed.\n", (unsigned long long)size);
105 exit(1);
106 }
107 #ifdef MLR_MALLOC_TRACE
108 fprintf(stderr, "REALLOC size=%llu,p=%p\n", (unsigned long long)size, nptr);
109 #endif
110 return nptr;
111 }
112
113 // ----------------------------------------------------------------
mlr_strdup_quoted_or_die(const char * s1)114 char * mlr_strdup_quoted_or_die(const char *s1) {
115 int len = strlen(s1);
116 char* s2 = mlr_malloc_or_die(len+3);
117 s2[0] = '"';
118 strcpy(&s2[1], s1);
119 s2[len+1] = '"';
120 s2[len+2] = 0;
121 return s2;
122 }
123
124 // ----------------------------------------------------------------
125 // The caller should free the return value from each of these.
126
mlr_alloc_string_from_double(double value,char * fmt)127 char* mlr_alloc_string_from_double(double value, char* fmt) {
128 int n = snprintf(NULL, 0, fmt, value);
129 char* string = mlr_malloc_or_die(n+1);
130 sprintf(string, fmt, value);
131 return string;
132 }
133
mlr_alloc_string_from_ull(unsigned long long value)134 char* mlr_alloc_string_from_ull(unsigned long long value) {
135 int n = snprintf(NULL, 0, "%llu", value);
136 char* string = mlr_malloc_or_die(n+1);
137 sprintf(string, "%llu", value);
138 return string;
139 }
140
mlr_alloc_string_from_ll(long long value)141 char* mlr_alloc_string_from_ll(long long value) {
142 int n = snprintf(NULL, 0, "%lld", value);
143 char* string = mlr_malloc_or_die(n+1);
144 sprintf(string, "%lld", value);
145 return string;
146 }
147
mlr_alloc_string_from_ll_and_format(long long value,char * fmt)148 char* mlr_alloc_string_from_ll_and_format(long long value, char* fmt) {
149 int n = snprintf(NULL, 0, fmt, value);
150 char* string = mlr_malloc_or_die(n+1);
151 sprintf(string, fmt, value);
152 return string;
153 }
154
mlr_alloc_string_from_int(int value)155 char* mlr_alloc_string_from_int(int value) {
156 int n = snprintf(NULL, 0, "%d", value);
157 char* string = mlr_malloc_or_die(n+1);
158 sprintf(string, "%d", value);
159 return string;
160 }
161
mlr_alloc_string_from_char_range(char * start,int num_bytes)162 char* mlr_alloc_string_from_char_range(char* start, int num_bytes) {
163 char* string = mlr_malloc_or_die(num_bytes+1);
164 memcpy(string, start, num_bytes);
165 string[num_bytes] = 0;
166 return string;
167 }
168
mlr_alloc_hexfmt_from_ll(long long value)169 char* mlr_alloc_hexfmt_from_ll(long long value) {
170 int n = snprintf(NULL, 0, "0x%llx", (unsigned long long)value);
171 char* string = mlr_malloc_or_die(n+1);
172 sprintf(string, "0x%llx", value);
173 return string;
174 }
175
mlr_alloc_string_from_string_and_format(char * old_value,char * fmt)176 char* mlr_alloc_string_from_string_and_format(char* old_value, char* fmt) {
177 int n = snprintf(NULL, 0, fmt, old_value);
178 char* new_value = mlr_malloc_or_die(n+1);
179 sprintf(new_value, fmt, old_value);
180 return new_value;
181 }
182
mlr_double_from_string_or_die(char * string)183 double mlr_double_from_string_or_die(char* string) {
184 double d;
185 if (!mlr_try_float_from_string(string, &d)) {
186 fprintf(stderr, "%s: couldn't parse \"%s\" as number.\n",
187 MLR_GLOBALS.bargv0, string);
188 exit(1);
189 }
190 return d;
191 }
192
193 // E.g. "300" is a number; "300ms" is not.
mlr_try_float_from_string(char * string,double * pval)194 int mlr_try_float_from_string(char* string, double* pval) {
195 int num_bytes_scanned;
196 int rc = sscanf(string, "%lf%n", pval, &num_bytes_scanned);
197 if (rc != 1)
198 return 0;
199 if (string[num_bytes_scanned] != 0) // scanned to end of string?
200 return 0;
201 return 1;
202 }
203
mlr_int_from_string_or_die(char * string)204 long long mlr_int_from_string_or_die(char* string) {
205 long long i;
206 if (!mlr_try_int_from_string(string, &i)) {
207 fprintf(stderr, "Couldn't parse \"%s\" as number.\n", string);
208 exit(1);
209 }
210 return i;
211 }
212
213 // E.g. "300" is a number; "300ms" is not.
mlr_try_int_from_string(char * string,long long * pval)214 int mlr_try_int_from_string(char* string, long long* pval) {
215 int num_bytes_scanned, rc;
216 // sscanf with %li / %lli doesn't scan correctly when the high bit is set
217 // on hex input; it just returns max signed. So we need to special-case hex
218 // input.
219 if (string[0] == '0' && (string[1] == 'x' || string[1] == 'X')) {
220 rc = sscanf(string, "%llx%n", pval, &num_bytes_scanned);
221 } else {
222 rc = sscanf(string, "%lli%n", pval, &num_bytes_scanned);
223 }
224 if (rc != 1)
225 return 0;
226 if (string[num_bytes_scanned] != 0) // scanned to end of string?
227 return 0;
228 return 1;
229 }
230
231 // ----------------------------------------------------------------
232 static char* low_int_to_string_data[] = {
233 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
234 "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
235 "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
236 "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
237 "40", "41", "42", "43", "44", "45", "46", "47", "48", "49",
238 "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
239 "60", "61", "62", "63", "64", "65", "66", "67", "68", "69",
240 "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
241 "80", "81", "82", "83", "84", "85", "86", "87", "88", "89",
242 "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100"
243 };
244
low_int_to_string(int idx,char * pfree_flags)245 char* low_int_to_string(int idx, char* pfree_flags) {
246 if ((0 <= idx) && (idx <= 100)) {
247 *pfree_flags = 0;
248 return low_int_to_string_data[idx];
249 } else {
250 char buf[32];
251 sprintf(buf, "%d", idx);
252 *pfree_flags = FREE_ENTRY_KEY;
253 return mlr_strdup_or_die(buf);
254 }
255 }
256
257 // ----------------------------------------------------------------
mlr_paste_2_strings(char * s1,char * s2)258 char* mlr_paste_2_strings(char* s1, char* s2) {
259 int n1 = strlen(s1);
260 int n2 = strlen(s2);
261 char* s = mlr_malloc_or_die(n1+n2+1);
262 strcpy(s, s1);
263 strcat(s, s2);
264 return s;
265 }
266
mlr_paste_3_strings(char * s1,char * s2,char * s3)267 char* mlr_paste_3_strings(char* s1, char* s2, char* s3) {
268 int n1 = strlen(s1);
269 int n2 = strlen(s2);
270 int n3 = strlen(s3);
271 char* s = mlr_malloc_or_die(n1+n2+n3+1);
272 strcpy(s, s1);
273 strcat(s, s2);
274 strcat(s, s3);
275 return s;
276 }
277
mlr_paste_4_strings(char * s1,char * s2,char * s3,char * s4)278 char* mlr_paste_4_strings(char* s1, char* s2, char* s3, char* s4) {
279 int n1 = strlen(s1);
280 int n2 = strlen(s2);
281 int n3 = strlen(s3);
282 int n4 = strlen(s4);
283 char* s = mlr_malloc_or_die(n1+n2+n3+n4+1);
284 strcpy(s, s1);
285 strcat(s, s2);
286 strcat(s, s3);
287 strcat(s, s4);
288 return s;
289 }
290
mlr_paste_5_strings(char * s1,char * s2,char * s3,char * s4,char * s5)291 char* mlr_paste_5_strings(char* s1, char* s2, char* s3, char* s4, char* s5) {
292 int n1 = strlen(s1);
293 int n2 = strlen(s2);
294 int n3 = strlen(s3);
295 int n4 = strlen(s4);
296 int n5 = strlen(s5);
297 char* s = mlr_malloc_or_die(n1+n2+n3+n4+n5+1);
298 strcpy(s, s1);
299 strcat(s, s2);
300 strcat(s, s3);
301 strcat(s, s4);
302 strcat(s, s5);
303 return s;
304 }
305
306 // ----------------------------------------------------------------
307 // This is djb2.
mlr_string_hash_func(char * str)308 int mlr_string_hash_func(char *str) {
309 unsigned long hash = 5381;
310 int c;
311
312 while ((c = *str++) != 0)
313 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
314
315 return (int)hash;
316 }
317
mlr_string_pair_hash_func(char * str1,char * str2)318 int mlr_string_pair_hash_func(char* str1, char* str2) {
319 unsigned long hash = 5381;
320 int c;
321
322 while ((c = *str1++) != 0)
323 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
324 while ((c = *str2++) != 0)
325 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
326
327 return (int)hash;
328 }
329
330 // ----------------------------------------------------------------
331 // 0x00-0x7f (MSB is 0) are ASCII and printable.
332 // 0x80-0xbf (MSBs are 10) are continuation characters and don't add to printable length.
333 // 0xc0-0xfe (MSBs are 11) are leading characters and do add to printable length.
334 // (0xff, incidentally, is never a valid UTF-8 byte).
strlen_for_utf8_display(char * str)335 int strlen_for_utf8_display(char* str) {
336 int len = 0;
337 for (char* p = str; *p; p++) {
338 if ((*p & 0xc0) != 0x80)
339 len++;
340 }
341 return len;
342 }
343
344 // ----------------------------------------------------------------
345 // These are for low-volume, call-at-startup applications. If they get used
346 // record-by-record they should be replaced with pointer-walking logic which
347 // avoids the unnecessary expense of calling strlen.
348
string_starts_with(char * string,char * prefix)349 int string_starts_with(char* string, char* prefix) {
350 int prefixlen = strlen(prefix);
351 return !strncmp(string, prefix, prefixlen);
352 }
353
string_ends_with(char * string,char * suffix,int * pstringlen)354 int string_ends_with(char* string, char* suffix, int* pstringlen) {
355 int stringlen = strlen(string);
356 int suffixlen = strlen(suffix);
357 if (pstringlen != NULL)
358 *pstringlen = stringlen;
359 if (stringlen < suffixlen)
360 return FALSE;
361 return !strcmp(&string[stringlen-suffixlen], suffix);
362 }
363
364 // ----------------------------------------------------------------
mlr_imax2(int a,int b)365 int mlr_imax2(int a, int b) {
366 if (a >= b)
367 return a;
368 else
369 return b;
370 }
371
372 // ----------------------------------------------------------------
mlr_imax3(int a,int b,int c)373 int mlr_imax3(int a, int b, int c) {
374 return mlr_imax2(a, mlr_imax2(b, c));
375 }
376
377 // ----------------------------------------------------------------
power_of_two_above(int n)378 int power_of_two_above(int n) {
379 n |= (n >> 1);
380 n |= (n >> 2);
381 n |= (n >> 4);
382 n |= (n >> 8);
383 n |= (n >> 16);
384 return(n+1);
385 }
386
387 // ----------------------------------------------------------------
is_backslash_octal(char * input,int * pcode)388 static int is_backslash_octal(char* input, int* pcode) {
389 if (strlen(input) < 4)
390 return FALSE;
391 if (input[0] != '\\')
392 return FALSE;
393 if (input[1] < '0' || input[1] > '7')
394 return FALSE;
395 if (input[2] < '0' || input[2] > '7')
396 return FALSE;
397 if (input[3] < '0' || input[3] > '7')
398 return FALSE;
399 *pcode = (input[1] - '0') * 64
400 + (input[2] - '0') * 8
401 + (input[3] - '0');
402 return TRUE;
403 }
404
is_backslash_hex(char * input,int * pcode)405 static int is_backslash_hex(char* input, int* pcode) {
406 if (strlen(input) < 4)
407 return FALSE;
408 if (input[0] != '\\')
409 return FALSE;
410 if (input[1] != 'x')
411 return FALSE;
412 if (!isxdigit(input[2]))
413 return FALSE;
414 if (!isxdigit(input[3]))
415 return FALSE;
416
417 char buf[3];
418 buf[0] = input[2];
419 buf[1] = input[3];
420 buf[2] = 0;
421 if (sscanf(buf, "%x", pcode) != 1) {
422 fprintf(stderr, "Miller: internal coding error detected in file %s at line %d.\n",
423 __FILE__, __LINE__);
424 exit(1);
425 }
426 return TRUE;
427 }
428
mlr_alloc_unbackslash(char * input)429 char* mlr_alloc_unbackslash(char* input) {
430 // Do the strdup even if there's nothing to expand, so the caller can unconditionally
431 // free what we return.
432 char* output = mlr_strdup_or_die(input);
433 char* pi = input;
434 char* po = output;
435 int code = 0;
436 while (*pi) {
437 // https://en.wikipedia.org/wiki/Escape_sequences_in_C
438 if (streqn(pi, "\\a", 2)) {
439 pi += 2;
440 *(po++) = '\a';
441 } else if (streqn(pi, "\\b", 2)) {
442 pi += 2;
443 *(po++) = '\b';
444 } else if (streqn(pi, "\\f", 2)) {
445 pi += 2;
446 *(po++) = '\f';
447 } else if (streqn(pi, "\\n", 2)) {
448 pi += 2;
449 *(po++) = '\n';
450 } else if (streqn(pi, "\\r", 2)) {
451 pi += 2;
452 *(po++) = '\r';
453 } else if (streqn(pi, "\\t", 2)) {
454 pi += 2;
455 *(po++) = '\t';
456 } else if (streqn(pi, "\\v", 2)) {
457 pi += 2;
458 *(po++) = '\v';
459 } else if (streqn(pi, "\\\\", 2)) {
460 pi += 2;
461 *(po++) = '\\';
462 } else if (streqn(pi, "\\'", 2)) {
463 pi += 2;
464 *(po++) = '\'';
465 } else if (streqn(pi, "\\\"", 2)) {
466 pi += 2;
467 *(po++) = '"';
468 } else if (streqn(pi, "\\?", 2)) {
469 pi += 2;
470 *(po++) = '?';
471 } else if (is_backslash_octal(pi, &code)) {
472 pi += 4;
473 *(po++) = code;
474 } else if (is_backslash_hex(pi, &code)) {
475 pi += 4;
476 *(po++) = code;
477 } else {
478 *po = *pi;
479 pi++;
480 po++;
481 }
482 }
483 *po = 0;
484
485 return output;
486 }
487
488 // Destructively removes final LF, CR, or CR/LF in the string.
mlr_rstrip(char * s)489 void mlr_rstrip(char* s) {
490 if (s == NULL) {
491 return;
492 }
493 int len = strlen(s);
494 if (len >= 2) {
495 if (s[len-2] == '\r' && s[len-1] == '\n') {
496 s[len-2] = 0;
497 return;
498 }
499 }
500 if (len >= 1) {
501 if (s[len-1] == '\r' || s[len-1] == '\n') {
502 s[len-1] = 0;
503 }
504 }
505 }
506
507 // Does a strdup even if there's nothing to expand, so the caller can unconditionally
508 // free what we return.
mlr_alloc_double_backslash(char * input)509 char* mlr_alloc_double_backslash(char* input) {
510 char *p, *q;
511 int input_length = 0;
512 int num_backslashes = 0;
513 for (p = input; *p; p++) {
514 input_length++;
515 if (*p == '\\') {
516 if (p[1] != '.') {
517 num_backslashes++;
518 }
519 }
520 }
521 char* output = mlr_malloc_or_die(input_length + num_backslashes + 1);
522 for (p = input, q = output; *p; p++) {
523 if (*p == '\\') {
524 if (p[1] != '.') {
525 *(q++) = *p;
526 }
527 *(q++) = *p;
528 } else {
529 *(q++) = *p;
530 }
531 }
532 *q = 0;
533
534 return output;
535 }
536
537 // ----------------------------------------------------------------
538 // Returns -1 on error
get_file_size(char * filename)539 ssize_t get_file_size(char* filename) {
540 struct stat statbuf;
541 if (stat(filename, &statbuf) < 0) {
542 return (ssize_t)(-1);
543 } else {
544 return statbuf.st_size;
545 }
546 }
547
548 // ----------------------------------------------------------------
read_file_into_memory(char * filename,size_t * psize)549 char* read_file_into_memory(char* filename, size_t* psize) {
550 struct stat statbuf;
551 if (stat(filename, &statbuf) < 0) {
552 perror("stat");
553 fprintf(stderr, "%s: could not stat \"%s\"\n", MLR_GLOBALS.bargv0, filename);
554 exit(1);
555 }
556 char* buffer = mlr_malloc_or_die(statbuf.st_size + 1);
557
558 FILE* fp = fopen(filename, "r");
559 if (fp == NULL) {
560 perror("fopen");
561 fprintf(stderr, "%s: could not fopen \"%s\"\n", MLR_GLOBALS.bargv0, filename);
562 free(buffer);
563 return NULL;
564 }
565
566 size_t rc = fread(buffer, 1, statbuf.st_size, fp);
567 if (rc != statbuf.st_size) {
568 fprintf(stderr, "Unable to read content of %s\n", filename);
569 perror("fread");
570 fprintf(stderr, "%s: could not fread \"%s\"\n", MLR_GLOBALS.bargv0, filename);
571 fclose(fp);
572 free(buffer);
573 return NULL;
574 }
575 fclose(fp);
576 buffer[statbuf.st_size] = 0;
577 if (psize)
578 *psize = statbuf.st_size;
579 return buffer;
580 }
581
582 // ----------------------------------------------------------------
583 #define INITIAL_ALLOC_SIZE 16384
584 #define BLOCK_SIZE 16384
read_fp_into_memory(FILE * fp,size_t * psize)585 char* read_fp_into_memory(FILE* fp, size_t* psize) {
586 size_t file_size = 0;
587 size_t alloc_size = INITIAL_ALLOC_SIZE;
588 char* buffer = mlr_malloc_or_die(alloc_size);
589
590 while (TRUE) {
591 if (file_size + BLOCK_SIZE > alloc_size) {
592 alloc_size *= 2;
593 buffer = mlr_realloc_or_die(buffer, alloc_size);
594 }
595 size_t block_num_bytes_read = fread(&buffer[file_size], 1, BLOCK_SIZE, fp);
596 if (block_num_bytes_read == 0) {
597 if (feof(fp))
598 break;
599 perror("fread");
600 fprintf(stderr, "%s: stdio/popen fread failed\n", MLR_GLOBALS.bargv0);
601 free(buffer);
602 *psize = 0;
603 return NULL;
604 }
605 file_size += block_num_bytes_read;
606 }
607
608 *psize = file_size;
609 return buffer;
610 }
611
612 // ----------------------------------------------------------------
alloc_suffixed_temp_file_name(char * filename)613 char* alloc_suffixed_temp_file_name(char* filename) {
614 const int suffix_length = 6;
615 static char bag[] = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789";
616 const static int bag_length = sizeof(bag) - 1;
617
618 char* output = mlr_malloc_or_die(strlen(filename) + 2 + suffix_length);
619
620 int rand_start_index = sprintf(output, "%s.", filename);
621 char* rand_start_ptr = &output[rand_start_index];
622
623 int i = 0;
624 for ( ; i < suffix_length; i++) {
625 rand_start_ptr[i] = bag[get_mtrand_int32() % bag_length];
626 }
627 rand_start_ptr[i] = 0;
628
629 return output;
630 }
631
632 // ----------------------------------------------------------------
633 // The convention for argv-style arrays is that they're null-terminated.
634 // So we loop through once to find the length.
copy_argv(char ** argv)635 char** copy_argv(char** argv) {
636 int length = 0;
637 int argi;
638 for (argi = 0; argv[argi]; argi++) {
639 length++;
640 }
641
642 char** copy = mlr_malloc_or_die((length + 1) * sizeof(char*));
643 for (argi = 0; argv[argi]; argi++) {
644 copy[argi] = mlr_strdup_or_die(argv[argi]);
645 }
646
647 copy[length] = 0;
648
649 return copy;
650 }
651
free_argv_copy(char ** copy)652 void free_argv_copy(char** copy) {
653 for (int argi = 0; copy[argi]; argi++) {
654 free(copy[argi]);
655 }
656 free(copy);
657 }
658