1 #ifndef MLRUTIL_H
2 #define MLRUTIL_H
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <time.h>
8 #include "mtrand.h"
9 
10 #define TRUE  1
11 #define FALSE 0
12 #define NEITHER_TRUE_NOR_FALSE -1
13 
14 //#define MLR_MALLOC_TRACE
15 
16 // ----------------------------------------------------------------
17 #define MLR_INTERNAL_CODING_ERROR() mlr_internal_coding_error(__FILE__, __LINE__)
18 #define MLR_INTERNAL_CODING_ERROR_IF(v) mlr_internal_coding_error_if(v, __FILE__, __LINE__)
19 #define MLR_INTERNAL_CODING_ERROR_UNLESS(v) mlr_internal_coding_error_unless(v, __FILE__, __LINE__)
20 void mlr_internal_coding_error(char* file, int line);
21 void mlr_internal_coding_error_if(int v, char* file, int line);
22 void mlr_internal_coding_error_unless(int v, char* file, int line);
23 
24 // ----------------------------------------------------------------
25 //int mlr_canonical_mod(int a, int n);
mlr_canonical_mod(int a,int n)26 static inline int mlr_canonical_mod(int a, int n) {
27 	int r = a % n;
28 	if (r >= 0)
29 		return r;
30 	else
31 		return r+n;
32 }
33 
34 // ----------------------------------------------------------------
35 // strcmp computes signs; we don't need that -- only equality or inequality.
streq(char * a,char * b)36 static inline int streq(char* a, char* b) {
37 #if 0 // performance comparison
38 	return !strcmp(a, b);
39 #else
40 	while (*a && *b) {
41 		if (*a != *b)
42 			return FALSE;
43 		a++;
44 		b++;
45 	}
46 	if (*a || *b)
47 		return FALSE;
48 	return TRUE;
49 #endif
50 }
51 
52 // strncmp computes signs; we don't need that -- only equality or inequality.
streqn(char * a,char * b,int n)53 static inline int streqn(char* a, char* b, int n) {
54 #if 0 // performance comparison
55 	return !strncmp(a, b, n);
56 #else
57 	while (n > 0 && *a && *b) {
58 		if (n-- <= 0) {
59 			return TRUE;
60 		}
61 		if (*a != *b) {
62 			return FALSE;
63 		}
64 		a++;
65 		b++;
66 	}
67 	if (n == 0)
68 		return TRUE;
69 	if (*a || *b) {
70 		return FALSE;
71 	}
72 	return TRUE;
73 #endif
74 }
75 
76 // ----------------------------------------------------------------
77 // Like strsep but the sep argument is a multi-character delimiter,
78 // not a set of single-character delimiters.
79 char* mlr_strmsep(char **pstring, const char *sep, int seplen);
80 
81 // ----------------------------------------------------------------
82 int mlr_bsearch_double_for_insert(double* array, int size, double value);
83 
84 void*  mlr_malloc_or_die(size_t size);
85 void*  mlr_realloc_or_die(void *ptr, size_t size);
mlr_strdup_or_die(const char * s1)86 static inline char * mlr_strdup_or_die(const char *s1) {
87 	char* s2 = strdup(s1);
88 	if (s2 == NULL) {
89 		fprintf(stderr, "malloc/strdup failed\n");
90 		exit(1);
91 	}
92 #ifdef MLR_MALLOC_TRACE
93 	fprintf(stderr, "STRDUP size=%d,p=%p\n", (int)strlen(s2), s2);
94 #endif
95 	return s2;
96 }
97 char * mlr_strdup_quoted_or_die(const char *s1);
98 
99 // The caller should free the return values from each of these.
100 char* mlr_alloc_string_from_double(double value, char* fmt);
101 char* mlr_alloc_string_from_ull(unsigned long long value);
102 char* mlr_alloc_string_from_ll(long long value);
103 char* mlr_alloc_string_from_ll_and_format(long long value, char* fmt);
104 char* mlr_alloc_string_from_int(int value);
105 char* mlr_alloc_string_from_string_and_format(char* value, char* fmt);
106 // The input doesn't include the null-terminator; the output does.
107 char* mlr_alloc_string_from_char_range(char* start, int num_bytes);
108 
109 char* mlr_alloc_hexfmt_from_ll(long long value);
110 
111 double mlr_double_from_string_or_die(char* string);
112 long long mlr_int_from_string_or_die(char* string);
113 int    mlr_try_float_from_string(char* string, double* pval);
114 int    mlr_try_int_from_string(char* string, long long* pval);
115 
116 // For small integers (as of this writing, 0 .. 100) returns a static string representation.
117 // For other values, returns a dynamically allocated string representation.
118 char* low_int_to_string(int idx, char* pfree_flags);
119 
120 // Inefficient and intended for call-rarely use. The caller should free the return values.
121 char* mlr_paste_2_strings(char* s1, char* s2);
122 char* mlr_paste_3_strings(char* s1, char* s2, char* s3);
123 char* mlr_paste_4_strings(char* s1, char* s2, char* s3, char* s4);
124 char* mlr_paste_5_strings(char* s1, char* s2, char* s3, char* s4, char* s5);
125 
126 int mlr_string_hash_func(char *str);
127 int mlr_string_pair_hash_func(char* str1, char* str2);
128 
129 int strlen_for_utf8_display(char* str);
130 int string_starts_with(char* string, char* prefix);
131 // If pstrlen is non-null, after return it will contain strlen(string) for
132 // convenience of the caller.
133 int string_ends_with(char* string, char* suffix, int* pstringlen);
134 
135 int mlr_imax2(int a, int b);
136 int mlr_imax3(int a, int b, int c);
137 int power_of_two_above(int n);
138 
139 // The caller should free the return value. Maps two-character sequences such as
140 // "\t", "\n", "\\" to single characters such as tab, newline, backslash, etc.
141 char* mlr_alloc_unbackslash(char* input);
142 
143 // Destructively removes final LF, CR, or CR/LF in the string.
144 void mlr_rstrip(char* s);
145 
146 // Miller DSL literals are unbackslashed: e.g. the two-character sequence "\t" is converted to a tab character, and
147 // users need to type "\\t" to get a backslash followed by a t. Well and good, but the system regex library handles
148 // backslashes not quite as I want. Namely, without this function,
149 //
150 //   echo 'x=a\tb' | mlr put '$x=sub($x,"\\t","TAB")'
151 //
152 // (note: not echo -e, but just plain echo) outputs
153 //
154 //   a\TABb
155 //
156 // while
157 //
158 //   echo 'x=a\tb' | mlr put '$x=sub($x,"\\\\t","TAB")'
159 //
160 // outputs
161 //
162 //   aTABb
163 //
164 // Using this function, backslashes can be escaped as the regex library requires, before I call regcomp:
165 //
166 //   echo 'x=a\tb' | mlr put '$x=sub($x,"\\t","TAB")'
167 //
168 // outputs
169 //
170 //   aTABb
171 //
172 // as desired.
173 char* mlr_alloc_double_backslash(char* input);
174 
175 // Returns -1 on error
176 ssize_t get_file_size(char* filename);
177 
178 // The caller should free the return value.
179 char* read_file_into_memory(char* filename, size_t* psize);
180 // The caller should free the return value.
181 char* read_fp_into_memory(FILE* fp, size_t* psize);
182 
183 // Returns a copy of the filename with random characters attached to the end.
184 char* alloc_suffixed_temp_file_name(char* filename);
185 
186 char** copy_argv(char** argv);
187 void free_argv_copy(char** argv);
188 
189 #endif // MLRUTIL_H
190