1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #include "buf_text.h"
8 
git_buf_text_puts_escaped(git_buf * buf,const char * string,const char * esc_chars,const char * esc_with)9 int git_buf_text_puts_escaped(
10 	git_buf *buf,
11 	const char *string,
12 	const char *esc_chars,
13 	const char *esc_with)
14 {
15 	const char *scan;
16 	size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
17 
18 	if (!string)
19 		return 0;
20 
21 	for (scan = string; *scan; ) {
22 		/* count run of non-escaped characters */
23 		count = strcspn(scan, esc_chars);
24 		total += count;
25 		scan += count;
26 		/* count run of escaped characters */
27 		count = strspn(scan, esc_chars);
28 		total += count * (esc_len + 1);
29 		scan += count;
30 	}
31 
32 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
33 	if (git_buf_grow_by(buf, alloclen) < 0)
34 		return -1;
35 
36 	for (scan = string; *scan; ) {
37 		count = strcspn(scan, esc_chars);
38 
39 		memmove(buf->ptr + buf->size, scan, count);
40 		scan += count;
41 		buf->size += count;
42 
43 		for (count = strspn(scan, esc_chars); count > 0; --count) {
44 			/* copy escape sequence */
45 			memmove(buf->ptr + buf->size, esc_with, esc_len);
46 			buf->size += esc_len;
47 			/* copy character to be escaped */
48 			buf->ptr[buf->size] = *scan;
49 			buf->size++;
50 			scan++;
51 		}
52 	}
53 
54 	buf->ptr[buf->size] = '\0';
55 
56 	return 0;
57 }
58 
git_buf_text_unescape(git_buf * buf)59 void git_buf_text_unescape(git_buf *buf)
60 {
61 	buf->size = git__unescape(buf->ptr);
62 }
63 
git_buf_text_crlf_to_lf(git_buf * tgt,const git_buf * src)64 int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
65 {
66 	const char *scan = src->ptr;
67 	const char *scan_end = src->ptr + src->size;
68 	const char *next = memchr(scan, '\r', src->size);
69 	size_t new_size;
70 	char *out;
71 
72 	GIT_ASSERT(tgt != src);
73 
74 	if (!next)
75 		return git_buf_set(tgt, src->ptr, src->size);
76 
77 	/* reduce reallocs while in the loop */
78 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
79 	if (git_buf_grow(tgt, new_size) < 0)
80 		return -1;
81 
82 	out = tgt->ptr;
83 	tgt->size = 0;
84 
85 	/* Find the next \r and copy whole chunk up to there to tgt */
86 	for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
87 		if (next > scan) {
88 			size_t copylen = (size_t)(next - scan);
89 			memcpy(out, scan, copylen);
90 			out += copylen;
91 		}
92 
93 		/* Do not drop \r unless it is followed by \n */
94 		if (next + 1 == scan_end || next[1] != '\n')
95 			*out++ = '\r';
96 	}
97 
98 	/* Copy remaining input into dest */
99 	if (scan < scan_end) {
100 		size_t remaining = (size_t)(scan_end - scan);
101 		memcpy(out, scan, remaining);
102 		out += remaining;
103 	}
104 
105 	tgt->size = (size_t)(out - tgt->ptr);
106 	tgt->ptr[tgt->size] = '\0';
107 
108 	return 0;
109 }
110 
git_buf_text_lf_to_crlf(git_buf * tgt,const git_buf * src)111 int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
112 {
113 	const char *start = src->ptr;
114 	const char *end = start + src->size;
115 	const char *scan = start;
116 	const char *next = memchr(scan, '\n', src->size);
117 	size_t alloclen;
118 
119 	GIT_ASSERT(tgt != src);
120 
121 	if (!next)
122 		return git_buf_set(tgt, src->ptr, src->size);
123 
124 	/* attempt to reduce reallocs while in the loop */
125 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
126 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
127 	if (git_buf_grow(tgt, alloclen) < 0)
128 		return -1;
129 	tgt->size = 0;
130 
131 	for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
132 		size_t copylen = next - scan;
133 
134 		/* if we find mixed line endings, carry on */
135 		if (copylen && next[-1] == '\r')
136 			copylen--;
137 
138 		GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
139 		if (git_buf_grow_by(tgt, alloclen) < 0)
140 			return -1;
141 
142 		if (copylen) {
143 			memcpy(tgt->ptr + tgt->size, scan, copylen);
144 			tgt->size += copylen;
145 		}
146 
147 		tgt->ptr[tgt->size++] = '\r';
148 		tgt->ptr[tgt->size++] = '\n';
149 	}
150 
151 	tgt->ptr[tgt->size] = '\0';
152 	return git_buf_put(tgt, scan, end - scan);
153 }
154 
git_buf_text_common_prefix(git_buf * buf,const git_strarray * strings)155 int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
156 {
157 	size_t i;
158 	const char *str, *pfx;
159 
160 	git_buf_clear(buf);
161 
162 	if (!strings || !strings->count)
163 		return 0;
164 
165 	/* initialize common prefix to first string */
166 	if (git_buf_sets(buf, strings->strings[0]) < 0)
167 		return -1;
168 
169 	/* go through the rest of the strings, truncating to shared prefix */
170 	for (i = 1; i < strings->count; ++i) {
171 
172 		for (str = strings->strings[i], pfx = buf->ptr;
173 			 *str && *str == *pfx; str++, pfx++)
174 			/* scanning */;
175 
176 		git_buf_truncate(buf, pfx - buf->ptr);
177 
178 		if (!buf->size)
179 			break;
180 	}
181 
182 	return 0;
183 }
184 
git_buf_text_is_binary(const git_buf * buf)185 bool git_buf_text_is_binary(const git_buf *buf)
186 {
187 	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
188 	git_bom_t bom;
189 	int printable = 0, nonprintable = 0;
190 
191 	scan += git_buf_text_detect_bom(&bom, buf);
192 
193 	if (bom > GIT_BOM_UTF8)
194 		return 1;
195 
196 	while (scan < end) {
197 		unsigned char c = *scan++;
198 
199 		/* Printable characters are those above SPACE (0x1F) excluding DEL,
200 		 * and including BS, ESC and FF.
201 		 */
202 		if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
203 			printable++;
204 		else if (c == '\0')
205 			return true;
206 		else if (!git__isspace(c))
207 			nonprintable++;
208 	}
209 
210 	return ((printable >> 7) < nonprintable);
211 }
212 
git_buf_text_contains_nul(const git_buf * buf)213 bool git_buf_text_contains_nul(const git_buf *buf)
214 {
215 	return (memchr(buf->ptr, '\0', buf->size) != NULL);
216 }
217 
git_buf_text_detect_bom(git_bom_t * bom,const git_buf * buf)218 int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf)
219 {
220 	const char *ptr;
221 	size_t len;
222 
223 	*bom = GIT_BOM_NONE;
224 	/* need at least 2 bytes to look for any BOM */
225 	if (buf->size < 2)
226 		return 0;
227 
228 	ptr = buf->ptr;
229 	len = buf->size;
230 
231 	switch (*ptr++) {
232 	case 0:
233 		if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
234 			*bom = GIT_BOM_UTF32_BE;
235 			return 4;
236 		}
237 		break;
238 	case '\xEF':
239 		if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
240 			*bom = GIT_BOM_UTF8;
241 			return 3;
242 		}
243 		break;
244 	case '\xFE':
245 		if (*ptr == '\xFF') {
246 			*bom = GIT_BOM_UTF16_BE;
247 			return 2;
248 		}
249 		break;
250 	case '\xFF':
251 		if (*ptr != '\xFE')
252 			break;
253 		if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
254 			*bom = GIT_BOM_UTF32_LE;
255 			return 4;
256 		} else {
257 			*bom = GIT_BOM_UTF16_LE;
258 			return 2;
259 		}
260 		break;
261 	default:
262 		break;
263 	}
264 
265 	return 0;
266 }
267 
git_buf_text_gather_stats(git_buf_text_stats * stats,const git_buf * buf,bool skip_bom)268 bool git_buf_text_gather_stats(
269 	git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
270 {
271 	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
272 	int skip;
273 
274 	memset(stats, 0, sizeof(*stats));
275 
276 	/* BOM detection */
277 	skip = git_buf_text_detect_bom(&stats->bom, buf);
278 	if (skip_bom)
279 		scan += skip;
280 
281 	/* Ignore EOF character */
282 	if (buf->size > 0 && end[-1] == '\032')
283 		end--;
284 
285 	/* Counting loop */
286 	while (scan < end) {
287 		unsigned char c = *scan++;
288 
289 		if (c > 0x1F && c != 0x7F)
290 			stats->printable++;
291 		else switch (c) {
292 			case '\0':
293 				stats->nul++;
294 				stats->nonprintable++;
295 				break;
296 			case '\n':
297 				stats->lf++;
298 				break;
299 			case '\r':
300 				stats->cr++;
301 				if (scan < end && *scan == '\n')
302 					stats->crlf++;
303 				break;
304 			case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
305 				stats->printable++;
306 				break;
307 			default:
308 				stats->nonprintable++;
309 				break;
310 			}
311 	}
312 
313 	/* Treat files with a bare CR as binary */
314 	return (stats->cr != stats->crlf || stats->nul > 0 ||
315 		((stats->printable >> 7) < stats->nonprintable));
316 }
317