1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #include "buffer.h"
8 #include "posix.h"
9 #include "git2/buffer.h"
10 #include <ctype.h>
11 
12 /* Used as default value for git_buf->ptr so that people can always
13  * assume ptr is non-NULL and zero terminated even for new git_bufs.
14  */
15 char git_buf__initbuf[1];
16 
17 char git_buf__oom[1];
18 
19 #define ENSURE_SIZE(b, d) \
20 	if ((b)->ptr == git_buf__oom || \
21 	    ((d) > (b)->asize && git_buf_grow((b), (d)) < 0))\
22 		return -1;
23 
24 
git_buf_init(git_buf * buf,size_t initial_size)25 int git_buf_init(git_buf *buf, size_t initial_size)
26 {
27 	buf->asize = 0;
28 	buf->size = 0;
29 	buf->ptr = git_buf__initbuf;
30 
31 	ENSURE_SIZE(buf, initial_size);
32 
33 	return 0;
34 }
35 
git_buf_try_grow(git_buf * buf,size_t target_size,bool mark_oom)36 int git_buf_try_grow(
37 	git_buf *buf, size_t target_size, bool mark_oom)
38 {
39 	char *new_ptr;
40 	size_t new_size;
41 
42 	if (buf->ptr == git_buf__oom)
43 		return -1;
44 
45 	if (buf->asize == 0 && buf->size != 0) {
46 		git_error_set(GIT_ERROR_INVALID, "cannot grow a borrowed buffer");
47 		return GIT_EINVALID;
48 	}
49 
50 	if (!target_size)
51 		target_size = buf->size;
52 
53 	if (target_size <= buf->asize)
54 		return 0;
55 
56 	if (buf->asize == 0) {
57 		new_size = target_size;
58 		new_ptr = NULL;
59 	} else {
60 		new_size = buf->asize;
61 		/*
62 		 * Grow the allocated buffer by 1.5 to allow
63 		 * re-use of memory holes resulting from the
64 		 * realloc. If this is still too small, then just
65 		 * use the target size.
66 		 */
67 		if ((new_size = (new_size << 1) - (new_size >> 1)) < target_size)
68 			new_size = target_size;
69 		new_ptr = buf->ptr;
70 	}
71 
72 	/* round allocation up to multiple of 8 */
73 	new_size = (new_size + 7) & ~7;
74 
75 	if (new_size < buf->size) {
76 		if (mark_oom) {
77 			if (buf->ptr && buf->ptr != git_buf__initbuf)
78 				git__free(buf->ptr);
79 			buf->ptr = git_buf__oom;
80 		}
81 
82 		git_error_set_oom();
83 		return -1;
84 	}
85 
86 	new_ptr = git__realloc(new_ptr, new_size);
87 
88 	if (!new_ptr) {
89 		if (mark_oom) {
90 			if (buf->ptr && (buf->ptr != git_buf__initbuf))
91 				git__free(buf->ptr);
92 			buf->ptr = git_buf__oom;
93 		}
94 		return -1;
95 	}
96 
97 	buf->asize = new_size;
98 	buf->ptr   = new_ptr;
99 
100 	/* truncate the existing buffer size if necessary */
101 	if (buf->size >= buf->asize)
102 		buf->size = buf->asize - 1;
103 	buf->ptr[buf->size] = '\0';
104 
105 	return 0;
106 }
107 
git_buf_grow(git_buf * buffer,size_t target_size)108 int git_buf_grow(git_buf *buffer, size_t target_size)
109 {
110 	return git_buf_try_grow(buffer, target_size, true);
111 }
112 
git_buf_grow_by(git_buf * buffer,size_t additional_size)113 int git_buf_grow_by(git_buf *buffer, size_t additional_size)
114 {
115 	size_t newsize;
116 
117 	if (GIT_ADD_SIZET_OVERFLOW(&newsize, buffer->size, additional_size)) {
118 		buffer->ptr = git_buf__oom;
119 		return -1;
120 	}
121 
122 	return git_buf_try_grow(buffer, newsize, true);
123 }
124 
git_buf_dispose(git_buf * buf)125 void git_buf_dispose(git_buf *buf)
126 {
127 	if (!buf) return;
128 
129 	if (buf->asize > 0 && buf->ptr != NULL && buf->ptr != git_buf__oom)
130 		git__free(buf->ptr);
131 
132 	git_buf_init(buf, 0);
133 }
134 
135 #ifndef GIT_DEPRECATE_HARD
git_buf_free(git_buf * buf)136 void git_buf_free(git_buf *buf)
137 {
138 	git_buf_dispose(buf);
139 }
140 #endif
141 
git_buf_sanitize(git_buf * buf)142 int git_buf_sanitize(git_buf *buf)
143 {
144 	if (buf->ptr == NULL) {
145 		GIT_ASSERT_ARG(buf->size == 0 && buf->asize == 0);
146 
147 		buf->ptr = git_buf__initbuf;
148 	} else if (buf->asize > buf->size) {
149 		buf->ptr[buf->size] = '\0';
150 	}
151 
152 	return 0;
153 }
154 
git_buf_clear(git_buf * buf)155 void git_buf_clear(git_buf *buf)
156 {
157 	buf->size = 0;
158 
159 	if (!buf->ptr) {
160 		buf->ptr = git_buf__initbuf;
161 		buf->asize = 0;
162 	}
163 
164 	if (buf->asize > 0)
165 		buf->ptr[0] = '\0';
166 }
167 
git_buf_set(git_buf * buf,const void * data,size_t len)168 int git_buf_set(git_buf *buf, const void *data, size_t len)
169 {
170 	size_t alloclen;
171 
172 	if (len == 0 || data == NULL) {
173 		git_buf_clear(buf);
174 	} else {
175 		if (data != buf->ptr) {
176 			GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, len, 1);
177 			ENSURE_SIZE(buf, alloclen);
178 			memmove(buf->ptr, data, len);
179 		}
180 
181 		buf->size = len;
182 		if (buf->asize > buf->size)
183 			buf->ptr[buf->size] = '\0';
184 
185 	}
186 	return 0;
187 }
188 
git_buf_sets(git_buf * buf,const char * string)189 int git_buf_sets(git_buf *buf, const char *string)
190 {
191 	return git_buf_set(buf, string, string ? strlen(string) : 0);
192 }
193 
git_buf_putc(git_buf * buf,char c)194 int git_buf_putc(git_buf *buf, char c)
195 {
196 	size_t new_size;
197 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, 2);
198 	ENSURE_SIZE(buf, new_size);
199 	buf->ptr[buf->size++] = c;
200 	buf->ptr[buf->size] = '\0';
201 	return 0;
202 }
203 
git_buf_putcn(git_buf * buf,char c,size_t len)204 int git_buf_putcn(git_buf *buf, char c, size_t len)
205 {
206 	size_t new_size;
207 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
208 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
209 	ENSURE_SIZE(buf, new_size);
210 	memset(buf->ptr + buf->size, c, len);
211 	buf->size += len;
212 	buf->ptr[buf->size] = '\0';
213 	return 0;
214 }
215 
git_buf_put(git_buf * buf,const char * data,size_t len)216 int git_buf_put(git_buf *buf, const char *data, size_t len)
217 {
218 	if (len) {
219 		size_t new_size;
220 
221 		GIT_ASSERT_ARG(data);
222 
223 		GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
224 		GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
225 		ENSURE_SIZE(buf, new_size);
226 		memmove(buf->ptr + buf->size, data, len);
227 		buf->size += len;
228 		buf->ptr[buf->size] = '\0';
229 	}
230 	return 0;
231 }
232 
git_buf_puts(git_buf * buf,const char * string)233 int git_buf_puts(git_buf *buf, const char *string)
234 {
235 	GIT_ASSERT_ARG(string);
236 
237 	return git_buf_put(buf, string, strlen(string));
238 }
239 
240 static const char base64_encode[] =
241 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
242 
git_buf_encode_base64(git_buf * buf,const char * data,size_t len)243 int git_buf_encode_base64(git_buf *buf, const char *data, size_t len)
244 {
245 	size_t extra = len % 3;
246 	uint8_t *write, a, b, c;
247 	const uint8_t *read = (const uint8_t *)data;
248 	size_t blocks = (len / 3) + !!extra, alloclen;
249 
250 	GIT_ERROR_CHECK_ALLOC_ADD(&blocks, blocks, 1);
251 	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&alloclen, blocks, 4);
252 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, buf->size);
253 
254 	ENSURE_SIZE(buf, alloclen);
255 	write = (uint8_t *)&buf->ptr[buf->size];
256 
257 	/* convert each run of 3 bytes into 4 output bytes */
258 	for (len -= extra; len > 0; len -= 3) {
259 		a = *read++;
260 		b = *read++;
261 		c = *read++;
262 
263 		*write++ = base64_encode[a >> 2];
264 		*write++ = base64_encode[(a & 0x03) << 4 | b >> 4];
265 		*write++ = base64_encode[(b & 0x0f) << 2 | c >> 6];
266 		*write++ = base64_encode[c & 0x3f];
267 	}
268 
269 	if (extra > 0) {
270 		a = *read++;
271 		b = (extra > 1) ? *read++ : 0;
272 
273 		*write++ = base64_encode[a >> 2];
274 		*write++ = base64_encode[(a & 0x03) << 4 | b >> 4];
275 		*write++ = (extra > 1) ? base64_encode[(b & 0x0f) << 2] : '=';
276 		*write++ = '=';
277 	}
278 
279 	buf->size = ((char *)write) - buf->ptr;
280 	buf->ptr[buf->size] = '\0';
281 
282 	return 0;
283 }
284 
285 /* The inverse of base64_encode */
286 static const int8_t base64_decode[] = {
287 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
288 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
289 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
290 	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1,  0, -1, -1,
291 	-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
292 	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
293 	-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
294 	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
295 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
296 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
297 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
298 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
299 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
300 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
301 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
302 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
303 };
304 
git_buf_decode_base64(git_buf * buf,const char * base64,size_t len)305 int git_buf_decode_base64(git_buf *buf, const char *base64, size_t len)
306 {
307 	size_t i;
308 	int8_t a, b, c, d;
309 	size_t orig_size = buf->size, new_size;
310 
311 	if (len % 4) {
312 		git_error_set(GIT_ERROR_INVALID, "invalid base64 input");
313 		return -1;
314 	}
315 
316 	GIT_ASSERT_ARG(len % 4 == 0);
317 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, (len / 4 * 3), buf->size);
318 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
319 	ENSURE_SIZE(buf, new_size);
320 
321 	for (i = 0; i < len; i += 4) {
322 		if ((a = base64_decode[(unsigned char)base64[i]]) < 0 ||
323 			(b = base64_decode[(unsigned char)base64[i+1]]) < 0 ||
324 			(c = base64_decode[(unsigned char)base64[i+2]]) < 0 ||
325 			(d = base64_decode[(unsigned char)base64[i+3]]) < 0) {
326 			buf->size = orig_size;
327 			buf->ptr[buf->size] = '\0';
328 
329 			git_error_set(GIT_ERROR_INVALID, "invalid base64 input");
330 			return -1;
331 		}
332 
333 		buf->ptr[buf->size++] = ((a << 2) | (b & 0x30) >> 4);
334 		buf->ptr[buf->size++] = ((b & 0x0f) << 4) | ((c & 0x3c) >> 2);
335 		buf->ptr[buf->size++] = (c & 0x03) << 6 | (d & 0x3f);
336 	}
337 
338 	buf->ptr[buf->size] = '\0';
339 	return 0;
340 }
341 
342 static const char base85_encode[] =
343 	"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
344 
git_buf_encode_base85(git_buf * buf,const char * data,size_t len)345 int git_buf_encode_base85(git_buf *buf, const char *data, size_t len)
346 {
347 	size_t blocks = (len / 4) + !!(len % 4), alloclen;
348 
349 	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&alloclen, blocks, 5);
350 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, buf->size);
351 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
352 
353 	ENSURE_SIZE(buf, alloclen);
354 
355 	while (len) {
356 		uint32_t acc = 0;
357 		char b85[5];
358 		int i;
359 
360 		for (i = 24; i >= 0; i -= 8) {
361 			uint8_t ch = *data++;
362 			acc |= (uint32_t)ch << i;
363 
364 			if (--len == 0)
365 				break;
366 		}
367 
368 		for (i = 4; i >= 0; i--) {
369 			int val = acc % 85;
370 			acc /= 85;
371 
372 			b85[i] = base85_encode[val];
373 		}
374 
375 		for (i = 0; i < 5; i++)
376 			buf->ptr[buf->size++] = b85[i];
377 	}
378 
379 	buf->ptr[buf->size] = '\0';
380 
381 	return 0;
382 }
383 
384 /* The inverse of base85_encode */
385 static const int8_t base85_decode[] = {
386 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
387 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
388 	-1, 63, -1, 64, 65, 66, 67, -1, 68, 69, 70, 71, -1, 72, -1, -1,
389 	 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, -1, 73, 74, 75, 76, 77,
390 	78, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
391 	26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, 79, 80,
392 	81, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
393 	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 82, 83, 84, 85, -1,
394 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
395 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
396 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
397 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
398 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
399 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
400 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
401 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
402 };
403 
git_buf_decode_base85(git_buf * buf,const char * base85,size_t base85_len,size_t output_len)404 int git_buf_decode_base85(
405 	git_buf *buf,
406 	const char *base85,
407 	size_t base85_len,
408 	size_t output_len)
409 {
410 	size_t orig_size = buf->size, new_size;
411 
412 	if (base85_len % 5 ||
413 		output_len > base85_len * 4 / 5) {
414 		git_error_set(GIT_ERROR_INVALID, "invalid base85 input");
415 		return -1;
416 	}
417 
418 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, output_len, buf->size);
419 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
420 	ENSURE_SIZE(buf, new_size);
421 
422 	while (output_len) {
423 		unsigned acc = 0;
424 		int de, cnt = 4;
425 		unsigned char ch;
426 		do {
427 			ch = *base85++;
428 			de = base85_decode[ch];
429 			if (--de < 0)
430 				goto on_error;
431 
432 			acc = acc * 85 + de;
433 		} while (--cnt);
434 		ch = *base85++;
435 		de = base85_decode[ch];
436 		if (--de < 0)
437 			goto on_error;
438 
439 		/* Detect overflow. */
440 		if (0xffffffff / 85 < acc ||
441 			0xffffffff - de < (acc *= 85))
442 			goto on_error;
443 
444 		acc += de;
445 
446 		cnt = (output_len < 4) ? (int)output_len : 4;
447 		output_len -= cnt;
448 		do {
449 			acc = (acc << 8) | (acc >> 24);
450 			buf->ptr[buf->size++] = acc;
451 		} while (--cnt);
452 	}
453 
454 	buf->ptr[buf->size] = 0;
455 
456 	return 0;
457 
458 on_error:
459 	buf->size = orig_size;
460 	buf->ptr[buf->size] = '\0';
461 
462 	git_error_set(GIT_ERROR_INVALID, "invalid base85 input");
463 	return -1;
464 }
465 
466 #define HEX_DECODE(c) ((c | 32) % 39 - 9)
467 
git_buf_decode_percent(git_buf * buf,const char * str,size_t str_len)468 int git_buf_decode_percent(
469 	git_buf *buf,
470 	const char *str,
471 	size_t str_len)
472 {
473 	size_t str_pos, new_size;
474 
475 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, str_len);
476 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
477 	ENSURE_SIZE(buf, new_size);
478 
479 	for (str_pos = 0; str_pos < str_len; buf->size++, str_pos++) {
480 		if (str[str_pos] == '%' &&
481 			str_len > str_pos + 2 &&
482 			isxdigit(str[str_pos + 1]) &&
483 			isxdigit(str[str_pos + 2])) {
484 			buf->ptr[buf->size] = (HEX_DECODE(str[str_pos + 1]) << 4) +
485 				HEX_DECODE(str[str_pos + 2]);
486 			str_pos += 2;
487 		} else {
488 			buf->ptr[buf->size] = str[str_pos];
489 		}
490 	}
491 
492 	buf->ptr[buf->size] = '\0';
493 	return 0;
494 }
495 
git_buf_vprintf(git_buf * buf,const char * format,va_list ap)496 int git_buf_vprintf(git_buf *buf, const char *format, va_list ap)
497 {
498 	size_t expected_size, new_size;
499 	int len;
500 
501 	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&expected_size, strlen(format), 2);
502 	GIT_ERROR_CHECK_ALLOC_ADD(&expected_size, expected_size, buf->size);
503 	ENSURE_SIZE(buf, expected_size);
504 
505 	while (1) {
506 		va_list args;
507 		va_copy(args, ap);
508 
509 		len = p_vsnprintf(
510 			buf->ptr + buf->size,
511 			buf->asize - buf->size,
512 			format, args
513 		);
514 
515 		va_end(args);
516 
517 		if (len < 0) {
518 			git__free(buf->ptr);
519 			buf->ptr = git_buf__oom;
520 			return -1;
521 		}
522 
523 		if ((size_t)len + 1 <= buf->asize - buf->size) {
524 			buf->size += len;
525 			break;
526 		}
527 
528 		GIT_ERROR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
529 		GIT_ERROR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
530 		ENSURE_SIZE(buf, new_size);
531 	}
532 
533 	return 0;
534 }
535 
git_buf_printf(git_buf * buf,const char * format,...)536 int git_buf_printf(git_buf *buf, const char *format, ...)
537 {
538 	int r;
539 	va_list ap;
540 
541 	va_start(ap, format);
542 	r = git_buf_vprintf(buf, format, ap);
543 	va_end(ap);
544 
545 	return r;
546 }
547 
git_buf_copy_cstr(char * data,size_t datasize,const git_buf * buf)548 int git_buf_copy_cstr(char *data, size_t datasize, const git_buf *buf)
549 {
550 	size_t copylen;
551 
552 	GIT_ASSERT_ARG(data);
553 	GIT_ASSERT_ARG(datasize);
554 	GIT_ASSERT_ARG(buf);
555 
556 	data[0] = '\0';
557 
558 	if (buf->size == 0 || buf->asize <= 0)
559 		return 0;
560 
561 	copylen = buf->size;
562 	if (copylen > datasize - 1)
563 		copylen = datasize - 1;
564 	memmove(data, buf->ptr, copylen);
565 	data[copylen] = '\0';
566 
567 	return 0;
568 }
569 
git_buf_consume_bytes(git_buf * buf,size_t len)570 void git_buf_consume_bytes(git_buf *buf, size_t len)
571 {
572 	git_buf_consume(buf, buf->ptr + len);
573 }
574 
git_buf_consume(git_buf * buf,const char * end)575 void git_buf_consume(git_buf *buf, const char *end)
576 {
577 	if (end > buf->ptr && end <= buf->ptr + buf->size) {
578 		size_t consumed = end - buf->ptr;
579 		memmove(buf->ptr, end, buf->size - consumed);
580 		buf->size -= consumed;
581 		buf->ptr[buf->size] = '\0';
582 	}
583 }
584 
git_buf_truncate(git_buf * buf,size_t len)585 void git_buf_truncate(git_buf *buf, size_t len)
586 {
587 	if (len >= buf->size)
588 		return;
589 
590 	buf->size = len;
591 	if (buf->size < buf->asize)
592 		buf->ptr[buf->size] = '\0';
593 }
594 
git_buf_shorten(git_buf * buf,size_t amount)595 void git_buf_shorten(git_buf *buf, size_t amount)
596 {
597 	if (buf->size > amount)
598 		git_buf_truncate(buf, buf->size - amount);
599 	else
600 		git_buf_clear(buf);
601 }
602 
git_buf_rtruncate_at_char(git_buf * buf,char separator)603 void git_buf_rtruncate_at_char(git_buf *buf, char separator)
604 {
605 	ssize_t idx = git_buf_rfind_next(buf, separator);
606 	git_buf_truncate(buf, idx < 0 ? 0 : (size_t)idx);
607 }
608 
git_buf_swap(git_buf * buf_a,git_buf * buf_b)609 void git_buf_swap(git_buf *buf_a, git_buf *buf_b)
610 {
611 	git_buf t = *buf_a;
612 	*buf_a = *buf_b;
613 	*buf_b = t;
614 }
615 
git_buf_detach(git_buf * buf)616 char *git_buf_detach(git_buf *buf)
617 {
618 	char *data = buf->ptr;
619 
620 	if (buf->asize == 0 || buf->ptr == git_buf__oom)
621 		return NULL;
622 
623 	git_buf_init(buf, 0);
624 
625 	return data;
626 }
627 
git_buf_attach(git_buf * buf,char * ptr,size_t asize)628 int git_buf_attach(git_buf *buf, char *ptr, size_t asize)
629 {
630 	git_buf_dispose(buf);
631 
632 	if (ptr) {
633 		buf->ptr = ptr;
634 		buf->size = strlen(ptr);
635 		if (asize)
636 			buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
637 		else /* pass 0 to fall back on strlen + 1 */
638 			buf->asize = buf->size + 1;
639 	}
640 
641 	ENSURE_SIZE(buf, asize);
642 	return 0;
643 }
644 
git_buf_attach_notowned(git_buf * buf,const char * ptr,size_t size)645 void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size)
646 {
647 	if (git_buf_is_allocated(buf))
648 		git_buf_dispose(buf);
649 
650 	if (!size) {
651 		git_buf_init(buf, 0);
652 	} else {
653 		buf->ptr = (char *)ptr;
654 		buf->asize = 0;
655 		buf->size = size;
656 	}
657 }
658 
git_buf_join_n(git_buf * buf,char separator,int nbuf,...)659 int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...)
660 {
661 	va_list ap;
662 	int i;
663 	size_t total_size = 0, original_size = buf->size;
664 	char *out, *original = buf->ptr;
665 
666 	if (buf->size > 0 && buf->ptr[buf->size - 1] != separator)
667 		++total_size; /* space for initial separator */
668 
669 	/* Make two passes to avoid multiple reallocation */
670 
671 	va_start(ap, nbuf);
672 	for (i = 0; i < nbuf; ++i) {
673 		const char* segment;
674 		size_t segment_len;
675 
676 		segment = va_arg(ap, const char *);
677 		if (!segment)
678 			continue;
679 
680 		segment_len = strlen(segment);
681 
682 		GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, segment_len);
683 
684 		if (segment_len == 0 || segment[segment_len - 1] != separator)
685 			GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, 1);
686 	}
687 	va_end(ap);
688 
689 	/* expand buffer if needed */
690 	if (total_size == 0)
691 		return 0;
692 
693 	GIT_ERROR_CHECK_ALLOC_ADD(&total_size, total_size, 1);
694 	if (git_buf_grow_by(buf, total_size) < 0)
695 		return -1;
696 
697 	out = buf->ptr + buf->size;
698 
699 	/* append separator to existing buf if needed */
700 	if (buf->size > 0 && out[-1] != separator)
701 		*out++ = separator;
702 
703 	va_start(ap, nbuf);
704 	for (i = 0; i < nbuf; ++i) {
705 		const char* segment;
706 		size_t segment_len;
707 
708 		segment = va_arg(ap, const char *);
709 		if (!segment)
710 			continue;
711 
712 		/* deal with join that references buffer's original content */
713 		if (segment >= original && segment < original + original_size) {
714 			size_t offset = (segment - original);
715 			segment = buf->ptr + offset;
716 			segment_len = original_size - offset;
717 		} else {
718 			segment_len = strlen(segment);
719 		}
720 
721 		/* skip leading separators */
722 		if (out > buf->ptr && out[-1] == separator)
723 			while (segment_len > 0 && *segment == separator) {
724 				segment++;
725 				segment_len--;
726 			}
727 
728 		/* copy over next buffer */
729 		if (segment_len > 0) {
730 			memmove(out, segment, segment_len);
731 			out += segment_len;
732 		}
733 
734 		/* append trailing separator (except for last item) */
735 		if (i < nbuf - 1 && out > buf->ptr && out[-1] != separator)
736 			*out++ = separator;
737 	}
738 	va_end(ap);
739 
740 	/* set size based on num characters actually written */
741 	buf->size = out - buf->ptr;
742 	buf->ptr[buf->size] = '\0';
743 
744 	return 0;
745 }
746 
git_buf_join(git_buf * buf,char separator,const char * str_a,const char * str_b)747 int git_buf_join(
748 	git_buf *buf,
749 	char separator,
750 	const char *str_a,
751 	const char *str_b)
752 {
753 	size_t strlen_a = str_a ? strlen(str_a) : 0;
754 	size_t strlen_b = strlen(str_b);
755 	size_t alloc_len;
756 	int need_sep = 0;
757 	ssize_t offset_a = -1;
758 
759 	/* not safe to have str_b point internally to the buffer */
760 	if (buf->size)
761 		GIT_ASSERT_ARG(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
762 
763 	/* figure out if we need to insert a separator */
764 	if (separator && strlen_a) {
765 		while (*str_b == separator) { str_b++; strlen_b--; }
766 		if (str_a[strlen_a - 1] != separator)
767 			need_sep = 1;
768 	}
769 
770 	/* str_a could be part of the buffer */
771 	if (buf->size && str_a >= buf->ptr && str_a < buf->ptr + buf->size)
772 		offset_a = str_a - buf->ptr;
773 
774 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, strlen_a, strlen_b);
775 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, need_sep);
776 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
777 	ENSURE_SIZE(buf, alloc_len);
778 
779 	/* fix up internal pointers */
780 	if (offset_a >= 0)
781 		str_a = buf->ptr + offset_a;
782 
783 	/* do the actual copying */
784 	if (offset_a != 0 && str_a)
785 		memmove(buf->ptr, str_a, strlen_a);
786 	if (need_sep)
787 		buf->ptr[strlen_a] = separator;
788 	memcpy(buf->ptr + strlen_a + need_sep, str_b, strlen_b);
789 
790 	buf->size = strlen_a + strlen_b + need_sep;
791 	buf->ptr[buf->size] = '\0';
792 
793 	return 0;
794 }
795 
git_buf_join3(git_buf * buf,char separator,const char * str_a,const char * str_b,const char * str_c)796 int git_buf_join3(
797 	git_buf *buf,
798 	char separator,
799 	const char *str_a,
800 	const char *str_b,
801 	const char *str_c)
802 {
803 	size_t len_a = strlen(str_a),
804 		len_b = strlen(str_b),
805 		len_c = strlen(str_c),
806 		len_total;
807 	int sep_a = 0, sep_b = 0;
808 	char *tgt;
809 
810 	/* for this function, disallow pointers into the existing buffer */
811 	GIT_ASSERT(str_a < buf->ptr || str_a >= buf->ptr + buf->size);
812 	GIT_ASSERT(str_b < buf->ptr || str_b >= buf->ptr + buf->size);
813 	GIT_ASSERT(str_c < buf->ptr || str_c >= buf->ptr + buf->size);
814 
815 	if (separator) {
816 		if (len_a > 0) {
817 			while (*str_b == separator) { str_b++; len_b--; }
818 			sep_a = (str_a[len_a - 1] != separator);
819 		}
820 		if (len_a > 0 || len_b > 0)
821 			while (*str_c == separator) { str_c++; len_c--; }
822 		if (len_b > 0)
823 			sep_b = (str_b[len_b - 1] != separator);
824 	}
825 
826 	GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_a, sep_a);
827 	GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, len_b);
828 	GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, sep_b);
829 	GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, len_c);
830 	GIT_ERROR_CHECK_ALLOC_ADD(&len_total, len_total, 1);
831 	ENSURE_SIZE(buf, len_total);
832 
833 	tgt = buf->ptr;
834 
835 	if (len_a) {
836 		memcpy(tgt, str_a, len_a);
837 		tgt += len_a;
838 	}
839 	if (sep_a)
840 		*tgt++ = separator;
841 	if (len_b) {
842 		memcpy(tgt, str_b, len_b);
843 		tgt += len_b;
844 	}
845 	if (sep_b)
846 		*tgt++ = separator;
847 	if (len_c)
848 		memcpy(tgt, str_c, len_c);
849 
850 	buf->size = len_a + sep_a + len_b + sep_b + len_c;
851 	buf->ptr[buf->size] = '\0';
852 
853 	return 0;
854 }
855 
git_buf_rtrim(git_buf * buf)856 void git_buf_rtrim(git_buf *buf)
857 {
858 	while (buf->size > 0) {
859 		if (!git__isspace(buf->ptr[buf->size - 1]))
860 			break;
861 
862 		buf->size--;
863 	}
864 
865 	if (buf->asize > buf->size)
866 		buf->ptr[buf->size] = '\0';
867 }
868 
git_buf_cmp(const git_buf * a,const git_buf * b)869 int git_buf_cmp(const git_buf *a, const git_buf *b)
870 {
871 	int result = memcmp(a->ptr, b->ptr, min(a->size, b->size));
872 	return (result != 0) ? result :
873 		(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
874 }
875 
git_buf_splice(git_buf * buf,size_t where,size_t nb_to_remove,const char * data,size_t nb_to_insert)876 int git_buf_splice(
877 	git_buf *buf,
878 	size_t where,
879 	size_t nb_to_remove,
880 	const char *data,
881 	size_t nb_to_insert)
882 {
883 	char *splice_loc;
884 	size_t new_size, alloc_size;
885 
886 	GIT_ASSERT(buf);
887 	GIT_ASSERT(where <= buf->size);
888 	GIT_ASSERT(nb_to_remove <= buf->size - where);
889 
890 	splice_loc = buf->ptr + where;
891 
892 	/* Ported from git.git
893 	 * https://github.com/git/git/blob/16eed7c/strbuf.c#L159-176
894 	 */
895 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, (buf->size - nb_to_remove), nb_to_insert);
896 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_size, new_size, 1);
897 	ENSURE_SIZE(buf, alloc_size);
898 
899 	memmove(splice_loc + nb_to_insert,
900 		splice_loc + nb_to_remove,
901 		buf->size - where - nb_to_remove);
902 
903 	memcpy(splice_loc, data, nb_to_insert);
904 
905 	buf->size = new_size;
906 	buf->ptr[buf->size] = '\0';
907 	return 0;
908 }
909 
910 /* Quote per http://marc.info/?l=git&m=112927316408690&w=2 */
git_buf_quote(git_buf * buf)911 int git_buf_quote(git_buf *buf)
912 {
913 	const char whitespace[] = { 'a', 'b', 't', 'n', 'v', 'f', 'r' };
914 	git_buf quoted = GIT_BUF_INIT;
915 	size_t i = 0;
916 	bool quote = false;
917 	int error = 0;
918 
919 	/* walk to the first char that needs quoting */
920 	if (buf->size && buf->ptr[0] == '!')
921 		quote = true;
922 
923 	for (i = 0; !quote && i < buf->size; i++) {
924 		if (buf->ptr[i] == '"' || buf->ptr[i] == '\\' ||
925 			buf->ptr[i] < ' ' || buf->ptr[i] > '~') {
926 			quote = true;
927 			break;
928 		}
929 	}
930 
931 	if (!quote)
932 		goto done;
933 
934 	git_buf_putc(&quoted, '"');
935 	git_buf_put(&quoted, buf->ptr, i);
936 
937 	for (; i < buf->size; i++) {
938 		/* whitespace - use the map above, which is ordered by ascii value */
939 		if (buf->ptr[i] >= '\a' && buf->ptr[i] <= '\r') {
940 			git_buf_putc(&quoted, '\\');
941 			git_buf_putc(&quoted, whitespace[buf->ptr[i] - '\a']);
942 		}
943 
944 		/* double quote and backslash must be escaped */
945 		else if (buf->ptr[i] == '"' || buf->ptr[i] == '\\') {
946 			git_buf_putc(&quoted, '\\');
947 			git_buf_putc(&quoted, buf->ptr[i]);
948 		}
949 
950 		/* escape anything unprintable as octal */
951 		else if (buf->ptr[i] != ' ' &&
952 				(buf->ptr[i] < '!' || buf->ptr[i] > '~')) {
953 			git_buf_printf(&quoted, "\\%03o", (unsigned char)buf->ptr[i]);
954 		}
955 
956 		/* yay, printable! */
957 		else {
958 			git_buf_putc(&quoted, buf->ptr[i]);
959 		}
960 	}
961 
962 	git_buf_putc(&quoted, '"');
963 
964 	if (git_buf_oom(&quoted)) {
965 		error = -1;
966 		goto done;
967 	}
968 
969 	git_buf_swap(&quoted, buf);
970 
971 done:
972 	git_buf_dispose(&quoted);
973 	return error;
974 }
975 
976 /* Unquote per http://marc.info/?l=git&m=112927316408690&w=2 */
git_buf_unquote(git_buf * buf)977 int git_buf_unquote(git_buf *buf)
978 {
979 	size_t i, j;
980 	char ch;
981 
982 	git_buf_rtrim(buf);
983 
984 	if (buf->size < 2 || buf->ptr[0] != '"' || buf->ptr[buf->size-1] != '"')
985 		goto invalid;
986 
987 	for (i = 0, j = 1; j < buf->size-1; i++, j++) {
988 		ch = buf->ptr[j];
989 
990 		if (ch == '\\') {
991 			if (j == buf->size-2)
992 				goto invalid;
993 
994 			ch = buf->ptr[++j];
995 
996 			switch (ch) {
997 			/* \" or \\ simply copy the char in */
998 			case '"': case '\\':
999 				break;
1000 
1001 			/* add the appropriate escaped char */
1002 			case 'a': ch = '\a'; break;
1003 			case 'b': ch = '\b'; break;
1004 			case 'f': ch = '\f'; break;
1005 			case 'n': ch = '\n'; break;
1006 			case 'r': ch = '\r'; break;
1007 			case 't': ch = '\t'; break;
1008 			case 'v': ch = '\v'; break;
1009 
1010 			/* \xyz digits convert to the char*/
1011 			case '0': case '1': case '2': case '3':
1012 				if (j == buf->size-3) {
1013 					git_error_set(GIT_ERROR_INVALID,
1014 						"truncated quoted character \\%c", ch);
1015 					return -1;
1016 				}
1017 
1018 				if (buf->ptr[j+1] < '0' || buf->ptr[j+1] > '7' ||
1019 					buf->ptr[j+2] < '0' || buf->ptr[j+2] > '7') {
1020 					git_error_set(GIT_ERROR_INVALID,
1021 						"truncated quoted character \\%c%c%c",
1022 						buf->ptr[j], buf->ptr[j+1], buf->ptr[j+2]);
1023 					return -1;
1024 				}
1025 
1026 				ch = ((buf->ptr[j] - '0') << 6) |
1027 					((buf->ptr[j+1] - '0') << 3) |
1028 					(buf->ptr[j+2] - '0');
1029 				j += 2;
1030 				break;
1031 
1032 			default:
1033 				git_error_set(GIT_ERROR_INVALID, "invalid quoted character \\%c", ch);
1034 				return -1;
1035 			}
1036 		}
1037 
1038 		buf->ptr[i] = ch;
1039 	}
1040 
1041 	buf->ptr[i] = '\0';
1042 	buf->size = i;
1043 
1044 	return 0;
1045 
1046 invalid:
1047 	git_error_set(GIT_ERROR_INVALID, "invalid quoted line");
1048 	return -1;
1049 }
1050 
git_buf_puts_escaped(git_buf * buf,const char * string,const char * esc_chars,const char * esc_with)1051 int git_buf_puts_escaped(
1052 	git_buf *buf,
1053 	const char *string,
1054 	const char *esc_chars,
1055 	const char *esc_with)
1056 {
1057 	const char *scan;
1058 	size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
1059 
1060 	if (!string)
1061 		return 0;
1062 
1063 	for (scan = string; *scan; ) {
1064 		/* count run of non-escaped characters */
1065 		count = strcspn(scan, esc_chars);
1066 		total += count;
1067 		scan += count;
1068 		/* count run of escaped characters */
1069 		count = strspn(scan, esc_chars);
1070 		total += count * (esc_len + 1);
1071 		scan += count;
1072 	}
1073 
1074 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
1075 	if (git_buf_grow_by(buf, alloclen) < 0)
1076 		return -1;
1077 
1078 	for (scan = string; *scan; ) {
1079 		count = strcspn(scan, esc_chars);
1080 
1081 		memmove(buf->ptr + buf->size, scan, count);
1082 		scan += count;
1083 		buf->size += count;
1084 
1085 		for (count = strspn(scan, esc_chars); count > 0; --count) {
1086 			/* copy escape sequence */
1087 			memmove(buf->ptr + buf->size, esc_with, esc_len);
1088 			buf->size += esc_len;
1089 			/* copy character to be escaped */
1090 			buf->ptr[buf->size] = *scan;
1091 			buf->size++;
1092 			scan++;
1093 		}
1094 	}
1095 
1096 	buf->ptr[buf->size] = '\0';
1097 
1098 	return 0;
1099 }
1100 
git_buf_unescape(git_buf * buf)1101 void git_buf_unescape(git_buf *buf)
1102 {
1103 	buf->size = git__unescape(buf->ptr);
1104 }
1105 
git_buf_crlf_to_lf(git_buf * tgt,const git_buf * src)1106 int git_buf_crlf_to_lf(git_buf *tgt, const git_buf *src)
1107 {
1108 	const char *scan = src->ptr;
1109 	const char *scan_end = src->ptr + src->size;
1110 	const char *next = memchr(scan, '\r', src->size);
1111 	size_t new_size;
1112 	char *out;
1113 
1114 	GIT_ASSERT(tgt != src);
1115 
1116 	if (!next)
1117 		return git_buf_set(tgt, src->ptr, src->size);
1118 
1119 	/* reduce reallocs while in the loop */
1120 	GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
1121 	if (git_buf_grow(tgt, new_size) < 0)
1122 		return -1;
1123 
1124 	out = tgt->ptr;
1125 	tgt->size = 0;
1126 
1127 	/* Find the next \r and copy whole chunk up to there to tgt */
1128 	for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
1129 		if (next > scan) {
1130 			size_t copylen = (size_t)(next - scan);
1131 			memcpy(out, scan, copylen);
1132 			out += copylen;
1133 		}
1134 
1135 		/* Do not drop \r unless it is followed by \n */
1136 		if (next + 1 == scan_end || next[1] != '\n')
1137 			*out++ = '\r';
1138 	}
1139 
1140 	/* Copy remaining input into dest */
1141 	if (scan < scan_end) {
1142 		size_t remaining = (size_t)(scan_end - scan);
1143 		memcpy(out, scan, remaining);
1144 		out += remaining;
1145 	}
1146 
1147 	tgt->size = (size_t)(out - tgt->ptr);
1148 	tgt->ptr[tgt->size] = '\0';
1149 
1150 	return 0;
1151 }
1152 
git_buf_lf_to_crlf(git_buf * tgt,const git_buf * src)1153 int git_buf_lf_to_crlf(git_buf *tgt, const git_buf *src)
1154 {
1155 	const char *start = src->ptr;
1156 	const char *end = start + src->size;
1157 	const char *scan = start;
1158 	const char *next = memchr(scan, '\n', src->size);
1159 	size_t alloclen;
1160 
1161 	GIT_ASSERT(tgt != src);
1162 
1163 	if (!next)
1164 		return git_buf_set(tgt, src->ptr, src->size);
1165 
1166 	/* attempt to reduce reallocs while in the loop */
1167 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
1168 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
1169 	if (git_buf_grow(tgt, alloclen) < 0)
1170 		return -1;
1171 	tgt->size = 0;
1172 
1173 	for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
1174 		size_t copylen = next - scan;
1175 
1176 		/* if we find mixed line endings, carry on */
1177 		if (copylen && next[-1] == '\r')
1178 			copylen--;
1179 
1180 		GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
1181 		if (git_buf_grow_by(tgt, alloclen) < 0)
1182 			return -1;
1183 
1184 		if (copylen) {
1185 			memcpy(tgt->ptr + tgt->size, scan, copylen);
1186 			tgt->size += copylen;
1187 		}
1188 
1189 		tgt->ptr[tgt->size++] = '\r';
1190 		tgt->ptr[tgt->size++] = '\n';
1191 	}
1192 
1193 	tgt->ptr[tgt->size] = '\0';
1194 	return git_buf_put(tgt, scan, end - scan);
1195 }
1196 
git_buf_common_prefix(git_buf * buf,const git_strarray * strings)1197 int git_buf_common_prefix(git_buf *buf, const git_strarray *strings)
1198 {
1199 	size_t i;
1200 	const char *str, *pfx;
1201 
1202 	git_buf_clear(buf);
1203 
1204 	if (!strings || !strings->count)
1205 		return 0;
1206 
1207 	/* initialize common prefix to first string */
1208 	if (git_buf_sets(buf, strings->strings[0]) < 0)
1209 		return -1;
1210 
1211 	/* go through the rest of the strings, truncating to shared prefix */
1212 	for (i = 1; i < strings->count; ++i) {
1213 
1214 		for (str = strings->strings[i], pfx = buf->ptr;
1215 			 *str && *str == *pfx; str++, pfx++)
1216 			/* scanning */;
1217 
1218 		git_buf_truncate(buf, pfx - buf->ptr);
1219 
1220 		if (!buf->size)
1221 			break;
1222 	}
1223 
1224 	return 0;
1225 }
1226 
git_buf_is_binary(const git_buf * buf)1227 int git_buf_is_binary(const git_buf *buf)
1228 {
1229 	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
1230 	git_buf_bom_t bom;
1231 	int printable = 0, nonprintable = 0;
1232 
1233 	scan += git_buf_detect_bom(&bom, buf);
1234 
1235 	if (bom > GIT_BUF_BOM_UTF8)
1236 		return 1;
1237 
1238 	while (scan < end) {
1239 		unsigned char c = *scan++;
1240 
1241 		/* Printable characters are those above SPACE (0x1F) excluding DEL,
1242 		 * and including BS, ESC and FF.
1243 		 */
1244 		if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
1245 			printable++;
1246 		else if (c == '\0')
1247 			return true;
1248 		else if (!git__isspace(c))
1249 			nonprintable++;
1250 	}
1251 
1252 	return ((printable >> 7) < nonprintable);
1253 }
1254 
git_buf_contains_nul(const git_buf * buf)1255 int git_buf_contains_nul(const git_buf *buf)
1256 {
1257 	return (memchr(buf->ptr, '\0', buf->size) != NULL);
1258 }
1259 
git_buf_detect_bom(git_buf_bom_t * bom,const git_buf * buf)1260 int git_buf_detect_bom(git_buf_bom_t *bom, const git_buf *buf)
1261 {
1262 	const char *ptr;
1263 	size_t len;
1264 
1265 	*bom = GIT_BUF_BOM_NONE;
1266 	/* need at least 2 bytes to look for any BOM */
1267 	if (buf->size < 2)
1268 		return 0;
1269 
1270 	ptr = buf->ptr;
1271 	len = buf->size;
1272 
1273 	switch (*ptr++) {
1274 	case 0:
1275 		if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
1276 			*bom = GIT_BUF_BOM_UTF32_BE;
1277 			return 4;
1278 		}
1279 		break;
1280 	case '\xEF':
1281 		if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
1282 			*bom = GIT_BUF_BOM_UTF8;
1283 			return 3;
1284 		}
1285 		break;
1286 	case '\xFE':
1287 		if (*ptr == '\xFF') {
1288 			*bom = GIT_BUF_BOM_UTF16_BE;
1289 			return 2;
1290 		}
1291 		break;
1292 	case '\xFF':
1293 		if (*ptr != '\xFE')
1294 			break;
1295 		if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
1296 			*bom = GIT_BUF_BOM_UTF32_LE;
1297 			return 4;
1298 		} else {
1299 			*bom = GIT_BUF_BOM_UTF16_LE;
1300 			return 2;
1301 		}
1302 		break;
1303 	default:
1304 		break;
1305 	}
1306 
1307 	return 0;
1308 }
1309 
git_buf_gather_text_stats(git_buf_text_stats * stats,const git_buf * buf,bool skip_bom)1310 bool git_buf_gather_text_stats(
1311 	git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
1312 {
1313 	const char *scan = buf->ptr, *end = buf->ptr + buf->size;
1314 	int skip;
1315 
1316 	memset(stats, 0, sizeof(*stats));
1317 
1318 	/* BOM detection */
1319 	skip = git_buf_detect_bom(&stats->bom, buf);
1320 	if (skip_bom)
1321 		scan += skip;
1322 
1323 	/* Ignore EOF character */
1324 	if (buf->size > 0 && end[-1] == '\032')
1325 		end--;
1326 
1327 	/* Counting loop */
1328 	while (scan < end) {
1329 		unsigned char c = *scan++;
1330 
1331 		if (c > 0x1F && c != 0x7F)
1332 			stats->printable++;
1333 		else switch (c) {
1334 			case '\0':
1335 				stats->nul++;
1336 				stats->nonprintable++;
1337 				break;
1338 			case '\n':
1339 				stats->lf++;
1340 				break;
1341 			case '\r':
1342 				stats->cr++;
1343 				if (scan < end && *scan == '\n')
1344 					stats->crlf++;
1345 				break;
1346 			case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
1347 				stats->printable++;
1348 				break;
1349 			default:
1350 				stats->nonprintable++;
1351 				break;
1352 			}
1353 	}
1354 
1355 	/* Treat files with a bare CR as binary */
1356 	return (stats->cr != stats->crlf || stats->nul > 0 ||
1357 		((stats->printable >> 7) < stats->nonprintable));
1358 }
1359