1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "common.h"
9 
10 #include "git2/attr.h"
11 #include "git2/blob.h"
12 #include "git2/index.h"
13 #include "git2/sys/filter.h"
14 
15 #include "futils.h"
16 #include "hash.h"
17 #include "filter.h"
18 #include "repository.h"
19 
20 typedef enum {
21 	GIT_CRLF_UNDEFINED,
22 	GIT_CRLF_BINARY,
23 	GIT_CRLF_TEXT,
24 	GIT_CRLF_TEXT_INPUT,
25 	GIT_CRLF_TEXT_CRLF,
26 	GIT_CRLF_AUTO,
27 	GIT_CRLF_AUTO_INPUT,
28 	GIT_CRLF_AUTO_CRLF,
29 } git_crlf_t;
30 
31 struct crlf_attrs {
32 	int attr_action; /* the .gitattributes setting */
33 	int crlf_action; /* the core.autocrlf setting */
34 
35 	int auto_crlf;
36 	int safe_crlf;
37 	int core_eol;
38 };
39 
40 struct crlf_filter {
41 	git_filter f;
42 };
43 
check_crlf(const char * value)44 static git_crlf_t check_crlf(const char *value)
45 {
46 	if (GIT_ATTR_IS_TRUE(value))
47 		return GIT_CRLF_TEXT;
48 	else if (GIT_ATTR_IS_FALSE(value))
49 		return GIT_CRLF_BINARY;
50 	else if (GIT_ATTR_IS_UNSPECIFIED(value))
51 		;
52 	else if (strcmp(value, "input") == 0)
53 		return GIT_CRLF_TEXT_INPUT;
54 	else if (strcmp(value, "auto") == 0)
55 		return GIT_CRLF_AUTO;
56 
57 	return GIT_CRLF_UNDEFINED;
58 }
59 
check_eol(const char * value)60 static git_configmap_value check_eol(const char *value)
61 {
62 	if (GIT_ATTR_IS_UNSPECIFIED(value))
63 		;
64 	else if (strcmp(value, "lf") == 0)
65 		return GIT_EOL_LF;
66 	else if (strcmp(value, "crlf") == 0)
67 		return GIT_EOL_CRLF;
68 
69 	return GIT_EOL_UNSET;
70 }
71 
has_cr_in_index(const git_filter_source * src)72 static int has_cr_in_index(const git_filter_source *src)
73 {
74 	git_repository *repo = git_filter_source_repo(src);
75 	const char *path = git_filter_source_path(src);
76 	git_index *index;
77 	const git_index_entry *entry;
78 	git_blob *blob;
79 	const void *blobcontent;
80 	git_object_size_t blobsize;
81 	bool found_cr;
82 
83 	if (!path)
84 		return false;
85 
86 	if (git_repository_index__weakptr(&index, repo) < 0) {
87 		git_error_clear();
88 		return false;
89 	}
90 
91 	if (!(entry = git_index_get_bypath(index, path, 0)) &&
92 		!(entry = git_index_get_bypath(index, path, 1)))
93 		return false;
94 
95 	if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
96 		return true;
97 
98 	if (git_blob_lookup(&blob, repo, &entry->id) < 0)
99 		return false;
100 
101 	blobcontent = git_blob_rawcontent(blob);
102 	blobsize    = git_blob_rawsize(blob);
103 	if (!git__is_sizet(blobsize))
104 		blobsize = (size_t)-1;
105 
106 	found_cr = (blobcontent != NULL &&
107 		blobsize > 0 &&
108 		memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
109 
110 	git_blob_free(blob);
111 	return found_cr;
112 }
113 
text_eol_is_crlf(struct crlf_attrs * ca)114 static int text_eol_is_crlf(struct crlf_attrs *ca)
115 {
116 	if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
117 		return 1;
118 	else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
119 		return 0;
120 
121 	if (ca->core_eol == GIT_EOL_CRLF)
122 		return 1;
123 	if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
124 		return 1;
125 
126 	return 0;
127 }
128 
output_eol(struct crlf_attrs * ca)129 static git_configmap_value output_eol(struct crlf_attrs *ca)
130 {
131 	switch (ca->crlf_action) {
132 	case GIT_CRLF_BINARY:
133 		return GIT_EOL_UNSET;
134 	case GIT_CRLF_TEXT_CRLF:
135 		return GIT_EOL_CRLF;
136 	case GIT_CRLF_TEXT_INPUT:
137 		return GIT_EOL_LF;
138 	case GIT_CRLF_UNDEFINED:
139 	case GIT_CRLF_AUTO_CRLF:
140 		return GIT_EOL_CRLF;
141 	case GIT_CRLF_AUTO_INPUT:
142 		return GIT_EOL_LF;
143 	case GIT_CRLF_TEXT:
144 	case GIT_CRLF_AUTO:
145 		return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
146 	}
147 
148 	/* TODO: warn when available */
149 	return ca->core_eol;
150 }
151 
check_safecrlf(struct crlf_attrs * ca,const git_filter_source * src,git_buf_text_stats * stats)152 GIT_INLINE(int) check_safecrlf(
153 	struct crlf_attrs *ca,
154 	const git_filter_source *src,
155 	git_buf_text_stats *stats)
156 {
157 	const char *filename = git_filter_source_path(src);
158 
159 	if (!ca->safe_crlf)
160 		return 0;
161 
162 	if (output_eol(ca) == GIT_EOL_LF) {
163 		/*
164 		 * CRLFs would not be restored by checkout:
165 		 * check if we'd remove CRLFs
166 		 */
167 		if (stats->crlf) {
168 			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
169 				/* TODO: issue a warning when available */
170 			} else {
171 				if (filename && *filename)
172 					git_error_set(
173 						GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'",
174 						filename);
175 				else
176 					git_error_set(
177 						GIT_ERROR_FILTER, "CRLF would be replaced by LF");
178 
179 				return -1;
180 			}
181 		}
182 	} else if (output_eol(ca) == GIT_EOL_CRLF) {
183 		/*
184 		 * CRLFs would be added by checkout:
185 		 * check if we have "naked" LFs
186 		 */
187 		if (stats->crlf != stats->lf) {
188 			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
189 				/* TODO: issue a warning when available */
190 			} else {
191 				if (filename && *filename)
192 					git_error_set(
193 						GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'",
194 						filename);
195 				else
196 					git_error_set(
197 						GIT_ERROR_FILTER, "LF would be replaced by CRLF");
198 
199 				return -1;
200 			}
201 		}
202 	}
203 
204 	return 0;
205 }
206 
crlf_apply_to_odb(struct crlf_attrs * ca,git_buf * to,const git_buf * from,const git_filter_source * src)207 static int crlf_apply_to_odb(
208 	struct crlf_attrs *ca,
209 	git_buf *to,
210 	const git_buf *from,
211 	const git_filter_source *src)
212 {
213 	git_buf_text_stats stats;
214 	bool is_binary;
215 	int error;
216 
217 	/* Binary attribute? Empty file? Nothing to do */
218 	if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from))
219 		return GIT_PASSTHROUGH;
220 
221 	is_binary = git_buf_gather_text_stats(&stats, from, false);
222 
223 	/* Heuristics to see if we can skip the conversion.
224 	 * Straight from Core Git.
225 	 */
226 	if (ca->crlf_action == GIT_CRLF_AUTO ||
227 		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
228 		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
229 
230 		if (is_binary)
231 			return GIT_PASSTHROUGH;
232 
233 		/*
234 		 * If the file in the index has any CR in it, do not convert.
235 		 * This is the new safer autocrlf handling.
236 		 */
237 		if (has_cr_in_index(src))
238 			return GIT_PASSTHROUGH;
239 	}
240 
241 	if ((error = check_safecrlf(ca, src, &stats)) < 0)
242 		return error;
243 
244 	/* If there are no CR characters to filter out, then just pass */
245 	if (!stats.crlf)
246 		return GIT_PASSTHROUGH;
247 
248 	/* Actually drop the carriage returns */
249 	return git_buf_crlf_to_lf(to, from);
250 }
251 
crlf_apply_to_workdir(struct crlf_attrs * ca,git_buf * to,const git_buf * from)252 static int crlf_apply_to_workdir(
253 	struct crlf_attrs *ca,
254 	git_buf *to,
255 	const git_buf *from)
256 {
257 	git_buf_text_stats stats;
258 	bool is_binary;
259 
260 	/* Empty file? Nothing to do. */
261 	if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
262 		return GIT_PASSTHROUGH;
263 
264 	is_binary = git_buf_gather_text_stats(&stats, from, false);
265 
266 	/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
267 	if (stats.lf == 0 || stats.lf == stats.crlf)
268 		return GIT_PASSTHROUGH;
269 
270 	if (ca->crlf_action == GIT_CRLF_AUTO ||
271 		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
272 		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
273 
274 		/* If we have any existing CR or CRLF line endings, do nothing */
275 		if (stats.cr > 0)
276 			return GIT_PASSTHROUGH;
277 
278 		/* Don't filter binary files */
279 		if (is_binary)
280 			return GIT_PASSTHROUGH;
281 	}
282 
283 	return git_buf_lf_to_crlf(to, from);
284 }
285 
convert_attrs(struct crlf_attrs * ca,const char ** attr_values,const git_filter_source * src)286 static int convert_attrs(
287 	struct crlf_attrs *ca,
288 	const char **attr_values,
289 	const git_filter_source *src)
290 {
291 	int error;
292 
293 	memset(ca, 0, sizeof(struct crlf_attrs));
294 
295 	if ((error = git_repository__configmap_lookup(&ca->auto_crlf,
296 		 git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 ||
297 		(error = git_repository__configmap_lookup(&ca->safe_crlf,
298 		 git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 ||
299 		(error = git_repository__configmap_lookup(&ca->core_eol,
300 		 git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0)
301 		return error;
302 
303 	/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
304 	if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
305 		ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
306 		ca->safe_crlf = GIT_SAFE_CRLF_WARN;
307 
308 	if (attr_values) {
309 		/* load the text attribute */
310 		ca->crlf_action = check_crlf(attr_values[2]); /* text */
311 
312 		if (ca->crlf_action == GIT_CRLF_UNDEFINED)
313 			ca->crlf_action = check_crlf(attr_values[0]); /* crlf */
314 
315 		if (ca->crlf_action != GIT_CRLF_BINARY) {
316 			/* load the eol attribute */
317 			int eol_attr = check_eol(attr_values[1]);
318 
319 			if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
320 				ca->crlf_action = GIT_CRLF_AUTO_INPUT;
321 			else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
322 				ca->crlf_action = GIT_CRLF_AUTO_CRLF;
323 			else if (eol_attr == GIT_EOL_LF)
324 				ca->crlf_action = GIT_CRLF_TEXT_INPUT;
325 			else if (eol_attr == GIT_EOL_CRLF)
326 				ca->crlf_action = GIT_CRLF_TEXT_CRLF;
327 		}
328 
329 		ca->attr_action = ca->crlf_action;
330 	} else {
331 		ca->crlf_action = GIT_CRLF_UNDEFINED;
332 	}
333 
334 	if (ca->crlf_action == GIT_CRLF_TEXT)
335 		ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
336 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
337 		ca->crlf_action = GIT_CRLF_BINARY;
338 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
339 		ca->crlf_action = GIT_CRLF_AUTO_CRLF;
340 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
341 		ca->crlf_action = GIT_CRLF_AUTO_INPUT;
342 
343 	return 0;
344 }
345 
crlf_check(git_filter * self,void ** payload,const git_filter_source * src,const char ** attr_values)346 static int crlf_check(
347 	git_filter *self,
348 	void **payload, /* points to NULL ptr on entry, may be set */
349 	const git_filter_source *src,
350 	const char **attr_values)
351 {
352 	struct crlf_attrs ca;
353 
354 	GIT_UNUSED(self);
355 
356 	convert_attrs(&ca, attr_values, src);
357 
358 	if (ca.crlf_action == GIT_CRLF_BINARY)
359 		return GIT_PASSTHROUGH;
360 
361 	*payload = git__malloc(sizeof(ca));
362 	GIT_ERROR_CHECK_ALLOC(*payload);
363 	memcpy(*payload, &ca, sizeof(ca));
364 
365 	return 0;
366 }
367 
crlf_apply(git_filter * self,void ** payload,git_buf * to,const git_buf * from,const git_filter_source * src)368 static int crlf_apply(
369 	git_filter *self,
370 	void **payload, /* may be read and/or set */
371 	git_buf *to,
372 	const git_buf *from,
373 	const git_filter_source *src)
374 {
375 	/* initialize payload in case `check` was bypassed */
376 	if (!*payload) {
377 		int error = crlf_check(self, payload, src, NULL);
378 
379 		if (error < 0)
380 			return error;
381 	}
382 
383 	if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
384 		return crlf_apply_to_workdir(*payload, to, from);
385 	else
386 		return crlf_apply_to_odb(*payload, to, from, src);
387 }
388 
crlf_stream(git_writestream ** out,git_filter * self,void ** payload,const git_filter_source * src,git_writestream * next)389 static int crlf_stream(
390 	git_writestream **out,
391 	git_filter *self,
392 	void **payload,
393 	const git_filter_source *src,
394 	git_writestream *next)
395 {
396 	return git_filter_buffered_stream_new(out,
397 		self, crlf_apply, NULL, payload, src, next);
398 }
399 
crlf_cleanup(git_filter * self,void * payload)400 static void crlf_cleanup(
401 	git_filter *self,
402 	void       *payload)
403 {
404 	GIT_UNUSED(self);
405 	git__free(payload);
406 }
407 
git_crlf_filter_new(void)408 git_filter *git_crlf_filter_new(void)
409 {
410 	struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
411 	if (f == NULL)
412 		return NULL;
413 
414 	f->f.version = GIT_FILTER_VERSION;
415 	f->f.attributes = "crlf eol text";
416 	f->f.initialize = NULL;
417 	f->f.shutdown = git_filter_free;
418 	f->f.check    = crlf_check;
419 	f->f.stream   = crlf_stream;
420 	f->f.cleanup  = crlf_cleanup;
421 
422 	return (git_filter *)f;
423 }
424