1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "common.h"
9 
10 #include "git2/attr.h"
11 #include "git2/blob.h"
12 #include "git2/index.h"
13 #include "git2/sys/filter.h"
14 
15 #include "futils.h"
16 #include "hash.h"
17 #include "filter.h"
18 #include "buf_text.h"
19 #include "repository.h"
20 
21 typedef enum {
22 	GIT_CRLF_UNDEFINED,
23 	GIT_CRLF_BINARY,
24 	GIT_CRLF_TEXT,
25 	GIT_CRLF_TEXT_INPUT,
26 	GIT_CRLF_TEXT_CRLF,
27 	GIT_CRLF_AUTO,
28 	GIT_CRLF_AUTO_INPUT,
29 	GIT_CRLF_AUTO_CRLF,
30 } git_crlf_t;
31 
32 struct crlf_attrs {
33 	int attr_action; /* the .gitattributes setting */
34 	int crlf_action; /* the core.autocrlf setting */
35 
36 	int auto_crlf;
37 	int safe_crlf;
38 	int core_eol;
39 };
40 
41 struct crlf_filter {
42 	git_filter f;
43 };
44 
check_crlf(const char * value)45 static git_crlf_t check_crlf(const char *value)
46 {
47 	if (GIT_ATTR_IS_TRUE(value))
48 		return GIT_CRLF_TEXT;
49 	else if (GIT_ATTR_IS_FALSE(value))
50 		return GIT_CRLF_BINARY;
51 	else if (GIT_ATTR_IS_UNSPECIFIED(value))
52 		;
53 	else if (strcmp(value, "input") == 0)
54 		return GIT_CRLF_TEXT_INPUT;
55 	else if (strcmp(value, "auto") == 0)
56 		return GIT_CRLF_AUTO;
57 
58 	return GIT_CRLF_UNDEFINED;
59 }
60 
check_eol(const char * value)61 static git_configmap_value check_eol(const char *value)
62 {
63 	if (GIT_ATTR_IS_UNSPECIFIED(value))
64 		;
65 	else if (strcmp(value, "lf") == 0)
66 		return GIT_EOL_LF;
67 	else if (strcmp(value, "crlf") == 0)
68 		return GIT_EOL_CRLF;
69 
70 	return GIT_EOL_UNSET;
71 }
72 
has_cr_in_index(const git_filter_source * src)73 static int has_cr_in_index(const git_filter_source *src)
74 {
75 	git_repository *repo = git_filter_source_repo(src);
76 	const char *path = git_filter_source_path(src);
77 	git_index *index;
78 	const git_index_entry *entry;
79 	git_blob *blob;
80 	const void *blobcontent;
81 	git_object_size_t blobsize;
82 	bool found_cr;
83 
84 	if (!path)
85 		return false;
86 
87 	if (git_repository_index__weakptr(&index, repo) < 0) {
88 		git_error_clear();
89 		return false;
90 	}
91 
92 	if (!(entry = git_index_get_bypath(index, path, 0)) &&
93 		!(entry = git_index_get_bypath(index, path, 1)))
94 		return false;
95 
96 	if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
97 		return true;
98 
99 	if (git_blob_lookup(&blob, repo, &entry->id) < 0)
100 		return false;
101 
102 	blobcontent = git_blob_rawcontent(blob);
103 	blobsize    = git_blob_rawsize(blob);
104 	if (!git__is_sizet(blobsize))
105 		blobsize = (size_t)-1;
106 
107 	found_cr = (blobcontent != NULL &&
108 		blobsize > 0 &&
109 		memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
110 
111 	git_blob_free(blob);
112 	return found_cr;
113 }
114 
text_eol_is_crlf(struct crlf_attrs * ca)115 static int text_eol_is_crlf(struct crlf_attrs *ca)
116 {
117 	if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
118 		return 1;
119 	else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
120 		return 0;
121 
122 	if (ca->core_eol == GIT_EOL_CRLF)
123 		return 1;
124 	if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
125 		return 1;
126 
127 	return 0;
128 }
129 
output_eol(struct crlf_attrs * ca)130 static git_configmap_value output_eol(struct crlf_attrs *ca)
131 {
132 	switch (ca->crlf_action) {
133 	case GIT_CRLF_BINARY:
134 		return GIT_EOL_UNSET;
135 	case GIT_CRLF_TEXT_CRLF:
136 		return GIT_EOL_CRLF;
137 	case GIT_CRLF_TEXT_INPUT:
138 		return GIT_EOL_LF;
139 	case GIT_CRLF_UNDEFINED:
140 	case GIT_CRLF_AUTO_CRLF:
141 		return GIT_EOL_CRLF;
142 	case GIT_CRLF_AUTO_INPUT:
143 		return GIT_EOL_LF;
144 	case GIT_CRLF_TEXT:
145 	case GIT_CRLF_AUTO:
146 		return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
147 	}
148 
149 	/* TODO: warn when available */
150 	return ca->core_eol;
151 }
152 
check_safecrlf(struct crlf_attrs * ca,const git_filter_source * src,git_buf_text_stats * stats)153 GIT_INLINE(int) check_safecrlf(
154 	struct crlf_attrs *ca,
155 	const git_filter_source *src,
156 	git_buf_text_stats *stats)
157 {
158 	const char *filename = git_filter_source_path(src);
159 
160 	if (!ca->safe_crlf)
161 		return 0;
162 
163 	if (output_eol(ca) == GIT_EOL_LF) {
164 		/*
165 		 * CRLFs would not be restored by checkout:
166 		 * check if we'd remove CRLFs
167 		 */
168 		if (stats->crlf) {
169 			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
170 				/* TODO: issue a warning when available */
171 			} else {
172 				if (filename && *filename)
173 					git_error_set(
174 						GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'",
175 						filename);
176 				else
177 					git_error_set(
178 						GIT_ERROR_FILTER, "CRLF would be replaced by LF");
179 
180 				return -1;
181 			}
182 		}
183 	} else if (output_eol(ca) == GIT_EOL_CRLF) {
184 		/*
185 		 * CRLFs would be added by checkout:
186 		 * check if we have "naked" LFs
187 		 */
188 		if (stats->crlf != stats->lf) {
189 			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
190 				/* TODO: issue a warning when available */
191 			} else {
192 				if (filename && *filename)
193 					git_error_set(
194 						GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'",
195 						filename);
196 				else
197 					git_error_set(
198 						GIT_ERROR_FILTER, "LF would be replaced by CRLF");
199 
200 				return -1;
201 			}
202 		}
203 	}
204 
205 	return 0;
206 }
207 
crlf_apply_to_odb(struct crlf_attrs * ca,git_buf * to,const git_buf * from,const git_filter_source * src)208 static int crlf_apply_to_odb(
209 	struct crlf_attrs *ca,
210 	git_buf *to,
211 	const git_buf *from,
212 	const git_filter_source *src)
213 {
214 	git_buf_text_stats stats;
215 	bool is_binary;
216 	int error;
217 
218 	/* Binary attribute? Empty file? Nothing to do */
219 	if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from))
220 		return GIT_PASSTHROUGH;
221 
222 	is_binary = git_buf_text_gather_stats(&stats, from, false);
223 
224 	/* Heuristics to see if we can skip the conversion.
225 	 * Straight from Core Git.
226 	 */
227 	if (ca->crlf_action == GIT_CRLF_AUTO ||
228 		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
229 		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
230 
231 		if (is_binary)
232 			return GIT_PASSTHROUGH;
233 
234 		/*
235 		 * If the file in the index has any CR in it, do not convert.
236 		 * This is the new safer autocrlf handling.
237 		 */
238 		if (has_cr_in_index(src))
239 			return GIT_PASSTHROUGH;
240 	}
241 
242 	if ((error = check_safecrlf(ca, src, &stats)) < 0)
243 		return error;
244 
245 	/* If there are no CR characters to filter out, then just pass */
246 	if (!stats.crlf)
247 		return GIT_PASSTHROUGH;
248 
249 	/* Actually drop the carriage returns */
250 	return git_buf_text_crlf_to_lf(to, from);
251 }
252 
crlf_apply_to_workdir(struct crlf_attrs * ca,git_buf * to,const git_buf * from)253 static int crlf_apply_to_workdir(
254 	struct crlf_attrs *ca,
255 	git_buf *to,
256 	const git_buf *from)
257 {
258 	git_buf_text_stats stats;
259 	bool is_binary;
260 
261 	/* Empty file? Nothing to do. */
262 	if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
263 		return GIT_PASSTHROUGH;
264 
265 	is_binary = git_buf_text_gather_stats(&stats, from, false);
266 
267 	/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
268 	if (stats.lf == 0 || stats.lf == stats.crlf)
269 		return GIT_PASSTHROUGH;
270 
271 	if (ca->crlf_action == GIT_CRLF_AUTO ||
272 		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
273 		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
274 
275 		/* If we have any existing CR or CRLF line endings, do nothing */
276 		if (stats.cr > 0)
277 			return GIT_PASSTHROUGH;
278 
279 		/* Don't filter binary files */
280 		if (is_binary)
281 			return GIT_PASSTHROUGH;
282 	}
283 
284 	return git_buf_text_lf_to_crlf(to, from);
285 }
286 
convert_attrs(struct crlf_attrs * ca,const char ** attr_values,const git_filter_source * src)287 static int convert_attrs(
288 	struct crlf_attrs *ca,
289 	const char **attr_values,
290 	const git_filter_source *src)
291 {
292 	int error;
293 
294 	memset(ca, 0, sizeof(struct crlf_attrs));
295 
296 	if ((error = git_repository__configmap_lookup(&ca->auto_crlf,
297 		 git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 ||
298 		(error = git_repository__configmap_lookup(&ca->safe_crlf,
299 		 git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 ||
300 		(error = git_repository__configmap_lookup(&ca->core_eol,
301 		 git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0)
302 		return error;
303 
304 	/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
305 	if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
306 		ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
307 		ca->safe_crlf = GIT_SAFE_CRLF_WARN;
308 
309 	if (attr_values) {
310 		/* load the text attribute */
311 		ca->crlf_action = check_crlf(attr_values[2]); /* text */
312 
313 		if (ca->crlf_action == GIT_CRLF_UNDEFINED)
314 			ca->crlf_action = check_crlf(attr_values[0]); /* crlf */
315 
316 		if (ca->crlf_action != GIT_CRLF_BINARY) {
317 			/* load the eol attribute */
318 			int eol_attr = check_eol(attr_values[1]);
319 
320 			if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
321 				ca->crlf_action = GIT_CRLF_AUTO_INPUT;
322 			else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
323 				ca->crlf_action = GIT_CRLF_AUTO_CRLF;
324 			else if (eol_attr == GIT_EOL_LF)
325 				ca->crlf_action = GIT_CRLF_TEXT_INPUT;
326 			else if (eol_attr == GIT_EOL_CRLF)
327 				ca->crlf_action = GIT_CRLF_TEXT_CRLF;
328 		}
329 
330 		ca->attr_action = ca->crlf_action;
331 	} else {
332 		ca->crlf_action = GIT_CRLF_UNDEFINED;
333 	}
334 
335 	if (ca->crlf_action == GIT_CRLF_TEXT)
336 		ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
337 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
338 		ca->crlf_action = GIT_CRLF_BINARY;
339 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
340 		ca->crlf_action = GIT_CRLF_AUTO_CRLF;
341 	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
342 		ca->crlf_action = GIT_CRLF_AUTO_INPUT;
343 
344 	return 0;
345 }
346 
crlf_check(git_filter * self,void ** payload,const git_filter_source * src,const char ** attr_values)347 static int crlf_check(
348 	git_filter *self,
349 	void **payload, /* points to NULL ptr on entry, may be set */
350 	const git_filter_source *src,
351 	const char **attr_values)
352 {
353 	struct crlf_attrs ca;
354 
355 	GIT_UNUSED(self);
356 
357 	convert_attrs(&ca, attr_values, src);
358 
359 	if (ca.crlf_action == GIT_CRLF_BINARY)
360 		return GIT_PASSTHROUGH;
361 
362 	*payload = git__malloc(sizeof(ca));
363 	GIT_ERROR_CHECK_ALLOC(*payload);
364 	memcpy(*payload, &ca, sizeof(ca));
365 
366 	return 0;
367 }
368 
crlf_apply(git_filter * self,void ** payload,git_buf * to,const git_buf * from,const git_filter_source * src)369 static int crlf_apply(
370 	git_filter *self,
371 	void **payload, /* may be read and/or set */
372 	git_buf *to,
373 	const git_buf *from,
374 	const git_filter_source *src)
375 {
376 	/* initialize payload in case `check` was bypassed */
377 	if (!*payload) {
378 		int error = crlf_check(self, payload, src, NULL);
379 
380 		if (error < 0)
381 			return error;
382 	}
383 
384 	if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
385 		return crlf_apply_to_workdir(*payload, to, from);
386 	else
387 		return crlf_apply_to_odb(*payload, to, from, src);
388 }
389 
crlf_cleanup(git_filter * self,void * payload)390 static void crlf_cleanup(
391 	git_filter *self,
392 	void       *payload)
393 {
394 	GIT_UNUSED(self);
395 	git__free(payload);
396 }
397 
git_crlf_filter_new(void)398 git_filter *git_crlf_filter_new(void)
399 {
400 	struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
401 	if (f == NULL)
402 		return NULL;
403 
404 	f->f.version = GIT_FILTER_VERSION;
405 	f->f.attributes = "crlf eol text";
406 	f->f.initialize = NULL;
407 	f->f.shutdown = git_filter_free;
408 	f->f.check    = crlf_check;
409 	f->f.apply    = crlf_apply;
410 	f->f.cleanup  = crlf_cleanup;
411 
412 	return (git_filter *)f;
413 }
414