1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9
10 #include "git2/attr.h"
11 #include "git2/blob.h"
12 #include "git2/index.h"
13 #include "git2/sys/filter.h"
14
15 #include "futils.h"
16 #include "hash.h"
17 #include "filter.h"
18 #include "repository.h"
19
20 typedef enum {
21 GIT_CRLF_UNDEFINED,
22 GIT_CRLF_BINARY,
23 GIT_CRLF_TEXT,
24 GIT_CRLF_TEXT_INPUT,
25 GIT_CRLF_TEXT_CRLF,
26 GIT_CRLF_AUTO,
27 GIT_CRLF_AUTO_INPUT,
28 GIT_CRLF_AUTO_CRLF,
29 } git_crlf_t;
30
31 struct crlf_attrs {
32 int attr_action; /* the .gitattributes setting */
33 int crlf_action; /* the core.autocrlf setting */
34
35 int auto_crlf;
36 int safe_crlf;
37 int core_eol;
38 };
39
40 struct crlf_filter {
41 git_filter f;
42 };
43
check_crlf(const char * value)44 static git_crlf_t check_crlf(const char *value)
45 {
46 if (GIT_ATTR_IS_TRUE(value))
47 return GIT_CRLF_TEXT;
48 else if (GIT_ATTR_IS_FALSE(value))
49 return GIT_CRLF_BINARY;
50 else if (GIT_ATTR_IS_UNSPECIFIED(value))
51 ;
52 else if (strcmp(value, "input") == 0)
53 return GIT_CRLF_TEXT_INPUT;
54 else if (strcmp(value, "auto") == 0)
55 return GIT_CRLF_AUTO;
56
57 return GIT_CRLF_UNDEFINED;
58 }
59
check_eol(const char * value)60 static git_configmap_value check_eol(const char *value)
61 {
62 if (GIT_ATTR_IS_UNSPECIFIED(value))
63 ;
64 else if (strcmp(value, "lf") == 0)
65 return GIT_EOL_LF;
66 else if (strcmp(value, "crlf") == 0)
67 return GIT_EOL_CRLF;
68
69 return GIT_EOL_UNSET;
70 }
71
has_cr_in_index(const git_filter_source * src)72 static int has_cr_in_index(const git_filter_source *src)
73 {
74 git_repository *repo = git_filter_source_repo(src);
75 const char *path = git_filter_source_path(src);
76 git_index *index;
77 const git_index_entry *entry;
78 git_blob *blob;
79 const void *blobcontent;
80 git_object_size_t blobsize;
81 bool found_cr;
82
83 if (!path)
84 return false;
85
86 if (git_repository_index__weakptr(&index, repo) < 0) {
87 git_error_clear();
88 return false;
89 }
90
91 if (!(entry = git_index_get_bypath(index, path, 0)) &&
92 !(entry = git_index_get_bypath(index, path, 1)))
93 return false;
94
95 if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
96 return true;
97
98 if (git_blob_lookup(&blob, repo, &entry->id) < 0)
99 return false;
100
101 blobcontent = git_blob_rawcontent(blob);
102 blobsize = git_blob_rawsize(blob);
103 if (!git__is_sizet(blobsize))
104 blobsize = (size_t)-1;
105
106 found_cr = (blobcontent != NULL &&
107 blobsize > 0 &&
108 memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
109
110 git_blob_free(blob);
111 return found_cr;
112 }
113
text_eol_is_crlf(struct crlf_attrs * ca)114 static int text_eol_is_crlf(struct crlf_attrs *ca)
115 {
116 if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
117 return 1;
118 else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
119 return 0;
120
121 if (ca->core_eol == GIT_EOL_CRLF)
122 return 1;
123 if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
124 return 1;
125
126 return 0;
127 }
128
output_eol(struct crlf_attrs * ca)129 static git_configmap_value output_eol(struct crlf_attrs *ca)
130 {
131 switch (ca->crlf_action) {
132 case GIT_CRLF_BINARY:
133 return GIT_EOL_UNSET;
134 case GIT_CRLF_TEXT_CRLF:
135 return GIT_EOL_CRLF;
136 case GIT_CRLF_TEXT_INPUT:
137 return GIT_EOL_LF;
138 case GIT_CRLF_UNDEFINED:
139 case GIT_CRLF_AUTO_CRLF:
140 return GIT_EOL_CRLF;
141 case GIT_CRLF_AUTO_INPUT:
142 return GIT_EOL_LF;
143 case GIT_CRLF_TEXT:
144 case GIT_CRLF_AUTO:
145 return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
146 }
147
148 /* TODO: warn when available */
149 return ca->core_eol;
150 }
151
check_safecrlf(struct crlf_attrs * ca,const git_filter_source * src,git_buf_text_stats * stats)152 GIT_INLINE(int) check_safecrlf(
153 struct crlf_attrs *ca,
154 const git_filter_source *src,
155 git_buf_text_stats *stats)
156 {
157 const char *filename = git_filter_source_path(src);
158
159 if (!ca->safe_crlf)
160 return 0;
161
162 if (output_eol(ca) == GIT_EOL_LF) {
163 /*
164 * CRLFs would not be restored by checkout:
165 * check if we'd remove CRLFs
166 */
167 if (stats->crlf) {
168 if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
169 /* TODO: issue a warning when available */
170 } else {
171 if (filename && *filename)
172 git_error_set(
173 GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'",
174 filename);
175 else
176 git_error_set(
177 GIT_ERROR_FILTER, "CRLF would be replaced by LF");
178
179 return -1;
180 }
181 }
182 } else if (output_eol(ca) == GIT_EOL_CRLF) {
183 /*
184 * CRLFs would be added by checkout:
185 * check if we have "naked" LFs
186 */
187 if (stats->crlf != stats->lf) {
188 if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
189 /* TODO: issue a warning when available */
190 } else {
191 if (filename && *filename)
192 git_error_set(
193 GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'",
194 filename);
195 else
196 git_error_set(
197 GIT_ERROR_FILTER, "LF would be replaced by CRLF");
198
199 return -1;
200 }
201 }
202 }
203
204 return 0;
205 }
206
crlf_apply_to_odb(struct crlf_attrs * ca,git_buf * to,const git_buf * from,const git_filter_source * src)207 static int crlf_apply_to_odb(
208 struct crlf_attrs *ca,
209 git_buf *to,
210 const git_buf *from,
211 const git_filter_source *src)
212 {
213 git_buf_text_stats stats;
214 bool is_binary;
215 int error;
216
217 /* Binary attribute? Empty file? Nothing to do */
218 if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from))
219 return GIT_PASSTHROUGH;
220
221 is_binary = git_buf_gather_text_stats(&stats, from, false);
222
223 /* Heuristics to see if we can skip the conversion.
224 * Straight from Core Git.
225 */
226 if (ca->crlf_action == GIT_CRLF_AUTO ||
227 ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
228 ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
229
230 if (is_binary)
231 return GIT_PASSTHROUGH;
232
233 /*
234 * If the file in the index has any CR in it, do not convert.
235 * This is the new safer autocrlf handling.
236 */
237 if (has_cr_in_index(src))
238 return GIT_PASSTHROUGH;
239 }
240
241 if ((error = check_safecrlf(ca, src, &stats)) < 0)
242 return error;
243
244 /* If there are no CR characters to filter out, then just pass */
245 if (!stats.crlf)
246 return GIT_PASSTHROUGH;
247
248 /* Actually drop the carriage returns */
249 return git_buf_crlf_to_lf(to, from);
250 }
251
crlf_apply_to_workdir(struct crlf_attrs * ca,git_buf * to,const git_buf * from)252 static int crlf_apply_to_workdir(
253 struct crlf_attrs *ca,
254 git_buf *to,
255 const git_buf *from)
256 {
257 git_buf_text_stats stats;
258 bool is_binary;
259
260 /* Empty file? Nothing to do. */
261 if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
262 return GIT_PASSTHROUGH;
263
264 is_binary = git_buf_gather_text_stats(&stats, from, false);
265
266 /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
267 if (stats.lf == 0 || stats.lf == stats.crlf)
268 return GIT_PASSTHROUGH;
269
270 if (ca->crlf_action == GIT_CRLF_AUTO ||
271 ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
272 ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
273
274 /* If we have any existing CR or CRLF line endings, do nothing */
275 if (stats.cr > 0)
276 return GIT_PASSTHROUGH;
277
278 /* Don't filter binary files */
279 if (is_binary)
280 return GIT_PASSTHROUGH;
281 }
282
283 return git_buf_lf_to_crlf(to, from);
284 }
285
convert_attrs(struct crlf_attrs * ca,const char ** attr_values,const git_filter_source * src)286 static int convert_attrs(
287 struct crlf_attrs *ca,
288 const char **attr_values,
289 const git_filter_source *src)
290 {
291 int error;
292
293 memset(ca, 0, sizeof(struct crlf_attrs));
294
295 if ((error = git_repository__configmap_lookup(&ca->auto_crlf,
296 git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 ||
297 (error = git_repository__configmap_lookup(&ca->safe_crlf,
298 git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 ||
299 (error = git_repository__configmap_lookup(&ca->core_eol,
300 git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0)
301 return error;
302
303 /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
304 if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
305 ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
306 ca->safe_crlf = GIT_SAFE_CRLF_WARN;
307
308 if (attr_values) {
309 /* load the text attribute */
310 ca->crlf_action = check_crlf(attr_values[2]); /* text */
311
312 if (ca->crlf_action == GIT_CRLF_UNDEFINED)
313 ca->crlf_action = check_crlf(attr_values[0]); /* crlf */
314
315 if (ca->crlf_action != GIT_CRLF_BINARY) {
316 /* load the eol attribute */
317 int eol_attr = check_eol(attr_values[1]);
318
319 if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
320 ca->crlf_action = GIT_CRLF_AUTO_INPUT;
321 else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
322 ca->crlf_action = GIT_CRLF_AUTO_CRLF;
323 else if (eol_attr == GIT_EOL_LF)
324 ca->crlf_action = GIT_CRLF_TEXT_INPUT;
325 else if (eol_attr == GIT_EOL_CRLF)
326 ca->crlf_action = GIT_CRLF_TEXT_CRLF;
327 }
328
329 ca->attr_action = ca->crlf_action;
330 } else {
331 ca->crlf_action = GIT_CRLF_UNDEFINED;
332 }
333
334 if (ca->crlf_action == GIT_CRLF_TEXT)
335 ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
336 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
337 ca->crlf_action = GIT_CRLF_BINARY;
338 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
339 ca->crlf_action = GIT_CRLF_AUTO_CRLF;
340 if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
341 ca->crlf_action = GIT_CRLF_AUTO_INPUT;
342
343 return 0;
344 }
345
crlf_check(git_filter * self,void ** payload,const git_filter_source * src,const char ** attr_values)346 static int crlf_check(
347 git_filter *self,
348 void **payload, /* points to NULL ptr on entry, may be set */
349 const git_filter_source *src,
350 const char **attr_values)
351 {
352 struct crlf_attrs ca;
353
354 GIT_UNUSED(self);
355
356 convert_attrs(&ca, attr_values, src);
357
358 if (ca.crlf_action == GIT_CRLF_BINARY)
359 return GIT_PASSTHROUGH;
360
361 *payload = git__malloc(sizeof(ca));
362 GIT_ERROR_CHECK_ALLOC(*payload);
363 memcpy(*payload, &ca, sizeof(ca));
364
365 return 0;
366 }
367
crlf_apply(git_filter * self,void ** payload,git_buf * to,const git_buf * from,const git_filter_source * src)368 static int crlf_apply(
369 git_filter *self,
370 void **payload, /* may be read and/or set */
371 git_buf *to,
372 const git_buf *from,
373 const git_filter_source *src)
374 {
375 /* initialize payload in case `check` was bypassed */
376 if (!*payload) {
377 int error = crlf_check(self, payload, src, NULL);
378
379 if (error < 0)
380 return error;
381 }
382
383 if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
384 return crlf_apply_to_workdir(*payload, to, from);
385 else
386 return crlf_apply_to_odb(*payload, to, from, src);
387 }
388
crlf_stream(git_writestream ** out,git_filter * self,void ** payload,const git_filter_source * src,git_writestream * next)389 static int crlf_stream(
390 git_writestream **out,
391 git_filter *self,
392 void **payload,
393 const git_filter_source *src,
394 git_writestream *next)
395 {
396 return git_filter_buffered_stream_new(out,
397 self, crlf_apply, NULL, payload, src, next);
398 }
399
crlf_cleanup(git_filter * self,void * payload)400 static void crlf_cleanup(
401 git_filter *self,
402 void *payload)
403 {
404 GIT_UNUSED(self);
405 git__free(payload);
406 }
407
git_crlf_filter_new(void)408 git_filter *git_crlf_filter_new(void)
409 {
410 struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
411 if (f == NULL)
412 return NULL;
413
414 f->f.version = GIT_FILTER_VERSION;
415 f->f.attributes = "crlf eol text";
416 f->f.initialize = NULL;
417 f->f.shutdown = git_filter_free;
418 f->f.check = crlf_check;
419 f->f.stream = crlf_stream;
420 f->f.cleanup = crlf_cleanup;
421
422 return (git_filter *)f;
423 }
424