1 /*
2 * parse-diff.c: functions for parsing diff files
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24 #include <stdlib.h>
25 #include <stddef.h>
26 #include <string.h>
27
28 #include "svn_hash.h"
29 #include "svn_types.h"
30 #include "svn_error.h"
31 #include "svn_io.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
35 #include "svn_utf.h"
36 #include "svn_dirent_uri.h"
37 #include "svn_diff.h"
38 #include "svn_ctype.h"
39 #include "svn_mergeinfo.h"
40
41 #include "private/svn_eol_private.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_diff_private.h"
44 #include "private/svn_sorts_private.h"
45
46 #include "diff.h"
47
48 #include "svn_private_config.h"
49
50 /* Helper macro for readability */
51 #define starts_with(str, start) \
52 (strncmp((str), (start), strlen(start)) == 0)
53
54 /* Like strlen() but for string literals. */
55 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
56
57 /* This struct describes a range within a file, as well as the
58 * current cursor position within the range. All numbers are in bytes. */
59 struct svn_diff__hunk_range {
60 apr_off_t start;
61 apr_off_t end;
62 apr_off_t current;
63 };
64
65 struct svn_diff_hunk_t {
66 /* The patch this hunk belongs to. */
67 const svn_patch_t *patch;
68
69 /* APR file handle to the patch file this hunk came from. */
70 apr_file_t *apr_file;
71
72 /* Whether the hunk was interpreted as pretty-print mergeinfo. If so,
73 the hunk content is in PATCH and the rest of this hunk object is
74 mostly uninitialized. */
75 svn_boolean_t is_pretty_print_mergeinfo;
76
77 /* Ranges used to keep track of this hunk's texts positions within
78 * the patch file. */
79 struct svn_diff__hunk_range diff_text_range;
80 struct svn_diff__hunk_range original_text_range;
81 struct svn_diff__hunk_range modified_text_range;
82
83 /* Hunk ranges as they appeared in the patch file.
84 * All numbers are lines, not bytes. */
85 svn_linenum_t original_start;
86 svn_linenum_t original_length;
87 svn_linenum_t modified_start;
88 svn_linenum_t modified_length;
89
90 /* Number of lines of leading and trailing hunk context. */
91 svn_linenum_t leading_context;
92 svn_linenum_t trailing_context;
93
94 /* Did we see a 'file does not end with eol' marker in this hunk? */
95 svn_boolean_t original_no_final_eol;
96 svn_boolean_t modified_no_final_eol;
97
98 /* Fuzz penalty, triggered by bad patch targets */
99 svn_linenum_t original_fuzz;
100 svn_linenum_t modified_fuzz;
101 };
102
103 struct svn_diff_binary_patch_t {
104 /* The patch this hunk belongs to. */
105 const svn_patch_t *patch;
106
107 /* APR file handle to the patch file this hunk came from. */
108 apr_file_t *apr_file;
109
110 /* Offsets inside APR_FILE representing the location of the patch */
111 apr_off_t src_start;
112 apr_off_t src_end;
113 svn_filesize_t src_filesize; /* Expanded/final size */
114
115 /* Offsets inside APR_FILE representing the location of the patch */
116 apr_off_t dst_start;
117 apr_off_t dst_end;
118 svn_filesize_t dst_filesize; /* Expanded/final size */
119 };
120
121 /* Common guts of svn_diff_hunk__create_adds_single_line() and
122 * svn_diff_hunk__create_deletes_single_line().
123 *
124 * ADD is TRUE if adding and FALSE if deleting.
125 */
126 static svn_error_t *
add_or_delete_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,svn_boolean_t add,apr_pool_t * result_pool,apr_pool_t * scratch_pool)127 add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
128 const char *line,
129 const svn_patch_t *patch,
130 svn_boolean_t add,
131 apr_pool_t *result_pool,
132 apr_pool_t *scratch_pool)
133 {
134 svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
135 static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
136 const apr_size_t header_len = strlen(hunk_header[add]);
137 const apr_size_t len = strlen(line);
138 const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
139 svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
140
141 hunk->patch = patch;
142
143 /* hunk->apr_file is created below. */
144
145 hunk->diff_text_range.start = header_len;
146 hunk->diff_text_range.current = header_len;
147
148 if (add)
149 {
150 hunk->original_text_range.start = 0; /* There's no "original" text. */
151 hunk->original_text_range.current = 0;
152 hunk->original_text_range.end = 0;
153 hunk->original_no_final_eol = FALSE;
154
155 hunk->modified_text_range.start = header_len;
156 hunk->modified_text_range.current = header_len;
157 hunk->modified_text_range.end = end;
158 hunk->modified_no_final_eol = TRUE;
159
160 hunk->original_start = 0;
161 hunk->original_length = 0;
162
163 hunk->modified_start = 1;
164 hunk->modified_length = 1;
165 }
166 else /* delete */
167 {
168 hunk->original_text_range.start = header_len;
169 hunk->original_text_range.current = header_len;
170 hunk->original_text_range.end = end;
171 hunk->original_no_final_eol = TRUE;
172
173 hunk->modified_text_range.start = 0; /* There's no "original" text. */
174 hunk->modified_text_range.current = 0;
175 hunk->modified_text_range.end = 0;
176 hunk->modified_no_final_eol = FALSE;
177
178 hunk->original_start = 1;
179 hunk->original_length = 1;
180
181 hunk->modified_start = 0;
182 hunk->modified_length = 0; /* setting to '1' works too */
183 }
184
185 hunk->leading_context = 0;
186 hunk->trailing_context = 0;
187
188 /* Create APR_FILE and put just a hunk in it (without a diff header).
189 * Save the offset of the last byte of the diff line. */
190 svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
191 svn_stringbuf_appendbyte(buf, add ? '+' : '-');
192 svn_stringbuf_appendbytes(buf, line, len);
193 svn_stringbuf_appendbyte(buf, '\n');
194 svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
195
196 hunk->diff_text_range.end = buf->len;
197
198 SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
199 NULL /* system tempdir */,
200 svn_io_file_del_on_pool_cleanup,
201 result_pool, scratch_pool));
202 SVN_ERR(svn_io_file_write_full(hunk->apr_file,
203 buf->data, buf->len,
204 NULL, scratch_pool));
205 /* No need to seek. */
206
207 *hunk_out = hunk;
208 return SVN_NO_ERROR;
209 }
210
211 svn_error_t *
svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)212 svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
213 const char *line,
214 const svn_patch_t *patch,
215 apr_pool_t *result_pool,
216 apr_pool_t *scratch_pool)
217 {
218 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
219 (!patch->reverse),
220 result_pool, scratch_pool));
221 return SVN_NO_ERROR;
222 }
223
224 svn_error_t *
svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)225 svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
226 const char *line,
227 const svn_patch_t *patch,
228 apr_pool_t *result_pool,
229 apr_pool_t *scratch_pool)
230 {
231 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
232 patch->reverse,
233 result_pool, scratch_pool));
234 return SVN_NO_ERROR;
235 }
236
237 void
svn_diff_hunk_reset_diff_text(svn_diff_hunk_t * hunk)238 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
239 {
240 hunk->diff_text_range.current = hunk->diff_text_range.start;
241 }
242
243 void
svn_diff_hunk_reset_original_text(svn_diff_hunk_t * hunk)244 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
245 {
246 if (hunk->patch->reverse)
247 hunk->modified_text_range.current = hunk->modified_text_range.start;
248 else
249 hunk->original_text_range.current = hunk->original_text_range.start;
250 }
251
252 void
svn_diff_hunk_reset_modified_text(svn_diff_hunk_t * hunk)253 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
254 {
255 if (hunk->patch->reverse)
256 hunk->original_text_range.current = hunk->original_text_range.start;
257 else
258 hunk->modified_text_range.current = hunk->modified_text_range.start;
259 }
260
261 svn_linenum_t
svn_diff_hunk_get_original_start(const svn_diff_hunk_t * hunk)262 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
263 {
264 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
265 }
266
267 svn_linenum_t
svn_diff_hunk_get_original_length(const svn_diff_hunk_t * hunk)268 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
269 {
270 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
271 }
272
273 svn_linenum_t
svn_diff_hunk_get_modified_start(const svn_diff_hunk_t * hunk)274 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
275 {
276 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
277 }
278
279 svn_linenum_t
svn_diff_hunk_get_modified_length(const svn_diff_hunk_t * hunk)280 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
281 {
282 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
283 }
284
285 svn_linenum_t
svn_diff_hunk_get_leading_context(const svn_diff_hunk_t * hunk)286 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
287 {
288 return hunk->leading_context;
289 }
290
291 svn_linenum_t
svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t * hunk)292 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
293 {
294 return hunk->trailing_context;
295 }
296
297 svn_linenum_t
svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t * hunk)298 svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
299 {
300 return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
301 }
302
303 /* Baton for the base85 stream implementation */
304 struct base85_baton_t
305 {
306 apr_file_t *file;
307 apr_pool_t *iterpool;
308 char buffer[52]; /* Bytes on current line */
309 apr_off_t next_pos; /* Start position of next line */
310 apr_off_t end_pos; /* Position after last line */
311 apr_size_t buf_size; /* Bytes available (52 unless at eof) */
312 apr_size_t buf_pos; /* Bytes in linebuffer */
313 svn_boolean_t done; /* At eof? */
314 };
315
316 /* Implements svn_read_fn_t for the base85 read stream */
317 static svn_error_t *
read_handler_base85(void * baton,char * buffer,apr_size_t * len)318 read_handler_base85(void *baton, char *buffer, apr_size_t *len)
319 {
320 struct base85_baton_t *b85b = baton;
321 apr_pool_t *iterpool = b85b->iterpool;
322 apr_size_t remaining = *len;
323 char *dest = buffer;
324
325 svn_pool_clear(iterpool);
326
327 if (b85b->done)
328 {
329 *len = 0;
330 return SVN_NO_ERROR;
331 }
332
333 while (remaining && (b85b->buf_size > b85b->buf_pos
334 || b85b->next_pos < b85b->end_pos))
335 {
336 svn_stringbuf_t *line;
337 svn_boolean_t at_eof;
338
339 apr_size_t available = b85b->buf_size - b85b->buf_pos;
340 if (available)
341 {
342 apr_size_t n = (remaining < available) ? remaining : available;
343
344 memcpy(dest, b85b->buffer + b85b->buf_pos, n);
345 dest += n;
346 remaining -= n;
347 b85b->buf_pos += n;
348
349 if (!remaining)
350 return SVN_NO_ERROR; /* *len = OK */
351 }
352
353 if (b85b->next_pos >= b85b->end_pos)
354 break; /* At EOF */
355 SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
356 iterpool));
357 SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
358 APR_SIZE_MAX, iterpool, iterpool));
359 if (at_eof)
360 b85b->next_pos = b85b->end_pos;
361 else
362 {
363 SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
364 iterpool));
365 }
366
367 if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
368 b85b->buf_size = line->data[0] - 'A' + 1;
369 else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
370 b85b->buf_size = line->data[0] - 'a' + 26 + 1;
371 else
372 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
373 _("Unexpected data in base85 section"));
374
375 if (b85b->buf_size < 52)
376 b85b->next_pos = b85b->end_pos; /* Handle as EOF */
377
378 SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
379 line->data + 1, line->len - 1,
380 iterpool));
381 b85b->buf_pos = 0;
382 }
383
384 *len -= remaining;
385 b85b->done = TRUE;
386
387 return SVN_NO_ERROR;
388 }
389
390 /* Implements svn_close_fn_t for the base85 read stream */
391 static svn_error_t *
close_handler_base85(void * baton)392 close_handler_base85(void *baton)
393 {
394 struct base85_baton_t *b85b = baton;
395
396 svn_pool_destroy(b85b->iterpool);
397
398 return SVN_NO_ERROR;
399 }
400
401 /* Gets a stream that reads decoded base85 data from a segment of a file.
402 The current implementation might assume that both start_pos and end_pos
403 are located at line boundaries. */
404 static svn_stream_t *
get_base85_data_stream(apr_file_t * file,apr_off_t start_pos,apr_off_t end_pos,apr_pool_t * result_pool)405 get_base85_data_stream(apr_file_t *file,
406 apr_off_t start_pos,
407 apr_off_t end_pos,
408 apr_pool_t *result_pool)
409 {
410 struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
411 svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
412
413 b85b->file = file;
414 b85b->iterpool = svn_pool_create(result_pool);
415 b85b->next_pos = start_pos;
416 b85b->end_pos = end_pos;
417
418 svn_stream_set_read2(base85s, NULL /* only full read support */,
419 read_handler_base85);
420 svn_stream_set_close(base85s, close_handler_base85);
421 return base85s;
422 }
423
424 /* Baton for the length verification stream functions */
425 struct length_verify_baton_t
426 {
427 svn_stream_t *inner;
428 svn_filesize_t remaining;
429 };
430
431 /* Implements svn_read_fn_t for the length verification stream */
432 static svn_error_t *
read_handler_length_verify(void * baton,char * buffer,apr_size_t * len)433 read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
434 {
435 struct length_verify_baton_t *lvb = baton;
436 apr_size_t requested_len = *len;
437
438 SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
439
440 if (*len > lvb->remaining)
441 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
442 _("Base85 data expands to longer than declared "
443 "filesize"));
444 else if (requested_len > *len && *len != lvb->remaining)
445 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
446 _("Base85 data expands to smaller than declared "
447 "filesize"));
448
449 lvb->remaining -= *len;
450
451 return SVN_NO_ERROR;
452 }
453
454 /* Implements svn_close_fn_t for the length verification stream */
455 static svn_error_t *
close_handler_length_verify(void * baton)456 close_handler_length_verify(void *baton)
457 {
458 struct length_verify_baton_t *lvb = baton;
459
460 return svn_error_trace(svn_stream_close(lvb->inner));
461 }
462
463 /* Gets a stream that verifies on reads that the inner stream is exactly
464 of the specified length */
465 static svn_stream_t *
get_verify_length_stream(svn_stream_t * inner,svn_filesize_t expected_size,apr_pool_t * result_pool)466 get_verify_length_stream(svn_stream_t *inner,
467 svn_filesize_t expected_size,
468 apr_pool_t *result_pool)
469 {
470 struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
471 svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
472
473 lvb->inner = inner;
474 lvb->remaining = expected_size;
475
476 svn_stream_set_read2(len_stream, NULL /* only full read support */,
477 read_handler_length_verify);
478 svn_stream_set_close(len_stream, close_handler_length_verify);
479
480 return len_stream;
481 }
482
483 svn_stream_t *
svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)484 svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
485 apr_pool_t *result_pool)
486 {
487 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
488 bpatch->src_end, result_pool);
489
490 s = svn_stream_compressed(s, result_pool);
491
492 /* ### If we (ever) want to support the DELTA format, then we should hook the
493 undelta handling here */
494
495 return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
496 }
497
498 svn_stream_t *
svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)499 svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
500 apr_pool_t *result_pool)
501 {
502 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
503 bpatch->dst_end, result_pool);
504
505 s = svn_stream_compressed(s, result_pool);
506
507 /* ### If we (ever) want to support the DELTA format, then we should hook the
508 undelta handling here */
509
510 return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
511 }
512
513 /* Try to parse a positive number from a decimal number encoded
514 * in the string NUMBER. Return parsed number in OFFSET, and return
515 * TRUE if parsing was successful. */
516 static svn_boolean_t
parse_offset(svn_linenum_t * offset,const char * number)517 parse_offset(svn_linenum_t *offset, const char *number)
518 {
519 svn_error_t *err;
520 apr_uint64_t val;
521
522 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
523 if (err)
524 {
525 svn_error_clear(err);
526 return FALSE;
527 }
528
529 *offset = (svn_linenum_t)val;
530
531 return TRUE;
532 }
533
534 /* Try to parse a hunk range specification from the string RANGE.
535 * Return parsed information in *START and *LENGTH, and return TRUE
536 * if the range parsed correctly. Note: This function may modify the
537 * input value RANGE. */
538 static svn_boolean_t
parse_range(svn_linenum_t * start,svn_linenum_t * length,char * range)539 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
540 {
541 char *comma;
542
543 if (*range == 0)
544 return FALSE;
545
546 comma = strstr(range, ",");
547 if (comma)
548 {
549 if (strlen(comma + 1) > 0)
550 {
551 /* Try to parse the length. */
552 if (! parse_offset(length, comma + 1))
553 return FALSE;
554
555 /* Snip off the end of the string,
556 * so we can comfortably parse the line
557 * number the hunk starts at. */
558 *comma = '\0';
559 }
560 else
561 /* A comma but no length? */
562 return FALSE;
563 }
564 else
565 {
566 *length = 1;
567 }
568
569 /* Try to parse the line number the hunk starts at. */
570 return parse_offset(start, range);
571 }
572
573 /* Try to parse a hunk header in string HEADER, putting parsed information
574 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
575 * character string used to delimit the hunk header.
576 * Do all allocations in POOL. */
577 static svn_boolean_t
parse_hunk_header(const char * header,svn_diff_hunk_t * hunk,const char * atat,apr_pool_t * pool)578 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
579 const char *atat, apr_pool_t *pool)
580 {
581 const char *p;
582 const char *start;
583 svn_stringbuf_t *range;
584
585 p = header + strlen(atat);
586 if (*p != ' ')
587 /* No. */
588 return FALSE;
589 p++;
590 if (*p != '-')
591 /* Nah... */
592 return FALSE;
593 /* OK, this may be worth allocating some memory for... */
594 range = svn_stringbuf_create_ensure(31, pool);
595 start = ++p;
596 while (*p && *p != ' ')
597 {
598 p++;
599 }
600
601 if (*p != ' ')
602 /* No no no... */
603 return FALSE;
604
605 svn_stringbuf_appendbytes(range, start, p - start);
606
607 /* Try to parse the first range. */
608 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
609 return FALSE;
610
611 /* Clear the stringbuf so we can reuse it for the second range. */
612 svn_stringbuf_setempty(range);
613 p++;
614 if (*p != '+')
615 /* Eeek! */
616 return FALSE;
617 /* OK, this may be worth copying... */
618 start = ++p;
619 while (*p && *p != ' ')
620 {
621 p++;
622 }
623 if (*p != ' ')
624 /* No no no... */
625 return FALSE;
626
627 svn_stringbuf_appendbytes(range, start, p - start);
628
629 /* Check for trailing @@ */
630 p++;
631 if (! starts_with(p, atat))
632 return FALSE;
633
634 /* There may be stuff like C-function names after the trailing @@,
635 * but we ignore that. */
636
637 /* Try to parse the second range. */
638 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
639 return FALSE;
640
641 /* Hunk header is good. */
642 return TRUE;
643 }
644
645 /* Read a line of original or modified hunk text from the specified
646 * RANGE within FILE. FILE is expected to contain unidiff text.
647 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
648 * Any lines commencing with the VERBOTEN character are discarded.
649 * VERBOTEN should be '+' or '-', depending on which form of hunk text
650 * is being read. NO_FINAL_EOL declares if the hunk contains a no final
651 * EOL marker.
652 *
653 * All other parameters are as in svn_diff_hunk_readline_original_text()
654 * and svn_diff_hunk_readline_modified_text().
655 */
656 static svn_error_t *
hunk_readline_original_or_modified(apr_file_t * file,struct svn_diff__hunk_range * range,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,char verboten,svn_boolean_t no_final_eol,apr_pool_t * result_pool,apr_pool_t * scratch_pool)657 hunk_readline_original_or_modified(apr_file_t *file,
658 struct svn_diff__hunk_range *range,
659 svn_stringbuf_t **stringbuf,
660 const char **eol,
661 svn_boolean_t *eof,
662 char verboten,
663 svn_boolean_t no_final_eol,
664 apr_pool_t *result_pool,
665 apr_pool_t *scratch_pool)
666 {
667 apr_size_t max_len;
668 svn_boolean_t filtered;
669 apr_off_t pos;
670 svn_stringbuf_t *str;
671 const char *eol_p;
672 apr_pool_t *last_pool;
673
674 if (!eol)
675 eol = &eol_p;
676
677 if (range->current >= range->end)
678 {
679 /* We're past the range. Indicate that no bytes can be read. */
680 *eof = TRUE;
681 *eol = NULL;
682 *stringbuf = svn_stringbuf_create_empty(result_pool);
683 return SVN_NO_ERROR;
684 }
685
686 SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
687 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
688
689 /* It's not ITERPOOL because we use data allocated in LAST_POOL out
690 of the loop. */
691 last_pool = svn_pool_create(scratch_pool);
692 do
693 {
694 svn_pool_clear(last_pool);
695
696 max_len = range->end - range->current;
697 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
698 last_pool, last_pool));
699 SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
700 filtered = (str->data[0] == verboten || str->data[0] == '\\');
701 }
702 while (filtered && ! *eof);
703
704 if (filtered)
705 {
706 /* EOF, return an empty string. */
707 *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
708 *eol = NULL;
709 }
710 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
711 {
712 /* Shave off leading unidiff symbols. */
713 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
714 }
715 else
716 {
717 /* Return the line as-is. Handle as a chopped leading spaces */
718 *stringbuf = svn_stringbuf_dup(str, result_pool);
719 }
720
721 if (!filtered && *eof && !*eol && *str->data)
722 {
723 /* Ok, we miss a final EOL in the patch file, but didn't see a
724 no eol marker line.
725
726 We should report that we had an EOL or the patch code will
727 misbehave (and it knows nothing about no eol markers) */
728
729 if (!no_final_eol && eol != &eol_p)
730 {
731 apr_off_t start = 0;
732
733 SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
734
735 SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
736 scratch_pool, scratch_pool));
737
738 /* Every patch file that has hunks has at least one EOL*/
739 SVN_ERR_ASSERT(*eol != NULL);
740 }
741
742 *eof = FALSE;
743 /* Fall through to seek back to the right location */
744 }
745 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
746
747 svn_pool_destroy(last_pool);
748 return SVN_NO_ERROR;
749 }
750
751 svn_error_t *
svn_diff_hunk_readline_original_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)752 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
753 svn_stringbuf_t **stringbuf,
754 const char **eol,
755 svn_boolean_t *eof,
756 apr_pool_t *result_pool,
757 apr_pool_t *scratch_pool)
758 {
759 return svn_error_trace(
760 hunk_readline_original_or_modified(hunk->apr_file,
761 hunk->patch->reverse ?
762 &hunk->modified_text_range :
763 &hunk->original_text_range,
764 stringbuf, eol, eof,
765 hunk->patch->reverse ? '-' : '+',
766 hunk->patch->reverse
767 ? hunk->modified_no_final_eol
768 : hunk->original_no_final_eol,
769 result_pool, scratch_pool));
770 }
771
772 svn_error_t *
svn_diff_hunk_readline_modified_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)773 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
774 svn_stringbuf_t **stringbuf,
775 const char **eol,
776 svn_boolean_t *eof,
777 apr_pool_t *result_pool,
778 apr_pool_t *scratch_pool)
779 {
780 return svn_error_trace(
781 hunk_readline_original_or_modified(hunk->apr_file,
782 hunk->patch->reverse ?
783 &hunk->original_text_range :
784 &hunk->modified_text_range,
785 stringbuf, eol, eof,
786 hunk->patch->reverse ? '+' : '-',
787 hunk->patch->reverse
788 ? hunk->original_no_final_eol
789 : hunk->modified_no_final_eol,
790 result_pool, scratch_pool));
791 }
792
793 svn_error_t *
svn_diff_hunk_readline_diff_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)794 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
795 svn_stringbuf_t **stringbuf,
796 const char **eol,
797 svn_boolean_t *eof,
798 apr_pool_t *result_pool,
799 apr_pool_t *scratch_pool)
800 {
801 svn_stringbuf_t *line;
802 apr_size_t max_len;
803 apr_off_t pos;
804 const char *eol_p;
805
806 if (!eol)
807 eol = &eol_p;
808
809 if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
810 {
811 /* We're past the range. Indicate that no bytes can be read. */
812 *eof = TRUE;
813 *eol = NULL;
814 *stringbuf = svn_stringbuf_create_empty(result_pool);
815 return SVN_NO_ERROR;
816 }
817
818 SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
819 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
820 &hunk->diff_text_range.current, scratch_pool));
821 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
822 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
823 result_pool,
824 scratch_pool));
825 SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
826 hunk->apr_file, scratch_pool));
827
828 if (*eof && !*eol && *line->data)
829 {
830 /* Ok, we miss a final EOL in the patch file, but didn't see a
831 no eol marker line.
832
833 We should report that we had an EOL or the patch code will
834 misbehave (and it knows nothing about no eol markers) */
835
836 if (eol != &eol_p)
837 {
838 /* Lets pick the first eol we find in our patch file */
839 apr_off_t start = 0;
840 svn_stringbuf_t *str;
841
842 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
843 scratch_pool));
844
845 SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
846 APR_SIZE_MAX,
847 scratch_pool, scratch_pool));
848
849 /* Every patch file that has hunks has at least one EOL*/
850 SVN_ERR_ASSERT(*eol != NULL);
851 }
852
853 *eof = FALSE;
854
855 /* Fall through to seek back to the right location */
856 }
857
858 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
859
860 if (hunk->patch->reverse)
861 {
862 if (line->data[0] == '+')
863 line->data[0] = '-';
864 else if (line->data[0] == '-')
865 line->data[0] = '+';
866 }
867
868 *stringbuf = line;
869
870 return SVN_NO_ERROR;
871 }
872
873 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
874 * Allocate *PROP_NAME in RESULT_POOL.
875 * Set *PROP_NAME to NULL if no valid property name was found. */
876 static svn_error_t *
parse_prop_name(const char ** prop_name,const char * header,const char * indicator,apr_pool_t * result_pool)877 parse_prop_name(const char **prop_name, const char *header,
878 const char *indicator, apr_pool_t *result_pool)
879 {
880 SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
881 header + strlen(indicator),
882 result_pool));
883 if (**prop_name == '\0')
884 *prop_name = NULL;
885 else if (! svn_prop_name_is_valid(*prop_name))
886 {
887 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
888 svn_stringbuf_strip_whitespace(buf);
889 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
890 }
891
892 return SVN_NO_ERROR;
893 }
894
895
896 /* A helper function to parse svn:mergeinfo diffs.
897 *
898 * These diffs use a special pretty-print format, for instance:
899 *
900 * Added: svn:mergeinfo
901 * ## -0,0 +0,1 ##
902 * Merged /trunk:r2-3
903 *
904 * The hunk header has the following format:
905 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
906 *
907 * The header is followed by a list of mergeinfo, one path per line.
908 * This function parses such lines. Lines describing reverse merges
909 * appear first, and then all lines describing forward merges appear.
910 *
911 * Parts of the line are affected by i18n. The words 'Merged'
912 * and 'Reverse-merged' can appear in any language and at any
913 * position within the line. We can only assume that a leading
914 * '/' starts the merge source path, the path is followed by
915 * ":r", which in turn is followed by a mergeinfo revision range,
916 * which is terminated by whitespace or end-of-string.
917 *
918 * *NUMBER_OF_REVERSE_MERGES and *NUMBER_OF_FORWARD_MERGES are the
919 * numbers of reverse and forward merges remaining to be read. This
920 * function decrements *NUMBER_OF_REVERSE_MERGES for each LINE
921 * parsed until that is zero, then *NUMBER_OF_FORWARD_MERGES for
922 * each LINE parsed until that is zero. If both are zero, it parses
923 * and discards LINE.
924 *
925 * If LINE is successfully parsed, *FOUND_MERGEINFO is set to TRUE,
926 * otherwise to FALSE.
927 *
928 * If LINE is successfully parsed and counted, the resulting mergeinfo
929 * is added to PATCH->mergeinfo or PATCH->reverse_mergeinfo.
930 */
931 static svn_error_t *
parse_pretty_mergeinfo_line(svn_boolean_t * found_mergeinfo,svn_linenum_t * number_of_reverse_merges,svn_linenum_t * number_of_forward_merges,svn_stringbuf_t * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)932 parse_pretty_mergeinfo_line(svn_boolean_t *found_mergeinfo,
933 svn_linenum_t *number_of_reverse_merges,
934 svn_linenum_t *number_of_forward_merges,
935 svn_stringbuf_t *line,
936 svn_patch_t *patch,
937 apr_pool_t *result_pool,
938 apr_pool_t *scratch_pool)
939 {
940 char *slash = strchr(line->data, '/');
941 char *colon = strrchr(line->data, ':');
942
943 *found_mergeinfo = FALSE;
944
945 if (slash && colon && colon[1] == 'r' && slash < colon)
946 {
947 svn_stringbuf_t *input;
948 svn_mergeinfo_t mergeinfo = NULL;
949 char *s;
950 svn_error_t *err;
951
952 input = svn_stringbuf_create_ensure(line->len, scratch_pool);
953
954 /* Copy the merge source path + colon */
955 s = slash;
956 while (s <= colon)
957 {
958 svn_stringbuf_appendbyte(input, *s);
959 s++;
960 }
961
962 /* skip 'r' after colon */
963 s++;
964
965 /* Copy the revision range. */
966 while (s < line->data + line->len)
967 {
968 if (svn_ctype_isspace(*s))
969 break;
970 svn_stringbuf_appendbyte(input, *s);
971 s++;
972 }
973
974 err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
975 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
976 {
977 svn_error_clear(err);
978 mergeinfo = NULL;
979 }
980 else
981 SVN_ERR(err);
982
983 if (mergeinfo)
984 {
985 if (*number_of_reverse_merges > 0) /* reverse merges */
986 {
987 if (patch->reverse)
988 {
989 if (patch->mergeinfo == NULL)
990 patch->mergeinfo = mergeinfo;
991 else
992 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
993 mergeinfo,
994 result_pool,
995 scratch_pool));
996 }
997 else
998 {
999 if (patch->reverse_mergeinfo == NULL)
1000 patch->reverse_mergeinfo = mergeinfo;
1001 else
1002 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1003 mergeinfo,
1004 result_pool,
1005 scratch_pool));
1006 }
1007 (*number_of_reverse_merges)--;
1008 }
1009 else if (number_of_forward_merges > 0) /* forward merges */
1010 {
1011 if (patch->reverse)
1012 {
1013 if (patch->reverse_mergeinfo == NULL)
1014 patch->reverse_mergeinfo = mergeinfo;
1015 else
1016 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1017 mergeinfo,
1018 result_pool,
1019 scratch_pool));
1020 }
1021 else
1022 {
1023 if (patch->mergeinfo == NULL)
1024 patch->mergeinfo = mergeinfo;
1025 else
1026 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1027 mergeinfo,
1028 result_pool,
1029 scratch_pool));
1030 }
1031 (*number_of_forward_merges)--;
1032 }
1033
1034 *found_mergeinfo = TRUE;
1035 }
1036 }
1037
1038 return SVN_NO_ERROR;
1039 }
1040
1041 /* Return the next *HUNK from a PATCH in APR_FILE.
1042 * If no hunk can be found, set *HUNK to NULL.
1043 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1044 * is the first belonging to a certain property, then PROP_NAME and
1045 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1046 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1047 * treated as context lines. Allocate results in RESULT_POOL.
1048 * Use SCRATCH_POOL for all other allocations. */
1049 static svn_error_t *
parse_next_hunk(svn_diff_hunk_t ** hunk,svn_boolean_t * is_property,const char ** prop_name,svn_diff_operation_kind_t * prop_operation,svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1050 parse_next_hunk(svn_diff_hunk_t **hunk,
1051 svn_boolean_t *is_property,
1052 const char **prop_name,
1053 svn_diff_operation_kind_t *prop_operation,
1054 svn_patch_t *patch,
1055 apr_file_t *apr_file,
1056 svn_boolean_t ignore_whitespace,
1057 apr_pool_t *result_pool,
1058 apr_pool_t *scratch_pool)
1059 {
1060 static const char * const minus = "--- ";
1061 static const char * const text_atat = "@@";
1062 static const char * const prop_atat = "##";
1063 svn_stringbuf_t *line;
1064 svn_boolean_t eof, in_hunk, hunk_seen;
1065 apr_off_t pos, last_line;
1066 apr_off_t start, end;
1067 apr_off_t original_end;
1068 apr_off_t modified_end;
1069 svn_boolean_t original_no_final_eol = FALSE;
1070 svn_boolean_t modified_no_final_eol = FALSE;
1071 svn_linenum_t original_lines;
1072 svn_linenum_t modified_lines;
1073 svn_linenum_t leading_context;
1074 svn_linenum_t trailing_context;
1075 svn_boolean_t changed_line_seen;
1076 enum {
1077 noise_line,
1078 original_line,
1079 modified_line,
1080 context_line
1081 } last_line_type;
1082 apr_pool_t *iterpool;
1083
1084 *prop_operation = svn_diff_op_unchanged;
1085
1086 /* We only set this if we have a property hunk header. */
1087 *prop_name = NULL;
1088 *is_property = FALSE;
1089
1090 if (apr_file_eof(apr_file) == APR_EOF)
1091 {
1092 /* No more hunks here. */
1093 *hunk = NULL;
1094 return SVN_NO_ERROR;
1095 }
1096
1097 in_hunk = FALSE;
1098 hunk_seen = FALSE;
1099 leading_context = 0;
1100 trailing_context = 0;
1101 changed_line_seen = FALSE;
1102 original_end = 0;
1103 modified_end = 0;
1104 *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1105
1106 /* Get current seek position. */
1107 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1108
1109 /* Start out assuming noise. */
1110 last_line_type = noise_line;
1111
1112 iterpool = svn_pool_create(scratch_pool);
1113 do
1114 {
1115
1116 svn_pool_clear(iterpool);
1117
1118 /* Remember the current line's offset, and read the line. */
1119 last_line = pos;
1120 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1121 iterpool, iterpool));
1122
1123 /* Update line offset for next iteration. */
1124 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1125
1126 /* Lines starting with a backslash indicate a missing EOL:
1127 * "\ No newline at end of file" or "end of property". */
1128 if (line->data[0] == '\\')
1129 {
1130 if (in_hunk)
1131 {
1132 char eolbuf[2];
1133 apr_size_t len;
1134 apr_off_t off;
1135 apr_off_t hunk_text_end;
1136
1137 /* Comment terminates the hunk text and says the hunk text
1138 * has no trailing EOL. Snip off trailing EOL which is part
1139 * of the patch file but not part of the hunk text. */
1140 off = last_line - 2;
1141 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1142 len = sizeof(eolbuf);
1143 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1144 &eof, iterpool));
1145 if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1146 hunk_text_end = last_line - 2;
1147 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1148 hunk_text_end = last_line - 1;
1149 else
1150 hunk_text_end = last_line;
1151
1152 if (last_line_type == original_line && original_end == 0)
1153 original_end = hunk_text_end;
1154 else if (last_line_type == modified_line && modified_end == 0)
1155 modified_end = hunk_text_end;
1156 else if (last_line_type == context_line)
1157 {
1158 if (original_end == 0)
1159 original_end = hunk_text_end;
1160 if (modified_end == 0)
1161 modified_end = hunk_text_end;
1162 }
1163
1164 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1165 /* Set for the type and context by using != the other type */
1166 if (last_line_type != modified_line)
1167 original_no_final_eol = TRUE;
1168 if (last_line_type != original_line)
1169 modified_no_final_eol = TRUE;
1170 }
1171
1172 continue;
1173 }
1174
1175 if (in_hunk && *is_property && *prop_name &&
1176 strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1177 {
1178 svn_boolean_t found_pretty_mergeinfo_line;
1179
1180 if (! hunk_seen)
1181 {
1182 /* We're reading the first line of the hunk, so the start
1183 * of the line just read is the hunk text's byte offset. */
1184 start = last_line;
1185 }
1186
1187 SVN_ERR(parse_pretty_mergeinfo_line(&found_pretty_mergeinfo_line,
1188 &original_lines, &modified_lines,
1189 line, patch,
1190 result_pool, iterpool));
1191 if (found_pretty_mergeinfo_line)
1192 {
1193 hunk_seen = TRUE;
1194 (*hunk)->is_pretty_print_mergeinfo = TRUE;
1195 continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1196 }
1197
1198 if ((*hunk)->is_pretty_print_mergeinfo)
1199 {
1200 /* We have reached the end of the pretty-print-mergeinfo hunk.
1201 (This format uses only one hunk.) */
1202 if (eof)
1203 {
1204 /* The hunk ends at EOF. */
1205 end = pos;
1206 }
1207 else
1208 {
1209 /* The start of the current line marks the first byte
1210 * after the hunk text. */
1211 end = last_line;
1212 }
1213 original_end = end;
1214 modified_end = end;
1215 break;
1216 }
1217
1218 /* Otherwise, this is a property diff in the
1219 regular format so fall through to normal processing. */
1220 }
1221
1222 if (in_hunk)
1223 {
1224 char c;
1225 static const char add = '+';
1226 static const char del = '-';
1227
1228 if (! hunk_seen)
1229 {
1230 /* We're reading the first line of the hunk, so the start
1231 * of the line just read is the hunk text's byte offset. */
1232 start = last_line;
1233 }
1234
1235 c = line->data[0];
1236 if (c == ' '
1237 || ((original_lines > 0 && modified_lines > 0)
1238 && (
1239 /* Tolerate chopped leading spaces on empty lines. */
1240 (! eof && line->len == 0)
1241 /* Maybe tolerate chopped leading spaces on non-empty lines. */
1242 || (ignore_whitespace && c != del && c != add))))
1243 {
1244 /* It's a "context" line in the hunk. */
1245 hunk_seen = TRUE;
1246 if (original_lines > 0)
1247 original_lines--;
1248 else
1249 {
1250 (*hunk)->original_length++;
1251 (*hunk)->original_fuzz++;
1252 }
1253 if (modified_lines > 0)
1254 modified_lines--;
1255 else
1256 {
1257 (*hunk)->modified_length++;
1258 (*hunk)->modified_fuzz++;
1259 }
1260 if (changed_line_seen)
1261 trailing_context++;
1262 else
1263 leading_context++;
1264 last_line_type = context_line;
1265 }
1266 else if (c == del
1267 && (original_lines > 0 || line->data[1] != del))
1268 {
1269 /* It's a "deleted" line in the hunk. */
1270 hunk_seen = TRUE;
1271 changed_line_seen = TRUE;
1272
1273 /* A hunk may have context in the middle. We only want
1274 trailing lines of context. */
1275 if (trailing_context > 0)
1276 trailing_context = 0;
1277
1278 if (original_lines > 0)
1279 original_lines--;
1280 else
1281 {
1282 (*hunk)->original_length++;
1283 (*hunk)->original_fuzz++;
1284 }
1285 last_line_type = original_line;
1286 }
1287 else if (c == add
1288 && (modified_lines > 0 || line->data[1] != add))
1289 {
1290 /* It's an "added" line in the hunk. */
1291 hunk_seen = TRUE;
1292 changed_line_seen = TRUE;
1293
1294 /* A hunk may have context in the middle. We only want
1295 trailing lines of context. */
1296 if (trailing_context > 0)
1297 trailing_context = 0;
1298
1299 if (modified_lines > 0)
1300 modified_lines--;
1301 else
1302 {
1303 (*hunk)->modified_length++;
1304 (*hunk)->modified_fuzz++;
1305 }
1306 last_line_type = modified_line;
1307 }
1308 else
1309 {
1310 if (eof)
1311 {
1312 /* The hunk ends at EOF. */
1313 end = pos;
1314 }
1315 else
1316 {
1317 /* The start of the current line marks the first byte
1318 * after the hunk text. */
1319 end = last_line;
1320 }
1321 if (original_end == 0)
1322 original_end = end;
1323 if (modified_end == 0)
1324 modified_end = end;
1325 break; /* Hunk was empty or has been read. */
1326 }
1327 }
1328 else
1329 {
1330 if (starts_with(line->data, text_atat))
1331 {
1332 /* Looks like we have a hunk header, try to rip it apart. */
1333 in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1334 iterpool);
1335 if (in_hunk)
1336 {
1337 original_lines = (*hunk)->original_length;
1338 modified_lines = (*hunk)->modified_length;
1339 *is_property = FALSE;
1340 }
1341 }
1342 else if (starts_with(line->data, prop_atat))
1343 {
1344 /* Looks like we have a property hunk header, try to rip it
1345 * apart. */
1346 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1347 iterpool);
1348 if (in_hunk)
1349 {
1350 original_lines = (*hunk)->original_length;
1351 modified_lines = (*hunk)->modified_length;
1352 *is_property = TRUE;
1353 }
1354 }
1355 else if (starts_with(line->data, "Added: "))
1356 {
1357 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1358 result_pool));
1359 if (*prop_name)
1360 *prop_operation = (patch->reverse ? svn_diff_op_deleted
1361 : svn_diff_op_added);
1362 }
1363 else if (starts_with(line->data, "Deleted: "))
1364 {
1365 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1366 result_pool));
1367 if (*prop_name)
1368 *prop_operation = (patch->reverse ? svn_diff_op_added
1369 : svn_diff_op_deleted);
1370 }
1371 else if (starts_with(line->data, "Modified: "))
1372 {
1373 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1374 result_pool));
1375 if (*prop_name)
1376 *prop_operation = svn_diff_op_modified;
1377 }
1378 else if (starts_with(line->data, minus)
1379 || starts_with(line->data, "diff --git "))
1380 /* This could be a header of another patch. Bail out. */
1381 break;
1382 }
1383 }
1384 /* Check for the line length since a file may not have a newline at the
1385 * end and we depend upon the last line to be an empty one. */
1386 while (! eof || line->len > 0);
1387 svn_pool_destroy(iterpool);
1388
1389 if (! eof)
1390 /* Rewind to the start of the line just read, so subsequent calls
1391 * to this function or svn_diff_parse_next_patch() don't end
1392 * up skipping the line -- it may contain a patch or hunk header. */
1393 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1394
1395 if (hunk_seen && start < end)
1396 {
1397 /* Did we get the number of context lines announced in the header?
1398
1399 If not... let's limit the number from the header to what we
1400 actually have, and apply a fuzz penalty */
1401 if (original_lines)
1402 {
1403 (*hunk)->original_length -= original_lines;
1404 (*hunk)->original_fuzz += original_lines;
1405 }
1406 if (modified_lines)
1407 {
1408 (*hunk)->modified_length -= modified_lines;
1409 (*hunk)->modified_fuzz += modified_lines;
1410 }
1411
1412 (*hunk)->patch = patch;
1413 (*hunk)->apr_file = apr_file;
1414 (*hunk)->leading_context = leading_context;
1415 (*hunk)->trailing_context = trailing_context;
1416 (*hunk)->diff_text_range.start = start;
1417 (*hunk)->diff_text_range.current = start;
1418 (*hunk)->diff_text_range.end = end;
1419 (*hunk)->original_text_range.start = start;
1420 (*hunk)->original_text_range.current = start;
1421 (*hunk)->original_text_range.end = original_end;
1422 (*hunk)->modified_text_range.start = start;
1423 (*hunk)->modified_text_range.current = start;
1424 (*hunk)->modified_text_range.end = modified_end;
1425 (*hunk)->original_no_final_eol = original_no_final_eol;
1426 (*hunk)->modified_no_final_eol = modified_no_final_eol;
1427 }
1428 else
1429 /* Something went wrong, just discard the result. */
1430 *hunk = NULL;
1431
1432 return SVN_NO_ERROR;
1433 }
1434
1435 /* Compare function for sorting hunks after parsing.
1436 * We sort hunks by their original line offset. */
1437 static int
compare_hunks(const void * a,const void * b)1438 compare_hunks(const void *a, const void *b)
1439 {
1440 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1441 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1442
1443 if (ha->original_start < hb->original_start)
1444 return -1;
1445 if (ha->original_start > hb->original_start)
1446 return 1;
1447 return 0;
1448 }
1449
1450 /* Possible states of the diff header parser. */
1451 enum parse_state
1452 {
1453 state_start, /* initial */
1454 state_git_diff_seen, /* diff --git */
1455 state_git_tree_seen, /* a tree operation, rather than content change */
1456 state_git_minus_seen, /* --- /dev/null; or --- a/ */
1457 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */
1458 state_old_mode_seen, /* old mode 100644 */
1459 state_git_mode_seen, /* new mode 100644 */
1460 state_move_from_seen, /* rename from foo.c */
1461 state_copy_from_seen, /* copy from foo.c */
1462 state_minus_seen, /* --- foo.c */
1463 state_unidiff_found, /* valid start of a regular unidiff header */
1464 state_git_header_found, /* valid start of a --git diff header */
1465 state_binary_patch_found /* valid start of binary patch */
1466 };
1467
1468 /* Data type describing a valid state transition of the parser. */
1469 struct transition
1470 {
1471 const char *expected_input;
1472 enum parse_state required_state;
1473
1474 /* A callback called upon each parser state transition. */
1475 svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1476 svn_patch_t *patch, apr_pool_t *result_pool,
1477 apr_pool_t *scratch_pool);
1478 };
1479
1480 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1481 static svn_error_t *
grab_filename(const char ** file_name,const char * line,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1482 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1483 apr_pool_t *scratch_pool)
1484 {
1485 const char *utf8_path;
1486 const char *canon_path;
1487
1488 /* Grab the filename and encode it in UTF-8. */
1489 /* TODO: Allow specifying the patch file's encoding.
1490 * For now, we assume its encoding is native. */
1491 /* ### This can fail if the filename cannot be represented in the current
1492 * ### locale's encoding. */
1493 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1494 line,
1495 scratch_pool));
1496
1497 /* Canonicalize the path name. */
1498 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1499
1500 *file_name = apr_pstrdup(result_pool, canon_path);
1501
1502 return SVN_NO_ERROR;
1503 }
1504
1505 /* Parse the '--- ' line of a regular unidiff. */
1506 static svn_error_t *
diff_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1507 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1508 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1509 {
1510 /* If we can find a tab, it separates the filename from
1511 * the rest of the line which we can discard. */
1512 char *tab = strchr(line, '\t');
1513 if (tab)
1514 *tab = '\0';
1515
1516 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1517 result_pool, scratch_pool));
1518
1519 *new_state = state_minus_seen;
1520
1521 return SVN_NO_ERROR;
1522 }
1523
1524 /* Parse the '+++ ' line of a regular unidiff. */
1525 static svn_error_t *
diff_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1526 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1527 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1528 {
1529 /* If we can find a tab, it separates the filename from
1530 * the rest of the line which we can discard. */
1531 char *tab = strchr(line, '\t');
1532 if (tab)
1533 *tab = '\0';
1534
1535 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1536 result_pool, scratch_pool));
1537
1538 *new_state = state_unidiff_found;
1539
1540 return SVN_NO_ERROR;
1541 }
1542
1543 /* Parse the first line of a git extended unidiff. */
1544 static svn_error_t *
git_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1545 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1546 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1547 {
1548 const char *old_path_start;
1549 char *old_path_end;
1550 const char *new_path_start;
1551 const char *new_path_end;
1552 char *new_path_marker;
1553 const char *old_path_marker;
1554
1555 /* ### Add handling of escaped paths
1556 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1557 *
1558 * TAB, LF, double quote and backslash characters in pathnames are
1559 * represented as \t, \n, \" and \\, respectively. If there is need for
1560 * such substitution then the whole pathname is put in double quotes.
1561 */
1562
1563 /* Our line should look like this: 'diff --git a/path b/path'.
1564 *
1565 * If we find any deviations from that format, we return with state reset
1566 * to start.
1567 */
1568 old_path_marker = strstr(line, " a/");
1569
1570 if (! old_path_marker)
1571 {
1572 *new_state = state_start;
1573 return SVN_NO_ERROR;
1574 }
1575
1576 if (! *(old_path_marker + 3))
1577 {
1578 *new_state = state_start;
1579 return SVN_NO_ERROR;
1580 }
1581
1582 new_path_marker = strstr(old_path_marker, " b/");
1583
1584 if (! new_path_marker)
1585 {
1586 *new_state = state_start;
1587 return SVN_NO_ERROR;
1588 }
1589
1590 if (! *(new_path_marker + 3))
1591 {
1592 *new_state = state_start;
1593 return SVN_NO_ERROR;
1594 }
1595
1596 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1597 * We only need the filenames when we have deleted or added empty
1598 * files. In those cases the old_path and new_path is identical on the
1599 * 'diff --git' line. For all other cases we fetch the filenames from
1600 * other header lines. */
1601 old_path_start = line + STRLEN_LITERAL("diff --git a/");
1602 new_path_end = line + strlen(line);
1603 new_path_start = old_path_start;
1604
1605 while (TRUE)
1606 {
1607 ptrdiff_t len_old;
1608 ptrdiff_t len_new;
1609
1610 new_path_marker = strstr(new_path_start, " b/");
1611
1612 /* No new path marker, bail out. */
1613 if (! new_path_marker)
1614 break;
1615
1616 old_path_end = new_path_marker;
1617 new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1618
1619 /* No path after the marker. */
1620 if (! *new_path_start)
1621 break;
1622
1623 len_old = old_path_end - old_path_start;
1624 len_new = new_path_end - new_path_start;
1625
1626 /* Are the paths before and after the " b/" marker the same? */
1627 if (len_old == len_new
1628 && ! strncmp(old_path_start, new_path_start, len_old))
1629 {
1630 *old_path_end = '\0';
1631 SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1632 result_pool, scratch_pool));
1633
1634 SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1635 result_pool, scratch_pool));
1636 break;
1637 }
1638 }
1639
1640 /* We assume that the path is only modified until we've found a 'tree'
1641 * header */
1642 patch->operation = svn_diff_op_modified;
1643
1644 *new_state = state_git_diff_seen;
1645 return SVN_NO_ERROR;
1646 }
1647
1648 /* Parse the '--- ' line of a git extended unidiff. */
1649 static svn_error_t *
git_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1650 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1651 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1652 {
1653 /* If we can find a tab, it separates the filename from
1654 * the rest of the line which we can discard. */
1655 char *tab = strchr(line, '\t');
1656 if (tab)
1657 *tab = '\0';
1658
1659 if (starts_with(line, "--- /dev/null"))
1660 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1661 result_pool, scratch_pool));
1662 else
1663 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1664 result_pool, scratch_pool));
1665
1666 *new_state = state_git_minus_seen;
1667 return SVN_NO_ERROR;
1668 }
1669
1670 /* Parse the '+++ ' line of a git extended unidiff. */
1671 static svn_error_t *
git_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1672 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1673 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1674 {
1675 /* If we can find a tab, it separates the filename from
1676 * the rest of the line which we can discard. */
1677 char *tab = strchr(line, '\t');
1678 if (tab)
1679 *tab = '\0';
1680
1681 if (starts_with(line, "+++ /dev/null"))
1682 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1683 result_pool, scratch_pool));
1684 else
1685 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1686 result_pool, scratch_pool));
1687
1688 *new_state = state_git_header_found;
1689 return SVN_NO_ERROR;
1690 }
1691
1692 /* Helper for git_old_mode() and git_new_mode(). Translate the git
1693 * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1694 static svn_error_t *
parse_git_mode_bits(svn_tristate_t * executable_p,svn_tristate_t * symlink_p,const char * mode_str)1695 parse_git_mode_bits(svn_tristate_t *executable_p,
1696 svn_tristate_t *symlink_p,
1697 const char *mode_str)
1698 {
1699 apr_uint64_t mode;
1700 SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1701 0 /* min */,
1702 0777777 /* max: six octal digits */,
1703 010 /* radix (octal) */));
1704
1705 /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1706 * We deliberately choose to parse only those values: we are strict in what
1707 * we accept _and_ in what we produce.
1708 *
1709 * (Having said that, though, we could consider relaxing the parser to also
1710 * map
1711 * (mode & 0111) == 0000 -> svn_tristate_false
1712 * (mode & 0111) == 0111 -> svn_tristate_true
1713 * [anything else] -> svn_tristate_unknown
1714 * .)
1715 */
1716
1717 switch (mode & 0777)
1718 {
1719 case 0644:
1720 *executable_p = svn_tristate_false;
1721 break;
1722
1723 case 0755:
1724 *executable_p = svn_tristate_true;
1725 break;
1726
1727 default:
1728 /* Ignore unknown values. */
1729 *executable_p = svn_tristate_unknown;
1730 break;
1731 }
1732
1733 switch (mode & 0170000 /* S_IFMT */)
1734 {
1735 case 0120000: /* S_IFLNK */
1736 *symlink_p = svn_tristate_true;
1737 break;
1738
1739 case 0100000: /* S_IFREG */
1740 case 0040000: /* S_IFDIR */
1741 *symlink_p = svn_tristate_false;
1742 break;
1743
1744 default:
1745 /* Ignore unknown values.
1746 (Including those generated by Subversion <= 1.9) */
1747 *symlink_p = svn_tristate_unknown;
1748 break;
1749 }
1750
1751 return SVN_NO_ERROR;
1752 }
1753
1754 /* Parse the 'old mode ' line of a git extended unidiff. */
1755 static svn_error_t *
git_old_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1756 git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1757 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1758 {
1759 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1760 &patch->old_symlink_bit,
1761 line + STRLEN_LITERAL("old mode ")));
1762
1763 #ifdef SVN_DEBUG
1764 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1765 SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1766 #endif
1767
1768 *new_state = state_old_mode_seen;
1769 return SVN_NO_ERROR;
1770 }
1771
1772 /* Parse the 'new mode ' line of a git extended unidiff. */
1773 static svn_error_t *
git_new_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1774 git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1775 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1776 {
1777 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1778 &patch->new_symlink_bit,
1779 line + STRLEN_LITERAL("new mode ")));
1780
1781 #ifdef SVN_DEBUG
1782 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1783 SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1784 #endif
1785
1786 /* Don't touch patch->operation. */
1787
1788 *new_state = state_git_mode_seen;
1789 return SVN_NO_ERROR;
1790 }
1791
1792 static svn_error_t *
git_index(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1793 git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1794 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1795 {
1796 /* We either have something like "index 33e5b38..0000000" (which we just
1797 ignore as we are not interested in git specific shas) or something like
1798 "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1799 changed by applying this patch.
1800
1801 If the mode would have changed then we would see 'old mode' and 'new mode'
1802 lines.
1803 */
1804 line = strchr(line + STRLEN_LITERAL("index "), ' ');
1805
1806 if (line && patch->new_executable_bit == svn_tristate_unknown
1807 && patch->new_symlink_bit == svn_tristate_unknown
1808 && patch->operation != svn_diff_op_added
1809 && patch->operation != svn_diff_op_deleted)
1810 {
1811 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1812 &patch->new_symlink_bit,
1813 line + 1));
1814
1815 /* There is no change.. so set the old values to the new values */
1816 patch->old_executable_bit = patch->new_executable_bit;
1817 patch->old_symlink_bit = patch->new_symlink_bit;
1818 }
1819
1820 /* This function doesn't change the state! */
1821 /* *new_state = *new_state */
1822 return SVN_NO_ERROR;
1823 }
1824
1825 /* Parse the 'rename from ' line of a git extended unidiff. */
1826 static svn_error_t *
git_move_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1827 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1828 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1829 {
1830 SVN_ERR(grab_filename(&patch->old_filename,
1831 line + STRLEN_LITERAL("rename from "),
1832 result_pool, scratch_pool));
1833
1834 *new_state = state_move_from_seen;
1835 return SVN_NO_ERROR;
1836 }
1837
1838 /* Parse the 'rename to ' line of a git extended unidiff. */
1839 static svn_error_t *
git_move_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1840 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1841 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1842 {
1843 SVN_ERR(grab_filename(&patch->new_filename,
1844 line + STRLEN_LITERAL("rename to "),
1845 result_pool, scratch_pool));
1846
1847 patch->operation = svn_diff_op_moved;
1848
1849 *new_state = state_git_tree_seen;
1850 return SVN_NO_ERROR;
1851 }
1852
1853 /* Parse the 'copy from ' line of a git extended unidiff. */
1854 static svn_error_t *
git_copy_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1855 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1856 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1857 {
1858 SVN_ERR(grab_filename(&patch->old_filename,
1859 line + STRLEN_LITERAL("copy from "),
1860 result_pool, scratch_pool));
1861
1862 *new_state = state_copy_from_seen;
1863 return SVN_NO_ERROR;
1864 }
1865
1866 /* Parse the 'copy to ' line of a git extended unidiff. */
1867 static svn_error_t *
git_copy_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1868 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1869 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1870 {
1871 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1872 result_pool, scratch_pool));
1873
1874 patch->operation = svn_diff_op_copied;
1875
1876 *new_state = state_git_tree_seen;
1877 return SVN_NO_ERROR;
1878 }
1879
1880 /* Parse the 'new file ' line of a git extended unidiff. */
1881 static svn_error_t *
git_new_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1882 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1883 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1884 {
1885 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1886 &patch->new_symlink_bit,
1887 line + STRLEN_LITERAL("new file mode ")));
1888
1889 patch->operation = svn_diff_op_added;
1890
1891 /* Filename already retrieved from diff --git header. */
1892
1893 *new_state = state_git_tree_seen;
1894 return SVN_NO_ERROR;
1895 }
1896
1897 /* Parse the 'deleted file ' line of a git extended unidiff. */
1898 static svn_error_t *
git_deleted_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1899 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1900 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1901 {
1902 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1903 &patch->old_symlink_bit,
1904 line + STRLEN_LITERAL("deleted file mode ")));
1905
1906 patch->operation = svn_diff_op_deleted;
1907
1908 /* Filename already retrieved from diff --git header. */
1909
1910 *new_state = state_git_tree_seen;
1911 return SVN_NO_ERROR;
1912 }
1913
1914 /* Parse the 'GIT binary patch' header */
1915 static svn_error_t *
binary_patch_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1916 binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1917 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1918 {
1919 *new_state = state_binary_patch_found;
1920 return SVN_NO_ERROR;
1921 }
1922
1923
1924 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1925 static svn_error_t *
add_property_hunk(svn_patch_t * patch,const char * prop_name,svn_diff_hunk_t * hunk,svn_diff_operation_kind_t operation,apr_pool_t * result_pool)1926 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1927 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1928 apr_pool_t *result_pool)
1929 {
1930 svn_prop_patch_t *prop_patch;
1931
1932 prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1933
1934 if (! prop_patch)
1935 {
1936 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1937 prop_patch->name = prop_name;
1938 prop_patch->operation = operation;
1939 prop_patch->hunks = apr_array_make(result_pool, 1,
1940 sizeof(svn_diff_hunk_t *));
1941
1942 svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1943 }
1944
1945 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1946
1947 return SVN_NO_ERROR;
1948 }
1949
1950 struct svn_patch_file_t
1951 {
1952 /* The APR file handle to the patch file. */
1953 apr_file_t *apr_file;
1954
1955 /* The file offset at which the next patch is expected. */
1956 apr_off_t next_patch_offset;
1957 };
1958
1959 svn_error_t *
svn_diff_open_patch_file(svn_patch_file_t ** patch_file,const char * local_abspath,apr_pool_t * result_pool)1960 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1961 const char *local_abspath,
1962 apr_pool_t *result_pool)
1963 {
1964 svn_patch_file_t *p;
1965
1966 p = apr_palloc(result_pool, sizeof(*p));
1967 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1968 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1969 result_pool));
1970 p->next_patch_offset = 0;
1971 *patch_file = p;
1972
1973 return SVN_NO_ERROR;
1974 }
1975
1976 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1977 * Parsing stops if no valid next hunk can be found.
1978 * If IGNORE_WHITESPACE is TRUE, lines without
1979 * leading spaces will be treated as context lines.
1980 * Allocate results in RESULT_POOL.
1981 * Use SCRATCH_POOL for temporary allocations. */
1982 static svn_error_t *
parse_hunks(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1983 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1984 svn_boolean_t ignore_whitespace,
1985 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1986 {
1987 svn_diff_hunk_t *hunk;
1988 svn_boolean_t is_property;
1989 const char *last_prop_name;
1990 const char *prop_name;
1991 svn_diff_operation_kind_t prop_operation;
1992 apr_pool_t *iterpool;
1993
1994 last_prop_name = NULL;
1995
1996 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1997 patch->prop_patches = apr_hash_make(result_pool);
1998 iterpool = svn_pool_create(scratch_pool);
1999 do
2000 {
2001 svn_pool_clear(iterpool);
2002
2003 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
2004 patch, apr_file, ignore_whitespace, result_pool,
2005 iterpool));
2006
2007 if (hunk && is_property)
2008 {
2009 if (! prop_name)
2010 prop_name = last_prop_name;
2011 else
2012 last_prop_name = prop_name;
2013
2014 /* Skip pretty-printed svn:mergeinfo property hunks.
2015 * Pretty-printed mergeinfo data cannot be represented as a hunk and
2016 * is therefore stored in PATCH itself. */
2017 if (hunk->is_pretty_print_mergeinfo)
2018 continue;
2019
2020 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
2021 result_pool));
2022 }
2023 else if (hunk)
2024 {
2025 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
2026 last_prop_name = NULL;
2027 }
2028
2029 }
2030 while (hunk);
2031 svn_pool_destroy(iterpool);
2032
2033 return SVN_NO_ERROR;
2034 }
2035
2036 static svn_error_t *
parse_binary_patch(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t reverse,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2037 parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
2038 svn_boolean_t reverse,
2039 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2040 {
2041 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2042 apr_off_t pos, last_line;
2043 svn_stringbuf_t *line;
2044 svn_boolean_t eof = FALSE;
2045 svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2046 svn_boolean_t in_blob = FALSE;
2047 svn_boolean_t in_src = FALSE;
2048
2049 bpatch->apr_file = apr_file;
2050
2051 patch->prop_patches = apr_hash_make(result_pool);
2052
2053 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2054
2055 while (!eof)
2056 {
2057 last_line = pos;
2058 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2059 iterpool, iterpool));
2060
2061 /* Update line offset for next iteration. */
2062 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2063
2064 if (in_blob)
2065 {
2066 char c = line->data[0];
2067
2068 /* 66 = len byte + (52/4*5) chars */
2069 if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2070 && line->len <= 66
2071 && !strchr(line->data, ':')
2072 && !strchr(line->data, ' '))
2073 {
2074 /* One more blop line */
2075 if (in_src)
2076 bpatch->src_end = pos;
2077 else
2078 bpatch->dst_end = pos;
2079 }
2080 else if (svn_stringbuf_first_non_whitespace(line) < line->len
2081 && !(in_src && bpatch->src_start < last_line))
2082 {
2083 break; /* Bad patch */
2084 }
2085 else if (in_src)
2086 {
2087 patch->binary_patch = bpatch; /* SUCCESS! */
2088 break;
2089 }
2090 else
2091 {
2092 in_blob = FALSE;
2093 in_src = TRUE;
2094 }
2095 }
2096 else if (starts_with(line->data, "literal "))
2097 {
2098 apr_uint64_t expanded_size;
2099 svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2100 &line->data[8],
2101 0, APR_UINT64_MAX, 10);
2102
2103 if (err)
2104 {
2105 svn_error_clear(err);
2106 break;
2107 }
2108
2109 if (in_src)
2110 {
2111 bpatch->src_start = pos;
2112 bpatch->src_filesize = expanded_size;
2113 }
2114 else
2115 {
2116 bpatch->dst_start = pos;
2117 bpatch->dst_filesize = expanded_size;
2118 }
2119 in_blob = TRUE;
2120 }
2121 else
2122 break; /* We don't support GIT deltas (yet) */
2123 }
2124 svn_pool_destroy(iterpool);
2125
2126 if (!eof)
2127 /* Rewind to the start of the line just read, so subsequent calls
2128 * don't end up skipping the line. It may contain a patch or hunk header.*/
2129 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2130 else if (in_src
2131 && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2132 {
2133 patch->binary_patch = bpatch; /* SUCCESS */
2134 }
2135
2136 /* Reverse patch if requested */
2137 if (reverse && patch->binary_patch)
2138 {
2139 apr_off_t tmp_start = bpatch->src_start;
2140 apr_off_t tmp_end = bpatch->src_end;
2141 svn_filesize_t tmp_filesize = bpatch->src_filesize;
2142
2143 bpatch->src_start = bpatch->dst_start;
2144 bpatch->src_end = bpatch->dst_end;
2145 bpatch->src_filesize = bpatch->dst_filesize;
2146
2147 bpatch->dst_start = tmp_start;
2148 bpatch->dst_end = tmp_end;
2149 bpatch->dst_filesize = tmp_filesize;
2150 }
2151
2152 return SVN_NO_ERROR;
2153 }
2154
2155 /* State machine for the diff header parser.
2156 * Expected Input Required state Function to call */
2157 static struct transition transitions[] =
2158 {
2159 {"--- ", state_start, diff_minus},
2160 {"+++ ", state_minus_seen, diff_plus},
2161
2162 {"diff --git", state_start, git_start},
2163 {"--- a/", state_git_diff_seen, git_minus},
2164 {"--- a/", state_git_mode_seen, git_minus},
2165 {"--- a/", state_git_tree_seen, git_minus},
2166 {"--- /dev/null", state_git_mode_seen, git_minus},
2167 {"--- /dev/null", state_git_tree_seen, git_minus},
2168 {"+++ b/", state_git_minus_seen, git_plus},
2169 {"+++ /dev/null", state_git_minus_seen, git_plus},
2170
2171 {"old mode ", state_git_diff_seen, git_old_mode},
2172 {"new mode ", state_old_mode_seen, git_new_mode},
2173
2174 {"rename from ", state_git_diff_seen, git_move_from},
2175 {"rename from ", state_git_mode_seen, git_move_from},
2176 {"rename to ", state_move_from_seen, git_move_to},
2177
2178 {"copy from ", state_git_diff_seen, git_copy_from},
2179 {"copy from ", state_git_mode_seen, git_copy_from},
2180 {"copy to ", state_copy_from_seen, git_copy_to},
2181
2182 {"new file ", state_git_diff_seen, git_new_file},
2183
2184 {"deleted file ", state_git_diff_seen, git_deleted_file},
2185
2186 {"index ", state_git_diff_seen, git_index},
2187 {"index ", state_git_tree_seen, git_index},
2188 {"index ", state_git_mode_seen, git_index},
2189
2190 {"GIT binary patch", state_git_diff_seen, binary_patch_start},
2191 {"GIT binary patch", state_git_tree_seen, binary_patch_start},
2192 {"GIT binary patch", state_git_mode_seen, binary_patch_start},
2193 };
2194
2195 svn_error_t *
svn_diff_parse_next_patch(svn_patch_t ** patch_p,svn_patch_file_t * patch_file,svn_boolean_t reverse,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2196 svn_diff_parse_next_patch(svn_patch_t **patch_p,
2197 svn_patch_file_t *patch_file,
2198 svn_boolean_t reverse,
2199 svn_boolean_t ignore_whitespace,
2200 apr_pool_t *result_pool,
2201 apr_pool_t *scratch_pool)
2202 {
2203 apr_off_t pos, last_line;
2204 svn_boolean_t eof;
2205 svn_boolean_t line_after_tree_header_read = FALSE;
2206 apr_pool_t *iterpool;
2207 svn_patch_t *patch;
2208 enum parse_state state = state_start;
2209
2210 if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2211 {
2212 /* No more patches here. */
2213 *patch_p = NULL;
2214 return SVN_NO_ERROR;
2215 }
2216
2217 patch = apr_pcalloc(result_pool, sizeof(*patch));
2218 patch->old_executable_bit = svn_tristate_unknown;
2219 patch->new_executable_bit = svn_tristate_unknown;
2220 patch->old_symlink_bit = svn_tristate_unknown;
2221 patch->new_symlink_bit = svn_tristate_unknown;
2222
2223 pos = patch_file->next_patch_offset;
2224 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2225
2226 iterpool = svn_pool_create(scratch_pool);
2227 do
2228 {
2229 svn_stringbuf_t *line;
2230 svn_boolean_t valid_header_line = FALSE;
2231 int i;
2232
2233 svn_pool_clear(iterpool);
2234
2235 /* Remember the current line's offset, and read the line. */
2236 last_line = pos;
2237 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2238 APR_SIZE_MAX, iterpool, iterpool));
2239
2240 if (! eof)
2241 {
2242 /* Update line offset for next iteration. */
2243 SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2244 iterpool));
2245 }
2246
2247 /* Run the state machine. */
2248 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2249 {
2250 if (starts_with(line->data, transitions[i].expected_input)
2251 && state == transitions[i].required_state)
2252 {
2253 SVN_ERR(transitions[i].fn(&state, line->data, patch,
2254 result_pool, iterpool));
2255 valid_header_line = TRUE;
2256 break;
2257 }
2258 }
2259
2260 if (state == state_unidiff_found
2261 || state == state_git_header_found
2262 || state == state_binary_patch_found)
2263 {
2264 /* We have a valid diff header, yay! */
2265 break;
2266 }
2267 else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2268 && line_after_tree_header_read
2269 && !valid_header_line)
2270 {
2271 /* We have a valid diff header for a patch with only tree changes.
2272 * Rewind to the start of the line just read, so subsequent calls
2273 * to this function don't end up skipping the line -- it may
2274 * contain a patch. */
2275 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2276 scratch_pool));
2277 break;
2278 }
2279 else if (state == state_git_tree_seen
2280 || state == state_git_mode_seen)
2281 {
2282 line_after_tree_header_read = TRUE;
2283 }
2284 else if (! valid_header_line && state != state_start
2285 && state != state_git_diff_seen)
2286 {
2287 /* We've encountered an invalid diff header.
2288 *
2289 * Rewind to the start of the line just read - it may be a new
2290 * header that begins there. */
2291 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2292 scratch_pool));
2293 state = state_start;
2294 }
2295
2296 }
2297 while (! eof);
2298
2299 patch->reverse = reverse;
2300 if (reverse)
2301 {
2302 const char *temp;
2303 svn_tristate_t ts_tmp;
2304
2305 temp = patch->old_filename;
2306 patch->old_filename = patch->new_filename;
2307 patch->new_filename = temp;
2308
2309 switch (patch->operation)
2310 {
2311 case svn_diff_op_added:
2312 patch->operation = svn_diff_op_deleted;
2313 break;
2314 case svn_diff_op_deleted:
2315 patch->operation = svn_diff_op_added;
2316 break;
2317
2318 case svn_diff_op_modified:
2319 break; /* Stays modified. */
2320
2321 case svn_diff_op_copied:
2322 case svn_diff_op_moved:
2323 break; /* Stays copied or moved, just in the other direction. */
2324 case svn_diff_op_unchanged:
2325 break; /* Stays unchanged, of course. */
2326 }
2327
2328 ts_tmp = patch->old_executable_bit;
2329 patch->old_executable_bit = patch->new_executable_bit;
2330 patch->new_executable_bit = ts_tmp;
2331
2332 ts_tmp = patch->old_symlink_bit;
2333 patch->old_symlink_bit = patch->new_symlink_bit;
2334 patch->new_symlink_bit = ts_tmp;
2335 }
2336
2337 if (patch->old_filename == NULL || patch->new_filename == NULL)
2338 {
2339 /* Something went wrong, just discard the result. */
2340 patch = NULL;
2341 }
2342 else
2343 {
2344 if (state == state_binary_patch_found)
2345 {
2346 SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2347 result_pool, iterpool));
2348 /* And fall through in property parsing */
2349 }
2350
2351 SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2352 result_pool, iterpool));
2353 }
2354
2355 svn_pool_destroy(iterpool);
2356
2357 SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2358 patch_file->apr_file, scratch_pool));
2359
2360 if (patch && patch->hunks)
2361 {
2362 /* Usually, hunks appear in the patch sorted by their original line
2363 * offset. But just in case they weren't parsed in this order for
2364 * some reason, we sort them so that our caller can assume that hunks
2365 * are sorted as if parsed from a usual patch. */
2366 svn_sort__array(patch->hunks, compare_hunks);
2367 }
2368
2369 *patch_p = patch;
2370 return SVN_NO_ERROR;
2371 }
2372
2373 svn_error_t *
svn_diff_close_patch_file(svn_patch_file_t * patch_file,apr_pool_t * scratch_pool)2374 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2375 apr_pool_t *scratch_pool)
2376 {
2377 return svn_error_trace(svn_io_file_close(patch_file->apr_file,
2378 scratch_pool));
2379 }
2380