1 /*
2  * parse-diff.c: functions for parsing diff files
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 #include <stdlib.h>
25 #include <stddef.h>
26 #include <string.h>
27 
28 #include "svn_hash.h"
29 #include "svn_types.h"
30 #include "svn_error.h"
31 #include "svn_io.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
35 #include "svn_utf.h"
36 #include "svn_dirent_uri.h"
37 #include "svn_diff.h"
38 #include "svn_ctype.h"
39 #include "svn_mergeinfo.h"
40 
41 #include "private/svn_eol_private.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_diff_private.h"
44 #include "private/svn_sorts_private.h"
45 
46 #include "diff.h"
47 
48 #include "svn_private_config.h"
49 
50 /* Helper macro for readability */
51 #define starts_with(str, start)  \
52   (strncmp((str), (start), strlen(start)) == 0)
53 
54 /* Like strlen() but for string literals. */
55 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
56 
57 /* This struct describes a range within a file, as well as the
58  * current cursor position within the range. All numbers are in bytes. */
59 struct svn_diff__hunk_range {
60   apr_off_t start;
61   apr_off_t end;
62   apr_off_t current;
63 };
64 
65 struct svn_diff_hunk_t {
66   /* The patch this hunk belongs to. */
67   const svn_patch_t *patch;
68 
69   /* APR file handle to the patch file this hunk came from. */
70   apr_file_t *apr_file;
71 
72   /* Whether the hunk was interpreted as pretty-print mergeinfo. If so,
73      the hunk content is in PATCH and the rest of this hunk object is
74      mostly uninitialized. */
75   svn_boolean_t is_pretty_print_mergeinfo;
76 
77   /* Ranges used to keep track of this hunk's texts positions within
78    * the patch file. */
79   struct svn_diff__hunk_range diff_text_range;
80   struct svn_diff__hunk_range original_text_range;
81   struct svn_diff__hunk_range modified_text_range;
82 
83   /* Hunk ranges as they appeared in the patch file.
84    * All numbers are lines, not bytes. */
85   svn_linenum_t original_start;
86   svn_linenum_t original_length;
87   svn_linenum_t modified_start;
88   svn_linenum_t modified_length;
89 
90   /* Number of lines of leading and trailing hunk context. */
91   svn_linenum_t leading_context;
92   svn_linenum_t trailing_context;
93 
94   /* Did we see a 'file does not end with eol' marker in this hunk? */
95   svn_boolean_t original_no_final_eol;
96   svn_boolean_t modified_no_final_eol;
97 
98   /* Fuzz penalty, triggered by bad patch targets */
99   svn_linenum_t original_fuzz;
100   svn_linenum_t modified_fuzz;
101 };
102 
103 struct svn_diff_binary_patch_t {
104   /* The patch this hunk belongs to. */
105   const svn_patch_t *patch;
106 
107   /* APR file handle to the patch file this hunk came from. */
108   apr_file_t *apr_file;
109 
110   /* Offsets inside APR_FILE representing the location of the patch */
111   apr_off_t src_start;
112   apr_off_t src_end;
113   svn_filesize_t src_filesize; /* Expanded/final size */
114 
115   /* Offsets inside APR_FILE representing the location of the patch */
116   apr_off_t dst_start;
117   apr_off_t dst_end;
118   svn_filesize_t dst_filesize; /* Expanded/final size */
119 };
120 
121 /* Common guts of svn_diff_hunk__create_adds_single_line() and
122  * svn_diff_hunk__create_deletes_single_line().
123  *
124  * ADD is TRUE if adding and FALSE if deleting.
125  */
126 static svn_error_t *
add_or_delete_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,svn_boolean_t add,apr_pool_t * result_pool,apr_pool_t * scratch_pool)127 add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
128                           const char *line,
129                           const svn_patch_t *patch,
130                           svn_boolean_t add,
131                           apr_pool_t *result_pool,
132                           apr_pool_t *scratch_pool)
133 {
134   svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
135   static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
136   const apr_size_t header_len = strlen(hunk_header[add]);
137   const apr_size_t len = strlen(line);
138   const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
139   svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
140 
141   hunk->patch = patch;
142 
143   /* hunk->apr_file is created below. */
144 
145   hunk->diff_text_range.start = header_len;
146   hunk->diff_text_range.current = header_len;
147 
148   if (add)
149     {
150       hunk->original_text_range.start = 0; /* There's no "original" text. */
151       hunk->original_text_range.current = 0;
152       hunk->original_text_range.end = 0;
153       hunk->original_no_final_eol = FALSE;
154 
155       hunk->modified_text_range.start = header_len;
156       hunk->modified_text_range.current = header_len;
157       hunk->modified_text_range.end = end;
158       hunk->modified_no_final_eol = TRUE;
159 
160       hunk->original_start = 0;
161       hunk->original_length = 0;
162 
163       hunk->modified_start = 1;
164       hunk->modified_length = 1;
165     }
166   else /* delete */
167     {
168       hunk->original_text_range.start = header_len;
169       hunk->original_text_range.current = header_len;
170       hunk->original_text_range.end = end;
171       hunk->original_no_final_eol = TRUE;
172 
173       hunk->modified_text_range.start = 0; /* There's no "original" text. */
174       hunk->modified_text_range.current = 0;
175       hunk->modified_text_range.end = 0;
176       hunk->modified_no_final_eol = FALSE;
177 
178       hunk->original_start = 1;
179       hunk->original_length = 1;
180 
181       hunk->modified_start = 0;
182       hunk->modified_length = 0; /* setting to '1' works too */
183     }
184 
185   hunk->leading_context = 0;
186   hunk->trailing_context = 0;
187 
188   /* Create APR_FILE and put just a hunk in it (without a diff header).
189    * Save the offset of the last byte of the diff line. */
190   svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
191   svn_stringbuf_appendbyte(buf, add ? '+' : '-');
192   svn_stringbuf_appendbytes(buf, line, len);
193   svn_stringbuf_appendbyte(buf, '\n');
194   svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
195 
196   hunk->diff_text_range.end = buf->len;
197 
198   SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
199                                    NULL /* system tempdir */,
200                                    svn_io_file_del_on_pool_cleanup,
201                                    result_pool, scratch_pool));
202   SVN_ERR(svn_io_file_write_full(hunk->apr_file,
203                                  buf->data, buf->len,
204                                  NULL, scratch_pool));
205   /* No need to seek. */
206 
207   *hunk_out = hunk;
208   return SVN_NO_ERROR;
209 }
210 
211 svn_error_t *
svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)212 svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
213                                        const char *line,
214                                        const svn_patch_t *patch,
215                                        apr_pool_t *result_pool,
216                                        apr_pool_t *scratch_pool)
217 {
218   SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
219                                     (!patch->reverse),
220                                     result_pool, scratch_pool));
221   return SVN_NO_ERROR;
222 }
223 
224 svn_error_t *
svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)225 svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
226                                           const char *line,
227                                           const svn_patch_t *patch,
228                                           apr_pool_t *result_pool,
229                                           apr_pool_t *scratch_pool)
230 {
231   SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
232                                     patch->reverse,
233                                     result_pool, scratch_pool));
234   return SVN_NO_ERROR;
235 }
236 
237 void
svn_diff_hunk_reset_diff_text(svn_diff_hunk_t * hunk)238 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
239 {
240   hunk->diff_text_range.current = hunk->diff_text_range.start;
241 }
242 
243 void
svn_diff_hunk_reset_original_text(svn_diff_hunk_t * hunk)244 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
245 {
246   if (hunk->patch->reverse)
247     hunk->modified_text_range.current = hunk->modified_text_range.start;
248   else
249     hunk->original_text_range.current = hunk->original_text_range.start;
250 }
251 
252 void
svn_diff_hunk_reset_modified_text(svn_diff_hunk_t * hunk)253 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
254 {
255   if (hunk->patch->reverse)
256     hunk->original_text_range.current = hunk->original_text_range.start;
257   else
258     hunk->modified_text_range.current = hunk->modified_text_range.start;
259 }
260 
261 svn_linenum_t
svn_diff_hunk_get_original_start(const svn_diff_hunk_t * hunk)262 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
263 {
264   return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
265 }
266 
267 svn_linenum_t
svn_diff_hunk_get_original_length(const svn_diff_hunk_t * hunk)268 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
269 {
270   return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
271 }
272 
273 svn_linenum_t
svn_diff_hunk_get_modified_start(const svn_diff_hunk_t * hunk)274 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
275 {
276   return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
277 }
278 
279 svn_linenum_t
svn_diff_hunk_get_modified_length(const svn_diff_hunk_t * hunk)280 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
281 {
282   return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
283 }
284 
285 svn_linenum_t
svn_diff_hunk_get_leading_context(const svn_diff_hunk_t * hunk)286 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
287 {
288   return hunk->leading_context;
289 }
290 
291 svn_linenum_t
svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t * hunk)292 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
293 {
294   return hunk->trailing_context;
295 }
296 
297 svn_linenum_t
svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t * hunk)298 svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
299 {
300   return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
301 }
302 
303 /* Baton for the base85 stream implementation */
304 struct base85_baton_t
305 {
306   apr_file_t *file;
307   apr_pool_t *iterpool;
308   char buffer[52];        /* Bytes on current line */
309   apr_off_t next_pos;     /* Start position of next line */
310   apr_off_t end_pos;      /* Position after last line */
311   apr_size_t buf_size;    /* Bytes available (52 unless at eof) */
312   apr_size_t buf_pos;     /* Bytes in linebuffer */
313   svn_boolean_t done;     /* At eof? */
314 };
315 
316 /* Implements svn_read_fn_t for the base85 read stream */
317 static svn_error_t *
read_handler_base85(void * baton,char * buffer,apr_size_t * len)318 read_handler_base85(void *baton, char *buffer, apr_size_t *len)
319 {
320   struct base85_baton_t *b85b = baton;
321   apr_pool_t *iterpool = b85b->iterpool;
322   apr_size_t remaining = *len;
323   char *dest = buffer;
324 
325   svn_pool_clear(iterpool);
326 
327   if (b85b->done)
328     {
329       *len = 0;
330       return SVN_NO_ERROR;
331     }
332 
333   while (remaining && (b85b->buf_size > b85b->buf_pos
334                        || b85b->next_pos < b85b->end_pos))
335     {
336       svn_stringbuf_t *line;
337       svn_boolean_t at_eof;
338 
339       apr_size_t available = b85b->buf_size - b85b->buf_pos;
340       if (available)
341         {
342           apr_size_t n = (remaining < available) ? remaining : available;
343 
344           memcpy(dest, b85b->buffer + b85b->buf_pos, n);
345           dest += n;
346           remaining -= n;
347           b85b->buf_pos += n;
348 
349           if (!remaining)
350             return SVN_NO_ERROR; /* *len = OK */
351         }
352 
353       if (b85b->next_pos >= b85b->end_pos)
354         break; /* At EOF */
355       SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
356                                iterpool));
357       SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
358                                    APR_SIZE_MAX, iterpool, iterpool));
359       if (at_eof)
360         b85b->next_pos = b85b->end_pos;
361       else
362         {
363           SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
364                                          iterpool));
365         }
366 
367       if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
368         b85b->buf_size = line->data[0] - 'A' + 1;
369       else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
370         b85b->buf_size = line->data[0] - 'a' + 26 + 1;
371       else
372         return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
373                                 _("Unexpected data in base85 section"));
374 
375       if (b85b->buf_size < 52)
376         b85b->next_pos = b85b->end_pos; /* Handle as EOF */
377 
378       SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
379                                            line->data + 1, line->len - 1,
380                                            iterpool));
381       b85b->buf_pos = 0;
382     }
383 
384   *len -= remaining;
385   b85b->done = TRUE;
386 
387   return SVN_NO_ERROR;
388 }
389 
390 /* Implements svn_close_fn_t for the base85 read stream */
391 static svn_error_t *
close_handler_base85(void * baton)392 close_handler_base85(void *baton)
393 {
394   struct base85_baton_t *b85b = baton;
395 
396   svn_pool_destroy(b85b->iterpool);
397 
398   return SVN_NO_ERROR;
399 }
400 
401 /* Gets a stream that reads decoded base85 data from a segment of a file.
402    The current implementation might assume that both start_pos and end_pos
403    are located at line boundaries. */
404 static svn_stream_t *
get_base85_data_stream(apr_file_t * file,apr_off_t start_pos,apr_off_t end_pos,apr_pool_t * result_pool)405 get_base85_data_stream(apr_file_t *file,
406                        apr_off_t start_pos,
407                        apr_off_t end_pos,
408                        apr_pool_t *result_pool)
409 {
410   struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
411   svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
412 
413   b85b->file = file;
414   b85b->iterpool = svn_pool_create(result_pool);
415   b85b->next_pos = start_pos;
416   b85b->end_pos = end_pos;
417 
418   svn_stream_set_read2(base85s, NULL /* only full read support */,
419                        read_handler_base85);
420   svn_stream_set_close(base85s, close_handler_base85);
421   return base85s;
422 }
423 
424 /* Baton for the length verification stream functions */
425 struct length_verify_baton_t
426 {
427   svn_stream_t *inner;
428   svn_filesize_t remaining;
429 };
430 
431 /* Implements svn_read_fn_t for the length verification stream */
432 static svn_error_t *
read_handler_length_verify(void * baton,char * buffer,apr_size_t * len)433 read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
434 {
435   struct length_verify_baton_t *lvb = baton;
436   apr_size_t requested_len = *len;
437 
438   SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
439 
440   if (*len > lvb->remaining)
441     return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
442                             _("Base85 data expands to longer than declared "
443                               "filesize"));
444   else if (requested_len > *len && *len != lvb->remaining)
445     return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
446                             _("Base85 data expands to smaller than declared "
447                               "filesize"));
448 
449   lvb->remaining -= *len;
450 
451   return SVN_NO_ERROR;
452 }
453 
454 /* Implements svn_close_fn_t for the length verification stream */
455 static svn_error_t *
close_handler_length_verify(void * baton)456 close_handler_length_verify(void *baton)
457 {
458   struct length_verify_baton_t *lvb = baton;
459 
460   return svn_error_trace(svn_stream_close(lvb->inner));
461 }
462 
463 /* Gets a stream that verifies on reads that the inner stream is exactly
464    of the specified length */
465 static svn_stream_t *
get_verify_length_stream(svn_stream_t * inner,svn_filesize_t expected_size,apr_pool_t * result_pool)466 get_verify_length_stream(svn_stream_t *inner,
467                          svn_filesize_t expected_size,
468                          apr_pool_t *result_pool)
469 {
470   struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
471   svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
472 
473   lvb->inner = inner;
474   lvb->remaining = expected_size;
475 
476   svn_stream_set_read2(len_stream, NULL /* only full read support */,
477                        read_handler_length_verify);
478   svn_stream_set_close(len_stream, close_handler_length_verify);
479 
480   return len_stream;
481 }
482 
483 svn_stream_t *
svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)484 svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
485                                          apr_pool_t *result_pool)
486 {
487   svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
488                                            bpatch->src_end, result_pool);
489 
490   s = svn_stream_compressed(s, result_pool);
491 
492   /* ### If we (ever) want to support the DELTA format, then we should hook the
493          undelta handling here */
494 
495   return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
496 }
497 
498 svn_stream_t *
svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)499 svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
500                                        apr_pool_t *result_pool)
501 {
502   svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
503                                            bpatch->dst_end, result_pool);
504 
505   s = svn_stream_compressed(s, result_pool);
506 
507   /* ### If we (ever) want to support the DELTA format, then we should hook the
508   undelta handling here */
509 
510   return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
511 }
512 
513 /* Try to parse a positive number from a decimal number encoded
514  * in the string NUMBER. Return parsed number in OFFSET, and return
515  * TRUE if parsing was successful. */
516 static svn_boolean_t
parse_offset(svn_linenum_t * offset,const char * number)517 parse_offset(svn_linenum_t *offset, const char *number)
518 {
519   svn_error_t *err;
520   apr_uint64_t val;
521 
522   err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
523   if (err)
524     {
525       svn_error_clear(err);
526       return FALSE;
527     }
528 
529   *offset = (svn_linenum_t)val;
530 
531   return TRUE;
532 }
533 
534 /* Try to parse a hunk range specification from the string RANGE.
535  * Return parsed information in *START and *LENGTH, and return TRUE
536  * if the range parsed correctly. Note: This function may modify the
537  * input value RANGE. */
538 static svn_boolean_t
parse_range(svn_linenum_t * start,svn_linenum_t * length,char * range)539 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
540 {
541   char *comma;
542 
543   if (*range == 0)
544     return FALSE;
545 
546   comma = strstr(range, ",");
547   if (comma)
548     {
549       if (strlen(comma + 1) > 0)
550         {
551           /* Try to parse the length. */
552           if (! parse_offset(length, comma + 1))
553             return FALSE;
554 
555           /* Snip off the end of the string,
556            * so we can comfortably parse the line
557            * number the hunk starts at. */
558           *comma = '\0';
559         }
560        else
561          /* A comma but no length? */
562          return FALSE;
563     }
564   else
565     {
566       *length = 1;
567     }
568 
569   /* Try to parse the line number the hunk starts at. */
570   return parse_offset(start, range);
571 }
572 
573 /* Try to parse a hunk header in string HEADER, putting parsed information
574  * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
575  * character string used to delimit the hunk header.
576  * Do all allocations in POOL. */
577 static svn_boolean_t
parse_hunk_header(const char * header,svn_diff_hunk_t * hunk,const char * atat,apr_pool_t * pool)578 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
579                   const char *atat, apr_pool_t *pool)
580 {
581   const char *p;
582   const char *start;
583   svn_stringbuf_t *range;
584 
585   p = header + strlen(atat);
586   if (*p != ' ')
587     /* No. */
588     return FALSE;
589   p++;
590   if (*p != '-')
591     /* Nah... */
592     return FALSE;
593   /* OK, this may be worth allocating some memory for... */
594   range = svn_stringbuf_create_ensure(31, pool);
595   start = ++p;
596   while (*p && *p != ' ')
597     {
598       p++;
599     }
600 
601   if (*p != ' ')
602     /* No no no... */
603     return FALSE;
604 
605   svn_stringbuf_appendbytes(range, start, p - start);
606 
607   /* Try to parse the first range. */
608   if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
609     return FALSE;
610 
611   /* Clear the stringbuf so we can reuse it for the second range. */
612   svn_stringbuf_setempty(range);
613   p++;
614   if (*p != '+')
615     /* Eeek! */
616     return FALSE;
617   /* OK, this may be worth copying... */
618   start = ++p;
619   while (*p && *p != ' ')
620     {
621       p++;
622     }
623   if (*p != ' ')
624     /* No no no... */
625     return FALSE;
626 
627   svn_stringbuf_appendbytes(range, start, p - start);
628 
629   /* Check for trailing @@ */
630   p++;
631   if (! starts_with(p, atat))
632     return FALSE;
633 
634   /* There may be stuff like C-function names after the trailing @@,
635    * but we ignore that. */
636 
637   /* Try to parse the second range. */
638   if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
639     return FALSE;
640 
641   /* Hunk header is good. */
642   return TRUE;
643 }
644 
645 /* Read a line of original or modified hunk text from the specified
646  * RANGE within FILE. FILE is expected to contain unidiff text.
647  * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
648  * Any lines commencing with the VERBOTEN character are discarded.
649  * VERBOTEN should be '+' or '-', depending on which form of hunk text
650  * is being read. NO_FINAL_EOL declares if the hunk contains a no final
651  * EOL marker.
652  *
653  * All other parameters are as in svn_diff_hunk_readline_original_text()
654  * and svn_diff_hunk_readline_modified_text().
655  */
656 static svn_error_t *
hunk_readline_original_or_modified(apr_file_t * file,struct svn_diff__hunk_range * range,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,char verboten,svn_boolean_t no_final_eol,apr_pool_t * result_pool,apr_pool_t * scratch_pool)657 hunk_readline_original_or_modified(apr_file_t *file,
658                                    struct svn_diff__hunk_range *range,
659                                    svn_stringbuf_t **stringbuf,
660                                    const char **eol,
661                                    svn_boolean_t *eof,
662                                    char verboten,
663                                    svn_boolean_t no_final_eol,
664                                    apr_pool_t *result_pool,
665                                    apr_pool_t *scratch_pool)
666 {
667   apr_size_t max_len;
668   svn_boolean_t filtered;
669   apr_off_t pos;
670   svn_stringbuf_t *str;
671   const char *eol_p;
672   apr_pool_t *last_pool;
673 
674   if (!eol)
675     eol = &eol_p;
676 
677   if (range->current >= range->end)
678     {
679       /* We're past the range. Indicate that no bytes can be read. */
680       *eof = TRUE;
681       *eol = NULL;
682       *stringbuf = svn_stringbuf_create_empty(result_pool);
683       return SVN_NO_ERROR;
684     }
685 
686   SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
687   SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
688 
689   /* It's not ITERPOOL because we use data allocated in LAST_POOL out
690      of the loop. */
691   last_pool = svn_pool_create(scratch_pool);
692   do
693     {
694       svn_pool_clear(last_pool);
695 
696       max_len = range->end - range->current;
697       SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
698                                    last_pool, last_pool));
699       SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
700       filtered = (str->data[0] == verboten || str->data[0] == '\\');
701     }
702   while (filtered && ! *eof);
703 
704   if (filtered)
705     {
706       /* EOF, return an empty string. */
707       *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
708       *eol = NULL;
709     }
710   else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
711     {
712       /* Shave off leading unidiff symbols. */
713       *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
714     }
715   else
716     {
717       /* Return the line as-is. Handle as a chopped leading spaces */
718       *stringbuf = svn_stringbuf_dup(str, result_pool);
719     }
720 
721   if (!filtered && *eof && !*eol && *str->data)
722     {
723       /* Ok, we miss a final EOL in the patch file, but didn't see a
724          no eol marker line.
725 
726          We should report that we had an EOL or the patch code will
727          misbehave (and it knows nothing about no eol markers) */
728 
729       if (!no_final_eol && eol != &eol_p)
730         {
731           apr_off_t start = 0;
732 
733           SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
734 
735           SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
736                                        scratch_pool, scratch_pool));
737 
738           /* Every patch file that has hunks has at least one EOL*/
739           SVN_ERR_ASSERT(*eol != NULL);
740         }
741 
742       *eof = FALSE;
743       /* Fall through to seek back to the right location */
744     }
745   SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
746 
747   svn_pool_destroy(last_pool);
748   return SVN_NO_ERROR;
749 }
750 
751 svn_error_t *
svn_diff_hunk_readline_original_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)752 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
753                                      svn_stringbuf_t **stringbuf,
754                                      const char **eol,
755                                      svn_boolean_t *eof,
756                                      apr_pool_t *result_pool,
757                                      apr_pool_t *scratch_pool)
758 {
759   return svn_error_trace(
760     hunk_readline_original_or_modified(hunk->apr_file,
761                                        hunk->patch->reverse ?
762                                          &hunk->modified_text_range :
763                                          &hunk->original_text_range,
764                                        stringbuf, eol, eof,
765                                        hunk->patch->reverse ? '-' : '+',
766                                        hunk->patch->reverse
767                                           ? hunk->modified_no_final_eol
768                                           : hunk->original_no_final_eol,
769                                        result_pool, scratch_pool));
770 }
771 
772 svn_error_t *
svn_diff_hunk_readline_modified_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)773 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
774                                      svn_stringbuf_t **stringbuf,
775                                      const char **eol,
776                                      svn_boolean_t *eof,
777                                      apr_pool_t *result_pool,
778                                      apr_pool_t *scratch_pool)
779 {
780   return svn_error_trace(
781     hunk_readline_original_or_modified(hunk->apr_file,
782                                        hunk->patch->reverse ?
783                                          &hunk->original_text_range :
784                                          &hunk->modified_text_range,
785                                        stringbuf, eol, eof,
786                                        hunk->patch->reverse ? '+' : '-',
787                                        hunk->patch->reverse
788                                           ? hunk->original_no_final_eol
789                                           : hunk->modified_no_final_eol,
790                                        result_pool, scratch_pool));
791 }
792 
793 svn_error_t *
svn_diff_hunk_readline_diff_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)794 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
795                                  svn_stringbuf_t **stringbuf,
796                                  const char **eol,
797                                  svn_boolean_t *eof,
798                                  apr_pool_t *result_pool,
799                                  apr_pool_t *scratch_pool)
800 {
801   svn_stringbuf_t *line;
802   apr_size_t max_len;
803   apr_off_t pos;
804   const char *eol_p;
805 
806   if (!eol)
807     eol = &eol_p;
808 
809   if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
810     {
811       /* We're past the range. Indicate that no bytes can be read. */
812       *eof = TRUE;
813       *eol = NULL;
814       *stringbuf = svn_stringbuf_create_empty(result_pool);
815       return SVN_NO_ERROR;
816     }
817 
818   SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
819   SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
820                            &hunk->diff_text_range.current, scratch_pool));
821   max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
822   SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
823                                result_pool,
824                    scratch_pool));
825   SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
826                                  hunk->apr_file, scratch_pool));
827 
828   if (*eof && !*eol && *line->data)
829     {
830       /* Ok, we miss a final EOL in the patch file, but didn't see a
831           no eol marker line.
832 
833           We should report that we had an EOL or the patch code will
834           misbehave (and it knows nothing about no eol markers) */
835 
836       if (eol != &eol_p)
837         {
838           /* Lets pick the first eol we find in our patch file */
839           apr_off_t start = 0;
840           svn_stringbuf_t *str;
841 
842           SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
843                                    scratch_pool));
844 
845           SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
846                                        APR_SIZE_MAX,
847                                        scratch_pool, scratch_pool));
848 
849           /* Every patch file that has hunks has at least one EOL*/
850           SVN_ERR_ASSERT(*eol != NULL);
851         }
852 
853       *eof = FALSE;
854 
855       /* Fall through to seek back to the right location */
856     }
857 
858   SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
859 
860   if (hunk->patch->reverse)
861     {
862       if (line->data[0] == '+')
863         line->data[0] = '-';
864       else if (line->data[0] == '-')
865         line->data[0] = '+';
866     }
867 
868   *stringbuf = line;
869 
870   return SVN_NO_ERROR;
871 }
872 
873 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
874  * Allocate *PROP_NAME in RESULT_POOL.
875  * Set *PROP_NAME to NULL if no valid property name was found. */
876 static svn_error_t *
parse_prop_name(const char ** prop_name,const char * header,const char * indicator,apr_pool_t * result_pool)877 parse_prop_name(const char **prop_name, const char *header,
878                 const char *indicator, apr_pool_t *result_pool)
879 {
880   SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
881                                   header + strlen(indicator),
882                                   result_pool));
883   if (**prop_name == '\0')
884     *prop_name = NULL;
885   else if (! svn_prop_name_is_valid(*prop_name))
886     {
887       svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
888       svn_stringbuf_strip_whitespace(buf);
889       *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
890     }
891 
892   return SVN_NO_ERROR;
893 }
894 
895 
896 /* A helper function to parse svn:mergeinfo diffs.
897  *
898  * These diffs use a special pretty-print format, for instance:
899  *
900  * Added: svn:mergeinfo
901  * ## -0,0 +0,1 ##
902  *   Merged /trunk:r2-3
903  *
904  * The hunk header has the following format:
905  * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
906  *
907  * The header is followed by a list of mergeinfo, one path per line.
908  * This function parses such lines. Lines describing reverse merges
909  * appear first, and then all lines describing forward merges appear.
910  *
911  * Parts of the line are affected by i18n. The words 'Merged'
912  * and 'Reverse-merged' can appear in any language and at any
913  * position within the line. We can only assume that a leading
914  * '/' starts the merge source path, the path is followed by
915  * ":r", which in turn is followed by a mergeinfo revision range,
916  *  which is terminated by whitespace or end-of-string.
917  *
918  * *NUMBER_OF_REVERSE_MERGES and *NUMBER_OF_FORWARD_MERGES are the
919  * numbers of reverse and forward merges remaining to be read. This
920  * function decrements *NUMBER_OF_REVERSE_MERGES for each LINE
921  * parsed until that is zero, then *NUMBER_OF_FORWARD_MERGES for
922  * each LINE parsed until that is zero. If both are zero, it parses
923  * and discards LINE.
924  *
925  * If LINE is successfully parsed, *FOUND_MERGEINFO is set to TRUE,
926  * otherwise to FALSE.
927  *
928  * If LINE is successfully parsed and counted, the resulting mergeinfo
929  * is added to PATCH->mergeinfo or PATCH->reverse_mergeinfo.
930  */
931 static svn_error_t *
parse_pretty_mergeinfo_line(svn_boolean_t * found_mergeinfo,svn_linenum_t * number_of_reverse_merges,svn_linenum_t * number_of_forward_merges,svn_stringbuf_t * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)932 parse_pretty_mergeinfo_line(svn_boolean_t *found_mergeinfo,
933                             svn_linenum_t *number_of_reverse_merges,
934                             svn_linenum_t *number_of_forward_merges,
935                             svn_stringbuf_t *line,
936                             svn_patch_t *patch,
937                             apr_pool_t *result_pool,
938                             apr_pool_t *scratch_pool)
939 {
940   char *slash = strchr(line->data, '/');
941   char *colon = strrchr(line->data, ':');
942 
943   *found_mergeinfo = FALSE;
944 
945   if (slash && colon && colon[1] == 'r' && slash < colon)
946     {
947       svn_stringbuf_t *input;
948       svn_mergeinfo_t mergeinfo = NULL;
949       char *s;
950       svn_error_t *err;
951 
952       input = svn_stringbuf_create_ensure(line->len, scratch_pool);
953 
954       /* Copy the merge source path + colon */
955       s = slash;
956       while (s <= colon)
957         {
958           svn_stringbuf_appendbyte(input, *s);
959           s++;
960         }
961 
962       /* skip 'r' after colon */
963       s++;
964 
965       /* Copy the revision range. */
966       while (s < line->data + line->len)
967         {
968           if (svn_ctype_isspace(*s))
969             break;
970           svn_stringbuf_appendbyte(input, *s);
971           s++;
972         }
973 
974       err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
975       if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
976         {
977           svn_error_clear(err);
978           mergeinfo = NULL;
979         }
980       else
981         SVN_ERR(err);
982 
983       if (mergeinfo)
984         {
985           if (*number_of_reverse_merges > 0) /* reverse merges */
986             {
987               if (patch->reverse)
988                 {
989                   if (patch->mergeinfo == NULL)
990                     patch->mergeinfo = mergeinfo;
991                   else
992                     SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
993                                                  mergeinfo,
994                                                  result_pool,
995                                                  scratch_pool));
996                 }
997               else
998                 {
999                   if (patch->reverse_mergeinfo == NULL)
1000                     patch->reverse_mergeinfo = mergeinfo;
1001                   else
1002                     SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1003                                                  mergeinfo,
1004                                                  result_pool,
1005                                                  scratch_pool));
1006                 }
1007               (*number_of_reverse_merges)--;
1008             }
1009           else if (number_of_forward_merges > 0) /* forward merges */
1010             {
1011               if (patch->reverse)
1012                 {
1013                   if (patch->reverse_mergeinfo == NULL)
1014                     patch->reverse_mergeinfo = mergeinfo;
1015                   else
1016                     SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1017                                                  mergeinfo,
1018                                                  result_pool,
1019                                                  scratch_pool));
1020                 }
1021               else
1022                 {
1023                   if (patch->mergeinfo == NULL)
1024                     patch->mergeinfo = mergeinfo;
1025                   else
1026                     SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1027                                                  mergeinfo,
1028                                                  result_pool,
1029                                                  scratch_pool));
1030                 }
1031               (*number_of_forward_merges)--;
1032             }
1033 
1034           *found_mergeinfo = TRUE;
1035         }
1036     }
1037 
1038   return SVN_NO_ERROR;
1039 }
1040 
1041 /* Return the next *HUNK from a PATCH in APR_FILE.
1042  * If no hunk can be found, set *HUNK to NULL.
1043  * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1044  * is the first belonging to a certain property, then PROP_NAME and
1045  * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1046  * NULL.  If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1047  * treated as context lines.  Allocate results in RESULT_POOL.
1048  * Use SCRATCH_POOL for all other allocations. */
1049 static svn_error_t *
parse_next_hunk(svn_diff_hunk_t ** hunk,svn_boolean_t * is_property,const char ** prop_name,svn_diff_operation_kind_t * prop_operation,svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1050 parse_next_hunk(svn_diff_hunk_t **hunk,
1051                 svn_boolean_t *is_property,
1052                 const char **prop_name,
1053                 svn_diff_operation_kind_t *prop_operation,
1054                 svn_patch_t *patch,
1055                 apr_file_t *apr_file,
1056                 svn_boolean_t ignore_whitespace,
1057                 apr_pool_t *result_pool,
1058                 apr_pool_t *scratch_pool)
1059 {
1060   static const char * const minus = "--- ";
1061   static const char * const text_atat = "@@";
1062   static const char * const prop_atat = "##";
1063   svn_stringbuf_t *line;
1064   svn_boolean_t eof, in_hunk, hunk_seen;
1065   apr_off_t pos, last_line;
1066   apr_off_t start, end;
1067   apr_off_t original_end;
1068   apr_off_t modified_end;
1069   svn_boolean_t original_no_final_eol = FALSE;
1070   svn_boolean_t modified_no_final_eol = FALSE;
1071   svn_linenum_t original_lines;
1072   svn_linenum_t modified_lines;
1073   svn_linenum_t leading_context;
1074   svn_linenum_t trailing_context;
1075   svn_boolean_t changed_line_seen;
1076   enum {
1077     noise_line,
1078     original_line,
1079     modified_line,
1080     context_line
1081   } last_line_type;
1082   apr_pool_t *iterpool;
1083 
1084   *prop_operation = svn_diff_op_unchanged;
1085 
1086   /* We only set this if we have a property hunk header. */
1087   *prop_name = NULL;
1088   *is_property = FALSE;
1089 
1090   if (apr_file_eof(apr_file) == APR_EOF)
1091     {
1092       /* No more hunks here. */
1093       *hunk = NULL;
1094       return SVN_NO_ERROR;
1095     }
1096 
1097   in_hunk = FALSE;
1098   hunk_seen = FALSE;
1099   leading_context = 0;
1100   trailing_context = 0;
1101   changed_line_seen = FALSE;
1102   original_end = 0;
1103   modified_end = 0;
1104   *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1105 
1106   /* Get current seek position. */
1107   SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1108 
1109   /* Start out assuming noise. */
1110   last_line_type = noise_line;
1111 
1112   iterpool = svn_pool_create(scratch_pool);
1113   do
1114     {
1115 
1116       svn_pool_clear(iterpool);
1117 
1118       /* Remember the current line's offset, and read the line. */
1119       last_line = pos;
1120       SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1121                                    iterpool, iterpool));
1122 
1123       /* Update line offset for next iteration. */
1124       SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1125 
1126       /* Lines starting with a backslash indicate a missing EOL:
1127        * "\ No newline at end of file" or "end of property". */
1128       if (line->data[0] == '\\')
1129         {
1130           if (in_hunk)
1131             {
1132               char eolbuf[2];
1133               apr_size_t len;
1134               apr_off_t off;
1135               apr_off_t hunk_text_end;
1136 
1137               /* Comment terminates the hunk text and says the hunk text
1138                * has no trailing EOL. Snip off trailing EOL which is part
1139                * of the patch file but not part of the hunk text. */
1140               off = last_line - 2;
1141               SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1142               len = sizeof(eolbuf);
1143               SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1144                                              &eof, iterpool));
1145               if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1146                 hunk_text_end = last_line - 2;
1147               else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1148                 hunk_text_end = last_line - 1;
1149               else
1150                 hunk_text_end = last_line;
1151 
1152               if (last_line_type == original_line && original_end == 0)
1153                 original_end = hunk_text_end;
1154               else if (last_line_type == modified_line && modified_end == 0)
1155                 modified_end = hunk_text_end;
1156               else if (last_line_type == context_line)
1157                 {
1158                   if (original_end == 0)
1159                     original_end = hunk_text_end;
1160                   if (modified_end == 0)
1161                     modified_end = hunk_text_end;
1162                 }
1163 
1164               SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1165               /* Set for the type and context by using != the other type */
1166               if (last_line_type != modified_line)
1167                 original_no_final_eol = TRUE;
1168               if (last_line_type != original_line)
1169                 modified_no_final_eol = TRUE;
1170             }
1171 
1172           continue;
1173         }
1174 
1175       if (in_hunk && *is_property && *prop_name &&
1176           strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1177         {
1178           svn_boolean_t found_pretty_mergeinfo_line;
1179 
1180           if (! hunk_seen)
1181             {
1182               /* We're reading the first line of the hunk, so the start
1183                * of the line just read is the hunk text's byte offset. */
1184               start = last_line;
1185             }
1186 
1187           SVN_ERR(parse_pretty_mergeinfo_line(&found_pretty_mergeinfo_line,
1188                                               &original_lines, &modified_lines,
1189                                               line, patch,
1190                                               result_pool, iterpool));
1191           if (found_pretty_mergeinfo_line)
1192             {
1193               hunk_seen = TRUE;
1194               (*hunk)->is_pretty_print_mergeinfo = TRUE;
1195               continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1196             }
1197 
1198           if ((*hunk)->is_pretty_print_mergeinfo)
1199             {
1200               /* We have reached the end of the pretty-print-mergeinfo hunk.
1201                  (This format uses only one hunk.) */
1202               if (eof)
1203                 {
1204                   /* The hunk ends at EOF. */
1205                   end = pos;
1206                 }
1207               else
1208                 {
1209                   /* The start of the current line marks the first byte
1210                    * after the hunk text. */
1211                   end = last_line;
1212                 }
1213               original_end = end;
1214               modified_end = end;
1215               break;
1216             }
1217 
1218           /* Otherwise, this is a property diff in the
1219              regular format so fall through to normal processing. */
1220         }
1221 
1222       if (in_hunk)
1223         {
1224           char c;
1225           static const char add = '+';
1226           static const char del = '-';
1227 
1228           if (! hunk_seen)
1229             {
1230               /* We're reading the first line of the hunk, so the start
1231                * of the line just read is the hunk text's byte offset. */
1232               start = last_line;
1233             }
1234 
1235           c = line->data[0];
1236           if (c == ' '
1237               || ((original_lines > 0 && modified_lines > 0)
1238                   && (
1239                /* Tolerate chopped leading spaces on empty lines. */
1240                       (! eof && line->len == 0)
1241                /* Maybe tolerate chopped leading spaces on non-empty lines. */
1242                       || (ignore_whitespace && c != del && c != add))))
1243             {
1244               /* It's a "context" line in the hunk. */
1245               hunk_seen = TRUE;
1246               if (original_lines > 0)
1247                 original_lines--;
1248               else
1249                 {
1250                   (*hunk)->original_length++;
1251                   (*hunk)->original_fuzz++;
1252                 }
1253               if (modified_lines > 0)
1254                 modified_lines--;
1255               else
1256                 {
1257                   (*hunk)->modified_length++;
1258                   (*hunk)->modified_fuzz++;
1259                 }
1260               if (changed_line_seen)
1261                 trailing_context++;
1262               else
1263                 leading_context++;
1264               last_line_type = context_line;
1265             }
1266           else if (c == del
1267                    && (original_lines > 0 || line->data[1] != del))
1268             {
1269               /* It's a "deleted" line in the hunk. */
1270               hunk_seen = TRUE;
1271               changed_line_seen = TRUE;
1272 
1273               /* A hunk may have context in the middle. We only want
1274                  trailing lines of context. */
1275               if (trailing_context > 0)
1276                 trailing_context = 0;
1277 
1278               if (original_lines > 0)
1279                 original_lines--;
1280               else
1281                 {
1282                   (*hunk)->original_length++;
1283                   (*hunk)->original_fuzz++;
1284                 }
1285               last_line_type = original_line;
1286             }
1287           else if (c == add
1288                    && (modified_lines > 0 || line->data[1] != add))
1289             {
1290               /* It's an "added" line in the hunk. */
1291               hunk_seen = TRUE;
1292               changed_line_seen = TRUE;
1293 
1294               /* A hunk may have context in the middle. We only want
1295                  trailing lines of context. */
1296               if (trailing_context > 0)
1297                 trailing_context = 0;
1298 
1299               if (modified_lines > 0)
1300                 modified_lines--;
1301               else
1302                 {
1303                   (*hunk)->modified_length++;
1304                   (*hunk)->modified_fuzz++;
1305                 }
1306               last_line_type = modified_line;
1307             }
1308           else
1309             {
1310               if (eof)
1311                 {
1312                   /* The hunk ends at EOF. */
1313                   end = pos;
1314                 }
1315               else
1316                 {
1317                   /* The start of the current line marks the first byte
1318                    * after the hunk text. */
1319                   end = last_line;
1320                 }
1321               if (original_end == 0)
1322                 original_end = end;
1323               if (modified_end == 0)
1324                 modified_end = end;
1325               break; /* Hunk was empty or has been read. */
1326             }
1327         }
1328       else
1329         {
1330           if (starts_with(line->data, text_atat))
1331             {
1332               /* Looks like we have a hunk header, try to rip it apart. */
1333               in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1334                                           iterpool);
1335               if (in_hunk)
1336                 {
1337                   original_lines = (*hunk)->original_length;
1338                   modified_lines = (*hunk)->modified_length;
1339                   *is_property = FALSE;
1340                 }
1341               }
1342           else if (starts_with(line->data, prop_atat))
1343             {
1344               /* Looks like we have a property hunk header, try to rip it
1345                * apart. */
1346               in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1347                                           iterpool);
1348               if (in_hunk)
1349                 {
1350                   original_lines = (*hunk)->original_length;
1351                   modified_lines = (*hunk)->modified_length;
1352                   *is_property = TRUE;
1353                 }
1354             }
1355           else if (starts_with(line->data, "Added: "))
1356             {
1357               SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1358                                       result_pool));
1359               if (*prop_name)
1360                 *prop_operation = (patch->reverse ? svn_diff_op_deleted
1361                                                   : svn_diff_op_added);
1362             }
1363           else if (starts_with(line->data, "Deleted: "))
1364             {
1365               SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1366                                       result_pool));
1367               if (*prop_name)
1368                 *prop_operation = (patch->reverse ? svn_diff_op_added
1369                                                   : svn_diff_op_deleted);
1370             }
1371           else if (starts_with(line->data, "Modified: "))
1372             {
1373               SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1374                                       result_pool));
1375               if (*prop_name)
1376                 *prop_operation = svn_diff_op_modified;
1377             }
1378           else if (starts_with(line->data, minus)
1379                    || starts_with(line->data, "diff --git "))
1380             /* This could be a header of another patch. Bail out. */
1381             break;
1382         }
1383     }
1384   /* Check for the line length since a file may not have a newline at the
1385    * end and we depend upon the last line to be an empty one. */
1386   while (! eof || line->len > 0);
1387   svn_pool_destroy(iterpool);
1388 
1389   if (! eof)
1390     /* Rewind to the start of the line just read, so subsequent calls
1391      * to this function or svn_diff_parse_next_patch() don't end
1392      * up skipping the line -- it may contain a patch or hunk header. */
1393     SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1394 
1395   if (hunk_seen && start < end)
1396     {
1397       /* Did we get the number of context lines announced in the header?
1398 
1399          If not... let's limit the number from the header to what we
1400          actually have, and apply a fuzz penalty */
1401       if (original_lines)
1402         {
1403           (*hunk)->original_length -= original_lines;
1404           (*hunk)->original_fuzz += original_lines;
1405         }
1406       if (modified_lines)
1407         {
1408           (*hunk)->modified_length -= modified_lines;
1409           (*hunk)->modified_fuzz += modified_lines;
1410         }
1411 
1412       (*hunk)->patch = patch;
1413       (*hunk)->apr_file = apr_file;
1414       (*hunk)->leading_context = leading_context;
1415       (*hunk)->trailing_context = trailing_context;
1416       (*hunk)->diff_text_range.start = start;
1417       (*hunk)->diff_text_range.current = start;
1418       (*hunk)->diff_text_range.end = end;
1419       (*hunk)->original_text_range.start = start;
1420       (*hunk)->original_text_range.current = start;
1421       (*hunk)->original_text_range.end = original_end;
1422       (*hunk)->modified_text_range.start = start;
1423       (*hunk)->modified_text_range.current = start;
1424       (*hunk)->modified_text_range.end = modified_end;
1425       (*hunk)->original_no_final_eol = original_no_final_eol;
1426       (*hunk)->modified_no_final_eol = modified_no_final_eol;
1427     }
1428   else
1429     /* Something went wrong, just discard the result. */
1430     *hunk = NULL;
1431 
1432   return SVN_NO_ERROR;
1433 }
1434 
1435 /* Compare function for sorting hunks after parsing.
1436  * We sort hunks by their original line offset. */
1437 static int
compare_hunks(const void * a,const void * b)1438 compare_hunks(const void *a, const void *b)
1439 {
1440   const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1441   const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1442 
1443   if (ha->original_start < hb->original_start)
1444     return -1;
1445   if (ha->original_start > hb->original_start)
1446     return 1;
1447   return 0;
1448 }
1449 
1450 /* Possible states of the diff header parser. */
1451 enum parse_state
1452 {
1453    state_start,             /* initial */
1454    state_git_diff_seen,     /* diff --git */
1455    state_git_tree_seen,     /* a tree operation, rather than content change */
1456    state_git_minus_seen,    /* --- /dev/null; or --- a/ */
1457    state_git_plus_seen,     /* +++ /dev/null; or +++ a/ */
1458    state_old_mode_seen,     /* old mode 100644 */
1459    state_git_mode_seen,     /* new mode 100644 */
1460    state_move_from_seen,    /* rename from foo.c */
1461    state_copy_from_seen,    /* copy from foo.c */
1462    state_minus_seen,        /* --- foo.c */
1463    state_unidiff_found,     /* valid start of a regular unidiff header */
1464    state_git_header_found,  /* valid start of a --git diff header */
1465    state_binary_patch_found /* valid start of binary patch */
1466 };
1467 
1468 /* Data type describing a valid state transition of the parser. */
1469 struct transition
1470 {
1471   const char *expected_input;
1472   enum parse_state required_state;
1473 
1474   /* A callback called upon each parser state transition. */
1475   svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1476                      svn_patch_t *patch, apr_pool_t *result_pool,
1477                      apr_pool_t *scratch_pool);
1478 };
1479 
1480 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1481 static svn_error_t *
grab_filename(const char ** file_name,const char * line,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1482 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1483               apr_pool_t *scratch_pool)
1484 {
1485   const char *utf8_path;
1486   const char *canon_path;
1487 
1488   /* Grab the filename and encode it in UTF-8. */
1489   /* TODO: Allow specifying the patch file's encoding.
1490    *       For now, we assume its encoding is native. */
1491   /* ### This can fail if the filename cannot be represented in the current
1492    * ### locale's encoding. */
1493   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1494                                   line,
1495                                   scratch_pool));
1496 
1497   /* Canonicalize the path name. */
1498   canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1499 
1500   *file_name = apr_pstrdup(result_pool, canon_path);
1501 
1502   return SVN_NO_ERROR;
1503 }
1504 
1505 /* Parse the '--- ' line of a regular unidiff. */
1506 static svn_error_t *
diff_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1507 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1508            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1509 {
1510   /* If we can find a tab, it separates the filename from
1511    * the rest of the line which we can discard. */
1512   char *tab = strchr(line, '\t');
1513   if (tab)
1514     *tab = '\0';
1515 
1516   SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1517                         result_pool, scratch_pool));
1518 
1519   *new_state = state_minus_seen;
1520 
1521   return SVN_NO_ERROR;
1522 }
1523 
1524 /* Parse the '+++ ' line of a regular unidiff. */
1525 static svn_error_t *
diff_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1526 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1527            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1528 {
1529   /* If we can find a tab, it separates the filename from
1530    * the rest of the line which we can discard. */
1531   char *tab = strchr(line, '\t');
1532   if (tab)
1533     *tab = '\0';
1534 
1535   SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1536                         result_pool, scratch_pool));
1537 
1538   *new_state = state_unidiff_found;
1539 
1540   return SVN_NO_ERROR;
1541 }
1542 
1543 /* Parse the first line of a git extended unidiff. */
1544 static svn_error_t *
git_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1545 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1546           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1547 {
1548   const char *old_path_start;
1549   char *old_path_end;
1550   const char *new_path_start;
1551   const char *new_path_end;
1552   char *new_path_marker;
1553   const char *old_path_marker;
1554 
1555   /* ### Add handling of escaped paths
1556    * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1557    *
1558    * TAB, LF, double quote and backslash characters in pathnames are
1559    * represented as \t, \n, \" and \\, respectively. If there is need for
1560    * such substitution then the whole pathname is put in double quotes.
1561    */
1562 
1563   /* Our line should look like this: 'diff --git a/path b/path'.
1564    *
1565    * If we find any deviations from that format, we return with state reset
1566    * to start.
1567    */
1568   old_path_marker = strstr(line, " a/");
1569 
1570   if (! old_path_marker)
1571     {
1572       *new_state = state_start;
1573       return SVN_NO_ERROR;
1574     }
1575 
1576   if (! *(old_path_marker + 3))
1577     {
1578       *new_state = state_start;
1579       return SVN_NO_ERROR;
1580     }
1581 
1582   new_path_marker = strstr(old_path_marker, " b/");
1583 
1584   if (! new_path_marker)
1585     {
1586       *new_state = state_start;
1587       return SVN_NO_ERROR;
1588     }
1589 
1590   if (! *(new_path_marker + 3))
1591     {
1592       *new_state = state_start;
1593       return SVN_NO_ERROR;
1594     }
1595 
1596   /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1597    * We only need the filenames when we have deleted or added empty
1598    * files. In those cases the old_path and new_path is identical on the
1599    * 'diff --git' line.  For all other cases we fetch the filenames from
1600    * other header lines. */
1601   old_path_start = line + STRLEN_LITERAL("diff --git a/");
1602   new_path_end = line + strlen(line);
1603   new_path_start = old_path_start;
1604 
1605   while (TRUE)
1606     {
1607       ptrdiff_t len_old;
1608       ptrdiff_t len_new;
1609 
1610       new_path_marker = strstr(new_path_start, " b/");
1611 
1612       /* No new path marker, bail out. */
1613       if (! new_path_marker)
1614         break;
1615 
1616       old_path_end = new_path_marker;
1617       new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1618 
1619       /* No path after the marker. */
1620       if (! *new_path_start)
1621         break;
1622 
1623       len_old = old_path_end - old_path_start;
1624       len_new = new_path_end - new_path_start;
1625 
1626       /* Are the paths before and after the " b/" marker the same? */
1627       if (len_old == len_new
1628           && ! strncmp(old_path_start, new_path_start, len_old))
1629         {
1630           *old_path_end = '\0';
1631           SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1632                                 result_pool, scratch_pool));
1633 
1634           SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1635                                 result_pool, scratch_pool));
1636           break;
1637         }
1638     }
1639 
1640   /* We assume that the path is only modified until we've found a 'tree'
1641    * header */
1642   patch->operation = svn_diff_op_modified;
1643 
1644   *new_state = state_git_diff_seen;
1645   return SVN_NO_ERROR;
1646 }
1647 
1648 /* Parse the '--- ' line of a git extended unidiff. */
1649 static svn_error_t *
git_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1650 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1651           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1652 {
1653   /* If we can find a tab, it separates the filename from
1654    * the rest of the line which we can discard. */
1655   char *tab = strchr(line, '\t');
1656   if (tab)
1657     *tab = '\0';
1658 
1659   if (starts_with(line, "--- /dev/null"))
1660     SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1661                           result_pool, scratch_pool));
1662   else
1663     SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1664                           result_pool, scratch_pool));
1665 
1666   *new_state = state_git_minus_seen;
1667   return SVN_NO_ERROR;
1668 }
1669 
1670 /* Parse the '+++ ' line of a git extended unidiff. */
1671 static svn_error_t *
git_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1672 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1673           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1674 {
1675   /* If we can find a tab, it separates the filename from
1676    * the rest of the line which we can discard. */
1677   char *tab = strchr(line, '\t');
1678   if (tab)
1679     *tab = '\0';
1680 
1681   if (starts_with(line, "+++ /dev/null"))
1682     SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1683                           result_pool, scratch_pool));
1684   else
1685     SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1686                           result_pool, scratch_pool));
1687 
1688   *new_state = state_git_header_found;
1689   return SVN_NO_ERROR;
1690 }
1691 
1692 /* Helper for git_old_mode() and git_new_mode().  Translate the git
1693  * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1694 static svn_error_t *
parse_git_mode_bits(svn_tristate_t * executable_p,svn_tristate_t * symlink_p,const char * mode_str)1695 parse_git_mode_bits(svn_tristate_t *executable_p,
1696                     svn_tristate_t *symlink_p,
1697                     const char *mode_str)
1698 {
1699   apr_uint64_t mode;
1700   SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1701                                 0 /* min */,
1702                                 0777777 /* max: six octal digits */,
1703                                 010 /* radix (octal) */));
1704 
1705   /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1706    * We deliberately choose to parse only those values: we are strict in what
1707    * we accept _and_ in what we produce.
1708    *
1709    * (Having said that, though, we could consider relaxing the parser to also
1710    * map
1711    *     (mode & 0111) == 0000 -> svn_tristate_false
1712    *     (mode & 0111) == 0111 -> svn_tristate_true
1713    *        [anything else]    -> svn_tristate_unknown
1714    * .)
1715    */
1716 
1717   switch (mode & 0777)
1718     {
1719       case 0644:
1720         *executable_p = svn_tristate_false;
1721         break;
1722 
1723       case 0755:
1724         *executable_p = svn_tristate_true;
1725         break;
1726 
1727       default:
1728         /* Ignore unknown values. */
1729         *executable_p = svn_tristate_unknown;
1730         break;
1731     }
1732 
1733   switch (mode & 0170000 /* S_IFMT */)
1734     {
1735       case 0120000: /* S_IFLNK */
1736         *symlink_p = svn_tristate_true;
1737         break;
1738 
1739       case 0100000: /* S_IFREG */
1740       case 0040000: /* S_IFDIR */
1741         *symlink_p = svn_tristate_false;
1742         break;
1743 
1744       default:
1745         /* Ignore unknown values.
1746            (Including those generated by Subversion <= 1.9) */
1747         *symlink_p = svn_tristate_unknown;
1748         break;
1749     }
1750 
1751   return SVN_NO_ERROR;
1752 }
1753 
1754 /* Parse the 'old mode ' line of a git extended unidiff. */
1755 static svn_error_t *
git_old_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1756 git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1757              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1758 {
1759   SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1760                               &patch->old_symlink_bit,
1761                               line + STRLEN_LITERAL("old mode ")));
1762 
1763 #ifdef SVN_DEBUG
1764   /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1765   SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1766 #endif
1767 
1768   *new_state = state_old_mode_seen;
1769   return SVN_NO_ERROR;
1770 }
1771 
1772 /* Parse the 'new mode ' line of a git extended unidiff. */
1773 static svn_error_t *
git_new_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1774 git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1775              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1776 {
1777   SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1778                               &patch->new_symlink_bit,
1779                               line + STRLEN_LITERAL("new mode ")));
1780 
1781 #ifdef SVN_DEBUG
1782   /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1783   SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1784 #endif
1785 
1786   /* Don't touch patch->operation. */
1787 
1788   *new_state = state_git_mode_seen;
1789   return SVN_NO_ERROR;
1790 }
1791 
1792 static svn_error_t *
git_index(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1793 git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1794           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1795 {
1796   /* We either have something like "index 33e5b38..0000000" (which we just
1797      ignore as we are not interested in git specific shas) or something like
1798      "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1799      changed by applying this patch.
1800 
1801      If the mode would have changed then we would see 'old mode' and 'new mode'
1802      lines.
1803   */
1804   line = strchr(line + STRLEN_LITERAL("index "), ' ');
1805 
1806   if (line && patch->new_executable_bit == svn_tristate_unknown
1807            && patch->new_symlink_bit == svn_tristate_unknown
1808            && patch->operation != svn_diff_op_added
1809            && patch->operation != svn_diff_op_deleted)
1810     {
1811       SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1812                                   &patch->new_symlink_bit,
1813                                   line + 1));
1814 
1815       /* There is no change.. so set the old values to the new values */
1816       patch->old_executable_bit = patch->new_executable_bit;
1817       patch->old_symlink_bit = patch->new_symlink_bit;
1818     }
1819 
1820   /* This function doesn't change the state! */
1821   /* *new_state = *new_state */
1822   return SVN_NO_ERROR;
1823 }
1824 
1825 /* Parse the 'rename from ' line of a git extended unidiff. */
1826 static svn_error_t *
git_move_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1827 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1828               apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1829 {
1830   SVN_ERR(grab_filename(&patch->old_filename,
1831                         line + STRLEN_LITERAL("rename from "),
1832                         result_pool, scratch_pool));
1833 
1834   *new_state = state_move_from_seen;
1835   return SVN_NO_ERROR;
1836 }
1837 
1838 /* Parse the 'rename to ' line of a git extended unidiff. */
1839 static svn_error_t *
git_move_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1840 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1841             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1842 {
1843   SVN_ERR(grab_filename(&patch->new_filename,
1844                         line + STRLEN_LITERAL("rename to "),
1845                         result_pool, scratch_pool));
1846 
1847   patch->operation = svn_diff_op_moved;
1848 
1849   *new_state = state_git_tree_seen;
1850   return SVN_NO_ERROR;
1851 }
1852 
1853 /* Parse the 'copy from ' line of a git extended unidiff. */
1854 static svn_error_t *
git_copy_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1855 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1856               apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1857 {
1858   SVN_ERR(grab_filename(&patch->old_filename,
1859                         line + STRLEN_LITERAL("copy from "),
1860                         result_pool, scratch_pool));
1861 
1862   *new_state = state_copy_from_seen;
1863   return SVN_NO_ERROR;
1864 }
1865 
1866 /* Parse the 'copy to ' line of a git extended unidiff. */
1867 static svn_error_t *
git_copy_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1868 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1869             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1870 {
1871   SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1872                         result_pool, scratch_pool));
1873 
1874   patch->operation = svn_diff_op_copied;
1875 
1876   *new_state = state_git_tree_seen;
1877   return SVN_NO_ERROR;
1878 }
1879 
1880 /* Parse the 'new file ' line of a git extended unidiff. */
1881 static svn_error_t *
git_new_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1882 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1883              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1884 {
1885   SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1886                               &patch->new_symlink_bit,
1887                               line + STRLEN_LITERAL("new file mode ")));
1888 
1889   patch->operation = svn_diff_op_added;
1890 
1891   /* Filename already retrieved from diff --git header. */
1892 
1893   *new_state = state_git_tree_seen;
1894   return SVN_NO_ERROR;
1895 }
1896 
1897 /* Parse the 'deleted file ' line of a git extended unidiff. */
1898 static svn_error_t *
git_deleted_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1899 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1900                  apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1901 {
1902   SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1903                               &patch->old_symlink_bit,
1904                               line + STRLEN_LITERAL("deleted file mode ")));
1905 
1906   patch->operation = svn_diff_op_deleted;
1907 
1908   /* Filename already retrieved from diff --git header. */
1909 
1910   *new_state = state_git_tree_seen;
1911   return SVN_NO_ERROR;
1912 }
1913 
1914 /* Parse the 'GIT binary patch' header */
1915 static svn_error_t *
binary_patch_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1916 binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1917              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1918 {
1919   *new_state = state_binary_patch_found;
1920   return SVN_NO_ERROR;
1921 }
1922 
1923 
1924 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1925 static svn_error_t *
add_property_hunk(svn_patch_t * patch,const char * prop_name,svn_diff_hunk_t * hunk,svn_diff_operation_kind_t operation,apr_pool_t * result_pool)1926 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1927                   svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1928                   apr_pool_t *result_pool)
1929 {
1930   svn_prop_patch_t *prop_patch;
1931 
1932   prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1933 
1934   if (! prop_patch)
1935     {
1936       prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1937       prop_patch->name = prop_name;
1938       prop_patch->operation = operation;
1939       prop_patch->hunks = apr_array_make(result_pool, 1,
1940                                          sizeof(svn_diff_hunk_t *));
1941 
1942       svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1943     }
1944 
1945   APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1946 
1947   return SVN_NO_ERROR;
1948 }
1949 
1950 struct svn_patch_file_t
1951 {
1952   /* The APR file handle to the patch file. */
1953   apr_file_t *apr_file;
1954 
1955   /* The file offset at which the next patch is expected. */
1956   apr_off_t next_patch_offset;
1957 };
1958 
1959 svn_error_t *
svn_diff_open_patch_file(svn_patch_file_t ** patch_file,const char * local_abspath,apr_pool_t * result_pool)1960 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1961                          const char *local_abspath,
1962                          apr_pool_t *result_pool)
1963 {
1964   svn_patch_file_t *p;
1965 
1966   p = apr_palloc(result_pool, sizeof(*p));
1967   SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1968                            APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1969                            result_pool));
1970   p->next_patch_offset = 0;
1971   *patch_file = p;
1972 
1973   return SVN_NO_ERROR;
1974 }
1975 
1976 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1977  * Parsing stops if no valid next hunk can be found.
1978  * If IGNORE_WHITESPACE is TRUE, lines without
1979  * leading spaces will be treated as context lines.
1980  * Allocate results in RESULT_POOL.
1981  * Use SCRATCH_POOL for temporary allocations. */
1982 static svn_error_t *
parse_hunks(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1983 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1984             svn_boolean_t ignore_whitespace,
1985             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1986 {
1987   svn_diff_hunk_t *hunk;
1988   svn_boolean_t is_property;
1989   const char *last_prop_name;
1990   const char *prop_name;
1991   svn_diff_operation_kind_t prop_operation;
1992   apr_pool_t *iterpool;
1993 
1994   last_prop_name = NULL;
1995 
1996   patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1997   patch->prop_patches = apr_hash_make(result_pool);
1998   iterpool = svn_pool_create(scratch_pool);
1999   do
2000     {
2001       svn_pool_clear(iterpool);
2002 
2003       SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
2004                               patch, apr_file, ignore_whitespace, result_pool,
2005                               iterpool));
2006 
2007       if (hunk && is_property)
2008         {
2009           if (! prop_name)
2010             prop_name = last_prop_name;
2011           else
2012             last_prop_name = prop_name;
2013 
2014           /* Skip pretty-printed svn:mergeinfo property hunks.
2015            * Pretty-printed mergeinfo data cannot be represented as a hunk and
2016            * is therefore stored in PATCH itself. */
2017           if (hunk->is_pretty_print_mergeinfo)
2018             continue;
2019 
2020           SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
2021                                     result_pool));
2022         }
2023       else if (hunk)
2024         {
2025           APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
2026           last_prop_name = NULL;
2027         }
2028 
2029     }
2030   while (hunk);
2031   svn_pool_destroy(iterpool);
2032 
2033   return SVN_NO_ERROR;
2034 }
2035 
2036 static svn_error_t *
parse_binary_patch(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t reverse,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2037 parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
2038                    svn_boolean_t reverse,
2039                    apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2040 {
2041   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2042   apr_off_t pos, last_line;
2043   svn_stringbuf_t *line;
2044   svn_boolean_t eof = FALSE;
2045   svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2046   svn_boolean_t in_blob = FALSE;
2047   svn_boolean_t in_src = FALSE;
2048 
2049   bpatch->apr_file = apr_file;
2050 
2051   patch->prop_patches = apr_hash_make(result_pool);
2052 
2053   SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2054 
2055   while (!eof)
2056     {
2057       last_line = pos;
2058       SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2059                                iterpool, iterpool));
2060 
2061       /* Update line offset for next iteration. */
2062       SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2063 
2064       if (in_blob)
2065         {
2066           char c = line->data[0];
2067 
2068           /* 66 = len byte + (52/4*5) chars */
2069           if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2070               && line->len <= 66
2071               && !strchr(line->data, ':')
2072               && !strchr(line->data, ' '))
2073             {
2074               /* One more blop line */
2075               if (in_src)
2076                 bpatch->src_end = pos;
2077               else
2078                 bpatch->dst_end = pos;
2079             }
2080           else if (svn_stringbuf_first_non_whitespace(line) < line->len
2081                    && !(in_src && bpatch->src_start < last_line))
2082             {
2083               break; /* Bad patch */
2084             }
2085           else if (in_src)
2086             {
2087               patch->binary_patch = bpatch; /* SUCCESS! */
2088               break;
2089             }
2090           else
2091             {
2092               in_blob = FALSE;
2093               in_src = TRUE;
2094             }
2095         }
2096       else if (starts_with(line->data, "literal "))
2097         {
2098           apr_uint64_t expanded_size;
2099           svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2100                                                    &line->data[8],
2101                                                    0, APR_UINT64_MAX, 10);
2102 
2103           if (err)
2104             {
2105               svn_error_clear(err);
2106               break;
2107             }
2108 
2109           if (in_src)
2110             {
2111               bpatch->src_start = pos;
2112               bpatch->src_filesize = expanded_size;
2113             }
2114           else
2115             {
2116               bpatch->dst_start = pos;
2117               bpatch->dst_filesize = expanded_size;
2118             }
2119           in_blob = TRUE;
2120         }
2121       else
2122         break; /* We don't support GIT deltas (yet) */
2123     }
2124   svn_pool_destroy(iterpool);
2125 
2126   if (!eof)
2127     /* Rewind to the start of the line just read, so subsequent calls
2128      * don't end up skipping the line. It may contain a patch or hunk header.*/
2129     SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2130   else if (in_src
2131            && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2132     {
2133       patch->binary_patch = bpatch; /* SUCCESS */
2134     }
2135 
2136   /* Reverse patch if requested */
2137   if (reverse && patch->binary_patch)
2138     {
2139       apr_off_t tmp_start = bpatch->src_start;
2140       apr_off_t tmp_end = bpatch->src_end;
2141       svn_filesize_t tmp_filesize = bpatch->src_filesize;
2142 
2143       bpatch->src_start = bpatch->dst_start;
2144       bpatch->src_end = bpatch->dst_end;
2145       bpatch->src_filesize = bpatch->dst_filesize;
2146 
2147       bpatch->dst_start = tmp_start;
2148       bpatch->dst_end = tmp_end;
2149       bpatch->dst_filesize = tmp_filesize;
2150     }
2151 
2152   return SVN_NO_ERROR;
2153 }
2154 
2155 /* State machine for the diff header parser.
2156  * Expected Input   Required state          Function to call */
2157 static struct transition transitions[] =
2158 {
2159   {"--- ",              state_start,            diff_minus},
2160   {"+++ ",              state_minus_seen,       diff_plus},
2161 
2162   {"diff --git",        state_start,            git_start},
2163   {"--- a/",            state_git_diff_seen,    git_minus},
2164   {"--- a/",            state_git_mode_seen,    git_minus},
2165   {"--- a/",            state_git_tree_seen,    git_minus},
2166   {"--- /dev/null",     state_git_mode_seen,    git_minus},
2167   {"--- /dev/null",     state_git_tree_seen,    git_minus},
2168   {"+++ b/",            state_git_minus_seen,   git_plus},
2169   {"+++ /dev/null",     state_git_minus_seen,   git_plus},
2170 
2171   {"old mode ",         state_git_diff_seen,    git_old_mode},
2172   {"new mode ",         state_old_mode_seen,    git_new_mode},
2173 
2174   {"rename from ",      state_git_diff_seen,    git_move_from},
2175   {"rename from ",      state_git_mode_seen,    git_move_from},
2176   {"rename to ",        state_move_from_seen,   git_move_to},
2177 
2178   {"copy from ",        state_git_diff_seen,    git_copy_from},
2179   {"copy from ",        state_git_mode_seen,    git_copy_from},
2180   {"copy to ",          state_copy_from_seen,   git_copy_to},
2181 
2182   {"new file ",         state_git_diff_seen,    git_new_file},
2183 
2184   {"deleted file ",     state_git_diff_seen,    git_deleted_file},
2185 
2186   {"index ",            state_git_diff_seen,    git_index},
2187   {"index ",            state_git_tree_seen,    git_index},
2188   {"index ",            state_git_mode_seen,    git_index},
2189 
2190   {"GIT binary patch",  state_git_diff_seen,    binary_patch_start},
2191   {"GIT binary patch",  state_git_tree_seen,    binary_patch_start},
2192   {"GIT binary patch",  state_git_mode_seen,    binary_patch_start},
2193 };
2194 
2195 svn_error_t *
svn_diff_parse_next_patch(svn_patch_t ** patch_p,svn_patch_file_t * patch_file,svn_boolean_t reverse,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2196 svn_diff_parse_next_patch(svn_patch_t **patch_p,
2197                           svn_patch_file_t *patch_file,
2198                           svn_boolean_t reverse,
2199                           svn_boolean_t ignore_whitespace,
2200                           apr_pool_t *result_pool,
2201                           apr_pool_t *scratch_pool)
2202 {
2203   apr_off_t pos, last_line;
2204   svn_boolean_t eof;
2205   svn_boolean_t line_after_tree_header_read = FALSE;
2206   apr_pool_t *iterpool;
2207   svn_patch_t *patch;
2208   enum parse_state state = state_start;
2209 
2210   if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2211     {
2212       /* No more patches here. */
2213       *patch_p = NULL;
2214       return SVN_NO_ERROR;
2215     }
2216 
2217   patch = apr_pcalloc(result_pool, sizeof(*patch));
2218   patch->old_executable_bit = svn_tristate_unknown;
2219   patch->new_executable_bit = svn_tristate_unknown;
2220   patch->old_symlink_bit = svn_tristate_unknown;
2221   patch->new_symlink_bit = svn_tristate_unknown;
2222 
2223   pos = patch_file->next_patch_offset;
2224   SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2225 
2226   iterpool = svn_pool_create(scratch_pool);
2227   do
2228     {
2229       svn_stringbuf_t *line;
2230       svn_boolean_t valid_header_line = FALSE;
2231       int i;
2232 
2233       svn_pool_clear(iterpool);
2234 
2235       /* Remember the current line's offset, and read the line. */
2236       last_line = pos;
2237       SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2238                                    APR_SIZE_MAX, iterpool, iterpool));
2239 
2240       if (! eof)
2241         {
2242           /* Update line offset for next iteration. */
2243           SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2244                                          iterpool));
2245         }
2246 
2247       /* Run the state machine. */
2248       for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2249         {
2250           if (starts_with(line->data, transitions[i].expected_input)
2251               && state == transitions[i].required_state)
2252             {
2253               SVN_ERR(transitions[i].fn(&state, line->data, patch,
2254                                         result_pool, iterpool));
2255               valid_header_line = TRUE;
2256               break;
2257             }
2258         }
2259 
2260       if (state == state_unidiff_found
2261           || state == state_git_header_found
2262           || state == state_binary_patch_found)
2263         {
2264           /* We have a valid diff header, yay! */
2265           break;
2266         }
2267       else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2268                && line_after_tree_header_read
2269                && !valid_header_line)
2270         {
2271           /* We have a valid diff header for a patch with only tree changes.
2272            * Rewind to the start of the line just read, so subsequent calls
2273            * to this function don't end up skipping the line -- it may
2274            * contain a patch. */
2275           SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2276                                    scratch_pool));
2277           break;
2278         }
2279       else if (state == state_git_tree_seen
2280                || state == state_git_mode_seen)
2281         {
2282           line_after_tree_header_read = TRUE;
2283         }
2284       else if (! valid_header_line && state != state_start
2285                && state != state_git_diff_seen)
2286         {
2287           /* We've encountered an invalid diff header.
2288            *
2289            * Rewind to the start of the line just read - it may be a new
2290            * header that begins there. */
2291           SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2292                                    scratch_pool));
2293           state = state_start;
2294         }
2295 
2296     }
2297   while (! eof);
2298 
2299   patch->reverse = reverse;
2300   if (reverse)
2301     {
2302       const char *temp;
2303       svn_tristate_t ts_tmp;
2304 
2305       temp = patch->old_filename;
2306       patch->old_filename = patch->new_filename;
2307       patch->new_filename = temp;
2308 
2309       switch (patch->operation)
2310         {
2311           case svn_diff_op_added:
2312             patch->operation = svn_diff_op_deleted;
2313             break;
2314           case svn_diff_op_deleted:
2315             patch->operation = svn_diff_op_added;
2316             break;
2317 
2318           case svn_diff_op_modified:
2319             break; /* Stays modified. */
2320 
2321           case svn_diff_op_copied:
2322           case svn_diff_op_moved:
2323             break; /* Stays copied or moved, just in the other direction. */
2324           case svn_diff_op_unchanged:
2325             break; /* Stays unchanged, of course. */
2326         }
2327 
2328       ts_tmp = patch->old_executable_bit;
2329       patch->old_executable_bit = patch->new_executable_bit;
2330       patch->new_executable_bit = ts_tmp;
2331 
2332       ts_tmp = patch->old_symlink_bit;
2333       patch->old_symlink_bit = patch->new_symlink_bit;
2334       patch->new_symlink_bit = ts_tmp;
2335     }
2336 
2337   if (patch->old_filename == NULL || patch->new_filename == NULL)
2338     {
2339       /* Something went wrong, just discard the result. */
2340       patch = NULL;
2341     }
2342   else
2343     {
2344       if (state == state_binary_patch_found)
2345         {
2346           SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2347                                      result_pool, iterpool));
2348           /* And fall through in property parsing */
2349         }
2350 
2351       SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2352                           result_pool, iterpool));
2353     }
2354 
2355   svn_pool_destroy(iterpool);
2356 
2357   SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2358                                  patch_file->apr_file, scratch_pool));
2359 
2360   if (patch && patch->hunks)
2361     {
2362       /* Usually, hunks appear in the patch sorted by their original line
2363        * offset. But just in case they weren't parsed in this order for
2364        * some reason, we sort them so that our caller can assume that hunks
2365        * are sorted as if parsed from a usual patch. */
2366       svn_sort__array(patch->hunks, compare_hunks);
2367     }
2368 
2369   *patch_p = patch;
2370   return SVN_NO_ERROR;
2371 }
2372 
2373 svn_error_t *
svn_diff_close_patch_file(svn_patch_file_t * patch_file,apr_pool_t * scratch_pool)2374 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2375                           apr_pool_t *scratch_pool)
2376 {
2377   return svn_error_trace(svn_io_file_close(patch_file->apr_file,
2378                                            scratch_pool));
2379 }
2380