1 /* Produce a unidiff output from a diff_result. */
2 /*
3  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <errno.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25 
26 #include <arraylist.h>
27 #include <diff_main.h>
28 #include <diff_output.h>
29 
30 #include "diff_internal.h"
31 #include "diff_debug.h"
32 
33 off_t
34 diff_chunk_get_left_start_pos(const struct diff_chunk *c)
35 {
36 	return c->left_start->pos;
37 }
38 
39 off_t
40 diff_chunk_get_right_start_pos(const struct diff_chunk *c)
41 {
42 	return c->right_start->pos;
43 }
44 
45 bool
46 diff_chunk_context_empty(const struct diff_chunk_context *cc)
47 {
48 	return diff_range_empty(&cc->chunk);
49 }
50 
51 int
52 diff_chunk_get_left_start(const struct diff_chunk *c,
53     const struct diff_result *r, int context_lines)
54 {
55 	int left_start = diff_atom_root_idx(r->left, c->left_start);
56 	return MAX(0, left_start - context_lines);
57 }
58 
59 int
60 diff_chunk_get_left_end(const struct diff_chunk *c,
61     const struct diff_result *r, int context_lines)
62 {
63 	int left_start = diff_chunk_get_left_start(c, r, 0);
64 	return MIN(r->left->atoms.len,
65 	    left_start + c->left_count + context_lines);
66 }
67 
68 int
69 diff_chunk_get_right_start(const struct diff_chunk *c,
70     const struct diff_result *r, int context_lines)
71 {
72 	int right_start = diff_atom_root_idx(r->right, c->right_start);
73 	return MAX(0, right_start - context_lines);
74 }
75 
76 int
77 diff_chunk_get_right_end(const struct diff_chunk *c,
78     const struct diff_result *r, int context_lines)
79 {
80 	int right_start = diff_chunk_get_right_start(c, r, 0);
81 	return MIN(r->right->atoms.len,
82 	    right_start + c->right_count + context_lines);
83 }
84 
85 struct diff_chunk *
86 diff_chunk_get(const struct diff_result *r, int chunk_idx)
87 {
88 	return &r->chunks.head[chunk_idx];
89 }
90 
91 int
92 diff_chunk_get_left_count(struct diff_chunk *c)
93 {
94 	return c->left_count;
95 }
96 
97 int
98 diff_chunk_get_right_count(struct diff_chunk *c)
99 {
100 	return c->right_count;
101 }
102 
103 void
104 diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
105 		  int chunk_idx, int context_lines)
106 {
107 	const struct diff_chunk *c = &r->chunks.head[chunk_idx];
108 	int left_start = diff_chunk_get_left_start(c, r, context_lines);
109 	int left_end = diff_chunk_get_left_end(c, r, context_lines);
110 	int right_start = diff_chunk_get_right_start(c, r, context_lines);
111 	int right_end = diff_chunk_get_right_end(c, r,  context_lines);
112 
113 	*cc = (struct diff_chunk_context){
114 		.chunk = {
115 			.start = chunk_idx,
116 			.end = chunk_idx + 1,
117 		},
118 		.left = {
119 			.start = left_start,
120 			.end = left_end,
121 		},
122 		.right = {
123 			.start = right_start,
124 			.end = right_end,
125 		},
126 	};
127 }
128 
129 bool
130 diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
131 			  const struct diff_chunk_context *other)
132 {
133 	return diff_ranges_touch(&cc->chunk, &other->chunk)
134 		|| diff_ranges_touch(&cc->left, &other->left)
135 		|| diff_ranges_touch(&cc->right, &other->right);
136 }
137 
138 void
139 diff_chunk_contexts_merge(struct diff_chunk_context *cc,
140 			  const struct diff_chunk_context *other)
141 {
142 	diff_ranges_merge(&cc->chunk, &other->chunk);
143 	diff_ranges_merge(&cc->left, &other->left);
144 	diff_ranges_merge(&cc->right, &other->right);
145 }
146 
147 void
148 diff_chunk_context_load_change(struct diff_chunk_context *cc,
149 			       int *nchunks_used,
150 			       struct diff_result *result,
151 			       int start_chunk_idx,
152 			       int context_lines)
153 {
154 	int i;
155 	int seen_minus = 0, seen_plus = 0;
156 
157 	if (nchunks_used)
158 		*nchunks_used = 0;
159 
160 	for (i = start_chunk_idx; i < result->chunks.len; i++) {
161 		struct diff_chunk *chunk = &result->chunks.head[i];
162 		enum diff_chunk_type t = diff_chunk_type(chunk);
163 		struct diff_chunk_context next;
164 
165 		if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
166 			if (nchunks_used)
167 				(*nchunks_used)++;
168 			if (seen_minus || seen_plus)
169 				break;
170 			else
171 				continue;
172 		} else if (t == CHUNK_MINUS)
173 			seen_minus = 1;
174 		else if (t == CHUNK_PLUS)
175 			seen_plus = 1;
176 
177 		if (diff_chunk_context_empty(cc)) {
178 			/* Note down the start point, any number of subsequent
179 			 * chunks may be joined up to this chunk by being
180 			 * directly adjacent. */
181 			diff_chunk_context_get(cc, result, i, context_lines);
182 			if (nchunks_used)
183 				(*nchunks_used)++;
184 			continue;
185 		}
186 
187 		/* There already is a previous chunk noted down for being
188 		 * printed. Does it join up with this one? */
189 		diff_chunk_context_get(&next, result, i, context_lines);
190 
191 		if (diff_chunk_contexts_touch(cc, &next)) {
192 			/* This next context touches or overlaps the previous
193 			 * one, join. */
194 			diff_chunk_contexts_merge(cc, &next);
195 			if (nchunks_used)
196 				(*nchunks_used)++;
197 			continue;
198 		} else
199 			break;
200 	}
201 }
202 
203 struct diff_output_unidiff_state {
204 	bool header_printed;
205 	char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
206 	int last_prototype_idx;
207 };
208 
209 struct diff_output_unidiff_state *
210 diff_output_unidiff_state_alloc(void)
211 {
212 	struct diff_output_unidiff_state *state;
213 
214 	state = calloc(1, sizeof(struct diff_output_unidiff_state));
215 	if (state != NULL)
216 		diff_output_unidiff_state_reset(state);
217 	return state;
218 }
219 
220 void
221 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
222 {
223 	state->header_printed = false;
224 	memset(state->prototype, 0, sizeof(state->prototype));
225 	state->last_prototype_idx = 0;
226 }
227 
228 void
229 diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
230 {
231 	free(state);
232 }
233 
234 static int
235 output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
236 		     struct diff_output_unidiff_state *state,
237 		     const struct diff_input_info *info,
238 		     const struct diff_result *result,
239 		     bool print_header, bool show_function_prototypes,
240 		     const struct diff_chunk_context *cc)
241 {
242 	int rc, left_start, left_len, right_start, right_len;
243 	off_t outoff = 0, *offp;
244 	uint8_t *typep;
245 
246 	if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
247 		return DIFF_RC_OK;
248 
249 	if (outinfo && outinfo->line_offsets.len > 0) {
250 		unsigned int idx = outinfo->line_offsets.len - 1;
251 		outoff = outinfo->line_offsets.head[idx];
252 	}
253 
254 	if (print_header && !(state->header_printed)) {
255 		rc = fprintf(dest, "--- %s\n",
256 		    diff_output_get_label_left(info));
257 		if (rc < 0)
258 			return errno;
259 		if (outinfo) {
260 			ARRAYLIST_ADD(offp, outinfo->line_offsets);
261 			if (offp == NULL)
262 				return ENOMEM;
263 			outoff += rc;
264 			*offp = outoff;
265 			ARRAYLIST_ADD(typep, outinfo->line_types);
266 			if (typep == NULL)
267 				return ENOMEM;
268 			*typep = DIFF_LINE_MINUS;
269 		}
270 		rc = fprintf(dest, "+++ %s\n",
271 		    diff_output_get_label_right(info));
272 		if (rc < 0)
273 			return errno;
274 		if (outinfo) {
275 			ARRAYLIST_ADD(offp, outinfo->line_offsets);
276 			if (offp == NULL)
277 				return ENOMEM;
278 			outoff += rc;
279 			*offp = outoff;
280 			ARRAYLIST_ADD(typep, outinfo->line_types);
281 			if (typep == NULL)
282 				return ENOMEM;
283 			*typep = DIFF_LINE_PLUS;
284 		}
285 		state->header_printed = true;
286 	}
287 
288 	left_len = cc->left.end - cc->left.start;
289 	if (result->left->atoms.len == 0)
290 		left_start = 0;
291 	else if (left_len == 0 && cc->left.start > 0)
292 		left_start = cc->left.start;
293 	else
294 		left_start = cc->left.start + 1;
295 
296 	right_len = cc->right.end - cc->right.start;
297 	if (result->right->atoms.len == 0)
298 		right_start = 0;
299 	else if (right_len == 0 && cc->right.start > 0)
300 		right_start = cc->right.start;
301 	else
302 		right_start = cc->right.start + 1;
303 
304 	if (show_function_prototypes) {
305 		rc = diff_output_match_function_prototype(state->prototype,
306 		    sizeof(state->prototype), &state->last_prototype_idx,
307 		    result, cc);
308 		if (rc)
309 			return rc;
310 	}
311 
312 	if (left_len == 1 && right_len == 1) {
313 		rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
314 			left_start, right_start,
315 			state->prototype[0] ? " " : "",
316 			state->prototype[0] ? state->prototype : "");
317 	} else if (left_len == 1 && right_len != 1) {
318 		rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
319 			left_start, right_start, right_len,
320 			state->prototype[0] ? " " : "",
321 			state->prototype[0] ? state->prototype : "");
322 	} else if (left_len != 1 && right_len == 1) {
323 		rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
324 			left_start, left_len, right_start,
325 			state->prototype[0] ? " " : "",
326 			state->prototype[0] ? state->prototype : "");
327 	} else {
328 		rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
329 			left_start, left_len, right_start, right_len,
330 			state->prototype[0] ? " " : "",
331 			state->prototype[0] ? state->prototype : "");
332 	}
333 	if (rc < 0)
334 		return errno;
335 	if (outinfo) {
336 		ARRAYLIST_ADD(offp, outinfo->line_offsets);
337 		if (offp == NULL)
338 			return ENOMEM;
339 		outoff += rc;
340 		*offp = outoff;
341 		ARRAYLIST_ADD(typep, outinfo->line_types);
342 		if (typep == NULL)
343 			return ENOMEM;
344 		*typep = DIFF_LINE_HUNK;
345 	}
346 
347 	/* Got the absolute line numbers where to start printing, and the index
348 	 * of the interesting (non-context) chunk.
349 	 * To print context lines above the interesting chunk, nipping on the
350 	 * previous chunk index may be necessary.
351 	 * It is guaranteed to be only context lines where left == right, so it
352 	 * suffices to look on the left. */
353 	const struct diff_chunk *first_chunk;
354 	int chunk_start_line;
355 	first_chunk = &result->chunks.head[cc->chunk.start];
356 	chunk_start_line = diff_atom_root_idx(result->left,
357 					      first_chunk->left_start);
358 	if (cc->left.start < chunk_start_line) {
359 		rc = diff_output_lines(outinfo, dest, " ",
360 				  &result->left->atoms.head[cc->left.start],
361 				  chunk_start_line - cc->left.start);
362 		if (rc)
363 			return rc;
364 	}
365 
366 	/* Now write out all the joined chunks and contexts between them */
367 	int c_idx;
368 	for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
369 		const struct diff_chunk *c = &result->chunks.head[c_idx];
370 
371 		if (c->left_count && c->right_count)
372 			rc = diff_output_lines(outinfo, dest,
373 					  c->solved ? " " : "?",
374 					  c->left_start, c->left_count);
375 		else if (c->left_count && !c->right_count)
376 			rc = diff_output_lines(outinfo, dest,
377 					  c->solved ? "-" : "?",
378 					  c->left_start, c->left_count);
379 		else if (c->right_count && !c->left_count)
380 			rc = diff_output_lines(outinfo, dest,
381 					  c->solved ? "+" : "?",
382 					  c->right_start, c->right_count);
383 		if (rc)
384 			return rc;
385 
386 		if (cc->chunk.end == result->chunks.len) {
387 			rc = diff_output_trailing_newline_msg(outinfo, dest, c);
388 			if (rc != DIFF_RC_OK)
389 				return rc;
390 		}
391 	}
392 
393 	/* Trailing context? */
394 	const struct diff_chunk *last_chunk;
395 	int chunk_end_line;
396 	last_chunk = &result->chunks.head[cc->chunk.end - 1];
397 	chunk_end_line = diff_atom_root_idx(result->left,
398 					    last_chunk->left_start
399 					    + last_chunk->left_count);
400 	if (cc->left.end > chunk_end_line) {
401 		rc = diff_output_lines(outinfo, dest, " ",
402 				  &result->left->atoms.head[chunk_end_line],
403 				  cc->left.end - chunk_end_line);
404 		if (rc)
405 			return rc;
406 
407 		if (cc->left.end == result->left->atoms.len) {
408 			rc = diff_output_trailing_newline_msg(outinfo, dest,
409 			    &result->chunks.head[result->chunks.len - 1]);
410 			if (rc != DIFF_RC_OK)
411 				return rc;
412 		}
413 	}
414 
415 	return DIFF_RC_OK;
416 }
417 
418 int
419 diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
420 			  struct diff_output_unidiff_state *state,
421 			  const struct diff_input_info *info,
422 			  const struct diff_result *result,
423 			  const struct diff_chunk_context *cc)
424 {
425 	struct diff_output_info *outinfo = NULL;
426 	int flags = (result->left->root->diff_flags |
427 	    result->right->root->diff_flags);
428 	bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
429 
430 	if (output_info) {
431 		*output_info = diff_output_info_alloc();
432 		if (*output_info == NULL)
433 			return ENOMEM;
434 		outinfo = *output_info;
435 	}
436 
437 	return output_unidiff_chunk(outinfo, dest, state, info,
438 	    result, false, show_function_prototypes, cc);
439 }
440 
441 int
442 diff_output_unidiff(struct diff_output_info **output_info,
443 		    FILE *dest, const struct diff_input_info *info,
444 		    const struct diff_result *result,
445 		    unsigned int context_lines)
446 {
447 	struct diff_output_unidiff_state *state;
448 	struct diff_chunk_context cc = {};
449 	struct diff_output_info *outinfo = NULL;
450 	int atomizer_flags = (result->left->atomizer_flags|
451 	    result->right->atomizer_flags);
452 	int flags = (result->left->root->diff_flags |
453 	    result->right->root->diff_flags);
454 	bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
455 	bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
456 	bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
457 	off_t outoff = 0, *offp;
458 	uint8_t *typep;
459 	int rc, i;
460 
461 	if (!result)
462 		return EINVAL;
463 	if (result->rc != DIFF_RC_OK)
464 		return result->rc;
465 
466 	if (output_info) {
467 		*output_info = diff_output_info_alloc();
468 		if (*output_info == NULL)
469 			return ENOMEM;
470 		outinfo = *output_info;
471 	}
472 
473 	if (have_binary && !force_text) {
474 		for (i = 0; i < result->chunks.len; i++) {
475 			struct diff_chunk *c = &result->chunks.head[i];
476 			enum diff_chunk_type t = diff_chunk_type(c);
477 
478 			if (t != CHUNK_MINUS && t != CHUNK_PLUS)
479 				continue;
480 
481 			if (outinfo && outinfo->line_offsets.len > 0) {
482 				unsigned int idx =
483 				    outinfo->line_offsets.len - 1;
484 				outoff = outinfo->line_offsets.head[idx];
485 			}
486 
487 			rc = fprintf(dest, "Binary files %s and %s differ\n",
488 			    diff_output_get_label_left(info),
489 			    diff_output_get_label_right(info));
490 			if (outinfo) {
491 				ARRAYLIST_ADD(offp, outinfo->line_offsets);
492 				if (offp == NULL)
493 					return ENOMEM;
494 				outoff += rc;
495 				*offp = outoff;
496 				ARRAYLIST_ADD(typep, outinfo->line_types);
497 				if (typep == NULL)
498 					return ENOMEM;
499 				*typep = DIFF_LINE_NONE;
500 			}
501 			break;
502 		}
503 
504 		return DIFF_RC_OK;
505 	}
506 
507 	state = diff_output_unidiff_state_alloc();
508 	if (state == NULL) {
509 		if (output_info) {
510 			diff_output_info_free(*output_info);
511 			*output_info = NULL;
512 		}
513 		return ENOMEM;
514 	}
515 
516 #if DEBUG
517 	unsigned int check_left_pos, check_right_pos;
518 	check_left_pos = 0;
519 	check_right_pos = 0;
520 	for (i = 0; i < result->chunks.len; i++) {
521 		struct diff_chunk *c = &result->chunks.head[i];
522 		enum diff_chunk_type t = diff_chunk_type(c);
523 
524 		debug("[%d] %s lines L%d R%d @L %d @R %d\n",
525 		      i, (t == CHUNK_MINUS ? "minus" :
526 			  (t == CHUNK_PLUS ? "plus" :
527 			   (t == CHUNK_SAME ? "same" : "?"))),
528 		      c->left_count,
529 		      c->right_count,
530 		      c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
531 		      c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
532 		assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
533 		assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
534 		check_left_pos += c->left_count;
535 		check_right_pos += c->right_count;
536 
537 	}
538 	assert(check_left_pos == result->left->atoms.len);
539 	assert(check_right_pos == result->right->atoms.len);
540 #endif
541 
542 	for (i = 0; i < result->chunks.len; i++) {
543 		struct diff_chunk *c = &result->chunks.head[i];
544 		enum diff_chunk_type t = diff_chunk_type(c);
545 		struct diff_chunk_context next;
546 
547 		if (t != CHUNK_MINUS && t != CHUNK_PLUS)
548 			continue;
549 
550 		if (diff_chunk_context_empty(&cc)) {
551 			/* These are the first lines being printed.
552 			 * Note down the start point, any number of subsequent
553 			 * chunks may be joined up to this unidiff chunk by
554 			 * context lines or by being directly adjacent. */
555 			diff_chunk_context_get(&cc, result, i, context_lines);
556 			debug("new chunk to be printed:"
557 			      " chunk %d-%d left %d-%d right %d-%d\n",
558 			      cc.chunk.start, cc.chunk.end,
559 			      cc.left.start, cc.left.end,
560 			      cc.right.start, cc.right.end);
561 			continue;
562 		}
563 
564 		/* There already is a previous chunk noted down for being
565 		 * printed. Does it join up with this one? */
566 		diff_chunk_context_get(&next, result, i, context_lines);
567 		debug("new chunk to be printed:"
568 		      " chunk %d-%d left %d-%d right %d-%d\n",
569 		      next.chunk.start, next.chunk.end,
570 		      next.left.start, next.left.end,
571 		      next.right.start, next.right.end);
572 
573 		if (diff_chunk_contexts_touch(&cc, &next)) {
574 			/* This next context touches or overlaps the previous
575 			 * one, join. */
576 			diff_chunk_contexts_merge(&cc, &next);
577 			debug("new chunk to be printed touches previous chunk,"
578 			      " now: left %d-%d right %d-%d\n",
579 			      cc.left.start, cc.left.end,
580 			      cc.right.start, cc.right.end);
581 			continue;
582 		}
583 
584 		/* No touching, so the previous context is complete with a gap
585 		 * between it and this next one. Print the previous one and
586 		 * start fresh here. */
587 		debug("new chunk to be printed does not touch previous chunk;"
588 		      " print left %d-%d right %d-%d\n",
589 		      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
590 		output_unidiff_chunk(outinfo, dest, state, info, result,
591 		    true, show_function_prototypes, &cc);
592 		cc = next;
593 		debug("new unprinted chunk is left %d-%d right %d-%d\n",
594 		      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
595 	}
596 
597 	if (!diff_chunk_context_empty(&cc))
598 		output_unidiff_chunk(outinfo, dest, state, info, result,
599 		    true, show_function_prototypes, &cc);
600 	diff_output_unidiff_state_free(state);
601 	return DIFF_RC_OK;
602 }
603