1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_xdiff.h"
9 #include "util.h"
10 
11 #include "git2/errors.h"
12 #include "diff.h"
13 #include "diff_driver.h"
14 #include "patch_generate.h"
15 
git_xdiff_scan_int(const char ** str,int * value)16 static int git_xdiff_scan_int(const char **str, int *value)
17 {
18 	const char *scan = *str;
19 	int v = 0, digits = 0;
20 	/* find next digit */
21 	for (scan = *str; *scan && !git__isdigit(*scan); scan++);
22 	/* parse next number */
23 	for (; git__isdigit(*scan); scan++, digits++)
24 		v = (v * 10) + (*scan - '0');
25 	*str = scan;
26 	*value = v;
27 	return (digits > 0) ? 0 : -1;
28 }
29 
git_xdiff_parse_hunk(git_diff_hunk * hunk,const char * header)30 static int git_xdiff_parse_hunk(git_diff_hunk *hunk, const char *header)
31 {
32 	/* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */
33 	if (*header != '@')
34 		goto fail;
35 	if (git_xdiff_scan_int(&header, &hunk->old_start) < 0)
36 		goto fail;
37 	if (*header == ',') {
38 		if (git_xdiff_scan_int(&header, &hunk->old_lines) < 0)
39 			goto fail;
40 	} else
41 		hunk->old_lines = 1;
42 	if (git_xdiff_scan_int(&header, &hunk->new_start) < 0)
43 		goto fail;
44 	if (*header == ',') {
45 		if (git_xdiff_scan_int(&header, &hunk->new_lines) < 0)
46 			goto fail;
47 	} else
48 		hunk->new_lines = 1;
49 	if (hunk->old_start < 0 || hunk->new_start < 0)
50 		goto fail;
51 
52 	return 0;
53 
54 fail:
55 	git_error_set(GIT_ERROR_INVALID, "malformed hunk header from xdiff");
56 	return -1;
57 }
58 
59 typedef struct {
60 	git_xdiff_output *xo;
61 	git_patch_generated *patch;
62 	git_diff_hunk hunk;
63 	int old_lineno, new_lineno;
64 	mmfile_t xd_old_data, xd_new_data;
65 } git_xdiff_info;
66 
diff_update_lines(git_xdiff_info * info,git_diff_line * line,const char * content,size_t content_len)67 static int diff_update_lines(
68 	git_xdiff_info *info,
69 	git_diff_line *line,
70 	const char *content,
71 	size_t content_len)
72 {
73 	const char *scan = content, *scan_end = content + content_len;
74 
75 	for (line->num_lines = 0; scan < scan_end; ++scan)
76 		if (*scan == '\n')
77 			++line->num_lines;
78 
79 	line->content     = content;
80 	line->content_len = content_len;
81 
82 	/* expect " "/"-"/"+", then data */
83 	switch (line->origin) {
84 	case GIT_DIFF_LINE_ADDITION:
85 	case GIT_DIFF_LINE_DEL_EOFNL:
86 		line->old_lineno = -1;
87 		line->new_lineno = info->new_lineno;
88 		info->new_lineno += (int)line->num_lines;
89 		break;
90 	case GIT_DIFF_LINE_DELETION:
91 	case GIT_DIFF_LINE_ADD_EOFNL:
92 		line->old_lineno = info->old_lineno;
93 		line->new_lineno = -1;
94 		info->old_lineno += (int)line->num_lines;
95 		break;
96 	case GIT_DIFF_LINE_CONTEXT:
97 	case GIT_DIFF_LINE_CONTEXT_EOFNL:
98 		line->old_lineno = info->old_lineno;
99 		line->new_lineno = info->new_lineno;
100 		info->old_lineno += (int)line->num_lines;
101 		info->new_lineno += (int)line->num_lines;
102 		break;
103 	default:
104 		git_error_set(GIT_ERROR_INVALID, "unknown diff line origin %02x",
105 			(unsigned int)line->origin);
106 		return -1;
107 	}
108 
109 	return 0;
110 }
111 
git_xdiff_cb(void * priv,mmbuffer_t * bufs,int len)112 static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len)
113 {
114 	git_xdiff_info *info = priv;
115 	git_patch_generated *patch = info->patch;
116 	const git_diff_delta *delta = patch->base.delta;
117 	git_patch_generated_output *output = &info->xo->output;
118 	git_diff_line line;
119 	size_t buffer_len;
120 
121 	if (len == 1) {
122 		output->error = git_xdiff_parse_hunk(&info->hunk, bufs[0].ptr);
123 		if (output->error < 0)
124 			return output->error;
125 
126 		info->hunk.header_len = bufs[0].size;
127 		if (info->hunk.header_len >= sizeof(info->hunk.header))
128 			info->hunk.header_len = sizeof(info->hunk.header) - 1;
129 
130 		/* Sanitize the hunk header in case there is invalid Unicode */
131 		buffer_len = git__utf8_valid_buf_length((const uint8_t *) bufs[0].ptr, info->hunk.header_len);
132 		/* Sanitizing the hunk header may delete the newline, so add it back again if there is room */
133 		if (buffer_len < info->hunk.header_len) {
134 			bufs[0].ptr[buffer_len] = '\n';
135 			buffer_len += 1;
136 			info->hunk.header_len = buffer_len;
137 		}
138 
139 		memcpy(info->hunk.header, bufs[0].ptr, info->hunk.header_len);
140 		info->hunk.header[info->hunk.header_len] = '\0';
141 
142 		if (output->hunk_cb != NULL &&
143 			(output->error = output->hunk_cb(
144 				delta, &info->hunk, output->payload)))
145 			return output->error;
146 
147 		info->old_lineno = info->hunk.old_start;
148 		info->new_lineno = info->hunk.new_start;
149 	}
150 
151 	if (len == 2 || len == 3) {
152 		/* expect " "/"-"/"+", then data */
153 		line.origin =
154 			(*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION :
155 			(*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION :
156 			GIT_DIFF_LINE_CONTEXT;
157 
158 		if (line.origin == GIT_DIFF_LINE_ADDITION)
159 			line.content_offset = bufs[1].ptr - info->xd_new_data.ptr;
160 		else if (line.origin == GIT_DIFF_LINE_DELETION)
161 			line.content_offset = bufs[1].ptr - info->xd_old_data.ptr;
162 		else
163 			line.content_offset = -1;
164 
165 		output->error = diff_update_lines(
166 			info, &line, bufs[1].ptr, bufs[1].size);
167 
168 		if (!output->error && output->data_cb != NULL)
169 			output->error = output->data_cb(
170 				delta, &info->hunk, &line, output->payload);
171 	}
172 
173 	if (len == 3 && !output->error) {
174 		/* If we have a '+' and a third buf, then we have added a line
175 		 * without a newline and the old code had one, so DEL_EOFNL.
176 		 * If we have a '-' and a third buf, then we have removed a line
177 		 * with out a newline but added a blank line, so ADD_EOFNL.
178 		 */
179 		line.origin =
180 			(*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL :
181 			(*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL :
182 			GIT_DIFF_LINE_CONTEXT_EOFNL;
183 
184 		line.content_offset = -1;
185 
186 		output->error = diff_update_lines(
187 			info, &line, bufs[2].ptr, bufs[2].size);
188 
189 		if (!output->error && output->data_cb != NULL)
190 			output->error = output->data_cb(
191 				delta, &info->hunk, &line, output->payload);
192 	}
193 
194 	return output->error;
195 }
196 
git_xdiff(git_patch_generated_output * output,git_patch_generated * patch)197 static int git_xdiff(git_patch_generated_output *output, git_patch_generated *patch)
198 {
199 	git_xdiff_output *xo = (git_xdiff_output *)output;
200 	git_xdiff_info info;
201 	git_diff_find_context_payload findctxt;
202 
203 	memset(&info, 0, sizeof(info));
204 	info.patch = patch;
205 	info.xo    = xo;
206 
207 	xo->callback.priv = &info;
208 
209 	git_diff_find_context_init(
210 		&xo->config.find_func, &findctxt, git_patch_generated_driver(patch));
211 	xo->config.find_func_priv = &findctxt;
212 
213 	if (xo->config.find_func != NULL)
214 		xo->config.flags |= XDL_EMIT_FUNCNAMES;
215 	else
216 		xo->config.flags &= ~XDL_EMIT_FUNCNAMES;
217 
218 	/* TODO: check ofile.opts_flags to see if driver-specific per-file
219 	 * updates are needed to xo->params.flags
220 	 */
221 
222 	git_patch_generated_old_data(&info.xd_old_data.ptr, &info.xd_old_data.size, patch);
223 	git_patch_generated_new_data(&info.xd_new_data.ptr, &info.xd_new_data.size, patch);
224 
225 	if (info.xd_old_data.size > GIT_XDIFF_MAX_SIZE ||
226 		info.xd_new_data.size > GIT_XDIFF_MAX_SIZE) {
227 		git_error_set(GIT_ERROR_INVALID, "files too large for diff");
228 		return -1;
229 	}
230 
231 	xdl_diff(&info.xd_old_data, &info.xd_new_data,
232 		&xo->params, &xo->config, &xo->callback);
233 
234 	git_diff_find_context_clear(&findctxt);
235 
236 	return xo->output.error;
237 }
238 
git_xdiff_init(git_xdiff_output * xo,const git_diff_options * opts)239 void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts)
240 {
241 	uint32_t flags = opts ? opts->flags : 0;
242 
243 	xo->output.diff_cb = git_xdiff;
244 
245 	xo->config.ctxlen = opts ? opts->context_lines : 3;
246 	xo->config.interhunkctxlen = opts ? opts->interhunk_lines : 0;
247 
248 	if (flags & GIT_DIFF_IGNORE_WHITESPACE)
249 		xo->params.flags |= XDF_WHITESPACE_FLAGS;
250 	if (flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE)
251 		xo->params.flags |= XDF_IGNORE_WHITESPACE_CHANGE;
252 	if (flags & GIT_DIFF_IGNORE_WHITESPACE_EOL)
253 		xo->params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL;
254 	if (flags & GIT_DIFF_INDENT_HEURISTIC)
255 		xo->params.flags |= XDF_INDENT_HEURISTIC;
256 
257 	if (flags & GIT_DIFF_PATIENCE)
258 		xo->params.flags |= XDF_PATIENCE_DIFF;
259 	if (flags & GIT_DIFF_MINIMAL)
260 		xo->params.flags |= XDF_NEED_MINIMAL;
261 
262 	xo->callback.outf = git_xdiff_cb;
263 }
264