1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_xdiff.h"
9 
10 #include "git2/errors.h"
11 #include "diff.h"
12 #include "diff_driver.h"
13 #include "patch_generate.h"
14 
git_xdiff_scan_int(const char ** str,int * value)15 static int git_xdiff_scan_int(const char **str, int *value)
16 {
17 	const char *scan = *str;
18 	int v = 0, digits = 0;
19 	/* find next digit */
20 	for (scan = *str; *scan && !git__isdigit(*scan); scan++);
21 	/* parse next number */
22 	for (; git__isdigit(*scan); scan++, digits++)
23 		v = (v * 10) + (*scan - '0');
24 	*str = scan;
25 	*value = v;
26 	return (digits > 0) ? 0 : -1;
27 }
28 
git_xdiff_parse_hunk(git_diff_hunk * hunk,const char * header)29 static int git_xdiff_parse_hunk(git_diff_hunk *hunk, const char *header)
30 {
31 	/* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */
32 	if (*header != '@')
33 		goto fail;
34 	if (git_xdiff_scan_int(&header, &hunk->old_start) < 0)
35 		goto fail;
36 	if (*header == ',') {
37 		if (git_xdiff_scan_int(&header, &hunk->old_lines) < 0)
38 			goto fail;
39 	} else
40 		hunk->old_lines = 1;
41 	if (git_xdiff_scan_int(&header, &hunk->new_start) < 0)
42 		goto fail;
43 	if (*header == ',') {
44 		if (git_xdiff_scan_int(&header, &hunk->new_lines) < 0)
45 			goto fail;
46 	} else
47 		hunk->new_lines = 1;
48 	if (hunk->old_start < 0 || hunk->new_start < 0)
49 		goto fail;
50 
51 	return 0;
52 
53 fail:
54 	git_error_set(GIT_ERROR_INVALID, "malformed hunk header from xdiff");
55 	return -1;
56 }
57 
58 typedef struct {
59 	git_xdiff_output *xo;
60 	git_patch_generated *patch;
61 	git_diff_hunk hunk;
62 	int old_lineno, new_lineno;
63 	mmfile_t xd_old_data, xd_new_data;
64 } git_xdiff_info;
65 
diff_update_lines(git_xdiff_info * info,git_diff_line * line,const char * content,size_t content_len)66 static int diff_update_lines(
67 	git_xdiff_info *info,
68 	git_diff_line *line,
69 	const char *content,
70 	size_t content_len)
71 {
72 	const char *scan = content, *scan_end = content + content_len;
73 
74 	for (line->num_lines = 0; scan < scan_end; ++scan)
75 		if (*scan == '\n')
76 			++line->num_lines;
77 
78 	line->content     = content;
79 	line->content_len = content_len;
80 
81 	/* expect " "/"-"/"+", then data */
82 	switch (line->origin) {
83 	case GIT_DIFF_LINE_ADDITION:
84 	case GIT_DIFF_LINE_DEL_EOFNL:
85 		line->old_lineno = -1;
86 		line->new_lineno = info->new_lineno;
87 		info->new_lineno += (int)line->num_lines;
88 		break;
89 	case GIT_DIFF_LINE_DELETION:
90 	case GIT_DIFF_LINE_ADD_EOFNL:
91 		line->old_lineno = info->old_lineno;
92 		line->new_lineno = -1;
93 		info->old_lineno += (int)line->num_lines;
94 		break;
95 	case GIT_DIFF_LINE_CONTEXT:
96 	case GIT_DIFF_LINE_CONTEXT_EOFNL:
97 		line->old_lineno = info->old_lineno;
98 		line->new_lineno = info->new_lineno;
99 		info->old_lineno += (int)line->num_lines;
100 		info->new_lineno += (int)line->num_lines;
101 		break;
102 	default:
103 		git_error_set(GIT_ERROR_INVALID, "unknown diff line origin %02x",
104 			(unsigned int)line->origin);
105 		return -1;
106 	}
107 
108 	return 0;
109 }
110 
git_xdiff_cb(void * priv,mmbuffer_t * bufs,int len)111 static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len)
112 {
113 	git_xdiff_info *info = priv;
114 	git_patch_generated *patch = info->patch;
115 	const git_diff_delta *delta = patch->base.delta;
116 	git_patch_generated_output *output = &info->xo->output;
117 	git_diff_line line;
118 	size_t buffer_len;
119 
120 	if (len == 1) {
121 		output->error = git_xdiff_parse_hunk(&info->hunk, bufs[0].ptr);
122 		if (output->error < 0)
123 			return output->error;
124 
125 		info->hunk.header_len = bufs[0].size;
126 		if (info->hunk.header_len >= sizeof(info->hunk.header))
127 			info->hunk.header_len = sizeof(info->hunk.header) - 1;
128 
129 		/* Sanitize the hunk header in case there is invalid Unicode */
130 		buffer_len = git_utf8_valid_buf_length(bufs[0].ptr, info->hunk.header_len);
131 		/* Sanitizing the hunk header may delete the newline, so add it back again if there is room */
132 		if (buffer_len < info->hunk.header_len) {
133 			bufs[0].ptr[buffer_len] = '\n';
134 			buffer_len += 1;
135 			info->hunk.header_len = buffer_len;
136 		}
137 
138 		memcpy(info->hunk.header, bufs[0].ptr, info->hunk.header_len);
139 		info->hunk.header[info->hunk.header_len] = '\0';
140 
141 		if (output->hunk_cb != NULL &&
142 			(output->error = output->hunk_cb(
143 				delta, &info->hunk, output->payload)))
144 			return output->error;
145 
146 		info->old_lineno = info->hunk.old_start;
147 		info->new_lineno = info->hunk.new_start;
148 	}
149 
150 	if (len == 2 || len == 3) {
151 		/* expect " "/"-"/"+", then data */
152 		line.origin =
153 			(*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION :
154 			(*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION :
155 			GIT_DIFF_LINE_CONTEXT;
156 
157 		if (line.origin == GIT_DIFF_LINE_ADDITION)
158 			line.content_offset = bufs[1].ptr - info->xd_new_data.ptr;
159 		else if (line.origin == GIT_DIFF_LINE_DELETION)
160 			line.content_offset = bufs[1].ptr - info->xd_old_data.ptr;
161 		else
162 			line.content_offset = -1;
163 
164 		output->error = diff_update_lines(
165 			info, &line, bufs[1].ptr, bufs[1].size);
166 
167 		if (!output->error && output->data_cb != NULL)
168 			output->error = output->data_cb(
169 				delta, &info->hunk, &line, output->payload);
170 	}
171 
172 	if (len == 3 && !output->error) {
173 		/* If we have a '+' and a third buf, then we have added a line
174 		 * without a newline and the old code had one, so DEL_EOFNL.
175 		 * If we have a '-' and a third buf, then we have removed a line
176 		 * with out a newline but added a blank line, so ADD_EOFNL.
177 		 */
178 		line.origin =
179 			(*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL :
180 			(*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL :
181 			GIT_DIFF_LINE_CONTEXT_EOFNL;
182 
183 		line.content_offset = -1;
184 
185 		output->error = diff_update_lines(
186 			info, &line, bufs[2].ptr, bufs[2].size);
187 
188 		if (!output->error && output->data_cb != NULL)
189 			output->error = output->data_cb(
190 				delta, &info->hunk, &line, output->payload);
191 	}
192 
193 	return output->error;
194 }
195 
git_xdiff(git_patch_generated_output * output,git_patch_generated * patch)196 static int git_xdiff(git_patch_generated_output *output, git_patch_generated *patch)
197 {
198 	git_xdiff_output *xo = (git_xdiff_output *)output;
199 	git_xdiff_info info;
200 	git_diff_find_context_payload findctxt;
201 
202 	memset(&info, 0, sizeof(info));
203 	info.patch = patch;
204 	info.xo    = xo;
205 
206 	xo->callback.priv = &info;
207 
208 	git_diff_find_context_init(
209 		&xo->config.find_func, &findctxt, git_patch_generated_driver(patch));
210 	xo->config.find_func_priv = &findctxt;
211 
212 	if (xo->config.find_func != NULL)
213 		xo->config.flags |= XDL_EMIT_FUNCNAMES;
214 	else
215 		xo->config.flags &= ~XDL_EMIT_FUNCNAMES;
216 
217 	/* TODO: check ofile.opts_flags to see if driver-specific per-file
218 	 * updates are needed to xo->params.flags
219 	 */
220 
221 	git_patch_generated_old_data(&info.xd_old_data.ptr, &info.xd_old_data.size, patch);
222 	git_patch_generated_new_data(&info.xd_new_data.ptr, &info.xd_new_data.size, patch);
223 
224 	if (info.xd_old_data.size > GIT_XDIFF_MAX_SIZE ||
225 		info.xd_new_data.size > GIT_XDIFF_MAX_SIZE) {
226 		git_error_set(GIT_ERROR_INVALID, "files too large for diff");
227 		return -1;
228 	}
229 
230 	xdl_diff(&info.xd_old_data, &info.xd_new_data,
231 		&xo->params, &xo->config, &xo->callback);
232 
233 	git_diff_find_context_clear(&findctxt);
234 
235 	return xo->output.error;
236 }
237 
git_xdiff_init(git_xdiff_output * xo,const git_diff_options * opts)238 void git_xdiff_init(git_xdiff_output *xo, const git_diff_options *opts)
239 {
240 	uint32_t flags = opts ? opts->flags : 0;
241 
242 	xo->output.diff_cb = git_xdiff;
243 
244 	xo->config.ctxlen = opts ? opts->context_lines : 3;
245 	xo->config.interhunkctxlen = opts ? opts->interhunk_lines : 0;
246 
247 	if (flags & GIT_DIFF_IGNORE_WHITESPACE)
248 		xo->params.flags |= XDF_WHITESPACE_FLAGS;
249 	if (flags & GIT_DIFF_IGNORE_WHITESPACE_CHANGE)
250 		xo->params.flags |= XDF_IGNORE_WHITESPACE_CHANGE;
251 	if (flags & GIT_DIFF_IGNORE_WHITESPACE_EOL)
252 		xo->params.flags |= XDF_IGNORE_WHITESPACE_AT_EOL;
253 	if (flags & GIT_DIFF_INDENT_HEURISTIC)
254 		xo->params.flags |= XDF_INDENT_HEURISTIC;
255 
256 	if (flags & GIT_DIFF_PATIENCE)
257 		xo->params.flags |= XDF_PATIENCE_DIFF;
258 	if (flags & GIT_DIFF_MINIMAL)
259 		xo->params.flags |= XDF_NEED_MINIMAL;
260 
261 	if (flags & GIT_DIFF_IGNORE_BLANK_LINES)
262 		xo->params.flags |= XDF_IGNORE_BLANK_LINES;
263 
264 	xo->callback.outf = git_xdiff_cb;
265 }
266