1 /*
2  * Copyright (C) 2005 Junio C Hamano
3  * Copyright (C) 2010 Google Inc.
4  */
5 #include "cache.h"
6 #include "diff.h"
7 #include "diffcore.h"
8 #include "xdiff-interface.h"
9 #include "kwset.h"
10 #include "commit.h"
11 #include "quote.h"
12 
13 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
14 			  struct diff_options *o,
15 			  regex_t *regexp, kwset_t kws);
16 
17 struct diffgrep_cb {
18 	regex_t *regexp;
19 	int hit;
20 };
21 
diffgrep_consume(void * priv,char * line,unsigned long len)22 static int diffgrep_consume(void *priv, char *line, unsigned long len)
23 {
24 	struct diffgrep_cb *data = priv;
25 	regmatch_t regmatch;
26 
27 	if (line[0] != '+' && line[0] != '-')
28 		return 0;
29 	if (data->hit)
30 		BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?");
31 	if (!regexec_buf(data->regexp, line + 1, len - 1, 1,
32 			 &regmatch, 0)) {
33 		data->hit = 1;
34 		return 1;
35 	}
36 	return 0;
37 }
38 
diff_grep(mmfile_t * one,mmfile_t * two,struct diff_options * o,regex_t * regexp,kwset_t kws)39 static int diff_grep(mmfile_t *one, mmfile_t *two,
40 		     struct diff_options *o,
41 		     regex_t *regexp, kwset_t kws)
42 {
43 	struct diffgrep_cb ecbdata;
44 	xpparam_t xpp;
45 	xdemitconf_t xecfg;
46 	int ret;
47 
48 	/*
49 	 * We have both sides; need to run textual diff and see if
50 	 * the pattern appears on added/deleted lines.
51 	 */
52 	memset(&xpp, 0, sizeof(xpp));
53 	memset(&xecfg, 0, sizeof(xecfg));
54 	ecbdata.regexp = regexp;
55 	ecbdata.hit = 0;
56 	xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
57 	xecfg.ctxlen = o->context;
58 	xecfg.interhunkctxlen = o->interhunkcontext;
59 
60 	/*
61 	 * An xdiff error might be our "data->hit" from above. See the
62 	 * comment for xdiff_emit_line_fn in xdiff-interface.h
63 	 */
64 	ret = xdi_diff_outf(one, two, NULL, diffgrep_consume,
65 			    &ecbdata, &xpp, &xecfg);
66 	if (ecbdata.hit)
67 		return 1;
68 	if (ret)
69 		return ret;
70 	return 0;
71 }
72 
contains(mmfile_t * mf,regex_t * regexp,kwset_t kws,unsigned int limit)73 static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws,
74 			     unsigned int limit)
75 {
76 	unsigned int cnt = 0;
77 	unsigned long sz = mf->size;
78 	const char *data = mf->ptr;
79 
80 	if (regexp) {
81 		regmatch_t regmatch;
82 		int flags = 0;
83 
84 		while (sz &&
85 		       !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
86 			flags |= REG_NOTBOL;
87 			data += regmatch.rm_eo;
88 			sz -= regmatch.rm_eo;
89 			if (sz && regmatch.rm_so == regmatch.rm_eo) {
90 				data++;
91 				sz--;
92 			}
93 			cnt++;
94 
95 			if (limit && cnt == limit)
96 				return cnt;
97 		}
98 
99 	} else { /* Classic exact string match */
100 		while (sz) {
101 			struct kwsmatch kwsm;
102 			size_t offset = kwsexec(kws, data, sz, &kwsm);
103 			if (offset == -1)
104 				break;
105 			sz -= offset + kwsm.size[0];
106 			data += offset + kwsm.size[0];
107 			cnt++;
108 
109 			if (limit && cnt == limit)
110 				return cnt;
111 		}
112 	}
113 	return cnt;
114 }
115 
has_changes(mmfile_t * one,mmfile_t * two,struct diff_options * o,regex_t * regexp,kwset_t kws)116 static int has_changes(mmfile_t *one, mmfile_t *two,
117 		       struct diff_options *o,
118 		       regex_t *regexp, kwset_t kws)
119 {
120 	unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0;
121 	unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0;
122 	return c1 != c2;
123 }
124 
pickaxe_match(struct diff_filepair * p,struct diff_options * o,regex_t * regexp,kwset_t kws,pickaxe_fn fn)125 static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
126 			 regex_t *regexp, kwset_t kws, pickaxe_fn fn)
127 {
128 	struct userdiff_driver *textconv_one = NULL;
129 	struct userdiff_driver *textconv_two = NULL;
130 	mmfile_t mf1, mf2;
131 	int ret;
132 
133 	/* ignore unmerged */
134 	if (!DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two))
135 		return 0;
136 
137 	if (o->objfind) {
138 		return  (DIFF_FILE_VALID(p->one) &&
139 			 oidset_contains(o->objfind, &p->one->oid)) ||
140 			(DIFF_FILE_VALID(p->two) &&
141 			 oidset_contains(o->objfind, &p->two->oid));
142 	}
143 
144 	if (o->flags.allow_textconv) {
145 		textconv_one = get_textconv(o->repo, p->one);
146 		textconv_two = get_textconv(o->repo, p->two);
147 	}
148 
149 	/*
150 	 * If we have an unmodified pair, we know that the count will be the
151 	 * same and don't even have to load the blobs. Unless textconv is in
152 	 * play, _and_ we are using two different textconv filters (e.g.,
153 	 * because a pair is an exact rename with different textconv attributes
154 	 * for each side, which might generate different content).
155 	 */
156 	if (textconv_one == textconv_two && diff_unmodified_pair(p))
157 		return 0;
158 
159 	if ((o->pickaxe_opts & DIFF_PICKAXE_KIND_G) &&
160 	    !o->flags.text &&
161 	    ((!textconv_one && diff_filespec_is_binary(o->repo, p->one)) ||
162 	     (!textconv_two && diff_filespec_is_binary(o->repo, p->two))))
163 		return 0;
164 
165 	mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr);
166 	mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr);
167 
168 	ret = fn(&mf1, &mf2, o, regexp, kws);
169 
170 	if (textconv_one)
171 		free(mf1.ptr);
172 	if (textconv_two)
173 		free(mf2.ptr);
174 	diff_free_filespec_data(p->one);
175 	diff_free_filespec_data(p->two);
176 
177 	return ret;
178 }
179 
pickaxe(struct diff_queue_struct * q,struct diff_options * o,regex_t * regexp,kwset_t kws,pickaxe_fn fn)180 static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
181 		    regex_t *regexp, kwset_t kws, pickaxe_fn fn)
182 {
183 	int i;
184 	struct diff_queue_struct outq;
185 
186 	DIFF_QUEUE_CLEAR(&outq);
187 
188 	if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
189 		/* Showing the whole changeset if needle exists */
190 		for (i = 0; i < q->nr; i++) {
191 			struct diff_filepair *p = q->queue[i];
192 			if (pickaxe_match(p, o, regexp, kws, fn))
193 				return; /* do not munge the queue */
194 		}
195 
196 		/*
197 		 * Otherwise we will clear the whole queue by copying
198 		 * the empty outq at the end of this function, but
199 		 * first clear the current entries in the queue.
200 		 */
201 		for (i = 0; i < q->nr; i++)
202 			diff_free_filepair(q->queue[i]);
203 	} else {
204 		/* Showing only the filepairs that has the needle */
205 		for (i = 0; i < q->nr; i++) {
206 			struct diff_filepair *p = q->queue[i];
207 			if (pickaxe_match(p, o, regexp, kws, fn))
208 				diff_q(&outq, p);
209 			else
210 				diff_free_filepair(p);
211 		}
212 	}
213 
214 	free(q->queue);
215 	*q = outq;
216 }
217 
regcomp_or_die(regex_t * regex,const char * needle,int cflags)218 static void regcomp_or_die(regex_t *regex, const char *needle, int cflags)
219 {
220 	int err = regcomp(regex, needle, cflags);
221 	if (err) {
222 		/* The POSIX.2 people are surely sick */
223 		char errbuf[1024];
224 		regerror(err, regex, errbuf, 1024);
225 		die("invalid regex: %s", errbuf);
226 	}
227 }
228 
diffcore_pickaxe(struct diff_options * o)229 void diffcore_pickaxe(struct diff_options *o)
230 {
231 	const char *needle = o->pickaxe;
232 	int opts = o->pickaxe_opts;
233 	regex_t regex, *regexp = NULL;
234 	kwset_t kws = NULL;
235 	pickaxe_fn fn;
236 
237 	if (opts & ~DIFF_PICKAXE_KIND_OBJFIND &&
238 	    (!needle || !*needle))
239 		BUG("should have needle under -G or -S");
240 	if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
241 		int cflags = REG_EXTENDED | REG_NEWLINE;
242 		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
243 			cflags |= REG_ICASE;
244 		regcomp_or_die(&regex, needle, cflags);
245 		regexp = &regex;
246 
247 		if (opts & DIFF_PICKAXE_KIND_G)
248 			fn = diff_grep;
249 		else if (opts & DIFF_PICKAXE_REGEX)
250 			fn = has_changes;
251 		else
252 			/*
253 			 * We don't need to check the combination of
254 			 * -G and --pickaxe-regex, by the time we get
255 			 * here diff.c has already died if they're
256 			 * combined. See the usage tests in
257 			 * t4209-log-pickaxe.sh.
258 			 */
259 			BUG("unreachable");
260 	} else if (opts & DIFF_PICKAXE_KIND_S) {
261 		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
262 		    has_non_ascii(needle)) {
263 			struct strbuf sb = STRBUF_INIT;
264 			int cflags = REG_NEWLINE | REG_ICASE;
265 
266 			basic_regex_quote_buf(&sb, needle);
267 			regcomp_or_die(&regex, sb.buf, cflags);
268 			strbuf_release(&sb);
269 			regexp = &regex;
270 		} else {
271 			kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
272 				       ? tolower_trans_tbl : NULL);
273 			kwsincr(kws, needle, strlen(needle));
274 			kwsprep(kws);
275 		}
276 		fn = has_changes;
277 	} else if (opts & DIFF_PICKAXE_KIND_OBJFIND) {
278 		fn = NULL;
279 	} else {
280 		BUG("unknown pickaxe_opts flag");
281 	}
282 
283 	pickaxe(&diff_queued_diff, o, regexp, kws, fn);
284 
285 	if (regexp)
286 		regfree(regexp);
287 	if (kws)
288 		kwsfree(kws);
289 	return;
290 }
291