1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "sna.h"
33 
34 #include <unistd.h>
35 #include <pthread.h>
36 #include <signal.h>
37 
38 #ifdef HAVE_VALGRIND
39 #include <valgrind.h>
valgrind_active(void)40 static inline bool valgrind_active(void) { return RUNNING_ON_VALGRIND; }
41 #else
valgrind_active(void)42 static inline bool valgrind_active(void) { return false; }
43 #endif
44 
45 static int max_threads = -1;
46 
47 static struct thread {
48     pthread_t thread;
49     pthread_mutex_t mutex;
50     pthread_cond_t cond;
51 
52     void (*func)(void *arg);
53     void *arg;
54 } *threads;
55 
__run__(void * arg)56 static void *__run__(void *arg)
57 {
58 	struct thread *t = arg;
59 	sigset_t signals;
60 
61 	/* Disable all signals in the slave threads as X uses them for IO */
62 	sigfillset(&signals);
63 	sigdelset(&signals, SIGBUS);
64 	sigdelset(&signals, SIGSEGV);
65 	pthread_sigmask(SIG_SETMASK, &signals, NULL);
66 
67 	pthread_mutex_lock(&t->mutex);
68 	while (1) {
69 		while (t->func == NULL)
70 			pthread_cond_wait(&t->cond, &t->mutex);
71 		pthread_mutex_unlock(&t->mutex);
72 
73 		assert(t->func);
74 		t->func(t->arg);
75 
76 		pthread_mutex_lock(&t->mutex);
77 		t->arg = NULL;
78 		t->func = NULL;
79 		pthread_cond_signal(&t->cond);
80 	}
81 	pthread_mutex_unlock(&t->mutex);
82 
83 	return NULL;
84 }
85 
86 #if defined(__GNUC__)
87 #define popcount(x) __builtin_popcount(x)
88 #else
popcount(unsigned int x)89 static int popcount(unsigned int x)
90 {
91 	int count = 0;
92 
93 	while (x) {
94 		count += x&1;
95 		x >>= 1;
96 	}
97 
98 	return count;
99 }
100 #endif
101 
102 static int
num_cores(void)103 num_cores(void)
104 {
105 	FILE *file = fopen("/proc/cpuinfo", "r");
106 	int count = 0;
107 	if (file) {
108 		size_t len = 0;
109 		char *line = NULL;
110 		uint32_t processors = 0, cores = 0;
111 		while (getline(&line, &len, file) != -1) {
112 			int id;
113 			if (sscanf(line, "physical id : %d", &id) == 1) {
114 				if (id >= 32)
115 					continue;
116 				processors |= 1 << id;
117 			} else if (sscanf(line, "core id : %d", &id) == 1) {
118 				if (id >= 32)
119 					continue;
120 				cores |= 1 << id;
121 			}
122 		}
123 		free(line);
124 		fclose(file);
125 
126 		DBG(("%s: processors=0x%08x, cores=0x%08x\n",
127 		     __FUNCTION__, processors, cores));
128 
129 		count = popcount(processors) * popcount(cores);
130 	}
131 	return count;
132 }
133 
sna_threads_init(void)134 void sna_threads_init(void)
135 {
136 	int n;
137 
138 	if (max_threads != -1)
139 		return;
140 
141 	if (valgrind_active())
142 		goto bail;
143 
144 	max_threads = num_cores();
145 	if (max_threads == 0)
146 		max_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2;
147 	if (max_threads <= 1)
148 		goto bail;
149 
150 	DBG(("%s: creating a thread pool of %d threads\n",
151 	     __func__, max_threads));
152 
153 	threads = malloc (sizeof(threads[0])*max_threads);
154 	if (threads == NULL)
155 		goto bail;
156 
157 	for (n = 1; n < max_threads; n++) {
158 		pthread_mutex_init(&threads[n].mutex, NULL);
159 		pthread_cond_init(&threads[n].cond, NULL);
160 
161 		threads[n].func = NULL;
162 		threads[n].arg = NULL;
163 		if (pthread_create(&threads[n].thread, NULL,
164 				   __run__, &threads[n]))
165 			goto bail;
166 	}
167 
168 	threads[0].thread = pthread_self();
169 	return;
170 
171 bail:
172 	max_threads = 0;
173 }
174 
sna_threads_run(int id,void (* func)(void * arg),void * arg)175 void sna_threads_run(int id, void (*func)(void *arg), void *arg)
176 {
177 	assert(max_threads > 0);
178 	assert(pthread_self() == threads[0].thread);
179 	assert(id > 0 && id < max_threads);
180 
181 	assert(threads[id].func == NULL);
182 
183 	pthread_mutex_lock(&threads[id].mutex);
184 	threads[id].func = func;
185 	threads[id].arg = arg;
186 	pthread_cond_signal(&threads[id].cond);
187 	pthread_mutex_unlock(&threads[id].mutex);
188 }
189 
sna_threads_trap(int sig)190 void sna_threads_trap(int sig)
191 {
192 	pthread_t t = pthread_self();
193 	int n;
194 
195 	if (max_threads == 0)
196 		return;
197 
198 	if (t == threads[0].thread)
199 		return;
200 
201 	for (n = 1; threads[n].thread != t; n++)
202 		;
203 
204 	ERR(("%s: thread[%d] caught signal %d\n", __func__, n, sig));
205 
206 	pthread_mutex_lock(&threads[n].mutex);
207 	threads[n].arg = (void *)(intptr_t)sig;
208 	threads[n].func = NULL;
209 	pthread_cond_signal(&threads[n].cond);
210 	pthread_mutex_unlock(&threads[n].mutex);
211 
212 	pthread_exit(&sig);
213 }
214 
sna_threads_wait(void)215 void sna_threads_wait(void)
216 {
217 	int n;
218 
219 	assert(max_threads > 0);
220 	assert(pthread_self() == threads[0].thread);
221 
222 	for (n = 1; n < max_threads; n++) {
223 		if (threads[n].func != NULL) {
224 			pthread_mutex_lock(&threads[n].mutex);
225 			while (threads[n].func)
226 				pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
227 			pthread_mutex_unlock(&threads[n].mutex);
228 		}
229 
230 		if (threads[n].arg != NULL) {
231 			DBG(("%s: thread[%d] died from signal %d\n", __func__, n, (int)(intptr_t)threads[n].arg));
232 			sna_threads_kill();
233 			return;
234 		}
235 	}
236 }
237 
sna_threads_kill(void)238 void sna_threads_kill(void)
239 {
240 	int n;
241 
242 	ERR(("%s: kill %d threads\n", __func__, max_threads));
243 	assert(max_threads > 0);
244 	assert(pthread_self() == threads[0].thread);
245 
246 	for (n = 1; n < max_threads; n++)
247 		pthread_cancel(threads[n].thread);
248 
249 	for (n = 1; n < max_threads; n++)
250 		pthread_join(threads[n].thread, NULL);
251 
252 	max_threads = 0;
253 }
254 
sna_use_threads(int width,int height,int threshold)255 int sna_use_threads(int width, int height, int threshold)
256 {
257 	int num_threads;
258 
259 	if (max_threads <= 0)
260 		return 1;
261 
262 	if (height <= 1)
263 		return 1;
264 
265 	if (width < 128)
266 		height /= 128/width;
267 
268 	num_threads = height * max_threads / threshold - 1;
269 	if (num_threads <= 0)
270 		return 1;
271 
272 	if (num_threads > max_threads)
273 		num_threads = max_threads;
274 	if (num_threads > height)
275 		num_threads = height;
276 
277 	return num_threads;
278 }
279 
280 struct thread_composite {
281 	pixman_image_t *src, *mask, *dst;
282 	pixman_op_t op;
283 	int16_t src_x, src_y;
284 	int16_t mask_x, mask_y;
285 	int16_t dst_x, dst_y;
286 	uint16_t width, height;
287 };
288 
thread_composite(void * arg)289 static void thread_composite(void *arg)
290 {
291 	struct thread_composite *t = arg;
292 	pixman_image_composite(t->op, t->src, t->mask, t->dst,
293 			       t->src_x, t->src_y,
294 			       t->mask_x, t->mask_y,
295 			       t->dst_x, t->dst_y,
296 			       t->width, t->height);
297 }
298 
sna_image_composite(pixman_op_t op,pixman_image_t * src,pixman_image_t * mask,pixman_image_t * dst,int16_t src_x,int16_t src_y,int16_t mask_x,int16_t mask_y,int16_t dst_x,int16_t dst_y,uint16_t width,uint16_t height)299 void sna_image_composite(pixman_op_t        op,
300 			 pixman_image_t    *src,
301 			 pixman_image_t    *mask,
302 			 pixman_image_t    *dst,
303 			 int16_t            src_x,
304 			 int16_t            src_y,
305 			 int16_t            mask_x,
306 			 int16_t            mask_y,
307 			 int16_t            dst_x,
308 			 int16_t            dst_y,
309 			 uint16_t           width,
310 			 uint16_t           height)
311 {
312 	int num_threads;
313 
314 	num_threads = sna_use_threads(width, height, 32);
315 	if (num_threads <= 1) {
316 		if (sigtrap_get() == 0) {
317 			pixman_image_composite(op, src, mask, dst,
318 					       src_x, src_y,
319 					       mask_x, mask_y,
320 					       dst_x, dst_y,
321 					       width, height);
322 			sigtrap_put();
323 		}
324 	} else {
325 		struct thread_composite data[num_threads];
326 		int y, dy, n;
327 
328 		DBG(("%s: using %d threads for compositing %dx%d\n",
329 		     __FUNCTION__, num_threads, width, height));
330 
331 		y = dst_y;
332 		dy = (height + num_threads - 1) / num_threads;
333 		num_threads -= (num_threads-1) * dy >= height;
334 
335 		data[0].op = op;
336 		data[0].src = src;
337 		data[0].mask = mask;
338 		data[0].dst = dst;
339 		data[0].src_x = src_x;
340 		data[0].src_y = src_y;
341 		data[0].mask_x = mask_x;
342 		data[0].mask_y = mask_y;
343 		data[0].dst_x = dst_x;
344 		data[0].dst_y = y;
345 		data[0].width = width;
346 		data[0].height = dy;
347 
348 		if (sigtrap_get() == 0) {
349 			for (n = 1; n < num_threads; n++) {
350 				data[n] = data[0];
351 				data[n].src_y += y - dst_y;
352 				data[n].mask_y += y - dst_y;
353 				data[n].dst_y = y;
354 				y += dy;
355 
356 				sna_threads_run(n, thread_composite, &data[n]);
357 			}
358 
359 			assert(y < dst_y + height);
360 			if (y + dy > dst_y + height)
361 				dy = dst_y + height - y;
362 
363 			data[0].src_y += y - dst_y;
364 			data[0].mask_y += y - dst_y;
365 			data[0].dst_y = y;
366 			data[0].height = dy;
367 
368 			thread_composite(&data[0]);
369 
370 			sna_threads_wait();
371 			sigtrap_put();
372 		} else
373 			sna_threads_kill();
374 	}
375 }
376