1 /* conva ... approximate convolution
2  *
3  * This operation does an approximate convolution.
4  *
5  * Author: John Cupitt & Nicolas Robidoux
6  * Written on: 31/5/11
7  * Modified on:
8  * 31/5/11
9  *      - from im_aconvsep()
10  * 10/7/16
11  * 	- redone as a class
12  */
13 
14 /*
15 
16     This file is part of VIPS.
17 
18     VIPS is free software; you can redistribute it and/or modify
19     it under the terms of the GNU Lesser General Public License as published by
20     the Free Software Foundation; either version 2 of the License, or
21     (at your option) any later version.
22 
23     This program is distributed in the hope that it will be useful,
24     but WITHOUT ANY WARRANTY; without even the implied warranty of
25     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26     GNU Lesser General Public License for more details.
27 
28     You should have received a copy of the GNU Lesser General Public License
29     along with this program; if not, write to the Free Software
30     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31     02110-1301  USA
32 
33  */
34 
35 /*
36 
37     These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
38 
39  */
40 
41 /*
42 
43   See:
44 
45 	http://incubator.quasimondo.com/processing/stackblur.pde
46 
47   This thing is a little like stackblur, but generalised to any 2D
48   convolution.
49 
50  */
51 
52 /*
53 
54   TODO
55 
56 timing:
57 
58 $ time vips im_conv_f img_0075.jpg x2.v g2d201.con
59 real	5m3.359s
60 user	9m34.700s
61 sys	0m1.500s
62 
63 $ time vips im_aconv img_0075.jpg x.v g2d201.con 10 10
64 real	0m3.151s
65 user	0m5.640s
66 sys	0m0.100s
67 
68 $ vips im_subtract x.v x2.v diff.v
69 $ vips im_abs diff.v abs.v
70 $ vips im_max abs.v
71 2.70833
72 
73   	- are we handling mask offset correctly?
74 
75 	- could we do better with an h and a v cumulativization image? we might
76 	not need the huge intermediate we have now, since any line sum an be
77 	found with simple indexing
78 
79  */
80 
81 /*
82 #define DEBUG
83 #define VIPS_DEBUG
84  */
85 
86 #ifdef HAVE_CONFIG_H
87 #include <config.h>
88 #endif /*HAVE_CONFIG_H*/
89 #include <vips/intl.h>
90 
91 #include <stdio.h>
92 #include <string.h>
93 #include <stdlib.h>
94 #include <limits.h>
95 #include <math.h>
96 
97 #include <vips/vips.h>
98 #include <vips/vector.h>
99 #include <vips/debug.h>
100 #include <vips/internal.h>
101 
102 #include "pconvolution.h"
103 
104 /* Maximum number of boxes we can break the mask into. Don't have this too
105  * high, it'll make the instance huge, and gobject has a 64kb limit.
106  */
107 #define MAX_LINES (1000)
108 
109 /* The number of edges we consider at once in clustering. Higher values are
110  * faster, but risk pushing up average error in the result.
111  */
112 #define MAX_EDGES (1000)
113 
114 /* A horizontal line in the mask.
115  */
116 typedef struct _HLine {
117 	/* Start is the left-most pixel in the line, end is one beyond the
118 	 * right-most pixel.
119 	 */
120 	int start;
121 	int end;
122 
123 	/* The hlines have weights. weight 0 means this line is unused.
124 	 */
125 	int weight;
126 } HLine;
127 
128 /* For clustering. A pair of hlines and their distance. An edge in a graph.
129  */
130 typedef struct _Edge {
131 	/* The index into boxes->hline[].
132 	 */
133 	int a;
134 	int b;
135 
136 	/* The distance between them, see boxes_distance().
137 	 */
138 	int d;
139 } Edge;
140 
141 /* An element of a vline.
142  */
143 typedef struct _VElement {
144 	/* band is the index into hline[] we add, row is the row we take
145 	 * it from.
146 	 */
147 	int band;
148 	int row;
149 
150 	/* Negative lobes are made with factor -1, we also common-up repeated
151 	 * additions of the same line.
152 	 */
153 	int factor;
154 } VElement;
155 
156 /* A vline.
157  */
158 typedef struct _VLine {
159 	int band;
160 	int factor;
161 	int start;
162 	int end;
163 } VLine;
164 
165 /* A set of boxes.
166  */
167 typedef struct {
168 	VipsConvolution parent_instance;
169 
170 	VipsImage *iM;
171 
172 	int layers;
173 	int cluster;
174 
175 	int divisor;
176 	int rounding;
177 	int offset;
178 
179 	/* The horizontal lines we gather. hline[3] writes to band 3 in the
180 	 * intermediate image. max_line is the length of the longest hline:
181 	 * over 256 and we need to use an int intermediate for 8-bit images.
182 	 */
183 	int n_hline;
184 	HLine hline[MAX_LINES];
185 	int max_line;
186 
187 	/* During clustering, the top few edges we are considering.
188 	 */
189 	Edge edge[MAX_EDGES];
190 
191 	/* Scale and sum a set of hlines to make the final value.
192 	 */
193 	int n_velement;
194 	VElement velement[MAX_LINES];
195 
196 	/* And group those velements as vlines.
197 	 */
198 	int n_vline;
199 	VLine vline[MAX_LINES];
200 
201 } VipsConva;
202 
203 typedef VipsConvolutionClass VipsConvaClass;
204 
205 G_DEFINE_TYPE( VipsConva, vips_conva, VIPS_TYPE_CONVOLUTION );
206 
207 /* Euclid's algorithm. Use this to common up mults.
208  */
209 static int
gcd(int a,int b)210 gcd( int a, int b )
211 {
212 	if( b == 0 )
213 		return( abs( a ) );
214 	else
215 		return( gcd( b, a % b ) );
216 }
217 
218 static void
vips_conva_hline_start(VipsConva * conva,int x)219 vips_conva_hline_start( VipsConva *conva, int x )
220 {
221 	conva->hline[conva->n_hline].start = x;
222 	conva->hline[conva->n_hline].weight = 1;
223 }
224 
225 static int
vips_conva_hline_end(VipsConva * conva,int x,int y,int factor)226 vips_conva_hline_end( VipsConva *conva, int x, int y, int factor )
227 {
228 	VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
229 
230 	conva->hline[conva->n_hline].end = x;
231 
232 	conva->velement[conva->n_velement].row = y;
233 	conva->velement[conva->n_velement].band = conva->n_hline;
234 	conva->velement[conva->n_velement].factor = factor;
235 
236 	if( conva->n_hline >= MAX_LINES - 1 ) {
237 		vips_error( class->nickname, "%s", _( "mask too complex" ) );
238 		return( -1 );
239 	}
240 	conva->n_hline += 1;
241 
242 	if( conva->n_velement >= MAX_LINES - 1 ) {
243 		vips_error( class->nickname, "%s", _( "mask too complex" ) );
244 		return( -1 );
245 	}
246 	conva->n_velement += 1;
247 
248 	return( 0 );
249 }
250 
251 #ifdef DEBUG
252 static void
vips_conva_hprint(VipsConva * conva)253 vips_conva_hprint( VipsConva *conva )
254 {
255 	int x, y;
256 
257 	printf( "hlines:\n" );
258 	printf( "   n   b   r  f   w\n" );
259 	for( y = 0; y < conva->n_velement; y++ ) {
260 		int b = conva->velement[y].band;
261 
262 		printf( "%4d %3d %3d %2d %3d ",
263 			y, b,
264 			conva->velement[y].row,
265 			conva->velement[y].factor,
266 			conva->hline[b].weight );
267 		for( x = 0; x < 45; x++ ) {
268 			int rx = x * (conva->iM->Xsize + 1) / 45;
269 
270 			if( rx >= conva->hline[b].start &&
271 				rx < conva->hline[b].end )
272 				printf( "#" );
273 			else
274 				printf( " " );
275 		}
276 		printf( " %3d .. %3d\n",
277 			conva->hline[b].start, conva->hline[b].end );
278 	}
279 }
280 
281 static void
vips_conva_vprint(VipsConva * conva)282 vips_conva_vprint( VipsConva *conva )
283 {
284 	int y;
285 
286 	printf( "%d vlines:\n", conva->n_vline );
287 	printf( "   n  b  f      s      e\n" );
288 	for( y = 0; y < conva->n_vline; y++ )
289 		printf( "%4d %2d %2d == %3d .. %3d\n", y,
290 			conva->vline[y].band,
291 			conva->vline[y].factor,
292 			conva->vline[y].start,
293 			conva->vline[y].end );
294 
295 	printf( "divisor = %d\n", conva->divisor );
296 	printf( "rounding = %d\n", conva->rounding );
297 	printf( "offset = %d\n", conva->offset );
298 	printf( "max_line = %d\n", conva->max_line );
299 }
300 #endif /*DEBUG*/
301 
302 /* Break the mask into a set of hlines.
303  */
304 static int
vips_conva_decompose_hlines(VipsConva * conva)305 vips_conva_decompose_hlines( VipsConva *conva )
306 {
307 	VipsImage *iM = conva->iM;
308 	const int size = iM->Xsize * iM->Ysize;
309 	double *coeff = VIPS_MATRIX( iM, 0, 0 );
310 
311 	double max;
312 	double min;
313 	double depth;
314 	int layers_above;
315 	int layers_below;
316 	int z, n, x, y;
317 
318 	/* Find mask range. We must always include the zero axis in the mask.
319 	 */
320 	max = 0;
321 	min = 0;
322 	for( n = 0; n < size; n++ ) {
323 		max = VIPS_MAX( max, coeff[n] );
324 		min = VIPS_MIN( min, coeff[n] );
325 	}
326 
327 	VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: min = %g, max = %g\n",
328 		min, max );
329 
330 	/* The zero axis must fall on a layer boundary. Estimate the
331 	 * depth, find n-lines-above-zero, get exact depth, then calculate a
332 	 * fixed n-lines which includes any negative parts.
333 	 */
334 	depth = (max - min) / conva->layers;
335 	layers_above = VIPS_CEIL( max / depth );
336 	depth = max / layers_above;
337 	layers_below = VIPS_FLOOR( min / depth );
338 	conva->layers = layers_above - layers_below;
339 
340 	VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: depth = %g, layers = %d\n",
341 		depth, conva->layers );
342 
343 	/* For each layer, generate a set of lines which are inside the
344 	 * perimeter. Work down from the top.
345 	 */
346 	for( z = 0; z < conva->layers; z++ ) {
347 		/* How deep we are into the mask, as a double we can test
348 		 * against. Add half the layer depth so we can easily find >50%
349 		 * mask elements.
350 		 */
351 		double z_ph = max - (1 + z) * depth + depth / 2;
352 
353 		/* Odd, but we must avoid rounding errors that make us miss 0
354 		 * in the line above.
355 		 */
356 		int z_positive = z < layers_above;
357 
358 		for( y = 0; y < iM->Ysize; y++ ) {
359 			int inside;
360 
361 			/* Start outside the perimeter.
362 			 */
363 			inside = 0;
364 
365 			for( x = 0; x < iM->Xsize; x++ ) {
366 				double c = coeff[x + y * iM->Xsize];
367 
368 				/* The vertical line from mask[x, y] to 0 is
369 				 * inside. Is our current square (x, y) part
370 				 * of that line?
371 				 */
372 				if( (z_positive && c >= z_ph) ||
373 					(!z_positive && c <= z_ph) ) {
374 					if( !inside ) {
375 						vips_conva_hline_start( conva,
376 							x );
377 						inside = 1;
378 					}
379 				}
380 				else {
381 					if( inside ) {
382 						if( vips_conva_hline_end( conva,
383 							x, y,
384 							z_positive ? 1 : -1 ) )
385 							return( -1 );
386 						inside = 0;
387 					}
388 				}
389 			}
390 
391 			if( inside &&
392 				vips_conva_hline_end( conva,
393 					iM->Xsize, y, z_positive ? 1 : -1 ) )
394 				return( -1 );
395 		}
396 	}
397 
398 #ifdef DEBUG
399 	VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: generated %d hlines\n",
400 		conva->n_hline );
401 	vips_conva_hprint( conva );
402 #endif /*DEBUG*/
403 
404 	return( 0 );
405 }
406 
407 /* The 'distance' between a pair of hlines.
408  */
409 static int
vips_conva_distance(VipsConva * conva,int a,int b)410 vips_conva_distance( VipsConva *conva, int a, int b )
411 {
412 	g_assert( conva->hline[a].weight > 0 && conva->hline[b].weight > 0 );
413 
414 	return( abs( conva->hline[a].start - conva->hline[b].start ) +
415 		abs( conva->hline[a].end - conva->hline[b].end ) );
416 }
417 
418 /* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to
419  * point at a.
420  */
421 static void
vips_conva_merge(VipsConva * conva,int a,int b)422 vips_conva_merge( VipsConva *conva, int a, int b )
423 {
424 	int i;
425 
426 	/* Scale weights.
427 	 */
428 	int fa = conva->hline[a].weight;
429 	int fb = conva->hline[b].weight;
430 	double w = (double) fb / (fa + fb);
431 
432 	/* New endpoints.
433 	 */
434 	conva->hline[a].start += w *
435 		(conva->hline[b].start - conva->hline[a].start);
436 	conva->hline[a].end += w *
437 		(conva->hline[b].end - conva->hline[a].end);
438 	conva->hline[a].weight += conva->hline[b].weight;
439 
440 	/* Update velement refs to b to refer to a instead.
441 	 */
442 	for( i = 0; i < conva->n_velement; i++ )
443 		if( conva->velement[i].band == b )
444 			conva->velement[i].band = a;
445 
446 	/* Mark b to be deleted.
447 	 */
448 	conva->hline[b].weight = 0;
449 }
450 
451 static int
edge_sortfn(const void * p1,const void * p2)452 edge_sortfn( const void *p1, const void *p2 )
453 {
454 	Edge *a = (Edge *) p1;
455 	Edge *b = (Edge *) p2;
456 
457 	return( a->d - b->d );
458 }
459 
460 /* Cluster in batches. Return non-zero if we merged some lines.
461  *
462  * This is not as accurate as rescanning the whole space on every merge, but
463  * it's far faster.
464  */
465 static int
vips_conva_cluster2(VipsConva * conva)466 vips_conva_cluster2( VipsConva *conva )
467 {
468 	int i, j, k;
469 	int worst;
470 	int worst_i;
471 	int merged;
472 
473 	for( i = 0; i < MAX_EDGES; i++ ) {
474 		conva->edge[i].a = -1;
475 		conva->edge[i].b = -1;
476 		conva->edge[i].d = 99999;
477 	}
478 	worst_i = 0;
479 	worst = conva->edge[worst_i].d;
480 
481 	for( i = 0; i < conva->n_hline; i++ ) {
482 		if( conva->hline[i].weight == 0 )
483 			continue;
484 
485 		for( j = i + 1; j < conva->n_hline; j++ ) {
486 			int distance;
487 
488 			if( conva->hline[j].weight == 0 )
489 				continue;
490 
491 			distance = vips_conva_distance( conva, i, j );
492 			if( distance < worst ) {
493 				conva->edge[worst_i].a = i;
494 				conva->edge[worst_i].b = j;
495 				conva->edge[worst_i].d = distance;
496 
497 				worst_i = 0;
498 				worst = conva->edge[worst_i].d;
499 				for( k = 0; k < MAX_EDGES; k++ )
500 					if( conva->edge[k].d > worst ) {
501 						worst = conva->edge[k].d;
502 						worst_i = k;
503 					}
504 			}
505 		}
506 	}
507 
508 	/* Sort to get closest first.
509 	 */
510 	qsort( conva->edge, MAX_EDGES, sizeof( Edge ), edge_sortfn );
511 
512 	/*
513 	printf( "edges:\n" );
514 	printf( "  n   a   b  d:\n" );
515 	for( i = 0; i < MAX_EDGES; i++ )
516 		printf( "%2i) %3d %3d %3d\n", i,
517 			conva->edge[i].a, conva->edge[i].b, conva->edge[i].d );
518 	 */
519 
520 	/* Merge from the top down.
521 	 */
522 	merged = 0;
523 	for( k = 0; k < MAX_EDGES; k++ ) {
524 		Edge *edge = &conva->edge[k];
525 
526 		if( edge->d > conva->cluster )
527 			break;
528 
529 		/* Has been removed, see loop below.
530 		 */
531 		if( edge->a == -1 )
532 			continue;
533 
534 		vips_conva_merge( conva, edge->a, edge->b );
535 		merged = 1;
536 
537 		/* Nodes a and b have vanished or been moved. Remove any edges
538 		 * which refer to them from the edge list,
539 		 */
540 		for( i = k; i < MAX_EDGES; i++ ) {
541 			Edge *edgei = &conva->edge[i];
542 
543 			if( edgei->a == edge->a ||
544 				edgei->b == edge->a ||
545 				edgei->a == edge->b ||
546 				edgei->b == edge->b )
547 				edgei->a = -1;
548 		}
549 	}
550 
551 	return( merged );
552 }
553 
554 /* Renumber after clustering. We will have removed a lot of hlines ... shuffle
555  * the rest down, adjust all the vline references.
556  */
557 static void
vips_conva_renumber(VipsConva * conva)558 vips_conva_renumber( VipsConva *conva )
559 {
560 	int i, j;
561 
562 	VIPS_DEBUG_MSG( "vips_conva_renumber: renumbering ...\n" );
563 
564 	/* Loop for all zero-weight hlines.
565 	 */
566 	for( i = 0; i < conva->n_hline; ) {
567 		if( conva->hline[i].weight > 0 ) {
568 			i++;
569 			continue;
570 		}
571 
572 		/* We move hlines i + 1 down, so we need to adjust all
573 		 * band[] refs to match.
574 		 */
575 		for( j = 0; j < conva->n_velement; j++ )
576 			if( conva->velement[j].band > i )
577 				conva->velement[j].band -= 1;
578 
579 		memmove( conva->hline + i, conva->hline + i + 1,
580 			sizeof( HLine ) * (conva->n_hline - i - 1) );
581 		conva->n_hline -= 1;
582 	}
583 
584 
585 	VIPS_DEBUG_MSG( "vips_conva_renumber: ... %d hlines remain\n",
586 		conva->n_hline );
587 }
588 
589 /* Sort by band, then factor, then row.
590  */
591 static int
velement_sortfn(const void * p1,const void * p2)592 velement_sortfn( const void *p1, const void *p2 )
593 {
594 	VElement *a = (VElement *) p1;
595 	VElement *b = (VElement *) p2;
596 
597 	if( a->band != b->band )
598 		return( a->band - b->band );
599 
600 	if( a->factor != b->factor )
601 		return( a->factor - b->factor );
602 
603 	return( a->row - b->row );
604 }
605 
606 static void
vips_conva_vline(VipsConva * conva)607 vips_conva_vline( VipsConva *conva )
608 {
609 	int y, z;
610 
611 	VIPS_DEBUG_MSG( "vips_conva_vline: forming vlines ...\n" );
612 
613 	/* Sort to get elements which could form a vline together.
614 	 */
615 	qsort( conva->velement, conva->n_velement, sizeof( VElement ),
616 		velement_sortfn );
617 
618 #ifdef DEBUG
619 	vips_conva_hprint( conva );
620 #endif /*DEBUG*/
621 
622 	/* If two lines have the same row and band, we can join them and knock
623 	 * up the factor instead.
624 	 */
625 	for( y = 0; y < conva->n_velement; y++ ) {
626 		for( z = y + 1; z < conva->n_velement; z++ )
627 			if( conva->velement[z].band !=
628 				conva->velement[y].band ||
629 				conva->velement[z].row !=
630 					conva->velement[y].row )
631 				break;
632 
633 		/* We need to keep the sign of the old factor.
634 		 */
635 		if( conva->velement[y].factor > 0 )
636 			conva->velement[y].factor = z - y;
637 		else
638 			conva->velement[y].factor = y - z;
639 		memmove( conva->velement + y + 1, conva->velement + z,
640 			sizeof( VElement ) * (conva->n_velement - z) );
641 		conva->n_velement -= z - y - 1;
642 	}
643 
644 #ifdef DEBUG
645 	printf( "after commoning up, %d velement remain\n", conva->n_velement );
646 	vips_conva_hprint( conva );
647 #endif /*DEBUG*/
648 
649 	conva->n_vline = 0;
650 	for( y = 0; y < conva->n_velement; ) {
651 		int n = conva->n_vline;
652 
653 		/* Start of a line.
654 		 */
655 		conva->vline[n].band = conva->velement[y].band;
656 		conva->vline[n].factor = conva->velement[y].factor;
657 		conva->vline[n].start = conva->velement[y].row;
658 
659 		/* Search for the end of this line.
660 		 */
661 		for( z = y + 1; z < conva->n_velement; z++ )
662 			if( conva->velement[z].band !=
663 					conva->vline[n].band ||
664 				conva->velement[z].factor !=
665 					conva->vline[n].factor ||
666 				conva->velement[z].row !=
667 					conva->vline[n].start + z - y )
668 				break;
669 
670 		/* So the line ends at the previously examined element. We
671 		 * want 'end' to be one beyond that (non-inclusive).
672 		 */
673 		conva->vline[n].end = conva->velement[z - 1].row + 1;
674 
675 		conva->n_vline += 1;
676 		y = z;
677 	}
678 
679 	VIPS_DEBUG_MSG( "vips_conva_vline: found %d vlines\n", conva->n_vline );
680 }
681 
682 /* Break a mask into boxes.
683  */
684 static int
vips_conva_decompose_boxes(VipsConva * conva)685 vips_conva_decompose_boxes( VipsConva *conva )
686 {
687 	VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
688 	VipsImage *iM = conva->iM;
689 	double *coeff = VIPS_MATRIX( iM, 0, 0 );
690 	const int size = iM->Xsize * iM->Ysize;
691 	double scale = vips_image_get_scale( iM );
692 	double offset = vips_image_get_offset( iM );
693 
694 	double sum;
695 	double area;
696 	int x, y, z;
697 
698 	if( vips_conva_decompose_hlines( conva ) )
699 		return( -1 );
700 
701 	/* Cluster to find groups of lines.
702 	 */
703 	VIPS_DEBUG_MSG( "vips_conva_decompose_boxes: "
704 		"clustering hlines with thresh %d ...\n", conva->cluster );
705 	while( vips_conva_cluster2( conva ) )
706 		;
707 
708 	/* Renumber to remove holes created by clustering.
709 	 */
710 	vips_conva_renumber( conva );
711 
712 	/* Find a set of vlines for the remaining hlines.
713 	 */
714 	vips_conva_vline( conva );
715 
716 	/* Find the area of the lines and the length of the longest hline. We
717 	 * find the absolute area, we don't want -ves to cancel.
718 	 */
719 	area = 0;
720 	conva->max_line = 0;
721 	for( y = 0; y < conva->n_velement; y++ ) {
722 		x = conva->velement[y].band;
723 		z = conva->hline[x].end - conva->hline[x].start;
724 
725 		area += abs( conva->velement[y].factor * z );
726 		if( z > conva->max_line )
727 			conva->max_line = z;
728 	}
729 
730 	/* Strength reduction: if all lines are divisible by n, we can move
731 	 * that n out into the area factor. The aim is to produce as many
732 	 * factor 1 lines as we can and to reduce the chance of overflow.
733 	 */
734 	x = conva->velement[0].factor;
735 	for( y = 1; y < conva->n_velement; y++ )
736 		x = gcd( x, conva->velement[y].factor );
737 	for( y = 0; y < conva->n_velement; y++ )
738 		conva->velement[y].factor /= x;
739 	area *= x;
740 
741 	/* Find the area of the original mask. Again, don't let -ves cancel.
742 	 */
743 	sum = 0;
744 	for( z = 0; z < size; z++ )
745 		sum += fabs( coeff[z] );
746 
747 	conva->divisor = VIPS_RINT( area * scale / sum );
748 	conva->rounding = (conva->divisor + 1) / 2;
749 	conva->offset = offset;
750 
751 #ifdef DEBUG
752 	vips_conva_hprint( conva );
753 	vips_conva_vprint( conva );
754 #endif /*DEBUG*/
755 
756 	/* With 512x512 tiles, each hline requires 3mb of intermediate per
757 	 * thread ... 300 lines is about a gb per thread, ouch.
758 	 */
759 	if( conva->n_hline > 150 ) {
760 		vips_error( class->nickname, "%s", _( "mask too complex" ) );
761 		return( -1 );
762 	}
763 
764 	return( 0 );
765 }
766 
767 /* Our sequence value.
768  */
769 typedef struct {
770 	VipsConva *conva;
771 
772 	VipsRegion *ir;		/* Input region */
773 
774 	/* Offsets for start and stop.
775 	 */
776 	int *start;
777 	int *end;
778 
779 	int last_stride;	/* Avoid recalcing offsets, if we can */
780 
781 	/* The rolling sums. int for integer types, double for floating point
782 	 * types.
783 	 */
784 	void *sum;
785 } VipsConvaSeq;
786 
787 /* Free a sequence value.
788  */
789 static int
vips_conva_stop(void * vseq,void * a,void * b)790 vips_conva_stop( void *vseq, void *a, void *b )
791 {
792 	VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
793 
794 	VIPS_UNREF( seq->ir );
795 
796 	return( 0 );
797 }
798 
799 /* Convolution start function.
800  */
801 static void *
vips_conva_start(VipsImage * out,void * a,void * b)802 vips_conva_start( VipsImage *out, void *a, void *b )
803 {
804 	VipsImage *in = (VipsImage *) a;
805 	VipsConva *conva = (VipsConva *) b;
806 
807 	VipsConvaSeq *seq;
808 
809 	seq = VIPS_NEW( out, VipsConvaSeq );
810 	seq->conva = conva;
811 	seq->ir = vips_region_new( in );
812 
813 	/* n_velement should be the largest possible dimension.
814 	 */
815 	g_assert( conva->n_velement >= conva->n_hline );
816 	g_assert( conva->n_velement >= conva->n_vline );
817 
818 	seq->start = VIPS_ARRAY( out, conva->n_velement, int );
819 	seq->end = VIPS_ARRAY( out, conva->n_velement, int );
820 
821 	if( vips_band_format_isint( out->BandFmt ) )
822 		seq->sum = VIPS_ARRAY( out, conva->n_velement, int );
823 	else
824 		seq->sum = VIPS_ARRAY( out, conva->n_velement, double );
825 	seq->last_stride = -1;
826 
827 	return( seq );
828 }
829 
830 /* The h and v loops are very similar, but also annoyingly different. Keep
831  * them separate for easy debugging.
832  */
833 
834 #define HCONV( IN, OUT ) \
835 G_STMT_START { \
836 	for( i = 0; i < bands; i++ ) { \
837 		OUT *seq_sum = (OUT *) seq->sum; \
838 		\
839 		IN *p; \
840 		OUT *q; \
841 		\
842 		p = i + (IN *) VIPS_REGION_ADDR( ir, r->left, r->top + y ); \
843 		q = i * n_hline + \
844 			(OUT *) VIPS_REGION_ADDR( or, r->left, r->top + y ); \
845 		\
846 		for( z = 0; z < n_hline; z++ ) { \
847 			seq_sum[z] = 0; \
848 			for( x = conva->hline[z].start; \
849 				x < conva->hline[z].end; x++ ) \
850 				seq_sum[z] += p[x * istride]; \
851 			q[z] = seq_sum[z]; \
852 		} \
853 		q += ostride; \
854 		\
855 		for( x = 1; x < r->width; x++ ) {  \
856 			for( z = 0; z < n_hline; z++ ) { \
857 				seq_sum[z] += p[seq->end[z]]; \
858 				seq_sum[z] -= p[seq->start[z]]; \
859 				q[z] = seq_sum[z]; \
860 			} \
861 			p += istride; \
862 			q += ostride; \
863 		} \
864 	} \
865 } G_STMT_END
866 
867 /* Do horizontal masks ... we scan the mask along scanlines.
868  */
869 static int
vips_conva_hgenerate(VipsRegion * or,void * vseq,void * a,void * b,gboolean * stop)870 vips_conva_hgenerate( VipsRegion *or, void *vseq,
871 	void *a, void *b, gboolean *stop )
872 {
873 	VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
874 	VipsImage *in = (VipsImage *) a;
875 	VipsConva *conva = (VipsConva *) b;
876 
877 	VipsRegion *ir = seq->ir;
878 	const int n_hline = conva->n_hline;
879 	VipsImage *iM = conva->iM;
880 	VipsRect *r = &or->valid;
881 
882 	/* Double the bands (notionally) for complex.
883 	 */
884 	int bands = vips_band_format_iscomplex( in->BandFmt ) ?
885 		2 * in->Bands : in->Bands;
886 
887 	VipsRect s;
888 	int x, y, z, i;
889 	int istride;
890 	int ostride;
891 
892 	/* Prepare the section of the input image we need. A little larger
893 	 * than the section of the output image we are producing.
894 	 */
895 	s = *r;
896 	s.width += iM->Xsize - 1;
897 	if( vips_region_prepare( ir, &s ) )
898 		return( -1 );
899 
900 	istride = VIPS_IMAGE_SIZEOF_PEL( in ) /
901 		VIPS_IMAGE_SIZEOF_ELEMENT( in );
902 	ostride = VIPS_IMAGE_SIZEOF_PEL( or->im ) /
903 		VIPS_IMAGE_SIZEOF_ELEMENT( or->im );
904 
905         /* Init offset array.
906 	 */
907 	if( seq->last_stride != istride ) {
908 		seq->last_stride = istride;
909 
910 		for( z = 0; z < n_hline; z++ ) {
911 			seq->start[z] = conva->hline[z].start * istride;
912 			seq->end[z] = conva->hline[z].end * istride;
913 		}
914 	}
915 
916 	for( y = 0; y < r->height; y++ ) {
917 		switch( in->BandFmt ) {
918 		case VIPS_FORMAT_UCHAR:
919 			if( conva->max_line < 256 )
920 				HCONV( unsigned char, unsigned short );
921 			else
922 				HCONV( unsigned char, unsigned int );
923 			break;
924 
925 		case VIPS_FORMAT_CHAR:
926 			if( conva->max_line < 256 )
927 				HCONV( signed char, signed short );
928 			else
929 				HCONV( signed char, signed int );
930 			break;
931 
932 		case VIPS_FORMAT_USHORT:
933 			HCONV( unsigned short, unsigned int );
934 			break;
935 
936 		case VIPS_FORMAT_SHORT:
937 			HCONV( signed short, signed int );
938 			break;
939 
940 		case VIPS_FORMAT_UINT:
941 			HCONV( unsigned int, unsigned int );
942 			break;
943 
944 		case VIPS_FORMAT_INT:
945 			HCONV( signed int, signed int );
946 			break;
947 
948 		case VIPS_FORMAT_FLOAT:
949 			HCONV( float, float );
950 			break;
951 
952 		case VIPS_FORMAT_DOUBLE:
953 			HCONV( double, double );
954 			break;
955 
956 		case VIPS_FORMAT_COMPLEX:
957 			HCONV( float, float );
958 			break;
959 
960 		case VIPS_FORMAT_DPCOMPLEX:
961 			HCONV( double, double );
962 			break;
963 
964 		default:
965 			g_assert_not_reached();
966 		}
967 	}
968 
969 	return( 0 );
970 }
971 
972 static int
vips_conva_horizontal(VipsConva * conva,VipsImage * in,VipsImage ** out)973 vips_conva_horizontal( VipsConva *conva, VipsImage *in, VipsImage **out )
974 {
975 	VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
976 
977 	/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
978 	 * would be 1x1.
979 	 */
980 	*out = vips_image_new();
981 	if( vips_image_pipelinev( *out,
982 		VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) )
983 		return( -1 );
984 
985 	(*out)->Xsize -= conva->iM->Xsize - 1;
986 	if( (*out)->Xsize <= 0 ) {
987 		vips_error( class->nickname,
988 			"%s", _( "image too small for mask" ) );
989 		return( -1 );
990 	}
991 	(*out)->Bands *= conva->n_hline;
992 
993 	/* Short u?char lines can use u?short intermediate.
994 	 */
995 	if( vips_band_format_isuint( in->BandFmt ) )
996 		(*out)->BandFmt = conva->max_line < 256 ?
997 			VIPS_FORMAT_USHORT : VIPS_FORMAT_UINT;
998 	else if( vips_band_format_isint( in->BandFmt ) )
999 		(*out)->BandFmt = conva->max_line < 256 ?
1000 			VIPS_FORMAT_SHORT : VIPS_FORMAT_INT;
1001 
1002 	if( vips_image_generate( *out,
1003 		vips_conva_start, vips_conva_hgenerate, vips_conva_stop,
1004 		in, conva ) )
1005 		return( -1 );
1006 
1007 	return( 0 );
1008 }
1009 
1010 #define CLIP_UCHAR( V ) \
1011 G_STMT_START { \
1012 	if( (V) < 0 ) \
1013 		(V) = 0; \
1014 	else if( (V) > UCHAR_MAX ) \
1015 		(V) = UCHAR_MAX; \
1016 } G_STMT_END
1017 
1018 #define CLIP_CHAR( V ) \
1019 G_STMT_START { \
1020 	if( (V) < SCHAR_MIN ) \
1021 		(V) = SCHAR_MIN; \
1022 	else if( (V) > SCHAR_MAX ) \
1023 		(V) = SCHAR_MAX; \
1024 } G_STMT_END
1025 
1026 #define CLIP_USHORT( V ) \
1027 G_STMT_START { \
1028 	if( (V) < 0 ) \
1029 		(V) = 0; \
1030 	else if( (V) > USHRT_MAX ) \
1031 		(V) = USHRT_MAX; \
1032 } G_STMT_END
1033 
1034 #define CLIP_SHORT( V ) \
1035 G_STMT_START { \
1036 	if( (V) < SHRT_MIN ) \
1037 		(V) = SHRT_MIN; \
1038 	else if( (V) > SHRT_MAX ) \
1039 		(V) = SHRT_MAX; \
1040 } G_STMT_END
1041 
1042 #define CLIP_NONE( V ) {}
1043 
1044 #define VCONV( ACC, IN, OUT, CLIP ) \
1045 G_STMT_START { \
1046 	for( x = 0; x < sz; x++ ) { \
1047 		ACC *seq_sum = (ACC *) seq->sum; \
1048 		\
1049 		IN *p; \
1050 		OUT *q; \
1051 		ACC sum; \
1052 		\
1053 		p = x * conva->n_hline + \
1054 			(IN *) VIPS_REGION_ADDR( ir, r->left, r->top ); \
1055 		q = x + (OUT *) VIPS_REGION_ADDR( or, r->left, r->top ); \
1056 		\
1057 		sum = 0; \
1058 		for( z = 0; z < n_vline; z++ ) { \
1059 			seq_sum[z] = 0; \
1060 			for( k = conva->vline[z].start; \
1061 				k < conva->vline[z].end; k++ ) \
1062 				seq_sum[z] += p[k * istride + \
1063 					conva->vline[z].band]; \
1064 			sum += conva->vline[z].factor * seq_sum[z]; \
1065 		} \
1066 		sum = (sum + conva->rounding) / conva->divisor + conva->offset; \
1067 		CLIP( sum ); \
1068 		*q = sum; \
1069 		q += ostride; \
1070 		\
1071 		for( y = 1; y < r->height; y++ ) { \
1072 			sum = 0;\
1073 			for( z = 0; z < n_vline; z++ ) { \
1074 				seq_sum[z] += p[seq->end[z]]; \
1075 				seq_sum[z] -= p[seq->start[z]]; \
1076 				sum += conva->vline[z].factor * seq_sum[z]; \
1077 			} \
1078 			p += istride; \
1079 			sum = (sum + conva->rounding) / conva->divisor + \
1080 				conva->offset; \
1081 			CLIP( sum ); \
1082 			*q = sum; \
1083 			q += ostride; \
1084 		} \
1085 	} \
1086 } G_STMT_END
1087 
1088 /* Do vertical masks ... we scan the mask down columns of pixels.
1089  */
1090 static int
vips_conva_vgenerate(VipsRegion * or,void * vseq,void * a,void * b,gboolean * stop)1091 vips_conva_vgenerate( VipsRegion *or, void *vseq,
1092 	void *a, void *b, gboolean *stop )
1093 {
1094 	VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
1095 	VipsImage *in = (VipsImage *) a;
1096 	VipsConva *conva = (VipsConva *) b;
1097 	VipsConvolution *convolution = (VipsConvolution *) conva;
1098 
1099 	VipsRegion *ir = seq->ir;
1100 	const int n_vline = conva->n_vline;
1101 	VipsImage *iM = conva->iM;
1102 	VipsRect *r = &or->valid;
1103 
1104 	/* Double the width (notionally) for complex.
1105 	 */
1106 	int sz = vips_band_format_iscomplex( in->BandFmt ) ?
1107 		2 * VIPS_REGION_N_ELEMENTS( or ) : VIPS_REGION_N_ELEMENTS( or );
1108 
1109 	VipsRect s;
1110 	int x, y, z, k;
1111 	int istride;
1112 	int ostride;
1113 
1114 	/* Prepare the section of the input image we need. A little larger
1115 	 * than the section of the output image we are producing.
1116 	 */
1117 	s = *r;
1118 	s.height += iM->Ysize - 1;
1119 	if( vips_region_prepare( ir, &s ) )
1120 		return( -1 );
1121 
1122 	istride = VIPS_REGION_LSKIP( ir ) /
1123 		VIPS_IMAGE_SIZEOF_ELEMENT( in );
1124 	ostride = VIPS_REGION_LSKIP( or ) /
1125 		VIPS_IMAGE_SIZEOF_ELEMENT( convolution->out );
1126 
1127         /* Init offset array.
1128 	 */
1129 	if( seq->last_stride != istride ) {
1130 		seq->last_stride = istride;
1131 
1132 		for( z = 0; z < n_vline; z++ ) {
1133 			seq->start[z] = conva->vline[z].band +
1134 				conva->vline[z].start * istride;
1135 			seq->end[z] = conva->vline[z].band +
1136 				conva->vline[z].end * istride;
1137 		}
1138 	}
1139 
1140 	switch( convolution->in->BandFmt ) {
1141 	case VIPS_FORMAT_UCHAR:
1142 		if( conva->max_line < 256 )
1143 			VCONV( unsigned int, \
1144 				unsigned short, unsigned char, CLIP_UCHAR );
1145 		else
1146 			VCONV( unsigned int, \
1147 				unsigned int, unsigned char, CLIP_UCHAR );
1148 		break;
1149 
1150 	case VIPS_FORMAT_CHAR:
1151 		if( conva->max_line < 256 )
1152 			VCONV( signed int, \
1153 				signed short, signed char, CLIP_CHAR );
1154 		else
1155 			VCONV( signed int, \
1156 				signed int, signed char, CLIP_CHAR );
1157 		break;
1158 
1159 	case VIPS_FORMAT_USHORT:
1160 		VCONV( unsigned int, \
1161 			unsigned int, unsigned short, CLIP_USHORT );
1162 		break;
1163 
1164 	case VIPS_FORMAT_SHORT:
1165 		VCONV( signed int, signed int, signed short, CLIP_SHORT );
1166 		break;
1167 
1168 	case VIPS_FORMAT_UINT:
1169 		VCONV( unsigned int, unsigned int, unsigned int, CLIP_NONE );
1170 		break;
1171 
1172 	case VIPS_FORMAT_INT:
1173 		VCONV( signed int, signed int, signed int, CLIP_NONE );
1174 		break;
1175 
1176 	case VIPS_FORMAT_FLOAT:
1177 		VCONV( float, float, float, CLIP_NONE );
1178 		break;
1179 
1180 	case VIPS_FORMAT_DOUBLE:
1181 		VCONV( double, double, double, CLIP_NONE );
1182 		break;
1183 
1184 	case VIPS_FORMAT_COMPLEX:
1185 		VCONV( float, float, float, CLIP_NONE );
1186 		break;
1187 
1188 	case VIPS_FORMAT_DPCOMPLEX:
1189 		VCONV( double, double, double, CLIP_NONE );
1190 		break;
1191 
1192 	default:
1193 		g_assert_not_reached();
1194 	}
1195 
1196 	return( 0 );
1197 }
1198 
1199 static int
vips_conva_vertical(VipsConva * conva,VipsImage * in,VipsImage ** out)1200 vips_conva_vertical( VipsConva *conva, VipsImage *in, VipsImage **out )
1201 {
1202 	VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
1203 	VipsConvolution *convolution = (VipsConvolution *) conva;
1204 
1205 	/* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
1206 	 * would be 1x1.
1207 	 */
1208 	*out = vips_image_new();
1209 	if( vips_image_pipelinev( *out,
1210 		VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) )
1211 		return( -1 );
1212 
1213 	(*out)->Ysize -= conva->iM->Ysize - 1;
1214 	if( (*out)->Ysize <= 0 ) {
1215 		vips_error( class->nickname,
1216 			"%s", _( "image too small for mask" ) );
1217 		return( -1 );
1218 	}
1219 	(*out)->Bands = convolution->in->Bands;
1220 	(*out)->BandFmt = convolution->in->BandFmt;
1221 
1222 	if( vips_image_generate( *out,
1223 		vips_conva_start, vips_conva_vgenerate, vips_conva_stop,
1224 		in, conva ) )
1225 		return( -1 );
1226 
1227 	return( 0 );
1228 }
1229 
1230 static int
vips_conva_build(VipsObject * object)1231 vips_conva_build( VipsObject *object )
1232 {
1233 	VipsConvolution *convolution = (VipsConvolution *) object;
1234 	VipsConva *conva = (VipsConva *) object;
1235 	VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 );
1236 
1237 	VipsImage *in;
1238 
1239 	if( VIPS_OBJECT_CLASS( vips_conva_parent_class )->build( object ) )
1240 		return( -1 );
1241 
1242 	/* An int version of our mask.
1243 	 */
1244 	if( vips__image_intize( convolution->M, &t[0] ) )
1245 		return( -1 );
1246 	conva->iM = t[0];
1247 
1248 #ifdef DEBUG
1249 	printf( "vips_conva_build: iM =\n" );
1250 	vips_matrixprint( conva->iM, NULL );
1251 #endif /*DEBUG*/
1252 
1253 	in = convolution->in;
1254 
1255 	if( vips_conva_decompose_boxes( conva ) )
1256 	       return( -1 );
1257 
1258 	g_object_set( conva, "out", vips_image_new(), NULL );
1259 	if(
1260 		vips_embed( in, &t[1],
1261 			t[0]->Xsize / 2,
1262 			t[0]->Ysize / 2,
1263 			in->Xsize + t[0]->Xsize - 1,
1264 			in->Ysize + t[0]->Ysize - 1,
1265 			"extend", VIPS_EXTEND_COPY,
1266 			NULL ) ||
1267 		vips_conva_horizontal( conva, t[1], &t[2] ) ||
1268 		vips_conva_vertical( conva, t[2], &t[3] ) ||
1269 		vips_image_write( t[3], convolution->out ) )
1270 		return( -1 );
1271 
1272 	convolution->out->Xoffset = 0;
1273 	convolution->out->Yoffset = 0;
1274 
1275 	return( 0 );
1276 }
1277 
1278 static void
vips_conva_class_init(VipsConvaClass * class)1279 vips_conva_class_init( VipsConvaClass *class )
1280 {
1281 	GObjectClass *gobject_class = G_OBJECT_CLASS( class );
1282 	VipsObjectClass *object_class = (VipsObjectClass *) class;
1283 
1284 	gobject_class->set_property = vips_object_set_property;
1285 	gobject_class->get_property = vips_object_get_property;
1286 
1287 	object_class->nickname = "conva";
1288 	object_class->description = _( "approximate integer convolution" );
1289 	object_class->build = vips_conva_build;
1290 
1291 	VIPS_ARG_INT( class, "layers", 104,
1292 		_( "Layers" ),
1293 		_( "Use this many layers in approximation" ),
1294 		VIPS_ARGUMENT_OPTIONAL_INPUT,
1295 		G_STRUCT_OFFSET( VipsConva, layers ),
1296 		1, 1000, 5 );
1297 
1298 	VIPS_ARG_INT( class, "cluster", 105,
1299 		_( "Cluster" ),
1300 		_( "Cluster lines closer than this in approximation" ),
1301 		VIPS_ARGUMENT_OPTIONAL_INPUT,
1302 		G_STRUCT_OFFSET( VipsConva, cluster ),
1303 		1, 100, 1 );
1304 
1305 }
1306 
1307 static void
vips_conva_init(VipsConva * conva)1308 vips_conva_init( VipsConva *conva )
1309 {
1310         conva->layers = 5;
1311         conva->cluster = 1;
1312 }
1313 
1314 /**
1315  * vips_conva: (method)
1316  * @in: input image
1317  * @out: (out): output image
1318  * @mask: convolution mask
1319  * @...: %NULL-terminated list of optional named arguments
1320  *
1321  * Optional arguments:
1322  *
1323  * * @layers: %gint, number of layers for approximation
1324  * * @cluster: %gint, cluster lines closer than this distance
1325  *
1326  * Perform an approximate integer convolution of @in with @mask.
1327  * This is a low-level operation, see
1328  * vips_conv() for something more convenient.
1329  *
1330  * The output image
1331  * always has the same #VipsBandFormat as the input image.
1332  * Elements of @mask are converted to
1333  * integers before convolution.
1334  *
1335  * Larger values for @layers give more accurate
1336  * results, but are slower. As @layers approaches the mask radius, the
1337  * accuracy will become close to exact convolution and the speed will drop to
1338  * match. For many large masks, such as Gaussian, @layers need be only 10% of
1339  * this value and accuracy will still be good.
1340  *
1341  * Smaller values of @cluster will give more accurate results, but be slower
1342  * and use more memory. 10% of the mask radius is a good rule of thumb.
1343  *
1344  * See also: vips_conv().
1345  *
1346  * Returns: 0 on success, -1 on error
1347  */
1348 int
vips_conva(VipsImage * in,VipsImage ** out,VipsImage * mask,...)1349 vips_conva( VipsImage *in, VipsImage **out, VipsImage *mask, ... )
1350 {
1351 	va_list ap;
1352 	int result;
1353 
1354 	va_start( ap, mask );
1355 	result = vips_call_split( "conva", ap, in, out, mask );
1356 	va_end( ap );
1357 
1358 	return( result );
1359 }
1360 
1361