1 /* conva ... approximate convolution
2 *
3 * This operation does an approximate convolution.
4 *
5 * Author: John Cupitt & Nicolas Robidoux
6 * Written on: 31/5/11
7 * Modified on:
8 * 31/5/11
9 * - from im_aconvsep()
10 * 10/7/16
11 * - redone as a class
12 */
13
14 /*
15
16 This file is part of VIPS.
17
18 VIPS is free software; you can redistribute it and/or modify
19 it under the terms of the GNU Lesser General Public License as published by
20 the Free Software Foundation; either version 2 of the License, or
21 (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 GNU Lesser General Public License for more details.
27
28 You should have received a copy of the GNU Lesser General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301 USA
32
33 */
34
35 /*
36
37 These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
38
39 */
40
41 /*
42
43 See:
44
45 http://incubator.quasimondo.com/processing/stackblur.pde
46
47 This thing is a little like stackblur, but generalised to any 2D
48 convolution.
49
50 */
51
52 /*
53
54 TODO
55
56 timing:
57
58 $ time vips im_conv_f img_0075.jpg x2.v g2d201.con
59 real 5m3.359s
60 user 9m34.700s
61 sys 0m1.500s
62
63 $ time vips im_aconv img_0075.jpg x.v g2d201.con 10 10
64 real 0m3.151s
65 user 0m5.640s
66 sys 0m0.100s
67
68 $ vips im_subtract x.v x2.v diff.v
69 $ vips im_abs diff.v abs.v
70 $ vips im_max abs.v
71 2.70833
72
73 - are we handling mask offset correctly?
74
75 - could we do better with an h and a v cumulativization image? we might
76 not need the huge intermediate we have now, since any line sum an be
77 found with simple indexing
78
79 */
80
81 /*
82 #define DEBUG
83 #define VIPS_DEBUG
84 */
85
86 #ifdef HAVE_CONFIG_H
87 #include <config.h>
88 #endif /*HAVE_CONFIG_H*/
89 #include <vips/intl.h>
90
91 #include <stdio.h>
92 #include <string.h>
93 #include <stdlib.h>
94 #include <limits.h>
95 #include <math.h>
96
97 #include <vips/vips.h>
98 #include <vips/vector.h>
99 #include <vips/debug.h>
100 #include <vips/internal.h>
101
102 #include "pconvolution.h"
103
104 /* Maximum number of boxes we can break the mask into. Don't have this too
105 * high, it'll make the instance huge, and gobject has a 64kb limit.
106 */
107 #define MAX_LINES (1000)
108
109 /* The number of edges we consider at once in clustering. Higher values are
110 * faster, but risk pushing up average error in the result.
111 */
112 #define MAX_EDGES (1000)
113
114 /* A horizontal line in the mask.
115 */
116 typedef struct _HLine {
117 /* Start is the left-most pixel in the line, end is one beyond the
118 * right-most pixel.
119 */
120 int start;
121 int end;
122
123 /* The hlines have weights. weight 0 means this line is unused.
124 */
125 int weight;
126 } HLine;
127
128 /* For clustering. A pair of hlines and their distance. An edge in a graph.
129 */
130 typedef struct _Edge {
131 /* The index into boxes->hline[].
132 */
133 int a;
134 int b;
135
136 /* The distance between them, see boxes_distance().
137 */
138 int d;
139 } Edge;
140
141 /* An element of a vline.
142 */
143 typedef struct _VElement {
144 /* band is the index into hline[] we add, row is the row we take
145 * it from.
146 */
147 int band;
148 int row;
149
150 /* Negative lobes are made with factor -1, we also common-up repeated
151 * additions of the same line.
152 */
153 int factor;
154 } VElement;
155
156 /* A vline.
157 */
158 typedef struct _VLine {
159 int band;
160 int factor;
161 int start;
162 int end;
163 } VLine;
164
165 /* A set of boxes.
166 */
167 typedef struct {
168 VipsConvolution parent_instance;
169
170 VipsImage *iM;
171
172 int layers;
173 int cluster;
174
175 int divisor;
176 int rounding;
177 int offset;
178
179 /* The horizontal lines we gather. hline[3] writes to band 3 in the
180 * intermediate image. max_line is the length of the longest hline:
181 * over 256 and we need to use an int intermediate for 8-bit images.
182 */
183 int n_hline;
184 HLine hline[MAX_LINES];
185 int max_line;
186
187 /* During clustering, the top few edges we are considering.
188 */
189 Edge edge[MAX_EDGES];
190
191 /* Scale and sum a set of hlines to make the final value.
192 */
193 int n_velement;
194 VElement velement[MAX_LINES];
195
196 /* And group those velements as vlines.
197 */
198 int n_vline;
199 VLine vline[MAX_LINES];
200
201 } VipsConva;
202
203 typedef VipsConvolutionClass VipsConvaClass;
204
205 G_DEFINE_TYPE( VipsConva, vips_conva, VIPS_TYPE_CONVOLUTION );
206
207 /* Euclid's algorithm. Use this to common up mults.
208 */
209 static int
gcd(int a,int b)210 gcd( int a, int b )
211 {
212 if( b == 0 )
213 return( abs( a ) );
214 else
215 return( gcd( b, a % b ) );
216 }
217
218 static void
vips_conva_hline_start(VipsConva * conva,int x)219 vips_conva_hline_start( VipsConva *conva, int x )
220 {
221 conva->hline[conva->n_hline].start = x;
222 conva->hline[conva->n_hline].weight = 1;
223 }
224
225 static int
vips_conva_hline_end(VipsConva * conva,int x,int y,int factor)226 vips_conva_hline_end( VipsConva *conva, int x, int y, int factor )
227 {
228 VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
229
230 conva->hline[conva->n_hline].end = x;
231
232 conva->velement[conva->n_velement].row = y;
233 conva->velement[conva->n_velement].band = conva->n_hline;
234 conva->velement[conva->n_velement].factor = factor;
235
236 if( conva->n_hline >= MAX_LINES - 1 ) {
237 vips_error( class->nickname, "%s", _( "mask too complex" ) );
238 return( -1 );
239 }
240 conva->n_hline += 1;
241
242 if( conva->n_velement >= MAX_LINES - 1 ) {
243 vips_error( class->nickname, "%s", _( "mask too complex" ) );
244 return( -1 );
245 }
246 conva->n_velement += 1;
247
248 return( 0 );
249 }
250
251 #ifdef DEBUG
252 static void
vips_conva_hprint(VipsConva * conva)253 vips_conva_hprint( VipsConva *conva )
254 {
255 int x, y;
256
257 printf( "hlines:\n" );
258 printf( " n b r f w\n" );
259 for( y = 0; y < conva->n_velement; y++ ) {
260 int b = conva->velement[y].band;
261
262 printf( "%4d %3d %3d %2d %3d ",
263 y, b,
264 conva->velement[y].row,
265 conva->velement[y].factor,
266 conva->hline[b].weight );
267 for( x = 0; x < 45; x++ ) {
268 int rx = x * (conva->iM->Xsize + 1) / 45;
269
270 if( rx >= conva->hline[b].start &&
271 rx < conva->hline[b].end )
272 printf( "#" );
273 else
274 printf( " " );
275 }
276 printf( " %3d .. %3d\n",
277 conva->hline[b].start, conva->hline[b].end );
278 }
279 }
280
281 static void
vips_conva_vprint(VipsConva * conva)282 vips_conva_vprint( VipsConva *conva )
283 {
284 int y;
285
286 printf( "%d vlines:\n", conva->n_vline );
287 printf( " n b f s e\n" );
288 for( y = 0; y < conva->n_vline; y++ )
289 printf( "%4d %2d %2d == %3d .. %3d\n", y,
290 conva->vline[y].band,
291 conva->vline[y].factor,
292 conva->vline[y].start,
293 conva->vline[y].end );
294
295 printf( "divisor = %d\n", conva->divisor );
296 printf( "rounding = %d\n", conva->rounding );
297 printf( "offset = %d\n", conva->offset );
298 printf( "max_line = %d\n", conva->max_line );
299 }
300 #endif /*DEBUG*/
301
302 /* Break the mask into a set of hlines.
303 */
304 static int
vips_conva_decompose_hlines(VipsConva * conva)305 vips_conva_decompose_hlines( VipsConva *conva )
306 {
307 VipsImage *iM = conva->iM;
308 const int size = iM->Xsize * iM->Ysize;
309 double *coeff = VIPS_MATRIX( iM, 0, 0 );
310
311 double max;
312 double min;
313 double depth;
314 int layers_above;
315 int layers_below;
316 int z, n, x, y;
317
318 /* Find mask range. We must always include the zero axis in the mask.
319 */
320 max = 0;
321 min = 0;
322 for( n = 0; n < size; n++ ) {
323 max = VIPS_MAX( max, coeff[n] );
324 min = VIPS_MIN( min, coeff[n] );
325 }
326
327 VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: min = %g, max = %g\n",
328 min, max );
329
330 /* The zero axis must fall on a layer boundary. Estimate the
331 * depth, find n-lines-above-zero, get exact depth, then calculate a
332 * fixed n-lines which includes any negative parts.
333 */
334 depth = (max - min) / conva->layers;
335 layers_above = VIPS_CEIL( max / depth );
336 depth = max / layers_above;
337 layers_below = VIPS_FLOOR( min / depth );
338 conva->layers = layers_above - layers_below;
339
340 VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: depth = %g, layers = %d\n",
341 depth, conva->layers );
342
343 /* For each layer, generate a set of lines which are inside the
344 * perimeter. Work down from the top.
345 */
346 for( z = 0; z < conva->layers; z++ ) {
347 /* How deep we are into the mask, as a double we can test
348 * against. Add half the layer depth so we can easily find >50%
349 * mask elements.
350 */
351 double z_ph = max - (1 + z) * depth + depth / 2;
352
353 /* Odd, but we must avoid rounding errors that make us miss 0
354 * in the line above.
355 */
356 int z_positive = z < layers_above;
357
358 for( y = 0; y < iM->Ysize; y++ ) {
359 int inside;
360
361 /* Start outside the perimeter.
362 */
363 inside = 0;
364
365 for( x = 0; x < iM->Xsize; x++ ) {
366 double c = coeff[x + y * iM->Xsize];
367
368 /* The vertical line from mask[x, y] to 0 is
369 * inside. Is our current square (x, y) part
370 * of that line?
371 */
372 if( (z_positive && c >= z_ph) ||
373 (!z_positive && c <= z_ph) ) {
374 if( !inside ) {
375 vips_conva_hline_start( conva,
376 x );
377 inside = 1;
378 }
379 }
380 else {
381 if( inside ) {
382 if( vips_conva_hline_end( conva,
383 x, y,
384 z_positive ? 1 : -1 ) )
385 return( -1 );
386 inside = 0;
387 }
388 }
389 }
390
391 if( inside &&
392 vips_conva_hline_end( conva,
393 iM->Xsize, y, z_positive ? 1 : -1 ) )
394 return( -1 );
395 }
396 }
397
398 #ifdef DEBUG
399 VIPS_DEBUG_MSG( "vips_conva_decompose_hlines: generated %d hlines\n",
400 conva->n_hline );
401 vips_conva_hprint( conva );
402 #endif /*DEBUG*/
403
404 return( 0 );
405 }
406
407 /* The 'distance' between a pair of hlines.
408 */
409 static int
vips_conva_distance(VipsConva * conva,int a,int b)410 vips_conva_distance( VipsConva *conva, int a, int b )
411 {
412 g_assert( conva->hline[a].weight > 0 && conva->hline[b].weight > 0 );
413
414 return( abs( conva->hline[a].start - conva->hline[b].start ) +
415 abs( conva->hline[a].end - conva->hline[b].end ) );
416 }
417
418 /* Merge two hlines. Line b is deleted, and any refs to b in vlines updated to
419 * point at a.
420 */
421 static void
vips_conva_merge(VipsConva * conva,int a,int b)422 vips_conva_merge( VipsConva *conva, int a, int b )
423 {
424 int i;
425
426 /* Scale weights.
427 */
428 int fa = conva->hline[a].weight;
429 int fb = conva->hline[b].weight;
430 double w = (double) fb / (fa + fb);
431
432 /* New endpoints.
433 */
434 conva->hline[a].start += w *
435 (conva->hline[b].start - conva->hline[a].start);
436 conva->hline[a].end += w *
437 (conva->hline[b].end - conva->hline[a].end);
438 conva->hline[a].weight += conva->hline[b].weight;
439
440 /* Update velement refs to b to refer to a instead.
441 */
442 for( i = 0; i < conva->n_velement; i++ )
443 if( conva->velement[i].band == b )
444 conva->velement[i].band = a;
445
446 /* Mark b to be deleted.
447 */
448 conva->hline[b].weight = 0;
449 }
450
451 static int
edge_sortfn(const void * p1,const void * p2)452 edge_sortfn( const void *p1, const void *p2 )
453 {
454 Edge *a = (Edge *) p1;
455 Edge *b = (Edge *) p2;
456
457 return( a->d - b->d );
458 }
459
460 /* Cluster in batches. Return non-zero if we merged some lines.
461 *
462 * This is not as accurate as rescanning the whole space on every merge, but
463 * it's far faster.
464 */
465 static int
vips_conva_cluster2(VipsConva * conva)466 vips_conva_cluster2( VipsConva *conva )
467 {
468 int i, j, k;
469 int worst;
470 int worst_i;
471 int merged;
472
473 for( i = 0; i < MAX_EDGES; i++ ) {
474 conva->edge[i].a = -1;
475 conva->edge[i].b = -1;
476 conva->edge[i].d = 99999;
477 }
478 worst_i = 0;
479 worst = conva->edge[worst_i].d;
480
481 for( i = 0; i < conva->n_hline; i++ ) {
482 if( conva->hline[i].weight == 0 )
483 continue;
484
485 for( j = i + 1; j < conva->n_hline; j++ ) {
486 int distance;
487
488 if( conva->hline[j].weight == 0 )
489 continue;
490
491 distance = vips_conva_distance( conva, i, j );
492 if( distance < worst ) {
493 conva->edge[worst_i].a = i;
494 conva->edge[worst_i].b = j;
495 conva->edge[worst_i].d = distance;
496
497 worst_i = 0;
498 worst = conva->edge[worst_i].d;
499 for( k = 0; k < MAX_EDGES; k++ )
500 if( conva->edge[k].d > worst ) {
501 worst = conva->edge[k].d;
502 worst_i = k;
503 }
504 }
505 }
506 }
507
508 /* Sort to get closest first.
509 */
510 qsort( conva->edge, MAX_EDGES, sizeof( Edge ), edge_sortfn );
511
512 /*
513 printf( "edges:\n" );
514 printf( " n a b d:\n" );
515 for( i = 0; i < MAX_EDGES; i++ )
516 printf( "%2i) %3d %3d %3d\n", i,
517 conva->edge[i].a, conva->edge[i].b, conva->edge[i].d );
518 */
519
520 /* Merge from the top down.
521 */
522 merged = 0;
523 for( k = 0; k < MAX_EDGES; k++ ) {
524 Edge *edge = &conva->edge[k];
525
526 if( edge->d > conva->cluster )
527 break;
528
529 /* Has been removed, see loop below.
530 */
531 if( edge->a == -1 )
532 continue;
533
534 vips_conva_merge( conva, edge->a, edge->b );
535 merged = 1;
536
537 /* Nodes a and b have vanished or been moved. Remove any edges
538 * which refer to them from the edge list,
539 */
540 for( i = k; i < MAX_EDGES; i++ ) {
541 Edge *edgei = &conva->edge[i];
542
543 if( edgei->a == edge->a ||
544 edgei->b == edge->a ||
545 edgei->a == edge->b ||
546 edgei->b == edge->b )
547 edgei->a = -1;
548 }
549 }
550
551 return( merged );
552 }
553
554 /* Renumber after clustering. We will have removed a lot of hlines ... shuffle
555 * the rest down, adjust all the vline references.
556 */
557 static void
vips_conva_renumber(VipsConva * conva)558 vips_conva_renumber( VipsConva *conva )
559 {
560 int i, j;
561
562 VIPS_DEBUG_MSG( "vips_conva_renumber: renumbering ...\n" );
563
564 /* Loop for all zero-weight hlines.
565 */
566 for( i = 0; i < conva->n_hline; ) {
567 if( conva->hline[i].weight > 0 ) {
568 i++;
569 continue;
570 }
571
572 /* We move hlines i + 1 down, so we need to adjust all
573 * band[] refs to match.
574 */
575 for( j = 0; j < conva->n_velement; j++ )
576 if( conva->velement[j].band > i )
577 conva->velement[j].band -= 1;
578
579 memmove( conva->hline + i, conva->hline + i + 1,
580 sizeof( HLine ) * (conva->n_hline - i - 1) );
581 conva->n_hline -= 1;
582 }
583
584
585 VIPS_DEBUG_MSG( "vips_conva_renumber: ... %d hlines remain\n",
586 conva->n_hline );
587 }
588
589 /* Sort by band, then factor, then row.
590 */
591 static int
velement_sortfn(const void * p1,const void * p2)592 velement_sortfn( const void *p1, const void *p2 )
593 {
594 VElement *a = (VElement *) p1;
595 VElement *b = (VElement *) p2;
596
597 if( a->band != b->band )
598 return( a->band - b->band );
599
600 if( a->factor != b->factor )
601 return( a->factor - b->factor );
602
603 return( a->row - b->row );
604 }
605
606 static void
vips_conva_vline(VipsConva * conva)607 vips_conva_vline( VipsConva *conva )
608 {
609 int y, z;
610
611 VIPS_DEBUG_MSG( "vips_conva_vline: forming vlines ...\n" );
612
613 /* Sort to get elements which could form a vline together.
614 */
615 qsort( conva->velement, conva->n_velement, sizeof( VElement ),
616 velement_sortfn );
617
618 #ifdef DEBUG
619 vips_conva_hprint( conva );
620 #endif /*DEBUG*/
621
622 /* If two lines have the same row and band, we can join them and knock
623 * up the factor instead.
624 */
625 for( y = 0; y < conva->n_velement; y++ ) {
626 for( z = y + 1; z < conva->n_velement; z++ )
627 if( conva->velement[z].band !=
628 conva->velement[y].band ||
629 conva->velement[z].row !=
630 conva->velement[y].row )
631 break;
632
633 /* We need to keep the sign of the old factor.
634 */
635 if( conva->velement[y].factor > 0 )
636 conva->velement[y].factor = z - y;
637 else
638 conva->velement[y].factor = y - z;
639 memmove( conva->velement + y + 1, conva->velement + z,
640 sizeof( VElement ) * (conva->n_velement - z) );
641 conva->n_velement -= z - y - 1;
642 }
643
644 #ifdef DEBUG
645 printf( "after commoning up, %d velement remain\n", conva->n_velement );
646 vips_conva_hprint( conva );
647 #endif /*DEBUG*/
648
649 conva->n_vline = 0;
650 for( y = 0; y < conva->n_velement; ) {
651 int n = conva->n_vline;
652
653 /* Start of a line.
654 */
655 conva->vline[n].band = conva->velement[y].band;
656 conva->vline[n].factor = conva->velement[y].factor;
657 conva->vline[n].start = conva->velement[y].row;
658
659 /* Search for the end of this line.
660 */
661 for( z = y + 1; z < conva->n_velement; z++ )
662 if( conva->velement[z].band !=
663 conva->vline[n].band ||
664 conva->velement[z].factor !=
665 conva->vline[n].factor ||
666 conva->velement[z].row !=
667 conva->vline[n].start + z - y )
668 break;
669
670 /* So the line ends at the previously examined element. We
671 * want 'end' to be one beyond that (non-inclusive).
672 */
673 conva->vline[n].end = conva->velement[z - 1].row + 1;
674
675 conva->n_vline += 1;
676 y = z;
677 }
678
679 VIPS_DEBUG_MSG( "vips_conva_vline: found %d vlines\n", conva->n_vline );
680 }
681
682 /* Break a mask into boxes.
683 */
684 static int
vips_conva_decompose_boxes(VipsConva * conva)685 vips_conva_decompose_boxes( VipsConva *conva )
686 {
687 VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
688 VipsImage *iM = conva->iM;
689 double *coeff = VIPS_MATRIX( iM, 0, 0 );
690 const int size = iM->Xsize * iM->Ysize;
691 double scale = vips_image_get_scale( iM );
692 double offset = vips_image_get_offset( iM );
693
694 double sum;
695 double area;
696 int x, y, z;
697
698 if( vips_conva_decompose_hlines( conva ) )
699 return( -1 );
700
701 /* Cluster to find groups of lines.
702 */
703 VIPS_DEBUG_MSG( "vips_conva_decompose_boxes: "
704 "clustering hlines with thresh %d ...\n", conva->cluster );
705 while( vips_conva_cluster2( conva ) )
706 ;
707
708 /* Renumber to remove holes created by clustering.
709 */
710 vips_conva_renumber( conva );
711
712 /* Find a set of vlines for the remaining hlines.
713 */
714 vips_conva_vline( conva );
715
716 /* Find the area of the lines and the length of the longest hline. We
717 * find the absolute area, we don't want -ves to cancel.
718 */
719 area = 0;
720 conva->max_line = 0;
721 for( y = 0; y < conva->n_velement; y++ ) {
722 x = conva->velement[y].band;
723 z = conva->hline[x].end - conva->hline[x].start;
724
725 area += abs( conva->velement[y].factor * z );
726 if( z > conva->max_line )
727 conva->max_line = z;
728 }
729
730 /* Strength reduction: if all lines are divisible by n, we can move
731 * that n out into the area factor. The aim is to produce as many
732 * factor 1 lines as we can and to reduce the chance of overflow.
733 */
734 x = conva->velement[0].factor;
735 for( y = 1; y < conva->n_velement; y++ )
736 x = gcd( x, conva->velement[y].factor );
737 for( y = 0; y < conva->n_velement; y++ )
738 conva->velement[y].factor /= x;
739 area *= x;
740
741 /* Find the area of the original mask. Again, don't let -ves cancel.
742 */
743 sum = 0;
744 for( z = 0; z < size; z++ )
745 sum += fabs( coeff[z] );
746
747 conva->divisor = VIPS_RINT( area * scale / sum );
748 conva->rounding = (conva->divisor + 1) / 2;
749 conva->offset = offset;
750
751 #ifdef DEBUG
752 vips_conva_hprint( conva );
753 vips_conva_vprint( conva );
754 #endif /*DEBUG*/
755
756 /* With 512x512 tiles, each hline requires 3mb of intermediate per
757 * thread ... 300 lines is about a gb per thread, ouch.
758 */
759 if( conva->n_hline > 150 ) {
760 vips_error( class->nickname, "%s", _( "mask too complex" ) );
761 return( -1 );
762 }
763
764 return( 0 );
765 }
766
767 /* Our sequence value.
768 */
769 typedef struct {
770 VipsConva *conva;
771
772 VipsRegion *ir; /* Input region */
773
774 /* Offsets for start and stop.
775 */
776 int *start;
777 int *end;
778
779 int last_stride; /* Avoid recalcing offsets, if we can */
780
781 /* The rolling sums. int for integer types, double for floating point
782 * types.
783 */
784 void *sum;
785 } VipsConvaSeq;
786
787 /* Free a sequence value.
788 */
789 static int
vips_conva_stop(void * vseq,void * a,void * b)790 vips_conva_stop( void *vseq, void *a, void *b )
791 {
792 VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
793
794 VIPS_UNREF( seq->ir );
795
796 return( 0 );
797 }
798
799 /* Convolution start function.
800 */
801 static void *
vips_conva_start(VipsImage * out,void * a,void * b)802 vips_conva_start( VipsImage *out, void *a, void *b )
803 {
804 VipsImage *in = (VipsImage *) a;
805 VipsConva *conva = (VipsConva *) b;
806
807 VipsConvaSeq *seq;
808
809 seq = VIPS_NEW( out, VipsConvaSeq );
810 seq->conva = conva;
811 seq->ir = vips_region_new( in );
812
813 /* n_velement should be the largest possible dimension.
814 */
815 g_assert( conva->n_velement >= conva->n_hline );
816 g_assert( conva->n_velement >= conva->n_vline );
817
818 seq->start = VIPS_ARRAY( out, conva->n_velement, int );
819 seq->end = VIPS_ARRAY( out, conva->n_velement, int );
820
821 if( vips_band_format_isint( out->BandFmt ) )
822 seq->sum = VIPS_ARRAY( out, conva->n_velement, int );
823 else
824 seq->sum = VIPS_ARRAY( out, conva->n_velement, double );
825 seq->last_stride = -1;
826
827 return( seq );
828 }
829
830 /* The h and v loops are very similar, but also annoyingly different. Keep
831 * them separate for easy debugging.
832 */
833
834 #define HCONV( IN, OUT ) \
835 G_STMT_START { \
836 for( i = 0; i < bands; i++ ) { \
837 OUT *seq_sum = (OUT *) seq->sum; \
838 \
839 IN *p; \
840 OUT *q; \
841 \
842 p = i + (IN *) VIPS_REGION_ADDR( ir, r->left, r->top + y ); \
843 q = i * n_hline + \
844 (OUT *) VIPS_REGION_ADDR( or, r->left, r->top + y ); \
845 \
846 for( z = 0; z < n_hline; z++ ) { \
847 seq_sum[z] = 0; \
848 for( x = conva->hline[z].start; \
849 x < conva->hline[z].end; x++ ) \
850 seq_sum[z] += p[x * istride]; \
851 q[z] = seq_sum[z]; \
852 } \
853 q += ostride; \
854 \
855 for( x = 1; x < r->width; x++ ) { \
856 for( z = 0; z < n_hline; z++ ) { \
857 seq_sum[z] += p[seq->end[z]]; \
858 seq_sum[z] -= p[seq->start[z]]; \
859 q[z] = seq_sum[z]; \
860 } \
861 p += istride; \
862 q += ostride; \
863 } \
864 } \
865 } G_STMT_END
866
867 /* Do horizontal masks ... we scan the mask along scanlines.
868 */
869 static int
vips_conva_hgenerate(VipsRegion * or,void * vseq,void * a,void * b,gboolean * stop)870 vips_conva_hgenerate( VipsRegion *or, void *vseq,
871 void *a, void *b, gboolean *stop )
872 {
873 VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
874 VipsImage *in = (VipsImage *) a;
875 VipsConva *conva = (VipsConva *) b;
876
877 VipsRegion *ir = seq->ir;
878 const int n_hline = conva->n_hline;
879 VipsImage *iM = conva->iM;
880 VipsRect *r = &or->valid;
881
882 /* Double the bands (notionally) for complex.
883 */
884 int bands = vips_band_format_iscomplex( in->BandFmt ) ?
885 2 * in->Bands : in->Bands;
886
887 VipsRect s;
888 int x, y, z, i;
889 int istride;
890 int ostride;
891
892 /* Prepare the section of the input image we need. A little larger
893 * than the section of the output image we are producing.
894 */
895 s = *r;
896 s.width += iM->Xsize - 1;
897 if( vips_region_prepare( ir, &s ) )
898 return( -1 );
899
900 istride = VIPS_IMAGE_SIZEOF_PEL( in ) /
901 VIPS_IMAGE_SIZEOF_ELEMENT( in );
902 ostride = VIPS_IMAGE_SIZEOF_PEL( or->im ) /
903 VIPS_IMAGE_SIZEOF_ELEMENT( or->im );
904
905 /* Init offset array.
906 */
907 if( seq->last_stride != istride ) {
908 seq->last_stride = istride;
909
910 for( z = 0; z < n_hline; z++ ) {
911 seq->start[z] = conva->hline[z].start * istride;
912 seq->end[z] = conva->hline[z].end * istride;
913 }
914 }
915
916 for( y = 0; y < r->height; y++ ) {
917 switch( in->BandFmt ) {
918 case VIPS_FORMAT_UCHAR:
919 if( conva->max_line < 256 )
920 HCONV( unsigned char, unsigned short );
921 else
922 HCONV( unsigned char, unsigned int );
923 break;
924
925 case VIPS_FORMAT_CHAR:
926 if( conva->max_line < 256 )
927 HCONV( signed char, signed short );
928 else
929 HCONV( signed char, signed int );
930 break;
931
932 case VIPS_FORMAT_USHORT:
933 HCONV( unsigned short, unsigned int );
934 break;
935
936 case VIPS_FORMAT_SHORT:
937 HCONV( signed short, signed int );
938 break;
939
940 case VIPS_FORMAT_UINT:
941 HCONV( unsigned int, unsigned int );
942 break;
943
944 case VIPS_FORMAT_INT:
945 HCONV( signed int, signed int );
946 break;
947
948 case VIPS_FORMAT_FLOAT:
949 HCONV( float, float );
950 break;
951
952 case VIPS_FORMAT_DOUBLE:
953 HCONV( double, double );
954 break;
955
956 case VIPS_FORMAT_COMPLEX:
957 HCONV( float, float );
958 break;
959
960 case VIPS_FORMAT_DPCOMPLEX:
961 HCONV( double, double );
962 break;
963
964 default:
965 g_assert_not_reached();
966 }
967 }
968
969 return( 0 );
970 }
971
972 static int
vips_conva_horizontal(VipsConva * conva,VipsImage * in,VipsImage ** out)973 vips_conva_horizontal( VipsConva *conva, VipsImage *in, VipsImage **out )
974 {
975 VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
976
977 /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
978 * would be 1x1.
979 */
980 *out = vips_image_new();
981 if( vips_image_pipelinev( *out,
982 VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) )
983 return( -1 );
984
985 (*out)->Xsize -= conva->iM->Xsize - 1;
986 if( (*out)->Xsize <= 0 ) {
987 vips_error( class->nickname,
988 "%s", _( "image too small for mask" ) );
989 return( -1 );
990 }
991 (*out)->Bands *= conva->n_hline;
992
993 /* Short u?char lines can use u?short intermediate.
994 */
995 if( vips_band_format_isuint( in->BandFmt ) )
996 (*out)->BandFmt = conva->max_line < 256 ?
997 VIPS_FORMAT_USHORT : VIPS_FORMAT_UINT;
998 else if( vips_band_format_isint( in->BandFmt ) )
999 (*out)->BandFmt = conva->max_line < 256 ?
1000 VIPS_FORMAT_SHORT : VIPS_FORMAT_INT;
1001
1002 if( vips_image_generate( *out,
1003 vips_conva_start, vips_conva_hgenerate, vips_conva_stop,
1004 in, conva ) )
1005 return( -1 );
1006
1007 return( 0 );
1008 }
1009
1010 #define CLIP_UCHAR( V ) \
1011 G_STMT_START { \
1012 if( (V) < 0 ) \
1013 (V) = 0; \
1014 else if( (V) > UCHAR_MAX ) \
1015 (V) = UCHAR_MAX; \
1016 } G_STMT_END
1017
1018 #define CLIP_CHAR( V ) \
1019 G_STMT_START { \
1020 if( (V) < SCHAR_MIN ) \
1021 (V) = SCHAR_MIN; \
1022 else if( (V) > SCHAR_MAX ) \
1023 (V) = SCHAR_MAX; \
1024 } G_STMT_END
1025
1026 #define CLIP_USHORT( V ) \
1027 G_STMT_START { \
1028 if( (V) < 0 ) \
1029 (V) = 0; \
1030 else if( (V) > USHRT_MAX ) \
1031 (V) = USHRT_MAX; \
1032 } G_STMT_END
1033
1034 #define CLIP_SHORT( V ) \
1035 G_STMT_START { \
1036 if( (V) < SHRT_MIN ) \
1037 (V) = SHRT_MIN; \
1038 else if( (V) > SHRT_MAX ) \
1039 (V) = SHRT_MAX; \
1040 } G_STMT_END
1041
1042 #define CLIP_NONE( V ) {}
1043
1044 #define VCONV( ACC, IN, OUT, CLIP ) \
1045 G_STMT_START { \
1046 for( x = 0; x < sz; x++ ) { \
1047 ACC *seq_sum = (ACC *) seq->sum; \
1048 \
1049 IN *p; \
1050 OUT *q; \
1051 ACC sum; \
1052 \
1053 p = x * conva->n_hline + \
1054 (IN *) VIPS_REGION_ADDR( ir, r->left, r->top ); \
1055 q = x + (OUT *) VIPS_REGION_ADDR( or, r->left, r->top ); \
1056 \
1057 sum = 0; \
1058 for( z = 0; z < n_vline; z++ ) { \
1059 seq_sum[z] = 0; \
1060 for( k = conva->vline[z].start; \
1061 k < conva->vline[z].end; k++ ) \
1062 seq_sum[z] += p[k * istride + \
1063 conva->vline[z].band]; \
1064 sum += conva->vline[z].factor * seq_sum[z]; \
1065 } \
1066 sum = (sum + conva->rounding) / conva->divisor + conva->offset; \
1067 CLIP( sum ); \
1068 *q = sum; \
1069 q += ostride; \
1070 \
1071 for( y = 1; y < r->height; y++ ) { \
1072 sum = 0;\
1073 for( z = 0; z < n_vline; z++ ) { \
1074 seq_sum[z] += p[seq->end[z]]; \
1075 seq_sum[z] -= p[seq->start[z]]; \
1076 sum += conva->vline[z].factor * seq_sum[z]; \
1077 } \
1078 p += istride; \
1079 sum = (sum + conva->rounding) / conva->divisor + \
1080 conva->offset; \
1081 CLIP( sum ); \
1082 *q = sum; \
1083 q += ostride; \
1084 } \
1085 } \
1086 } G_STMT_END
1087
1088 /* Do vertical masks ... we scan the mask down columns of pixels.
1089 */
1090 static int
vips_conva_vgenerate(VipsRegion * or,void * vseq,void * a,void * b,gboolean * stop)1091 vips_conva_vgenerate( VipsRegion *or, void *vseq,
1092 void *a, void *b, gboolean *stop )
1093 {
1094 VipsConvaSeq *seq = (VipsConvaSeq *) vseq;
1095 VipsImage *in = (VipsImage *) a;
1096 VipsConva *conva = (VipsConva *) b;
1097 VipsConvolution *convolution = (VipsConvolution *) conva;
1098
1099 VipsRegion *ir = seq->ir;
1100 const int n_vline = conva->n_vline;
1101 VipsImage *iM = conva->iM;
1102 VipsRect *r = &or->valid;
1103
1104 /* Double the width (notionally) for complex.
1105 */
1106 int sz = vips_band_format_iscomplex( in->BandFmt ) ?
1107 2 * VIPS_REGION_N_ELEMENTS( or ) : VIPS_REGION_N_ELEMENTS( or );
1108
1109 VipsRect s;
1110 int x, y, z, k;
1111 int istride;
1112 int ostride;
1113
1114 /* Prepare the section of the input image we need. A little larger
1115 * than the section of the output image we are producing.
1116 */
1117 s = *r;
1118 s.height += iM->Ysize - 1;
1119 if( vips_region_prepare( ir, &s ) )
1120 return( -1 );
1121
1122 istride = VIPS_REGION_LSKIP( ir ) /
1123 VIPS_IMAGE_SIZEOF_ELEMENT( in );
1124 ostride = VIPS_REGION_LSKIP( or ) /
1125 VIPS_IMAGE_SIZEOF_ELEMENT( convolution->out );
1126
1127 /* Init offset array.
1128 */
1129 if( seq->last_stride != istride ) {
1130 seq->last_stride = istride;
1131
1132 for( z = 0; z < n_vline; z++ ) {
1133 seq->start[z] = conva->vline[z].band +
1134 conva->vline[z].start * istride;
1135 seq->end[z] = conva->vline[z].band +
1136 conva->vline[z].end * istride;
1137 }
1138 }
1139
1140 switch( convolution->in->BandFmt ) {
1141 case VIPS_FORMAT_UCHAR:
1142 if( conva->max_line < 256 )
1143 VCONV( unsigned int, \
1144 unsigned short, unsigned char, CLIP_UCHAR );
1145 else
1146 VCONV( unsigned int, \
1147 unsigned int, unsigned char, CLIP_UCHAR );
1148 break;
1149
1150 case VIPS_FORMAT_CHAR:
1151 if( conva->max_line < 256 )
1152 VCONV( signed int, \
1153 signed short, signed char, CLIP_CHAR );
1154 else
1155 VCONV( signed int, \
1156 signed int, signed char, CLIP_CHAR );
1157 break;
1158
1159 case VIPS_FORMAT_USHORT:
1160 VCONV( unsigned int, \
1161 unsigned int, unsigned short, CLIP_USHORT );
1162 break;
1163
1164 case VIPS_FORMAT_SHORT:
1165 VCONV( signed int, signed int, signed short, CLIP_SHORT );
1166 break;
1167
1168 case VIPS_FORMAT_UINT:
1169 VCONV( unsigned int, unsigned int, unsigned int, CLIP_NONE );
1170 break;
1171
1172 case VIPS_FORMAT_INT:
1173 VCONV( signed int, signed int, signed int, CLIP_NONE );
1174 break;
1175
1176 case VIPS_FORMAT_FLOAT:
1177 VCONV( float, float, float, CLIP_NONE );
1178 break;
1179
1180 case VIPS_FORMAT_DOUBLE:
1181 VCONV( double, double, double, CLIP_NONE );
1182 break;
1183
1184 case VIPS_FORMAT_COMPLEX:
1185 VCONV( float, float, float, CLIP_NONE );
1186 break;
1187
1188 case VIPS_FORMAT_DPCOMPLEX:
1189 VCONV( double, double, double, CLIP_NONE );
1190 break;
1191
1192 default:
1193 g_assert_not_reached();
1194 }
1195
1196 return( 0 );
1197 }
1198
1199 static int
vips_conva_vertical(VipsConva * conva,VipsImage * in,VipsImage ** out)1200 vips_conva_vertical( VipsConva *conva, VipsImage *in, VipsImage **out )
1201 {
1202 VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( conva );
1203 VipsConvolution *convolution = (VipsConvolution *) conva;
1204
1205 /* Prepare output. Consider a 7x7 mask and a 7x7 image --- the output
1206 * would be 1x1.
1207 */
1208 *out = vips_image_new();
1209 if( vips_image_pipelinev( *out,
1210 VIPS_DEMAND_STYLE_SMALLTILE, in, NULL ) )
1211 return( -1 );
1212
1213 (*out)->Ysize -= conva->iM->Ysize - 1;
1214 if( (*out)->Ysize <= 0 ) {
1215 vips_error( class->nickname,
1216 "%s", _( "image too small for mask" ) );
1217 return( -1 );
1218 }
1219 (*out)->Bands = convolution->in->Bands;
1220 (*out)->BandFmt = convolution->in->BandFmt;
1221
1222 if( vips_image_generate( *out,
1223 vips_conva_start, vips_conva_vgenerate, vips_conva_stop,
1224 in, conva ) )
1225 return( -1 );
1226
1227 return( 0 );
1228 }
1229
1230 static int
vips_conva_build(VipsObject * object)1231 vips_conva_build( VipsObject *object )
1232 {
1233 VipsConvolution *convolution = (VipsConvolution *) object;
1234 VipsConva *conva = (VipsConva *) object;
1235 VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 );
1236
1237 VipsImage *in;
1238
1239 if( VIPS_OBJECT_CLASS( vips_conva_parent_class )->build( object ) )
1240 return( -1 );
1241
1242 /* An int version of our mask.
1243 */
1244 if( vips__image_intize( convolution->M, &t[0] ) )
1245 return( -1 );
1246 conva->iM = t[0];
1247
1248 #ifdef DEBUG
1249 printf( "vips_conva_build: iM =\n" );
1250 vips_matrixprint( conva->iM, NULL );
1251 #endif /*DEBUG*/
1252
1253 in = convolution->in;
1254
1255 if( vips_conva_decompose_boxes( conva ) )
1256 return( -1 );
1257
1258 g_object_set( conva, "out", vips_image_new(), NULL );
1259 if(
1260 vips_embed( in, &t[1],
1261 t[0]->Xsize / 2,
1262 t[0]->Ysize / 2,
1263 in->Xsize + t[0]->Xsize - 1,
1264 in->Ysize + t[0]->Ysize - 1,
1265 "extend", VIPS_EXTEND_COPY,
1266 NULL ) ||
1267 vips_conva_horizontal( conva, t[1], &t[2] ) ||
1268 vips_conva_vertical( conva, t[2], &t[3] ) ||
1269 vips_image_write( t[3], convolution->out ) )
1270 return( -1 );
1271
1272 convolution->out->Xoffset = 0;
1273 convolution->out->Yoffset = 0;
1274
1275 return( 0 );
1276 }
1277
1278 static void
vips_conva_class_init(VipsConvaClass * class)1279 vips_conva_class_init( VipsConvaClass *class )
1280 {
1281 GObjectClass *gobject_class = G_OBJECT_CLASS( class );
1282 VipsObjectClass *object_class = (VipsObjectClass *) class;
1283
1284 gobject_class->set_property = vips_object_set_property;
1285 gobject_class->get_property = vips_object_get_property;
1286
1287 object_class->nickname = "conva";
1288 object_class->description = _( "approximate integer convolution" );
1289 object_class->build = vips_conva_build;
1290
1291 VIPS_ARG_INT( class, "layers", 104,
1292 _( "Layers" ),
1293 _( "Use this many layers in approximation" ),
1294 VIPS_ARGUMENT_OPTIONAL_INPUT,
1295 G_STRUCT_OFFSET( VipsConva, layers ),
1296 1, 1000, 5 );
1297
1298 VIPS_ARG_INT( class, "cluster", 105,
1299 _( "Cluster" ),
1300 _( "Cluster lines closer than this in approximation" ),
1301 VIPS_ARGUMENT_OPTIONAL_INPUT,
1302 G_STRUCT_OFFSET( VipsConva, cluster ),
1303 1, 100, 1 );
1304
1305 }
1306
1307 static void
vips_conva_init(VipsConva * conva)1308 vips_conva_init( VipsConva *conva )
1309 {
1310 conva->layers = 5;
1311 conva->cluster = 1;
1312 }
1313
1314 /**
1315 * vips_conva: (method)
1316 * @in: input image
1317 * @out: (out): output image
1318 * @mask: convolution mask
1319 * @...: %NULL-terminated list of optional named arguments
1320 *
1321 * Optional arguments:
1322 *
1323 * * @layers: %gint, number of layers for approximation
1324 * * @cluster: %gint, cluster lines closer than this distance
1325 *
1326 * Perform an approximate integer convolution of @in with @mask.
1327 * This is a low-level operation, see
1328 * vips_conv() for something more convenient.
1329 *
1330 * The output image
1331 * always has the same #VipsBandFormat as the input image.
1332 * Elements of @mask are converted to
1333 * integers before convolution.
1334 *
1335 * Larger values for @layers give more accurate
1336 * results, but are slower. As @layers approaches the mask radius, the
1337 * accuracy will become close to exact convolution and the speed will drop to
1338 * match. For many large masks, such as Gaussian, @layers need be only 10% of
1339 * this value and accuracy will still be good.
1340 *
1341 * Smaller values of @cluster will give more accurate results, but be slower
1342 * and use more memory. 10% of the mask radius is a good rule of thumb.
1343 *
1344 * See also: vips_conv().
1345 *
1346 * Returns: 0 on success, -1 on error
1347 */
1348 int
vips_conva(VipsImage * in,VipsImage ** out,VipsImage * mask,...)1349 vips_conva( VipsImage *in, VipsImage **out, VipsImage *mask, ... )
1350 {
1351 va_list ap;
1352 int result;
1353
1354 va_start( ap, mask );
1355 result = vips_call_split( "conva", ap, in, out, mask );
1356 va_end( ap );
1357
1358 return( result );
1359 }
1360
1361