1 /* Copyright (C) 2005-2011 Fabio Riccardi */
2 
3 #include <string.h>
4 
5 typedef unsigned char byte;
6 typedef unsigned short ushort;
7 
8 #define abs(x) ({typeof(x) _x = x; _x >= 0 ? _x : -_x;})
9 
10 #define max(a,b)				\
11   ({ typeof (a) _a = (a);			\
12     typeof (b) _b = (b);			\
13     _a > _b ? _a : _b; })
14 
15 #define min(a,b)				\
16   ({ typeof (a) _a = (a);			\
17     typeof (b) _b = (b);			\
18     _a < _b ? _a : _b; })
19 
20 template <typename T> struct Values {
21   static const T maxVal;
22 };
23 
24 template<> const byte           Values<byte>::maxVal         = 0xFF;
25 template<> const ushort         Values<ushort>::maxVal       = 0xFFFF;
26 template<> const signed char    Values<signed char>::maxVal  = 0x7F;
27 template<> const signed short   Values<signed short>::maxVal = 0x7FFF;
28 template<> const unsigned int   Values<unsigned int>::maxVal = 0xFFFFFFFF;
29 template<> const int            Values<int>::maxVal          = 0x7FFFFFFF;
30 template<> const float          Values<float>::maxVal        = 1.0;
31 template<> const double         Values<double>::maxVal       = 1.0;
32 
33 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
34 #include <altivec.h>
35 
TurnJavaModeOff(vector unsigned int * oldJavaMode)36 static void TurnJavaModeOff( vector unsigned int *oldJavaMode ) {
37     vector unsigned int javaOffMask = ( vector unsigned int ) { 0x00010000 };
38     vector unsigned int java;
39     *oldJavaMode = ( vector unsigned int ) vec_mfvscr ( );
40     java = vec_or ( *oldJavaMode, javaOffMask );
41     vec_mtvscr ( java );
42 }
43 
RestoreJavaMode(vector unsigned int * oldJavaMode)44 static void RestoreJavaMode( vector unsigned int *oldJavaMode ) {
45     vec_mtvscr ( *oldJavaMode );
46 }
47 
loadUnalignedChar(unsigned char * target)48 static inline vector unsigned char loadUnalignedChar( unsigned char *target )
49 {
50   vector unsigned char MSQ, LSQ;
51   vector unsigned char mask;
52 
53   MSQ = vec_ld(0, target);            // most significant quadword
54   LSQ = vec_ld(15, target);           // least significant quadword
55   mask = vec_lvsl(0, target);         // create the permute mask
56   return vec_perm(MSQ, LSQ, mask);    // align the data
57 }
58 
loadUnalignedShort(unsigned short * target)59 static inline vector unsigned short loadUnalignedShort( unsigned short *target )
60 {
61   vector unsigned short MSQ, LSQ;
62   vector unsigned char mask;
63 
64   MSQ = vec_ld(0, target);            // most significant quadword
65   LSQ = vec_ld(15, target);           // least significant quadword
66   mask = vec_lvsl(0, target);         // create the permute mask
67   return vec_perm(MSQ, LSQ, mask);    // align the data
68 }
69 
loadUnalignedInt(unsigned int * target)70 static inline vector unsigned int loadUnalignedInt( unsigned int *target )
71 {
72   vector unsigned int MSQ, LSQ;
73   vector unsigned char mask;
74 
75   MSQ = vec_ld(0, target);            // most significant quadword
76   LSQ = vec_ld(15, target);           // least significant quadword
77   mask = vec_lvsl(0, target);         // create the permute mask
78   return vec_perm(MSQ, LSQ, mask);    // align the data
79 }
80 
loadUnalignedFloat(float * target)81 static inline vector float loadUnalignedFloat( float *target )
82 {
83   vector float MSQ, LSQ;
84   vector unsigned char mask;
85 
86   MSQ = vec_ld(0, target);            // most significant quadword
87   LSQ = vec_ld(15, target);           // least significant quadword
88   mask = vec_lvsl(0, target);         // create the permute mask
89   return vec_perm(MSQ, LSQ, mask);    // align the data
90 }
91 
92 template< typename T >
93 static inline vector float loadFloatPixels(T* data) __attribute__ ((always_inline));
94 
95 template< typename T >
loadFloatPixels(T * data)96 static inline vector float loadFloatPixels(T* data) {
97   vector unsigned int ll;
98   if (sizeof(T) == 1) {
99     vector unsigned char x = loadUnalignedChar((unsigned char *) data);
100     vector short xx = vec_unpackh((vector char) x);
101     ll = vec_unpackh(xx);
102     ll = vec_and(ll, (vector unsigned int) {0xFF, 0xFF, 0xFF, 0xFF});
103   } else if (sizeof(T) == 2) {
104     vector unsigned short x = loadUnalignedShort((unsigned short *) data);
105     ll = vec_unpackh((vector short) x);
106     ll = vec_and(ll, (vector unsigned int) {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF});
107   } else if (sizeof(T) == 4) {
108     ll = loadUnalignedInt((unsigned int *) data);
109   }
110   return vec_ctf(ll, 0);
111 }
112 
113 template< typename T >
114 void storeFloatPixels(T* dest, int bands, vector float data) __attribute__ ((always_inline));
115 
116 template< typename T >
storeFloatPixels(T * dest,int bands,vector float data)117 void storeFloatPixels(T* dest, int bands, vector float data) {
118   vector int res = vec_cts(data, 0);
119 
120   if (sizeof(T) == 1) {
121     vector unsigned short sres = vec_packsu(res, res);
122     vector unsigned char bres = vec_packsu(sres, sres);
123 
124     bres = vec_perm( bres, bres, vec_lvsr( 0, dest ) );
125 
126     for (int b = 0; b < bands; b++)
127       vec_ste( bres, b, (unsigned char *) dest );
128   } else if (sizeof(T) == 2) {
129     vector unsigned short sres = vec_packsu(res, res);
130 
131     sres = vec_perm( sres, sres, vec_lvsr( 0, dest ) );
132 
133     for (int b = 0; b < bands; b++)
134       vec_ste( sres, 2 * b, (unsigned short *) dest );
135   } else if (sizeof(T) == 4) {
136     res = vec_perm( res, res, vec_lvsr( 0, (int *) dest ) );
137 
138     for (int b = 0; b < bands; b++)
139       vec_ste( res, 4 * b, (int *) dest );
140   }
141 }
142 
143 static inline vector float addAll(vector float t1) __attribute__ ((always_inline));
144 
addAll(vector float t1)145 static inline vector float addAll(vector float t1) {
146   t1 = vec_add(t1, vec_sld(t1, t1, 8));
147   return vec_add(t1, vec_sld(t1, t1, 4));
148 }
149 
150 #define W0  0,  1,  2,  3
151 #define W1  4,  5,  6,  7
152 #define W2  8,  9, 10, 11
153 #define W3 12, 13, 14, 15
154 #define W4 16, 17, 18, 19
155 #define W5 20, 21, 22, 23
156 #define W6 24, 25, 26, 27
157 #define W7 28, 29, 30, 31
158 
loadKernelElem(int pos,float * kernel)159 vector float loadKernelElem(int pos, float *kernel) {
160   float *ptr = &kernel[pos];
161   vector float vv = vec_lde( 0, ptr );
162   vector unsigned char moveToStart = vec_lvsl( 0, ptr );
163   vv = vec_perm( vv, vv, moveToStart );
164   vv = vec_splat( vv, 0 );
165   return vv;
166 }
167 
168 template< typename T >
conv_line0(T * data,float * kernel,int kw)169 vector float conv_line0(T *data, float *kernel, int kw) {
170   vector float ff = (vector float) vec_splat_u32(0);
171 
172   for (int v = 0; v < kw; v++) {
173     vector float hvv = loadKernelElem(v, kernel);
174 
175     vector float ss = loadFloatPixels(&data[3 * v]);
176 
177     ff = vec_madd(hvv, ss, ff);
178   }
179   return ff;
180 }
181 
182 template< typename T >
conv_line3(T * data,float * kernel,int kw)183 vector float conv_line3(T *data, float *kernel, int kw) {
184   vector float ff;
185 
186   if (kw/4 > 0) {
187     vector float fr, fg, fb;
188 
189     fr = fg = fb = (vector float) vec_splat_u32(0);
190 
191     for (int v = 0; v < kw/4; v++) {
192       vector float hv = vec_ld(0, &kernel[4 * v]);
193 
194       vector float ss1 = loadFloatPixels(&data[3 * 4 * v]);
195       vector float ss2 = loadFloatPixels(&data[3 * 4 * v + 4]);
196       vector float ss3 = loadFloatPixels(&data[3 * 4 * v + 8]);
197 
198       vector float ssa, ssb, ssc;
199       ssa = vec_perm(ss1, ss2, (vector unsigned char) {W0, W3, W6, W2});
200       ssb = vec_perm(ss1, ss2, (vector unsigned char) {W1, W4, W7, W5});
201       ss1 = vec_perm(ssa, ss3, (vector unsigned char) {W0, W1, W2, W5});
202       fr = vec_madd(hv, ss1, fr);
203       ssc = vec_perm(ssa, ss3, (vector unsigned char) {W3, W6, W4, W7});
204       ss2 = vec_perm(ssb, ss3, (vector unsigned char) {W0, W1, W2, W6});
205       fg = vec_madd(hv, ss2, fg);
206       ss3 = vec_perm(ssc, ssb, (vector unsigned char) {W0, W7, W2, W3});
207       fb = vec_madd(hv, ss3, fb);
208     }
209 
210     fr = addAll(fr);
211     fg = addAll(fg);
212     fb = addAll(fb);
213 
214     ff = vec_perm(fr, fg, (vector unsigned char) {W0, W4, W1, W2});
215     ff = vec_perm(ff, fb, (vector unsigned char) {W0, W1, W4, W2});
216   } else
217     ff = (vector float) vec_splat_u32(0);
218 
219   // we have to make an extra step of the computation for the last element of the kernel...
220 
221   for (int v = 4*(kw/4); v < kw; v++) {
222     vector float hvv = loadKernelElem(v, kernel);
223 
224     vector float ss = loadFloatPixels(&data[3*v]);
225 
226     ff = vec_madd(hvv, ss, ff);
227   }
228 
229   return ff;
230 }
231 
232 template< typename T >
conv_line1(T * data,float * kernel,int kw)233 float conv_line1(T *data, float *kernel, int kw) {
234   float f __attribute__ ((aligned (16)));
235 
236   if (kw/4 > 0) {
237     vector float ff = (vector float) vec_splat_u32(0);
238 
239     for (int v = 0; v < kw/4; v++) {
240       vector float hv = vec_ld(0, &kernel[4 * v]);
241       vector float ss = loadFloatPixels(&data[4 * v]);
242       ff = vec_madd(hv, ss, ff);
243     }
244 
245     ff = addAll(ff);
246     vec_ste(ff, 0, &f);
247   } else
248     f = 0;
249 
250   // we have to make an extra step of the computation for the last element of the kernel...
251 
252   for (int v = 4*(kw/4); v < kw; v++) {
253     f += data[v] * kernel[v];
254   }
255 
256   return f;
257 }
258 
259 #endif
260 
261 
262 template< typename T, int bands >
263 static
convolveBandsLoop(T * srcData,T * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)264 void convolveBandsLoop(T *srcData, T *dstData, int srcScanlineOffset, int dstScanlineOffset,
265 		       int srcScanlineStride, int dstScanlineStride,
266 		       int dheight, int dwidth, int kw, int kh,
267 		       float *hValues, float *vValues)
268 {
269 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
270   vector unsigned int oldJavaMode;
271   TurnJavaModeOff( &oldJavaMode );
272 #endif
273 
274 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
275   float *tmpBuffer = new float[kh*dwidth*4]; // waste some space but go faster...
276 #else
277   float *tmpBuffer = new float[kh*dwidth*bands];
278 #endif
279   if (tmpBuffer == NULL)
280     return; // Do no evil...
281   int tmpBufferSize = kh*dwidth;
282 
283   float hkernel[kw] __attribute__ ((aligned (16)));
284   float vkernel[kh] __attribute__ ((aligned (16)));
285 
286   memcpy(hkernel, hValues, kw * sizeof(float));
287   memcpy(vkernel, vValues, kh * sizeof(float));
288 
289   int revolver = 0;
290   int kvRevolver = 0;                 // to match kernel vkernel
291   for (int j = 0; j < kh-1; j++) {
292     int srcPixelOffset = srcScanlineOffset;
293 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
294     for (int i = 0; i < dwidth; i++) {
295       vector float ff = conv_line3(&srcData[srcPixelOffset], hkernel, kw);
296       vec_st(ff, 0, &tmpBuffer[4*(revolver+i)]);
297       srcPixelOffset += bands;
298     }
299 #else
300     for (int i = 0; i < dwidth; i++) {
301       float f[bands];
302       for (int b = 0; b < bands; b++)
303         f[b] = 0.0;
304 
305       for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset+=bands) {
306         float hv = hkernel[v];
307         for (int b= 0; b < bands; b++) {
308 	  f[b] += srcData[imageOffset+b] * hv;
309 	}
310       }
311 
312       for (int b= 0; b < bands; b++)
313         tmpBuffer[bands*(revolver+i) + b] = f[b];
314 
315       srcPixelOffset += bands;
316     }
317 #endif
318     revolver += dwidth;
319     srcScanlineOffset += srcScanlineStride;
320   }
321 
322   // srcScanlineStride already bumped by
323   // kh-1*scanlineStride
324   for (int j = 0; j < dheight; j++)  {
325     for (int i = 0, srcPixelOffset = srcScanlineOffset, dstPixelOffset = dstScanlineOffset;
326          i < dwidth;
327          i++, srcPixelOffset += bands, dstPixelOffset += bands) {
328 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
329       vector float ff = conv_line3(&srcData[srcPixelOffset], hkernel, kw);
330 
331       vec_st(ff, 0, (vector float *)(&tmpBuffer[4*(revolver+i)]));
332       ff = (vector float) {0.5f, 0.5f, 0.5f};
333 #else
334       float f[bands];
335       for (int b = 0; b < bands; b++)
336         f[b] = 0.0;
337 
338       for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset+=bands) {
339         float hv = hkernel[v];
340         for (int b= 0; b < bands; b++) {
341 	  f[b] += srcData[imageOffset+b] * hv;
342 	}
343       }
344 
345       for (int b = 0; b < bands; b++) {
346         tmpBuffer[bands*(revolver+i) + b] = f[b];
347         f[b] = 0.5;
348       }
349 #endif
350 
351       // The vertical kernel must revolve as well
352       int b = kvRevolver + i;
353       for (int a=0; a < kh; a++) {
354 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
355         vector float vv = loadKernelElem(a, vkernel);
356         // vector float ss = loadUnalignedFloat(&tmpBuffer[bands*b]);
357         vector float ss = vec_ld(0, (vector float *)(&tmpBuffer[4*b]));
358 
359         ff = vec_madd(vv, ss, ff);
360 #else
361         float vv = vkernel[a];
362         for (int c = 0; c < bands; c++)
363 	  f[c] += tmpBuffer[bands*b + c] * vv;
364 #endif
365 	b += dwidth;
366 	if (b >= tmpBufferSize) b -= tmpBufferSize;
367       }
368 
369 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
370       storeFloatPixels(&dstData[dstPixelOffset], bands, ff);
371 #else
372       for (int b = 0; b < bands; b++) {
373 	float res = f[b];
374 	if (res < 0)  {
375 	  res = 0;
376 	} else if (res > Values<T>::maxVal)  {
377 	  res = Values<T>::maxVal;
378 	}
379 
380 	dstData[dstPixelOffset+b] = (T) res;
381       }
382 #endif
383     }
384 
385     revolver += dwidth;
386     if (revolver == tmpBufferSize) {
387       revolver = 0;
388     }
389     kvRevolver += dwidth;
390     if (kvRevolver == tmpBufferSize) {
391       kvRevolver = 0;
392     }
393     srcScanlineOffset += srcScanlineStride;
394     dstScanlineOffset += dstScanlineStride;
395   }
396   delete[] tmpBuffer;
397 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
398   RestoreJavaMode(&oldJavaMode);
399 #endif
400 }
401 
402 template< typename T >
403 static
convolveLoop(T * srcData,T * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int srcPixelStride,int dstPixelStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)404 void convolveLoop(T *srcData, T *dstData, int srcScanlineOffset, int dstScanlineOffset,
405 		  int srcScanlineStride, int dstScanlineStride,
406 		  int srcPixelStride, int dstPixelStride,
407 		  int dheight, int dwidth, int kw, int kh,
408 		  float *hValues, float *vValues)
409 {
410 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
411   vector unsigned int oldJavaMode;
412   TurnJavaModeOff( &oldJavaMode );
413 #endif
414 
415   float *tmpBuffer = new float[kh*dwidth];
416 
417   if (tmpBuffer == NULL)
418     return; // Do no evil...
419 
420   int tmpBufferSize = kh*dwidth;
421 
422   float hkernel[kw] __attribute__ ((aligned (16)));
423   float vkernel[kh] __attribute__ ((aligned (16)));
424 
425   memcpy(hkernel, hValues, kw * sizeof(float));
426   memcpy(vkernel, vValues, kh * sizeof(float));
427 
428   int revolver = 0;
429   int kvRevolver = 0;                 // to match kernel vValues
430   for (int j = 0; j < kh-1; j++) {
431     int srcPixelOffset = srcScanlineOffset;
432 
433     for (int i = 0; i < dwidth; i++) {
434       float f;
435 
436 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
437       if (srcPixelStride == 1)
438         f = conv_line1(&srcData[srcPixelOffset], hkernel, kw);
439       else
440 #endif
441 	{
442 	  f = 0.0f;
443 	  for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset += srcPixelStride)
444 	    f += srcData[imageOffset] * hkernel[v];
445 	}
446       tmpBuffer[revolver+i] = f;
447       srcPixelOffset += srcPixelStride;
448     }
449     revolver += dwidth;
450     srcScanlineOffset += srcScanlineStride;
451   }
452 
453   const float fmaxVal = (float) Values<T>::maxVal;
454 
455   // srcScanlineStride already bumped by
456   // kh-1*scanlineStride
457   for (int j = 0; j < dheight; j++)  {
458     int srcPixelOffset = srcScanlineOffset;
459     int dstPixelOffset = dstScanlineOffset;
460 
461     for (int i = 0; i < dwidth; i++)  {
462 
463 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
464       union {
465         vector float ff;
466         float fa[4];
467         float f;
468       };
469 #else
470 	float f;
471 #endif
472 
473 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
474       if (srcPixelStride == 1)
475         f = conv_line1(&srcData[srcPixelOffset], hkernel, kw);
476       else
477 #endif
478       {
479 	f = 0.0f;
480 	for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset += srcPixelStride)
481 	  f += srcData[imageOffset] * hkernel[v];
482       }
483       tmpBuffer[revolver + i] = f;
484 
485       // A bug in gcc 4.0 causes the following code to "drop pixels", check forthcoming gcc4 updates...
486 
487 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
488       if (srcPixelStride == 1 && kh/4 != 0) {
489 	ff = (vector float) {0.5f, 0, 0, 0};
490 	// The vertical kernel must revolve as well
491 	int b = kvRevolver + i;
492 	for (int a = 0; a < kh/4; a++) {
493 	  union {
494 	    vector float tvb;
495 	    float ftb[4];
496 	  };
497 	  for (int i = 0; i < 4; i++) {
498 	    ftb[i] = tmpBuffer[b];
499 	    b += dwidth;
500 	    if (b >= tmpBufferSize) b -= tmpBufferSize;
501 	  }
502 	  vector float vv = vec_ld(0, &vkernel[4 * a]); // loadUnalignedFloat(&vkernel[4 * a]);
503 	  ff = vec_madd(vv, tvb, ff);
504 	}
505 
506 	ff = addAll(ff);
507 
508 	for (int a = 4*(kh/4); a < kh; a++){
509 	  f += tmpBuffer[b] * vkernel[a];
510 	  b += dwidth;
511 	  if (b >= tmpBufferSize) b -= tmpBufferSize;
512 	}
513       } else
514 #endif
515 	{
516           f = 0.5f;
517           // The vertical kernel must revolve as well
518           int b = kvRevolver + i;
519           for (int a = 0; a < kh; a++){
520             f += tmpBuffer[b] * vkernel[a];
521             b += dwidth;
522             if (b >= tmpBufferSize) b -= tmpBufferSize;
523           }
524 	}
525       if (f < 0.0f)  {
526 	f = 0.0f;
527       } else if (f > fmaxVal)  {
528 	f = fmaxVal;
529       }
530 
531       dstData[dstPixelOffset] = (T) f;
532       srcPixelOffset += srcPixelStride;
533       dstPixelOffset += dstPixelStride;
534     }
535 
536     revolver += dwidth;
537     if (revolver == tmpBufferSize) {
538       revolver = 0;
539     }
540     kvRevolver += dwidth;
541     if (kvRevolver == tmpBufferSize) {
542       kvRevolver = 0;
543     }
544     srcScanlineOffset += srcScanlineStride;
545     dstScanlineOffset += dstScanlineStride;
546   }
547   delete[] tmpBuffer;
548 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
549   RestoreJavaMode(&oldJavaMode);
550 #endif
551 }
552 
553 // #define VIMAGE
554 
555 #ifdef VIMAGE
556 #include "Convolution.h"
557 
convolveAVLoop(byte * srcData,byte * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int srcPixelStride,int dstPixelStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)558 void convolveAVLoop(byte *srcData, byte *dstData, int srcScanlineOffset, int dstScanlineOffset,
559 		    int srcScanlineStride, int dstScanlineStride,
560 		    int srcPixelStride, int dstPixelStride,
561 		    int dheight, int dwidth, int kw, int kh,
562 		    float *hValues, float *vValues)
563 {
564   vImage_Buffer src = { srcData + srcScanlineOffset + srcScanlineStride * dstScanlineOffset / dstScanlineStride, dheight, dwidth, srcScanlineStride };
565   vImage_Buffer dst = { dstData + dstScanlineOffset, dheight, dwidth, dstScanlineStride };
566 
567   short kernel[kw * kh];
568 
569   int rowOffset = 0;
570   for (int i = 0; i < kw; i++) {
571     float vValue = vValues[i];
572     for (int j = 0; j < kh; j++) {
573       kernel[rowOffset+j] = (short) ((vValue*hValues[j]) * Values<signed short>::maxVal + 0.5);
574     }
575     rowOffset += kh;
576   }
577 
578   vImageConvolve_Planar8(&src, &dst, NULL, 0, 0, kernel, kw, kh, Values<signed short>::maxVal, 0, kvImageEdgeExtend | kvImageDoNotTile);
579 }
580 #endif
581 
582 #ifndef AUTO_DEP
583 #include "javah/com_lightcrafts_jai_opimage_Convolutions.h"
584 #endif
585 
Java_com_lightcrafts_jai_opimage_Convolutions_cByteLoop(JNIEnv * env,jclass cls,jbyteArray src,jbyteArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)586 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cByteLoop
587 (JNIEnv *env, jclass cls,
588  jbyteArray src, jbyteArray dst,
589  jint srcScanlineOffset, jint dstScanlineOffset,
590  jint srcScanlineStride, jint dstScanlineStride,
591  jint srcPixelStride, jint dstPixelStride,
592  jint dheight, jint dwidth, jint kw, jint kh,
593  jfloatArray jhValues, jfloatArray jvValues)
594 {
595   byte *srcData = (byte *) env->GetPrimitiveArrayCritical(src, 0);
596   byte *dstData = (byte *) env->GetPrimitiveArrayCritical(dst, 0);
597   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
598   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
599   // convolveBandsLoop<byte, 3>(srcData, dstData,
600   convolveLoop(srcData, dstData,
601 	       srcScanlineOffset, dstScanlineOffset,
602 	       srcScanlineStride, dstScanlineStride,
603 	       srcPixelStride, dstPixelStride,
604 	       dheight, dwidth, kw, kh, hValues, vValues);
605   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
606   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
607   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
608   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
609 }
610 
Java_com_lightcrafts_jai_opimage_Convolutions_cShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)611 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cShortLoop
612 (JNIEnv *env, jclass cls,
613  jshortArray src, jshortArray dst,
614  jint srcScanlineOffset, jint dstScanlineOffset,
615  jint srcScanlineStride, jint dstScanlineStride,
616  jint srcPixelStride, jint dstPixelStride,
617  jint dheight, jint dwidth, jint kw, jint kh,
618  jfloatArray jhValues, jfloatArray jvValues)
619 {
620   short *srcData = (short *) env->GetPrimitiveArrayCritical(src, 0);
621   short *dstData = (short *) env->GetPrimitiveArrayCritical(dst, 0);
622   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
623   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
624 
625   convolveLoop(srcData, dstData,
626 	       srcScanlineOffset, dstScanlineOffset,
627 	       srcScanlineStride, dstScanlineStride,
628 	       srcPixelStride, dstPixelStride,
629 	       dheight, dwidth, kw, kh, hValues, vValues);
630 
631   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
632   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
633   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
634   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
635 }
636 
Java_com_lightcrafts_jai_opimage_Convolutions_cUShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)637 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cUShortLoop
638 (JNIEnv *env, jclass cls,
639  jshortArray src, jshortArray dst,
640  jint srcScanlineOffset, jint dstScanlineOffset,
641  jint srcScanlineStride, jint dstScanlineStride,
642  jint srcPixelStride, jint dstPixelStride,
643  jint dheight, jint dwidth, jint kw, jint kh,
644  jfloatArray jhValues, jfloatArray jvValues)
645 {
646   ushort *srcData = (ushort *) env->GetPrimitiveArrayCritical(src, 0);
647   ushort *dstData = (ushort *) env->GetPrimitiveArrayCritical(dst, 0);
648   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
649   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
650 
651   // convolveBandsLoop<ushort, 3>(srcData, dstData,
652   convolveLoop(srcData, dstData,
653 	       srcScanlineOffset, dstScanlineOffset,
654 	       srcScanlineStride, dstScanlineStride,
655 	       srcPixelStride, dstPixelStride,
656 	       dheight, dwidth, kw, kh, hValues, vValues);
657 
658   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
659   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
660   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
661   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
662 }
663 
Java_com_lightcrafts_jai_opimage_Convolutions_cIntLoop(JNIEnv * env,jclass cls,jintArray src,jintArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)664 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cIntLoop
665 (JNIEnv *env, jclass cls,
666  jintArray src, jintArray dst,
667  jint srcScanlineOffset, jint dstScanlineOffset,
668  jint srcScanlineStride, jint dstScanlineStride,
669  jint srcPixelStride, jint dstPixelStride,
670  jint dheight, jint dwidth, jint kw, jint kh,
671  jfloatArray jhValues, jfloatArray jvValues)
672 {
673   int *srcData = (int *) env->GetPrimitiveArrayCritical(src, 0);
674   int *dstData = (int *) env->GetPrimitiveArrayCritical(dst, 0);
675   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
676   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
677 
678   convolveLoop(srcData, dstData,
679 	       srcScanlineOffset, dstScanlineOffset,
680 	       srcScanlineStride, dstScanlineStride,
681 	       srcPixelStride, dstPixelStride,
682 	       dheight, dwidth, kw, kh, hValues, vValues);
683 
684   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
685   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
686   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
687   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
688 }
689 
Java_com_lightcrafts_jai_opimage_Convolutions_cFloatLoop(JNIEnv * env,jclass cls,jfloatArray src,jfloatArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)690 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cFloatLoop
691 (JNIEnv *env, jclass cls,
692  jfloatArray src, jfloatArray dst,
693  jint srcScanlineOffset, jint dstScanlineOffset,
694  jint srcScanlineStride, jint dstScanlineStride,
695  jint srcPixelStride, jint dstPixelStride,
696  jint dheight, jint dwidth, jint kw, jint kh,
697  jfloatArray jhValues, jfloatArray jvValues)
698 {
699   float *srcData = (float *) env->GetPrimitiveArrayCritical(src, 0);
700   float *dstData = (float *) env->GetPrimitiveArrayCritical(dst, 0);
701   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
702   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
703 
704   convolveLoop(srcData, dstData,
705 	       srcScanlineOffset, dstScanlineOffset,
706 	       srcScanlineStride, dstScanlineStride,
707 	       srcPixelStride, dstPixelStride,
708 	       dheight, dwidth, kw, kh, hValues, vValues);
709 
710   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
711   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
712   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
713   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
714 }
715 
Java_com_lightcrafts_jai_opimage_Convolutions_cDoubleLoop(JNIEnv * env,jclass cls,jdoubleArray src,jdoubleArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)716 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cDoubleLoop
717 (JNIEnv *env, jclass cls,
718  jdoubleArray src, jdoubleArray dst,
719  jint srcScanlineOffset, jint dstScanlineOffset,
720  jint srcScanlineStride, jint dstScanlineStride,
721  jint srcPixelStride, jint dstPixelStride,
722  jint dheight, jint dwidth, jint kw, jint kh,
723  jfloatArray jhValues, jfloatArray jvValues)
724 {
725   double *srcData = (double *) env->GetPrimitiveArrayCritical(src, 0);
726   double *dstData = (double *) env->GetPrimitiveArrayCritical(dst, 0);
727   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
728   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
729 
730   convolveLoop(srcData, dstData,
731 	       srcScanlineOffset, dstScanlineOffset,
732 	       srcScanlineStride, dstScanlineStride,
733 	       srcPixelStride, dstPixelStride,
734 	       dheight, dwidth, kw, kh, hValues, vValues);
735 
736   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
737   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
738   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
739   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
740 }
741 
742 // three colors interleaved special version
743 
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ByteLoop(JNIEnv * env,jclass cls,jbyteArray src,jbyteArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)744 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ByteLoop
745 (JNIEnv *env, jclass cls,
746  jbyteArray src, jbyteArray dst,
747  jint srcScanlineOffset, jint dstScanlineOffset,
748  jint srcScanlineStride, jint dstScanlineStride,
749  jint dheight, jint dwidth, jint kw, jint kh,
750  jfloatArray jhValues, jfloatArray jvValues)
751 {
752   byte *srcData = (byte *) env->GetPrimitiveArrayCritical(src, 0);
753   byte *dstData = (byte *) env->GetPrimitiveArrayCritical(dst, 0);
754   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
755   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
756   convolveBandsLoop<byte, 3>(srcData, dstData,
757 			     srcScanlineOffset, dstScanlineOffset,
758 			     srcScanlineStride, dstScanlineStride,
759 			     dheight, dwidth, kw, kh, hValues, vValues);
760   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
761   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
762   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
763   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
764 }
765 
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)766 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ShortLoop
767 (JNIEnv *env, jclass cls,
768  jshortArray src, jshortArray dst,
769  jint srcScanlineOffset, jint dstScanlineOffset,
770  jint srcScanlineStride, jint dstScanlineStride,
771  jint dheight, jint dwidth, jint kw, jint kh,
772  jfloatArray jhValues, jfloatArray jvValues)
773 {
774   short *srcData = (short *) env->GetPrimitiveArrayCritical(src, 0);
775   short *dstData = (short *) env->GetPrimitiveArrayCritical(dst, 0);
776   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
777   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
778 
779   convolveBandsLoop<short, 3>(srcData, dstData,
780 			      srcScanlineOffset, dstScanlineOffset,
781 			      srcScanlineStride, dstScanlineStride,
782 			      dheight, dwidth, kw, kh, hValues, vValues);
783 
784   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
785   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
786   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
787   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
788 }
789 
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3UShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)790 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3UShortLoop
791 (JNIEnv *env, jclass cls,
792  jshortArray src, jshortArray dst,
793  jint srcScanlineOffset, jint dstScanlineOffset,
794  jint srcScanlineStride, jint dstScanlineStride,
795  jint dheight, jint dwidth, jint kw, jint kh,
796  jfloatArray jhValues, jfloatArray jvValues)
797 {
798   ushort *srcData = (ushort *) env->GetPrimitiveArrayCritical(src, 0);
799   ushort *dstData = (ushort *) env->GetPrimitiveArrayCritical(dst, 0);
800   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
801   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
802 
803   convolveBandsLoop<ushort, 3>(srcData, dstData,
804 			       srcScanlineOffset, dstScanlineOffset,
805 			       srcScanlineStride, dstScanlineStride,
806 			       dheight, dwidth, kw, kh, hValues, vValues);
807 
808   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
809   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
810   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
811   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
812 }
813 
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3IntLoop(JNIEnv * env,jclass cls,jintArray src,jintArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)814 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3IntLoop
815 (JNIEnv *env, jclass cls,
816  jintArray src, jintArray dst,
817  jint srcScanlineOffset, jint dstScanlineOffset,
818  jint srcScanlineStride, jint dstScanlineStride,
819  jint dheight, jint dwidth, jint kw, jint kh,
820  jfloatArray jhValues, jfloatArray jvValues)
821 {
822   int *srcData = (int *) env->GetPrimitiveArrayCritical(src, 0);
823   int *dstData = (int *) env->GetPrimitiveArrayCritical(dst, 0);
824   float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
825   float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
826 
827   convolveBandsLoop<int, 3>(srcData, dstData,
828 			    srcScanlineOffset, dstScanlineOffset,
829 			    srcScanlineStride, dstScanlineStride,
830 			    dheight, dwidth, kw, kh, hValues, vValues);
831 
832   env->ReleasePrimitiveArrayCritical(src, srcData, 0);
833   env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
834   env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
835   env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
836 }
837