1 /* Copyright (C) 2005-2011 Fabio Riccardi */
2
3 #include <string.h>
4
5 typedef unsigned char byte;
6 typedef unsigned short ushort;
7
8 #define abs(x) ({typeof(x) _x = x; _x >= 0 ? _x : -_x;})
9
10 #define max(a,b) \
11 ({ typeof (a) _a = (a); \
12 typeof (b) _b = (b); \
13 _a > _b ? _a : _b; })
14
15 #define min(a,b) \
16 ({ typeof (a) _a = (a); \
17 typeof (b) _b = (b); \
18 _a < _b ? _a : _b; })
19
20 template <typename T> struct Values {
21 static const T maxVal;
22 };
23
24 template<> const byte Values<byte>::maxVal = 0xFF;
25 template<> const ushort Values<ushort>::maxVal = 0xFFFF;
26 template<> const signed char Values<signed char>::maxVal = 0x7F;
27 template<> const signed short Values<signed short>::maxVal = 0x7FFF;
28 template<> const unsigned int Values<unsigned int>::maxVal = 0xFFFFFFFF;
29 template<> const int Values<int>::maxVal = 0x7FFFFFFF;
30 template<> const float Values<float>::maxVal = 1.0;
31 template<> const double Values<double>::maxVal = 1.0;
32
33 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
34 #include <altivec.h>
35
TurnJavaModeOff(vector unsigned int * oldJavaMode)36 static void TurnJavaModeOff( vector unsigned int *oldJavaMode ) {
37 vector unsigned int javaOffMask = ( vector unsigned int ) { 0x00010000 };
38 vector unsigned int java;
39 *oldJavaMode = ( vector unsigned int ) vec_mfvscr ( );
40 java = vec_or ( *oldJavaMode, javaOffMask );
41 vec_mtvscr ( java );
42 }
43
RestoreJavaMode(vector unsigned int * oldJavaMode)44 static void RestoreJavaMode( vector unsigned int *oldJavaMode ) {
45 vec_mtvscr ( *oldJavaMode );
46 }
47
loadUnalignedChar(unsigned char * target)48 static inline vector unsigned char loadUnalignedChar( unsigned char *target )
49 {
50 vector unsigned char MSQ, LSQ;
51 vector unsigned char mask;
52
53 MSQ = vec_ld(0, target); // most significant quadword
54 LSQ = vec_ld(15, target); // least significant quadword
55 mask = vec_lvsl(0, target); // create the permute mask
56 return vec_perm(MSQ, LSQ, mask); // align the data
57 }
58
loadUnalignedShort(unsigned short * target)59 static inline vector unsigned short loadUnalignedShort( unsigned short *target )
60 {
61 vector unsigned short MSQ, LSQ;
62 vector unsigned char mask;
63
64 MSQ = vec_ld(0, target); // most significant quadword
65 LSQ = vec_ld(15, target); // least significant quadword
66 mask = vec_lvsl(0, target); // create the permute mask
67 return vec_perm(MSQ, LSQ, mask); // align the data
68 }
69
loadUnalignedInt(unsigned int * target)70 static inline vector unsigned int loadUnalignedInt( unsigned int *target )
71 {
72 vector unsigned int MSQ, LSQ;
73 vector unsigned char mask;
74
75 MSQ = vec_ld(0, target); // most significant quadword
76 LSQ = vec_ld(15, target); // least significant quadword
77 mask = vec_lvsl(0, target); // create the permute mask
78 return vec_perm(MSQ, LSQ, mask); // align the data
79 }
80
loadUnalignedFloat(float * target)81 static inline vector float loadUnalignedFloat( float *target )
82 {
83 vector float MSQ, LSQ;
84 vector unsigned char mask;
85
86 MSQ = vec_ld(0, target); // most significant quadword
87 LSQ = vec_ld(15, target); // least significant quadword
88 mask = vec_lvsl(0, target); // create the permute mask
89 return vec_perm(MSQ, LSQ, mask); // align the data
90 }
91
92 template< typename T >
93 static inline vector float loadFloatPixels(T* data) __attribute__ ((always_inline));
94
95 template< typename T >
loadFloatPixels(T * data)96 static inline vector float loadFloatPixels(T* data) {
97 vector unsigned int ll;
98 if (sizeof(T) == 1) {
99 vector unsigned char x = loadUnalignedChar((unsigned char *) data);
100 vector short xx = vec_unpackh((vector char) x);
101 ll = vec_unpackh(xx);
102 ll = vec_and(ll, (vector unsigned int) {0xFF, 0xFF, 0xFF, 0xFF});
103 } else if (sizeof(T) == 2) {
104 vector unsigned short x = loadUnalignedShort((unsigned short *) data);
105 ll = vec_unpackh((vector short) x);
106 ll = vec_and(ll, (vector unsigned int) {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF});
107 } else if (sizeof(T) == 4) {
108 ll = loadUnalignedInt((unsigned int *) data);
109 }
110 return vec_ctf(ll, 0);
111 }
112
113 template< typename T >
114 void storeFloatPixels(T* dest, int bands, vector float data) __attribute__ ((always_inline));
115
116 template< typename T >
storeFloatPixels(T * dest,int bands,vector float data)117 void storeFloatPixels(T* dest, int bands, vector float data) {
118 vector int res = vec_cts(data, 0);
119
120 if (sizeof(T) == 1) {
121 vector unsigned short sres = vec_packsu(res, res);
122 vector unsigned char bres = vec_packsu(sres, sres);
123
124 bres = vec_perm( bres, bres, vec_lvsr( 0, dest ) );
125
126 for (int b = 0; b < bands; b++)
127 vec_ste( bres, b, (unsigned char *) dest );
128 } else if (sizeof(T) == 2) {
129 vector unsigned short sres = vec_packsu(res, res);
130
131 sres = vec_perm( sres, sres, vec_lvsr( 0, dest ) );
132
133 for (int b = 0; b < bands; b++)
134 vec_ste( sres, 2 * b, (unsigned short *) dest );
135 } else if (sizeof(T) == 4) {
136 res = vec_perm( res, res, vec_lvsr( 0, (int *) dest ) );
137
138 for (int b = 0; b < bands; b++)
139 vec_ste( res, 4 * b, (int *) dest );
140 }
141 }
142
143 static inline vector float addAll(vector float t1) __attribute__ ((always_inline));
144
addAll(vector float t1)145 static inline vector float addAll(vector float t1) {
146 t1 = vec_add(t1, vec_sld(t1, t1, 8));
147 return vec_add(t1, vec_sld(t1, t1, 4));
148 }
149
150 #define W0 0, 1, 2, 3
151 #define W1 4, 5, 6, 7
152 #define W2 8, 9, 10, 11
153 #define W3 12, 13, 14, 15
154 #define W4 16, 17, 18, 19
155 #define W5 20, 21, 22, 23
156 #define W6 24, 25, 26, 27
157 #define W7 28, 29, 30, 31
158
loadKernelElem(int pos,float * kernel)159 vector float loadKernelElem(int pos, float *kernel) {
160 float *ptr = &kernel[pos];
161 vector float vv = vec_lde( 0, ptr );
162 vector unsigned char moveToStart = vec_lvsl( 0, ptr );
163 vv = vec_perm( vv, vv, moveToStart );
164 vv = vec_splat( vv, 0 );
165 return vv;
166 }
167
168 template< typename T >
conv_line0(T * data,float * kernel,int kw)169 vector float conv_line0(T *data, float *kernel, int kw) {
170 vector float ff = (vector float) vec_splat_u32(0);
171
172 for (int v = 0; v < kw; v++) {
173 vector float hvv = loadKernelElem(v, kernel);
174
175 vector float ss = loadFloatPixels(&data[3 * v]);
176
177 ff = vec_madd(hvv, ss, ff);
178 }
179 return ff;
180 }
181
182 template< typename T >
conv_line3(T * data,float * kernel,int kw)183 vector float conv_line3(T *data, float *kernel, int kw) {
184 vector float ff;
185
186 if (kw/4 > 0) {
187 vector float fr, fg, fb;
188
189 fr = fg = fb = (vector float) vec_splat_u32(0);
190
191 for (int v = 0; v < kw/4; v++) {
192 vector float hv = vec_ld(0, &kernel[4 * v]);
193
194 vector float ss1 = loadFloatPixels(&data[3 * 4 * v]);
195 vector float ss2 = loadFloatPixels(&data[3 * 4 * v + 4]);
196 vector float ss3 = loadFloatPixels(&data[3 * 4 * v + 8]);
197
198 vector float ssa, ssb, ssc;
199 ssa = vec_perm(ss1, ss2, (vector unsigned char) {W0, W3, W6, W2});
200 ssb = vec_perm(ss1, ss2, (vector unsigned char) {W1, W4, W7, W5});
201 ss1 = vec_perm(ssa, ss3, (vector unsigned char) {W0, W1, W2, W5});
202 fr = vec_madd(hv, ss1, fr);
203 ssc = vec_perm(ssa, ss3, (vector unsigned char) {W3, W6, W4, W7});
204 ss2 = vec_perm(ssb, ss3, (vector unsigned char) {W0, W1, W2, W6});
205 fg = vec_madd(hv, ss2, fg);
206 ss3 = vec_perm(ssc, ssb, (vector unsigned char) {W0, W7, W2, W3});
207 fb = vec_madd(hv, ss3, fb);
208 }
209
210 fr = addAll(fr);
211 fg = addAll(fg);
212 fb = addAll(fb);
213
214 ff = vec_perm(fr, fg, (vector unsigned char) {W0, W4, W1, W2});
215 ff = vec_perm(ff, fb, (vector unsigned char) {W0, W1, W4, W2});
216 } else
217 ff = (vector float) vec_splat_u32(0);
218
219 // we have to make an extra step of the computation for the last element of the kernel...
220
221 for (int v = 4*(kw/4); v < kw; v++) {
222 vector float hvv = loadKernelElem(v, kernel);
223
224 vector float ss = loadFloatPixels(&data[3*v]);
225
226 ff = vec_madd(hvv, ss, ff);
227 }
228
229 return ff;
230 }
231
232 template< typename T >
conv_line1(T * data,float * kernel,int kw)233 float conv_line1(T *data, float *kernel, int kw) {
234 float f __attribute__ ((aligned (16)));
235
236 if (kw/4 > 0) {
237 vector float ff = (vector float) vec_splat_u32(0);
238
239 for (int v = 0; v < kw/4; v++) {
240 vector float hv = vec_ld(0, &kernel[4 * v]);
241 vector float ss = loadFloatPixels(&data[4 * v]);
242 ff = vec_madd(hv, ss, ff);
243 }
244
245 ff = addAll(ff);
246 vec_ste(ff, 0, &f);
247 } else
248 f = 0;
249
250 // we have to make an extra step of the computation for the last element of the kernel...
251
252 for (int v = 4*(kw/4); v < kw; v++) {
253 f += data[v] * kernel[v];
254 }
255
256 return f;
257 }
258
259 #endif
260
261
262 template< typename T, int bands >
263 static
convolveBandsLoop(T * srcData,T * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)264 void convolveBandsLoop(T *srcData, T *dstData, int srcScanlineOffset, int dstScanlineOffset,
265 int srcScanlineStride, int dstScanlineStride,
266 int dheight, int dwidth, int kw, int kh,
267 float *hValues, float *vValues)
268 {
269 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
270 vector unsigned int oldJavaMode;
271 TurnJavaModeOff( &oldJavaMode );
272 #endif
273
274 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
275 float *tmpBuffer = new float[kh*dwidth*4]; // waste some space but go faster...
276 #else
277 float *tmpBuffer = new float[kh*dwidth*bands];
278 #endif
279 if (tmpBuffer == NULL)
280 return; // Do no evil...
281 int tmpBufferSize = kh*dwidth;
282
283 float hkernel[kw] __attribute__ ((aligned (16)));
284 float vkernel[kh] __attribute__ ((aligned (16)));
285
286 memcpy(hkernel, hValues, kw * sizeof(float));
287 memcpy(vkernel, vValues, kh * sizeof(float));
288
289 int revolver = 0;
290 int kvRevolver = 0; // to match kernel vkernel
291 for (int j = 0; j < kh-1; j++) {
292 int srcPixelOffset = srcScanlineOffset;
293 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
294 for (int i = 0; i < dwidth; i++) {
295 vector float ff = conv_line3(&srcData[srcPixelOffset], hkernel, kw);
296 vec_st(ff, 0, &tmpBuffer[4*(revolver+i)]);
297 srcPixelOffset += bands;
298 }
299 #else
300 for (int i = 0; i < dwidth; i++) {
301 float f[bands];
302 for (int b = 0; b < bands; b++)
303 f[b] = 0.0;
304
305 for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset+=bands) {
306 float hv = hkernel[v];
307 for (int b= 0; b < bands; b++) {
308 f[b] += srcData[imageOffset+b] * hv;
309 }
310 }
311
312 for (int b= 0; b < bands; b++)
313 tmpBuffer[bands*(revolver+i) + b] = f[b];
314
315 srcPixelOffset += bands;
316 }
317 #endif
318 revolver += dwidth;
319 srcScanlineOffset += srcScanlineStride;
320 }
321
322 // srcScanlineStride already bumped by
323 // kh-1*scanlineStride
324 for (int j = 0; j < dheight; j++) {
325 for (int i = 0, srcPixelOffset = srcScanlineOffset, dstPixelOffset = dstScanlineOffset;
326 i < dwidth;
327 i++, srcPixelOffset += bands, dstPixelOffset += bands) {
328 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
329 vector float ff = conv_line3(&srcData[srcPixelOffset], hkernel, kw);
330
331 vec_st(ff, 0, (vector float *)(&tmpBuffer[4*(revolver+i)]));
332 ff = (vector float) {0.5f, 0.5f, 0.5f};
333 #else
334 float f[bands];
335 for (int b = 0; b < bands; b++)
336 f[b] = 0.0;
337
338 for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset+=bands) {
339 float hv = hkernel[v];
340 for (int b= 0; b < bands; b++) {
341 f[b] += srcData[imageOffset+b] * hv;
342 }
343 }
344
345 for (int b = 0; b < bands; b++) {
346 tmpBuffer[bands*(revolver+i) + b] = f[b];
347 f[b] = 0.5;
348 }
349 #endif
350
351 // The vertical kernel must revolve as well
352 int b = kvRevolver + i;
353 for (int a=0; a < kh; a++) {
354 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
355 vector float vv = loadKernelElem(a, vkernel);
356 // vector float ss = loadUnalignedFloat(&tmpBuffer[bands*b]);
357 vector float ss = vec_ld(0, (vector float *)(&tmpBuffer[4*b]));
358
359 ff = vec_madd(vv, ss, ff);
360 #else
361 float vv = vkernel[a];
362 for (int c = 0; c < bands; c++)
363 f[c] += tmpBuffer[bands*b + c] * vv;
364 #endif
365 b += dwidth;
366 if (b >= tmpBufferSize) b -= tmpBufferSize;
367 }
368
369 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
370 storeFloatPixels(&dstData[dstPixelOffset], bands, ff);
371 #else
372 for (int b = 0; b < bands; b++) {
373 float res = f[b];
374 if (res < 0) {
375 res = 0;
376 } else if (res > Values<T>::maxVal) {
377 res = Values<T>::maxVal;
378 }
379
380 dstData[dstPixelOffset+b] = (T) res;
381 }
382 #endif
383 }
384
385 revolver += dwidth;
386 if (revolver == tmpBufferSize) {
387 revolver = 0;
388 }
389 kvRevolver += dwidth;
390 if (kvRevolver == tmpBufferSize) {
391 kvRevolver = 0;
392 }
393 srcScanlineOffset += srcScanlineStride;
394 dstScanlineOffset += dstScanlineStride;
395 }
396 delete[] tmpBuffer;
397 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
398 RestoreJavaMode(&oldJavaMode);
399 #endif
400 }
401
402 template< typename T >
403 static
convolveLoop(T * srcData,T * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int srcPixelStride,int dstPixelStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)404 void convolveLoop(T *srcData, T *dstData, int srcScanlineOffset, int dstScanlineOffset,
405 int srcScanlineStride, int dstScanlineStride,
406 int srcPixelStride, int dstPixelStride,
407 int dheight, int dwidth, int kw, int kh,
408 float *hValues, float *vValues)
409 {
410 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
411 vector unsigned int oldJavaMode;
412 TurnJavaModeOff( &oldJavaMode );
413 #endif
414
415 float *tmpBuffer = new float[kh*dwidth];
416
417 if (tmpBuffer == NULL)
418 return; // Do no evil...
419
420 int tmpBufferSize = kh*dwidth;
421
422 float hkernel[kw] __attribute__ ((aligned (16)));
423 float vkernel[kh] __attribute__ ((aligned (16)));
424
425 memcpy(hkernel, hValues, kw * sizeof(float));
426 memcpy(vkernel, vValues, kh * sizeof(float));
427
428 int revolver = 0;
429 int kvRevolver = 0; // to match kernel vValues
430 for (int j = 0; j < kh-1; j++) {
431 int srcPixelOffset = srcScanlineOffset;
432
433 for (int i = 0; i < dwidth; i++) {
434 float f;
435
436 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
437 if (srcPixelStride == 1)
438 f = conv_line1(&srcData[srcPixelOffset], hkernel, kw);
439 else
440 #endif
441 {
442 f = 0.0f;
443 for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset += srcPixelStride)
444 f += srcData[imageOffset] * hkernel[v];
445 }
446 tmpBuffer[revolver+i] = f;
447 srcPixelOffset += srcPixelStride;
448 }
449 revolver += dwidth;
450 srcScanlineOffset += srcScanlineStride;
451 }
452
453 const float fmaxVal = (float) Values<T>::maxVal;
454
455 // srcScanlineStride already bumped by
456 // kh-1*scanlineStride
457 for (int j = 0; j < dheight; j++) {
458 int srcPixelOffset = srcScanlineOffset;
459 int dstPixelOffset = dstScanlineOffset;
460
461 for (int i = 0; i < dwidth; i++) {
462
463 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
464 union {
465 vector float ff;
466 float fa[4];
467 float f;
468 };
469 #else
470 float f;
471 #endif
472
473 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
474 if (srcPixelStride == 1)
475 f = conv_line1(&srcData[srcPixelOffset], hkernel, kw);
476 else
477 #endif
478 {
479 f = 0.0f;
480 for (int v = 0, imageOffset = srcPixelOffset; v < kw; v++, imageOffset += srcPixelStride)
481 f += srcData[imageOffset] * hkernel[v];
482 }
483 tmpBuffer[revolver + i] = f;
484
485 // A bug in gcc 4.0 causes the following code to "drop pixels", check forthcoming gcc4 updates...
486
487 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
488 if (srcPixelStride == 1 && kh/4 != 0) {
489 ff = (vector float) {0.5f, 0, 0, 0};
490 // The vertical kernel must revolve as well
491 int b = kvRevolver + i;
492 for (int a = 0; a < kh/4; a++) {
493 union {
494 vector float tvb;
495 float ftb[4];
496 };
497 for (int i = 0; i < 4; i++) {
498 ftb[i] = tmpBuffer[b];
499 b += dwidth;
500 if (b >= tmpBufferSize) b -= tmpBufferSize;
501 }
502 vector float vv = vec_ld(0, &vkernel[4 * a]); // loadUnalignedFloat(&vkernel[4 * a]);
503 ff = vec_madd(vv, tvb, ff);
504 }
505
506 ff = addAll(ff);
507
508 for (int a = 4*(kh/4); a < kh; a++){
509 f += tmpBuffer[b] * vkernel[a];
510 b += dwidth;
511 if (b >= tmpBufferSize) b -= tmpBufferSize;
512 }
513 } else
514 #endif
515 {
516 f = 0.5f;
517 // The vertical kernel must revolve as well
518 int b = kvRevolver + i;
519 for (int a = 0; a < kh; a++){
520 f += tmpBuffer[b] * vkernel[a];
521 b += dwidth;
522 if (b >= tmpBufferSize) b -= tmpBufferSize;
523 }
524 }
525 if (f < 0.0f) {
526 f = 0.0f;
527 } else if (f > fmaxVal) {
528 f = fmaxVal;
529 }
530
531 dstData[dstPixelOffset] = (T) f;
532 srcPixelOffset += srcPixelStride;
533 dstPixelOffset += dstPixelStride;
534 }
535
536 revolver += dwidth;
537 if (revolver == tmpBufferSize) {
538 revolver = 0;
539 }
540 kvRevolver += dwidth;
541 if (kvRevolver == tmpBufferSize) {
542 kvRevolver = 0;
543 }
544 srcScanlineOffset += srcScanlineStride;
545 dstScanlineOffset += dstScanlineStride;
546 }
547 delete[] tmpBuffer;
548 #if defined( __POWERPC__ ) && defined( LC_USE_ALTIVEC )
549 RestoreJavaMode(&oldJavaMode);
550 #endif
551 }
552
553 // #define VIMAGE
554
555 #ifdef VIMAGE
556 #include "Convolution.h"
557
convolveAVLoop(byte * srcData,byte * dstData,int srcScanlineOffset,int dstScanlineOffset,int srcScanlineStride,int dstScanlineStride,int srcPixelStride,int dstPixelStride,int dheight,int dwidth,int kw,int kh,float * hValues,float * vValues)558 void convolveAVLoop(byte *srcData, byte *dstData, int srcScanlineOffset, int dstScanlineOffset,
559 int srcScanlineStride, int dstScanlineStride,
560 int srcPixelStride, int dstPixelStride,
561 int dheight, int dwidth, int kw, int kh,
562 float *hValues, float *vValues)
563 {
564 vImage_Buffer src = { srcData + srcScanlineOffset + srcScanlineStride * dstScanlineOffset / dstScanlineStride, dheight, dwidth, srcScanlineStride };
565 vImage_Buffer dst = { dstData + dstScanlineOffset, dheight, dwidth, dstScanlineStride };
566
567 short kernel[kw * kh];
568
569 int rowOffset = 0;
570 for (int i = 0; i < kw; i++) {
571 float vValue = vValues[i];
572 for (int j = 0; j < kh; j++) {
573 kernel[rowOffset+j] = (short) ((vValue*hValues[j]) * Values<signed short>::maxVal + 0.5);
574 }
575 rowOffset += kh;
576 }
577
578 vImageConvolve_Planar8(&src, &dst, NULL, 0, 0, kernel, kw, kh, Values<signed short>::maxVal, 0, kvImageEdgeExtend | kvImageDoNotTile);
579 }
580 #endif
581
582 #ifndef AUTO_DEP
583 #include "javah/com_lightcrafts_jai_opimage_Convolutions.h"
584 #endif
585
Java_com_lightcrafts_jai_opimage_Convolutions_cByteLoop(JNIEnv * env,jclass cls,jbyteArray src,jbyteArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)586 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cByteLoop
587 (JNIEnv *env, jclass cls,
588 jbyteArray src, jbyteArray dst,
589 jint srcScanlineOffset, jint dstScanlineOffset,
590 jint srcScanlineStride, jint dstScanlineStride,
591 jint srcPixelStride, jint dstPixelStride,
592 jint dheight, jint dwidth, jint kw, jint kh,
593 jfloatArray jhValues, jfloatArray jvValues)
594 {
595 byte *srcData = (byte *) env->GetPrimitiveArrayCritical(src, 0);
596 byte *dstData = (byte *) env->GetPrimitiveArrayCritical(dst, 0);
597 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
598 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
599 // convolveBandsLoop<byte, 3>(srcData, dstData,
600 convolveLoop(srcData, dstData,
601 srcScanlineOffset, dstScanlineOffset,
602 srcScanlineStride, dstScanlineStride,
603 srcPixelStride, dstPixelStride,
604 dheight, dwidth, kw, kh, hValues, vValues);
605 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
606 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
607 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
608 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
609 }
610
Java_com_lightcrafts_jai_opimage_Convolutions_cShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)611 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cShortLoop
612 (JNIEnv *env, jclass cls,
613 jshortArray src, jshortArray dst,
614 jint srcScanlineOffset, jint dstScanlineOffset,
615 jint srcScanlineStride, jint dstScanlineStride,
616 jint srcPixelStride, jint dstPixelStride,
617 jint dheight, jint dwidth, jint kw, jint kh,
618 jfloatArray jhValues, jfloatArray jvValues)
619 {
620 short *srcData = (short *) env->GetPrimitiveArrayCritical(src, 0);
621 short *dstData = (short *) env->GetPrimitiveArrayCritical(dst, 0);
622 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
623 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
624
625 convolveLoop(srcData, dstData,
626 srcScanlineOffset, dstScanlineOffset,
627 srcScanlineStride, dstScanlineStride,
628 srcPixelStride, dstPixelStride,
629 dheight, dwidth, kw, kh, hValues, vValues);
630
631 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
632 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
633 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
634 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
635 }
636
Java_com_lightcrafts_jai_opimage_Convolutions_cUShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)637 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cUShortLoop
638 (JNIEnv *env, jclass cls,
639 jshortArray src, jshortArray dst,
640 jint srcScanlineOffset, jint dstScanlineOffset,
641 jint srcScanlineStride, jint dstScanlineStride,
642 jint srcPixelStride, jint dstPixelStride,
643 jint dheight, jint dwidth, jint kw, jint kh,
644 jfloatArray jhValues, jfloatArray jvValues)
645 {
646 ushort *srcData = (ushort *) env->GetPrimitiveArrayCritical(src, 0);
647 ushort *dstData = (ushort *) env->GetPrimitiveArrayCritical(dst, 0);
648 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
649 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
650
651 // convolveBandsLoop<ushort, 3>(srcData, dstData,
652 convolveLoop(srcData, dstData,
653 srcScanlineOffset, dstScanlineOffset,
654 srcScanlineStride, dstScanlineStride,
655 srcPixelStride, dstPixelStride,
656 dheight, dwidth, kw, kh, hValues, vValues);
657
658 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
659 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
660 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
661 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
662 }
663
Java_com_lightcrafts_jai_opimage_Convolutions_cIntLoop(JNIEnv * env,jclass cls,jintArray src,jintArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)664 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cIntLoop
665 (JNIEnv *env, jclass cls,
666 jintArray src, jintArray dst,
667 jint srcScanlineOffset, jint dstScanlineOffset,
668 jint srcScanlineStride, jint dstScanlineStride,
669 jint srcPixelStride, jint dstPixelStride,
670 jint dheight, jint dwidth, jint kw, jint kh,
671 jfloatArray jhValues, jfloatArray jvValues)
672 {
673 int *srcData = (int *) env->GetPrimitiveArrayCritical(src, 0);
674 int *dstData = (int *) env->GetPrimitiveArrayCritical(dst, 0);
675 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
676 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
677
678 convolveLoop(srcData, dstData,
679 srcScanlineOffset, dstScanlineOffset,
680 srcScanlineStride, dstScanlineStride,
681 srcPixelStride, dstPixelStride,
682 dheight, dwidth, kw, kh, hValues, vValues);
683
684 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
685 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
686 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
687 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
688 }
689
Java_com_lightcrafts_jai_opimage_Convolutions_cFloatLoop(JNIEnv * env,jclass cls,jfloatArray src,jfloatArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)690 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cFloatLoop
691 (JNIEnv *env, jclass cls,
692 jfloatArray src, jfloatArray dst,
693 jint srcScanlineOffset, jint dstScanlineOffset,
694 jint srcScanlineStride, jint dstScanlineStride,
695 jint srcPixelStride, jint dstPixelStride,
696 jint dheight, jint dwidth, jint kw, jint kh,
697 jfloatArray jhValues, jfloatArray jvValues)
698 {
699 float *srcData = (float *) env->GetPrimitiveArrayCritical(src, 0);
700 float *dstData = (float *) env->GetPrimitiveArrayCritical(dst, 0);
701 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
702 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
703
704 convolveLoop(srcData, dstData,
705 srcScanlineOffset, dstScanlineOffset,
706 srcScanlineStride, dstScanlineStride,
707 srcPixelStride, dstPixelStride,
708 dheight, dwidth, kw, kh, hValues, vValues);
709
710 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
711 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
712 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
713 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
714 }
715
Java_com_lightcrafts_jai_opimage_Convolutions_cDoubleLoop(JNIEnv * env,jclass cls,jdoubleArray src,jdoubleArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint srcPixelStride,jint dstPixelStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)716 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cDoubleLoop
717 (JNIEnv *env, jclass cls,
718 jdoubleArray src, jdoubleArray dst,
719 jint srcScanlineOffset, jint dstScanlineOffset,
720 jint srcScanlineStride, jint dstScanlineStride,
721 jint srcPixelStride, jint dstPixelStride,
722 jint dheight, jint dwidth, jint kw, jint kh,
723 jfloatArray jhValues, jfloatArray jvValues)
724 {
725 double *srcData = (double *) env->GetPrimitiveArrayCritical(src, 0);
726 double *dstData = (double *) env->GetPrimitiveArrayCritical(dst, 0);
727 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
728 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
729
730 convolveLoop(srcData, dstData,
731 srcScanlineOffset, dstScanlineOffset,
732 srcScanlineStride, dstScanlineStride,
733 srcPixelStride, dstPixelStride,
734 dheight, dwidth, kw, kh, hValues, vValues);
735
736 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
737 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
738 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
739 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
740 }
741
742 // three colors interleaved special version
743
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ByteLoop(JNIEnv * env,jclass cls,jbyteArray src,jbyteArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)744 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ByteLoop
745 (JNIEnv *env, jclass cls,
746 jbyteArray src, jbyteArray dst,
747 jint srcScanlineOffset, jint dstScanlineOffset,
748 jint srcScanlineStride, jint dstScanlineStride,
749 jint dheight, jint dwidth, jint kw, jint kh,
750 jfloatArray jhValues, jfloatArray jvValues)
751 {
752 byte *srcData = (byte *) env->GetPrimitiveArrayCritical(src, 0);
753 byte *dstData = (byte *) env->GetPrimitiveArrayCritical(dst, 0);
754 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
755 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
756 convolveBandsLoop<byte, 3>(srcData, dstData,
757 srcScanlineOffset, dstScanlineOffset,
758 srcScanlineStride, dstScanlineStride,
759 dheight, dwidth, kw, kh, hValues, vValues);
760 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
761 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
762 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
763 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
764 }
765
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)766 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3ShortLoop
767 (JNIEnv *env, jclass cls,
768 jshortArray src, jshortArray dst,
769 jint srcScanlineOffset, jint dstScanlineOffset,
770 jint srcScanlineStride, jint dstScanlineStride,
771 jint dheight, jint dwidth, jint kw, jint kh,
772 jfloatArray jhValues, jfloatArray jvValues)
773 {
774 short *srcData = (short *) env->GetPrimitiveArrayCritical(src, 0);
775 short *dstData = (short *) env->GetPrimitiveArrayCritical(dst, 0);
776 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
777 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
778
779 convolveBandsLoop<short, 3>(srcData, dstData,
780 srcScanlineOffset, dstScanlineOffset,
781 srcScanlineStride, dstScanlineStride,
782 dheight, dwidth, kw, kh, hValues, vValues);
783
784 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
785 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
786 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
787 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
788 }
789
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3UShortLoop(JNIEnv * env,jclass cls,jshortArray src,jshortArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)790 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3UShortLoop
791 (JNIEnv *env, jclass cls,
792 jshortArray src, jshortArray dst,
793 jint srcScanlineOffset, jint dstScanlineOffset,
794 jint srcScanlineStride, jint dstScanlineStride,
795 jint dheight, jint dwidth, jint kw, jint kh,
796 jfloatArray jhValues, jfloatArray jvValues)
797 {
798 ushort *srcData = (ushort *) env->GetPrimitiveArrayCritical(src, 0);
799 ushort *dstData = (ushort *) env->GetPrimitiveArrayCritical(dst, 0);
800 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
801 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
802
803 convolveBandsLoop<ushort, 3>(srcData, dstData,
804 srcScanlineOffset, dstScanlineOffset,
805 srcScanlineStride, dstScanlineStride,
806 dheight, dwidth, kw, kh, hValues, vValues);
807
808 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
809 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
810 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
811 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
812 }
813
Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3IntLoop(JNIEnv * env,jclass cls,jintArray src,jintArray dst,jint srcScanlineOffset,jint dstScanlineOffset,jint srcScanlineStride,jint dstScanlineStride,jint dheight,jint dwidth,jint kw,jint kh,jfloatArray jhValues,jfloatArray jvValues)814 extern "C" JNIEXPORT void JNICALL Java_com_lightcrafts_jai_opimage_Convolutions_cInterleaved3IntLoop
815 (JNIEnv *env, jclass cls,
816 jintArray src, jintArray dst,
817 jint srcScanlineOffset, jint dstScanlineOffset,
818 jint srcScanlineStride, jint dstScanlineStride,
819 jint dheight, jint dwidth, jint kw, jint kh,
820 jfloatArray jhValues, jfloatArray jvValues)
821 {
822 int *srcData = (int *) env->GetPrimitiveArrayCritical(src, 0);
823 int *dstData = (int *) env->GetPrimitiveArrayCritical(dst, 0);
824 float *hValues = (float *) env->GetPrimitiveArrayCritical(jhValues, 0);
825 float *vValues = (float *) env->GetPrimitiveArrayCritical(jvValues, 0);
826
827 convolveBandsLoop<int, 3>(srcData, dstData,
828 srcScanlineOffset, dstScanlineOffset,
829 srcScanlineStride, dstScanlineStride,
830 dheight, dwidth, kw, kh, hValues, vValues);
831
832 env->ReleasePrimitiveArrayCritical(src, srcData, 0);
833 env->ReleasePrimitiveArrayCritical(dst, dstData, 0);
834 env->ReleasePrimitiveArrayCritical(jhValues, hValues, 0);
835 env->ReleasePrimitiveArrayCritical(jvValues, vValues, 0);
836 }
837