1 
2 /* Integer Multi-Dimensional Interpolation */
3 
4 /*
5  * Copyright 2000 - 2007 Graeme W. Gill
6  * All rights reserved.
7  *
8  * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
9  * see the License.txt file for licencing details.
10  */
11 
12 /*
13  * This is the implementation of the run time code.
14  */
15 
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <math.h>
20 #include <stdarg.h>
21 #include <string.h>
22 
23 #include "imdi.h"
24 #include "imdi_tab.h"
25 #include "imdi_k.h"			/* Declaration of all the kernel functions */
26 
27 #undef VERBOSE
28 #undef VVERBOSE
29 
30 static unsigned int imdi_get_check(imdi *im);
31 static void imdi_reset_check(imdi *im);
32 static void imdi_info(imdi *s, unsigned long *size, int *gres, int *sres);
33 static void imdi_del(imdi *im);
34 static void interp_match(imdi *s, void **outp, int outst, void **inp, int inst,
35                          unsigned int npixels);
36 
37 
38 /* Create a new imdi */
39 /* Return NULL if request is not supported */
40 /* Note that we use the high level pixel layout description to locate */
41 /* a suitable run-time. */
new_imdi(int id,int od,imdi_pixrep in,int in_signed,int * inm,imdi_iprec iprec,imdi_pixrep out,int out_signed,int * outm,int res,imdi_ooptions oopt,unsigned int * checkv,imdi_options opt,void (* input_curves)(void * cntx,double * out_vals,double * in_vals),void (* md_table)(void * cntx,double * out_vals,double * in_vals),void (* output_curves)(void * cntx,double * out_vals,double * in_vals),void * cntx)42 imdi *new_imdi(
43 	int id,				  /* Number of input dimensions */
44 	int od,				  /* Number of output lookup dimensions */
45 	                      /* Number of output channels written = od - no. of oopt skip flags */
46 	imdi_pixrep in,		  /* Input pixel representation */
47 	int in_signed,		  /* Bit flag per channel, NZ if treat as signed */
48 	int *inm,			  /* Input raster channel to callback channel mapping, NULL for none. */
49 	imdi_iprec iprec,	  /* Internal processing precision */
50 	imdi_pixrep out,	  /* Output pixel representation */
51 	int out_signed,		  /* Bit flag per channel, NZ if treat as signed */
52 	int *outm,			  /* Output raster channel to callback channel mapping, NULL for none. */
53 	                      /* Mapping must include skipped channels. */
54 	int res,			  /* Desired table resolution */
55 	imdi_ooptions oopt,   /* Output per channel options (by callback channel) */
56 	unsigned int *checkv, /* Output channel check values (by callback channel, NULL == 0) */
57 	imdi_options opt,	  /* Direction and stride options */
58 
59 	/* Callbacks to lookup the imdi table values. */
60 	/* (Skip output channels are looked up) */
61 	void (*input_curves) (void *cntx, double *out_vals, double *in_vals),
62 	void (*md_table)     (void *cntx, double *out_vals, double *in_vals),
63 	void (*output_curves)(void *cntx, double *out_vals, double *in_vals),
64 	void *cntx		/* Context to callbacks */
65 ) {
66 	int i;
67 	int prec;					/* Target internal precision */
68 	int bk = -1;				/* Best table */
69 	int bfig = 0x7fffffff;		/* Best tables figure of merit */
70 	int bstres = 0;				/* Best tables target stres */
71 	genspec gs;					/* Generation specification */
72 	tabspec ts;					/* Table specifications */
73 	genspec bgs;				/* Best gen spec */
74 	tabspec bts;				/* Best tab spec */
75 	imdi_conv bcnv = conv_none;	/* Best tables conversion flags */
76 	imdi_ooptions Ooopt;		/* oopt re-aranged to correspond to output channel index */
77 
78 	imdi *im;
79 
80 	/* Compute the Output channel index oopt mask */
81 	if (outm == NULL)
82 		Ooopt = oopt;
83 	else {
84 		int ff;
85 		Ooopt = 0;
86 		for (i = 0; i < od; i++) {
87 			ff = OOPTX(oopt,outm[i]);
88 			Ooopt |= OOPT(ff,i);
89 		}
90 	}
91 
92 #ifdef VERBOSE
93 	printf("new_imdi called with id %d, od %d, res %d Ooopt 0x%x, opt 0x%x\n", id, od, res, Ooopt, opt);
94 	printf("about to checking %d kernels\n", no_kfuncs);
95 #endif
96 
97 	/* Figure out the internal precision requested */
98 	COMPUTE_IPREC(prec, in, iprec, out)
99 
100 	/* Zero out the gen and tabspecs, to give diff a place to start */
101 	memset((void *)&gs, 0, sizeof(genspec));
102 	memset((void *)&ts, 0, sizeof(tabspec));
103 
104 	/* The first thing to do is see if there is an available kernel function */
105 	for (i = 0; i < no_kfuncs; i++) {
106 		int stres;					/* Computed stres needed */
107 		imdi_conv cnv = conv_none;	/* Conversions needed for this choice */
108 		int fig;					/* Figure of merit - smaller is better */
109 		ktable[i].gentab(&gs, &ts);	/* Udate the kernel functions genspec and tabspec */
110 
111 #ifdef VERBOSE
112 	printf("\n");
113 	printf("kernel %d has id %d, od %d, irep %d orep %d oopt 0x%x, opt 0x%x\n",
114 	        i, gs.id, gs.od, gs.irep, gs.orep, gs.oopt, gs.opt);
115 	printf("Input req is id %d, od %d, irep %d orep %d, oopt 0x%x, opt 0x%x\n",
116 	        id, od, in, out, Ooopt, opt);
117 #endif
118 		/* First check mandatory things */
119 		if (!(
120 			  id == gs.id			/* Input dimension */
121 		   && od == gs.od			/* Output dimension */
122 		)) {
123 #ifdef VVERBOSE
124 		printf("  Input or Output dimension mismatch\n");
125 #endif
126 			continue;
127 		}
128 
129 		/* Check if we have an exact or conversion match for input stride */
130 		if (!(
131 		    (opt & opts_istride) == (gs.opt & opts_istride)
132 		 || (!(opt & opts_istride) && (gs.opt & opts_istride))
133 		)) {
134 #ifdef VVERBOSE
135 		printf("  Input stride mismatch\n");
136 #endif
137 			continue;
138 		}
139 
140 		/* Check if we have an exact or conversion match for output stride */
141 		if (!(
142 		    (opt & opts_ostride) == (gs.opt & opts_ostride)
143 		 || (!(opt & opts_ostride) && (gs.opt & opts_ostride))
144 		)) {
145 #ifdef VVERBOSE
146 		printf("  Output stride mismatch\n");
147 #endif
148 			continue;
149 		}
150 
151 		/* Check if we have an exact or conversion match for input representation */
152 		if (!(
153 		     in == gs.irep
154 		 || (in == pixint8 && gs.irep == planeint8 && (gs.opt & opts_istride))
155 		 || (in == pixint16 && gs.irep == planeint16 && (gs.opt & opts_istride))
156 		)) {
157 #ifdef VVERBOSE
158 		printf("  Input representation mismatch\n");
159 #endif
160 			continue;
161 		}
162 
163 		/* Check if we have an exact or conversion match for output representation */
164 		if (!(
165 		      out == gs.orep
166 		  || (out == pixint8 && gs.orep == planeint8 && (gs.opt & opts_ostride))
167 		  || (out == pixint16 && gs.orep == planeint16 && (gs.opt & opts_ostride))
168 		)) {
169 #ifdef VVERBOSE
170 		printf("  Output representation mismatch\n");
171 #endif
172 			continue;
173 		}
174 
175 		/* See if we have the output per channel options needed */
176 		if (!((Ooopt & gs.oopt) == Ooopt)) {
177 #ifdef VVERBOSE
178 		printf("  Output per channel options mismatch\n");
179 #endif
180 			continue;
181 		}
182 
183 		/* See if we have an exact or conversion match for reverse direction */
184 		if (!(
185 		    ((!(opt & opts_bwd) && !(opt & opts_fwd)))		/* Don't care */
186 		 || (((opt & opts_bwd) && (gs.opt & opts_bwd)))		/* Match */
187 		 || (((opt & opts_fwd) && (gs.opt & opts_fwd)))		/* Match */
188 		 || ((gs.opt & opts_istride) && (gs.opt & opts_ostride))	/* Can convert */
189 		)) {
190 #ifdef VVERBOSE
191 		printf("  Direction mismatch\n");
192 #endif
193 			continue;
194 		}
195 
196 #ifdef VERBOSE
197 		printf("  found match\n");
198 #endif
199 		fig = 0;
200 
201 		/* Apply penalty if we will have to do a conversion match */
202 		if ((opt & opts_istride) != (gs.opt & opts_istride)) {
203 			cnv |= conv_istr;
204 			fig += 1000;
205 #ifdef VERBOSE
206 			printf("  needs input stride conversion though\n");
207 #endif
208 		}
209 
210 		if ((opt & opts_ostride) != (gs.opt & opts_ostride)) {
211 			cnv |= conv_ostr;
212 			fig += 1000;
213 #ifdef VERBOSE
214 			printf("  needs output stride conversion though\n");
215 #endif
216 		}
217 
218 		if (in != gs.irep) {
219 			cnv |= conv_irep;
220 			fig += 5000;
221 #ifdef VERBOSE
222 			printf("  needs input representation conversion though\n");
223 #endif
224 		}
225 
226 		if (out != gs.orep) {
227 			cnv |= conv_orep;
228 			fig += 5000;
229 #ifdef VERBOSE
230 			printf("  needs output representation conversion though\n");
231 #endif
232 		}
233 
234 		/* If we don't need output checking or skipping, but we've got it */
235 		if ((~Ooopt & gs.oopt) != 0) {
236 			unsigned int tmp = (~Ooopt & gs.oopt);
237 			/* Count number of bits set that we don't need */
238   			/* (From HACKMEM 169) */
239 		    tmp = tmp - ((tmp >> 1) & 033333333333) - ((tmp >> 2) & 011111111111);
240 		    tmp = ((tmp + (tmp >> 3)) & 030707070707) % 63;
241 			fig += tmp;
242 #ifdef VERBOSE
243 			printf("  has output options we don't need though\n");
244 #endif
245 		}
246 
247 		/* If we've got output skipping, we need to skip pointers in interp. */
248 		if ((Ooopt & OOPTS_SKIP) != 0) {
249 			cnv |= conv_skip;
250 		}
251 
252 		if (((opt & opts_fwd) && (gs.opt & opts_bwd))
253 		 || ((opt & opts_bwd) && (gs.opt & opts_fwd))) {
254 			cnv |= conv_rev;
255 			fig += 1000;
256 #ifdef VERBOSE
257 			printf("  needs direction conversion though\n");
258 #endif
259 		}
260 
261 		if (prec != gs.prec) { 	/* Internal precision mismatch */
262 			fig += 100000;		/* Will have a major effect, so discourage using it */
263 #ifdef VERBOSE
264 			printf("  internal precision doesn't match though (want %d, got %d)\n",prec,gs.prec);
265 #endif
266 		}
267 
268 		/* If we've got a choice of algorithm possible */
269 		if (gs.opt & (opts_splx_sort | opts_sort_splx)) {
270 
271 			/* If we've got a preference of algorithm, */
272 			if ((opt & (opts_splx_sort | opts_sort_splx))) {
273 
274 				/* and there is a mismatch */
275 				if (((opt & opts_splx_sort) && ts.sort != 0)
276 				 || ((opt & opts_sort_splx) && ts.sort == 0)) {
277 					fig += 10000;		/* Will have a great effect, so discourage using it */
278 #ifdef VERBOSE
279 					printf("  sort/simplex algorithm mismatch\n");
280 #endif
281 				}
282 
283 			} else {	/* There is no preference chosen at run time, */
284 				/* and there is a mismatch to the compile time preference */
285 				if (((gs.opt & opts_splx_sort) && ts.sort != 0)
286 				 || ((gs.opt & opts_sort_splx) && ts.sort == 0)) {
287 					fig += 10000;		/* Will have a great effect, so discourage using it */
288 #ifdef VERBOSE
289 					printf("  sort/simplex algorithm mismatch\n");
290 #endif
291 				}
292 			}
293 		}
294 
295 		if (ts.sort) {
296 			stres = 0;
297 #ifdef VERBOSE
298 		printf("gres = %d, gs.gres = %d\n",res,gs.itres);
299 #endif
300 			/* We want one that is equals or exceeds the desired */
301 			/* resolution, but doesn't exceed it too much, or the code */
302 			/* will be inefficient. */
303 			/* If there are no routines that can meet the desired precision, */
304 			/* then it is ok to use the one closest to the desired precision. */
305 			if (gs.itres >= res) {
306 				fig += 10 * (gs.itres - res);
307 			} else {
308 				fig += 10000 + 10 * (res - gs.itres);
309 			}
310 		} else {
311 			/* compute the needed stres (Assuming not sort) */
312 			stres = ((1 << gs.prec)-1 + res-2)/(res-1);
313 
314 #ifdef VERBOSE
315 		printf("gres = %d, sres = %d, gs.gres = %d, gs.sres = %d\n",res,stres,gs.itres,gs.stres);
316 #endif
317 			/* We want one that is equals or exceeds the desired */
318 			/* resolution, but doesn't exceed it too much, or the code */
319 			/* will be inefficient. */
320 			/* If there are no routines that can meet the desired precision, */
321 			/* then it is ok to use the one closest to the desired precision. */
322 			if (gs.itres >= res && gs.stres >= stres) {
323 				fig += 10 * (gs.itres - res) + (gs.stres - stres);
324 			} {
325 				if (gs.itres < res) {
326 					fig += 10000 + 10 * (res - gs.itres);
327 				}
328 				if (gs.stres < stres) {
329 					fig += 1000 + 10 * (stres - gs.stres);
330 				}
331 			}
332 		}
333 
334 #ifdef VERBOSE
335 		printf("  figure of merit %d\n",fig);
336 #endif
337 		/* Is this the best one so far ? */
338 		if (fig < bfig) {
339 			bfig = fig;
340 			bk = i;
341 			bstres = stres;
342 			bgs = gs;		/* Structure copy */
343 			bts = ts;		/* Structure copy */
344 			bcnv = cnv;
345 #ifdef VERBOSE
346 			printf("  best so far\n");
347 #endif
348 		}
349 	}
350 
351 	if (bk < 0) {
352 #ifdef VERBOSE
353 		printf("new_imdi failed - dimensionality or representations couldn't be matched\n");
354 #endif
355 		return NULL;	/* Nothing matches */
356 	}
357 
358 	if ((im = (imdi *)calloc(1, sizeof(imdi))) == NULL) {
359 #ifdef VERBOSE
360 		printf("new_imdi malloc imdi failed\n");
361 #endif
362 		/* Should we return an error somehow ? */
363 		return NULL;
364 	}
365 
366 	/* We've decided kernel function bk is going to be the best, */
367 	/* so now setup the appropriate tables to use with it. */
368 	if (bgs.itres > res)
369 		bgs.itres = res;		/* Tell table create what the res is */
370 	if (bgs.stres > bstres)
371 		bgs.stres = bstres;
372 
373 	/* Tel table setup how to treat integer input in per channel lookups */
374 	bgs.in_signed = in_signed;
375 	bgs.out_signed = out_signed;
376 
377 #ifdef VERBOSE
378 	if (!bts.sort) {
379 		if ((bgs.stres * (bgs.itres-1)) < ((1 << bgs.prec)-1)) {
380 			printf("Warning: table chosen doesn't reach desired precision!\n");
381 			printf("Wanted %d, got %d\n", ((1 << bgs.prec)-1), (bgs.stres * (bgs.itres-1)));
382 		}
383 	}
384 #endif
385 
386 	/* Allocate and initialise the appropriate tables */
387 	im->impl = (void *)imdi_tab(&bgs, &bts, bcnv, in, out, ktable[bk].interp,
388 	                            inm, outm, oopt, checkv, input_curves, md_table,
389 	                            output_curves, cntx);
390 
391 	if (im->impl == NULL) {
392 #ifdef VERBOSE
393 		printf("imdi_tab failed\n");
394 #endif
395 		imdi_del(im);
396 		return NULL;
397 	}
398 
399 #ifdef VERBOSE
400 	if (bcnv != conv_none)
401 		printf("imdi_tab: using a runtime match, cnv flags 0x%x\n",bcnv);
402 #endif
403 
404 	if (bcnv == conv_none)		/* No runtime match conversion needed */
405 		im->interp  = ktable[bk].interp;
406 	else
407 		im->interp  = interp_match;
408 	im->get_check   = imdi_get_check;
409 	im->reset_check = imdi_reset_check;
410 	im->info        = imdi_info;
411 	im->del         = imdi_del;
412 
413 #ifdef VERBOSE
414 	printf("new_imdi returning 0x%x\n", im);
415 #endif
416 	return im;
417 }
418 
419 /* Runtime matching adapter */
420 /* We can emulate many combinations of interpolation function */
421 /* by converting to a routine that supports stride, or one that */
422 /* supports plane interleaved and stride. */
423 /* This also lets us emulate functions that can convert between */
424 /* pixel and plane interleaved at the same time as doing the */
425 /* color interpolation. Warp is in components (NOT bytes) */
426 /* We cope with skipping writes to output channels by */
427 /* only implementing that support in plane interleaved, */
428 /* and depending on that being used for pixel interleaved. */
interp_match(imdi * s,void ** outp,int outst,void ** inp,int inst,unsigned int npixels)429 static void interp_match(
430 imdi *s,
431 void **outp, int outst,		/* Output pointers and stride */
432 void **inp, int inst,		/* Input pointers and stride */
433 unsigned int npixels		/* Number of pixels */
434 ) {
435 	int j, i;
436 	void *minp[IXDI];
437 	void *moutp[IXDI];
438 	imdi_imp *impl = (imdi_imp *)s->impl;
439 	imdi_conv cnv = impl->cnv;
440 
441 	/* Need to supply default stride. */
442 	/* This is simply the input dimension for pixel interleaved, */
443 	/* and 1 for plane interleaved. */
444 	if (cnv & conv_istr) {
445 		if (impl->cirep == pixint8 || impl->cirep == pixint16)
446 			inst = impl->id;
447 		else
448 			inst = 1;
449 	}
450 	if (cnv & conv_ostr) {
451 		if (impl->corep == pixint8 || impl->corep == pixint16)
452 			outst = impl->wod;
453 		else
454 			outst = 1;
455 	}
456 
457 	/* Convert from pixel to plane by setting the plane pointers */
458 	/* to each pixel component, and using the pixel interleaved stride */
459 	if (cnv & conv_irep) {
460 		if (impl->cirep == pixint8) {	/* Convert from pix8 to plane8 */
461 			for (j = 0; j < impl->id; j++)
462 				minp[j] = (void *)((char *)inp[0] + j);
463 		} else if (impl->cirep == pixint16) {	/* Convert from pix16 to plane16 */
464 			for (j = 0; j < impl->id; j++)
465 				minp[j] = (void *)((char *)inp[0] + 2 * j);
466 		}
467 	} else {	/* Copy pointers, because we call with them. */
468 		if (impl->cirep == pixint8 || impl->cirep == pixint16) {
469 			minp[0] = inp[0];
470 		} else {
471 			for (j = 0; j < impl->id; j++)
472 				minp[j] = inp[j];
473 		}
474 	}
475 	if (cnv & conv_orep) {
476 		if (impl->corep == pixint8) {	/* Convert from pix8 to plane8 */
477 			for (j = i = 0; j < impl->od; j++) {
478 				if ((impl->skipf & (1 << j)) == 0)	/* If not skipped */
479 					moutp[j] = (void *)((char *)outp[0] + i++);
480 				else
481 					moutp[j] = NULL;
482 			}
483 		} else if (impl->corep == pixint16) {	/* Convert from pix16 to plane16 */
484 			for (j = i = 0; j < impl->od; j++)
485 				if ((impl->skipf & (1 << j)) == 0)	/* If not skipped */
486 					moutp[j] = (void *)((char *)outp[0] + 2 * i++);
487 				else
488 					moutp[j] = NULL;
489 		}
490 	} else {	/* Copy pointers, because we call with them. */
491 		if (impl->corep == pixint8 || impl->corep == pixint16) {
492 			moutp[0] = outp[0];
493 		} else {	/* Plane interleaved */
494 			for (j = i = 0; j < impl->od; j++) {
495 				if ((impl->skipf & (1 << j)) == 0)	/* If not skipped */
496 					moutp[j] = outp[i++];
497 				else
498 					moutp[j] = NULL;
499 			}
500 		}
501 	}
502 
503 	/* Convert fwd function to a backwards one, or visa-versa. */
504 	/* Move the pointers to the last pixel, and reverse the stride */
505 	if (cnv & conv_rev) {
506 		if (impl->firep == pixint8) {
507 			minp[0] = (void *)((char *)minp[0] + inst * (npixels - 1));
508 		} else if (impl->firep == pixint16) {
509 			minp[0] = (void *)((char *)minp[0] + inst * 2 * (npixels - 1));
510 		} else if (impl->firep == planeint8) {
511 			for (j = 0; j < impl->id; j++)
512 				minp[j] = (void *)((char *)minp[j] + inst * (npixels - 1));
513 		} else if (impl->firep == planeint16) {
514 			for (j = 0; j < impl->id; j++)
515 				minp[j] = (void *)((char *)minp[j] + inst * 2 * (npixels - 1));
516 		}
517 		inst = -inst;
518 		if (impl->forep == pixint8) {
519 			moutp[0] = (void *)((char *)moutp[0] + outst * (npixels - 1));
520 		} else if (impl->forep == pixint16) {
521 			moutp[0] = (void *)((char *)moutp[0] + outst * 2 * (npixels - 1));
522 		} else if (impl->forep == planeint8) {
523 			for (j = 0; j < impl->od; j++)
524 				moutp[j] = (void *)((char *)moutp[j] + outst * (npixels - 1));
525 		} else if (impl->forep == planeint16) {
526 			for (j = 0; j < impl->od; j++)
527 				moutp[j] = (void *)((char *)moutp[j] + outst * 2 * (npixels - 1));
528 		}
529 		outst = -outst;
530 	}
531 
532 	/* Call the conversion routine we have */
533 	impl->interp(s, moutp, outst, minp, inst, npixels);
534 }
535 
536 /* Get the per output channel check flags - bit corresponds to output interpolation channel */
imdi_get_check(imdi * im)537 static unsigned int imdi_get_check(imdi *im) {
538 	imdi_imp *impl = (imdi_imp *)im->impl;
539 	unsigned int rv;
540 	int i;
541 
542 	/* Convert from output channel index to callback index */
543 	for (rv = 0, i = 0; i < impl->od; i++) {
544 		unsigned int b;
545 		b = 1 & (impl->checkf >> i);		/* Output index flag */
546 		rv |= (b << impl->im_map[i]);		/* to callback index */
547 	}
548 	return rv;
549 }
550 
551 /* Reset the output check flags (not reset by interp) */
imdi_reset_check(imdi * im)552 static void imdi_reset_check(imdi *im) {
553 	imdi_imp *impl = (imdi_imp *)im->impl;
554 
555 	impl->checkf = 0;
556 }
557 
558 /* Return some information about the imdi */
imdi_info(imdi * im,unsigned long * psize,int * pgres,int * psres)559 static void imdi_info(imdi *im, unsigned long *psize, int *pgres, int *psres) {
560 	imdi_imp *impl = (imdi_imp *)im->impl;
561 	unsigned long size;
562 
563 	size = sizeof(imdi);
564 
565 	if (impl != NULL) {
566 		size += impl->size;
567 		if (psize != NULL)
568 			*psize = size;
569 
570 		if (pgres != NULL)
571 			*pgres = impl->gres;
572 
573 		if (psres != NULL)
574 			*psres = impl->sres;
575 	}
576 }
577 
578 
579 /* Delete the object */
imdi_del(imdi * im)580 static void imdi_del(imdi *im) {
581 	/* Free all the allocated tables */
582 	if (im->impl != NULL)
583 		imdi_tab_free((imdi_imp *)im->impl);
584 
585 	/* Free this structure */
586 	free(im);
587 }
588 
589 
590 
591 
592 
593 
594 
595 
596 
597 
598 
599 
600 
601 
602 
603 
604 
605 
606