1 
2 /* Integer Multi-Dimensional Interpolation */
3 
4 /*
5  * Copyright 2000 - 2007 Graeme W. Gill
6  * All rights reserved.
7  *
8  * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
9  * see the License.txt file for licencing details.
10  */
11 
12 /* 'C' code color transform kernel code generator. */
13 
14 /*
15    This module generates C code routines which implement
16    an integer multi-channel transform. The input values
17    are read, passed through per channel lookup tables,
18    a multi-dimentional interpolation table, and then
19    a per channel output lookup table, before being written.
20 */
21 
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <stdarg.h>
27 #include <string.h>
28 
29 #include "imdi.h"
30 #include "imdi_tab.h"
31 
32 #undef VERBOSE
33 #define INSTHRESH 4		/* Use inserion sort of di >= INSTHRESH for best performance. */
34 #undef ROUND			/* Round the division after accumulation */
35 						/* Improves accuracy at the cost of a little speed */
36 
37 /* ------------------------------------ */
38 /* Generator context */
39 typedef struct {
40 	FILE *of;			/* Output file */
41 	int indt;			/* Indent */
42 
43 	/* Other info */
44 	genspec *g;			/* Generation specifications */
45 	tabspec *t;			/* Table setup data */
46 	mach_arch *a;		/* Machine architecture and tuning data */
47 
48 	/* Code generation information */
49 	/* if() conditions are for entry usage */
50 
51 	/* Pixel read information */
52 	int ipt[IXDI];		/* Input pointer types */
53 	int nip;			/* Actual number of input pointers, accounting for pint */
54 	int chv_bits;		/* Bits in chv temp variable ?? */
55 
56 	/* Input table entry */
57 	int itet;			/* Input table entry type */
58 	int itvt;			/* Input table variable type */
59 	int itmnb;			/* Input table minimum bits (actual is it_ab) */
60 
61 	/* Interpolation index */
62 	int ixet;			/* Interpolation index entry type */
63 	int ixvt;			/* Interpolation index variable type */
64 	int ixmnb;			/* Interpolation index minimum bits (actual is ix_ab ???) */
65 	int ixmxres;		/* Interpolation table maximum resolution */
66 
67 	/* Simplex index: if(!sort && it_xs) */
68 	int sxet;			/* Simplex index entry type  */
69 	int sxvt;			/* Simplex index variable type */
70 	int sxmnb;			/* Simplex index bits minimum (actual is sx_ab) */
71 	int sxmxres;		/* Simplex table maximum resolution (0 if sort) */
72 
73 	/* Combination Weighting + Vertex offset values: if(it_xs && !wo_xs) */
74 	int woet;			/* Weighting+offset entry type  */
75 	int wovt;			/* Weighting+offset variable type */
76 	int womnb;			/* Weighting+offset index bits minimum (actual is wo_ab) */
77 
78 	/* Weighting value: if(it_xs && wo_xs) */
79 	int weet;			/* Weighting entry type  */
80 	int wevt;			/* Weighting variable type */
81 	int wemnb;			/* Weighting index bits minimum (actual is we_ab) */
82 
83 	/* Vertex offset value: if(it_xs && wo_xs) */
84 	int voet;			/* Vertex offset entry type  */
85 	int vovt;			/* Vertex offset variable type */
86 	int vomnb;			/* Vertex offset index bits minimum (actual is vo_ab) */
87 
88 	/* Interpolation table entry: */
89 	int imovb;			/* Interpolation output value bits per channel required */
90 	int imfvt;			/* Interpolation full entry & variable type */
91 	int impvt;			/* Interpolation partial entry variable type */
92 
93 	/* Interpolation accumulators: */
94 	int iaovb;			/* Interpolation output value bits per channel required */
95 	int iafvt;			/* Interpolation full entry & variable type */
96 	int iapvt;			/* Interpolation partial entry variable type */
97 	int ian;			/* Total number of accumulators */
98 
99 	/* Output table lookup */
100 	int otit;			/* Output table index type */
101 	int otvt;			/* Output table value type (size is ot_ts bytes) */
102 
103 	/* Write information */
104 	int opt[IXDO];		/* Output pointer types */
105 	int nop;			/* Actual number of output pointers, accounting for pint */
106 
107 } fileo;
108 
109 void line(fileo *f, char *fmt, ...);	/* Output one line */
110 void sline(fileo *f, char *fmt, ...);	/* Output start of line */
111 void mline(fileo *f, char *fmt, ...);	/* Output middle of line */
112 void eline(fileo *f, char *fmt, ...);	/* Output end of line */
113 void niline(fileo *f, char *fmt, ...);	/* Output one line, no indent */
cr(fileo * f)114 void cr(fileo *f) { line(f,""); }		/* Output a blank line */
inc(fileo * f)115 void inc(fileo *f) { f->indt++; }		/* Increment the indent level */
dec(fileo * f)116 void dec(fileo *f) { f->indt--; }		/* Decrement the indent level */
117 void lineinc(fileo *f, char *fmt, ...);	/* Output one line and increment indent */
118 void decline(fileo *f, char *fmt, ...);	/* Decrement indent and output one line */
119 /* ------------------------------------ */
120 
121 int findord(fileo *f, int bits);		/* Find ordinal with bits or more */
122 int nord(fileo *f, int ov);				/* Round ordinal type up to natural size */
123 int findnord(fileo *f, int bits);		/* Find ordinal with bits, or natural larger */
124 int findint(fileo *f, int bits);		/* Find integer with bits or more */
125 int nint(fileo *f, int iv);				/* Round integer type up to natural size */
126 int findnint(fileo *f, int bits);		/* Find integer with bits, or natural larger */
127 static void doheader(fileo *f);
128 
129 static int calc_bits(int dim, int res);
130 static int calc_res(int dim, int bits);
131 static int calc_obits(int dim, int res, int esize);
132 static int calc_ores(int dim, int bits, int esize);
133 
134 
135 /* return a hexadecimal mask string */
136 /* take care of the case when bits >= 32 */
hmask(int bits)137 char *hmask(int bits) {
138 	static char buf[20];
139 
140 	if (bits < 32) {
141 		sprintf(buf, "0x%x",(1 << bits)-1);
142 	} else if (bits == 32) {
143 		return "0xffffffff";
144 	} else if (bits == 64) {
145 		return "0xffffffffffffffff";
146 	} else {	/* Bits > 32 */
147 		sprintf(buf, "0x%xffffffff",(1 << (bits-32))-1);
148 	}
149 	return buf;
150 }
151 
152 /* Generate a source file to implement the specified */
153 /* interpolation kernel. Fill in return values and return 0 if OK. */
154 /* g->opt should be set to opts_splx_sort or opts_sort_splx if both */
155 /* are being generated, but opts_splx is what actually chooses simplex */
156 /* when available, and is not recorded in the resulting table. */
157 /* Return 1 if this kernel could be generated with a simplex table algorithm, */
158 /* and some other non-zero on another error. */
gen_c_kernel(genspec * g,tabspec * t,mach_arch * a,FILE * fp,int index,genspec * og,tabspec * ot)159 int gen_c_kernel(
160 	genspec *g,				/* Specification of what to generate */
161 	tabspec *t,				/* Tablspec that will be filled in */
162 	mach_arch *a,
163 	FILE *fp,				/* File to write to */
164 	int index,				/* Identification index, 1 = first */
165 	genspec *og,			/* Previous tables genspec (for diff) */
166 	tabspec *ot				/* Previous tables tabspec (for diff) */
167 ) {
168 	int frv = 0;		/* Function return value */
169 	unsigned char kk[] = { 0x43, 0x6F, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68,
170 	                       0x74, 0x20, 0x32, 0x30, 0x30, 0x34, 0x20, 0x47,
171 	                       0x72, 0x61, 0x65, 0x6D, 0x65, 0x20, 0x57, 0x2E,
172 	                       0x20, 0x47, 0x69, 0x6C, 0x6C, 0x00 };
173 	fileo f[1];
174 	int e, i;
175 	int timp = 0;		/* Flag to use temporary imp pointer. */
176 						/* Seem to make x86 MSVC++ slower */
177 						/* Has no effect on x86 IBMCC */
178 
179 	sprintf(g->kname, "imdi_k%d",index); /* Kernel routine base name */
180 	strcpy(g->kkeys, (char *)kk);		 /* Kernel keys for this session */
181 
182 	/* Setup the file output context */
183 	f->of = fp;
184 	f->indt = 0;			/* Start with no indentation */
185 	f->g = g;
186 	f->t = t;
187 	f->a = a;
188 
189 	/* (prec is currently permitted to be only 8 or 16) */
190 	if (g->prec == 8) {
191 		if (g->id <= 4) {		/* Simplex table can be used */
192 			frv = 1;			/* Signal caller that simplex is possible */
193 			if (g->opt & opts_splx)
194 				t->sort = 0;	/* Implicit sort using simplex table lookup */
195 			else
196 				t->sort = 1;	/* Explicit sort */
197 		} else {
198 			t->sort = 1;		/* Explicit sort */
199 		}
200 
201 	} else if (g->prec == 16) {
202 		t->sort = 1;			/* Explit sort, no simplex table */
203 
204 	} else {
205 		fprintf(stderr,"Can't cope with requested precision of %d bits\n",g->prec);
206 		exit(-1);
207 	}
208 
209 	/* Compute input read and input table lookup stuff */
210 
211 	/* Compute number of input pointers */
212 	if (g->in.pint != 0)	/* Pixel interleaved */
213 		f->nip = 1;
214 	else
215 		f->nip = g->id;
216 
217 	/* Figure out the input pointer types */
218 	for (e = 0; e < f->nip; e++) {
219 		if ((f->ipt[e] = findord(f, g->in.bpch[e])) < 0) {
220 			fprintf(stderr,"Input channel size can't be handled\n");
221 			exit(-1);
222 		}
223 	}
224 
225 	/* Do the rest of the input table size calculations after figuring */
226 	/* out simplex and interpolation table sizes. */
227 
228 	/* Figure out the interpolation multi-dimentional table structure */
229 	/* and output accumulation variable sizes. Note that the accumulator */
230 	/* size needs to be greater than the basic precision by soem factor, */
231 	/* if we are not to get rounding errors due to each value being the sum */
232 	/* of di+1 parts with weighting that sum to 1.0. It's convenient in */
233 	/* C code case to simply double the basic precision size. */
234 	if (g->prec == 8
235 	 || (g->prec == 16 && a->ords[a->nords-1].bits >= (g->prec * 4))) {
236 		int tiby;		/* Total interpolation bytes needed */
237 
238 		/* We assume that we can normally compute more than one */
239 		/* output value at a time, so we need to hold the interpolation */
240 		/* output data in the expanded fixed point format in both the */
241 		/* table and accumulator. */
242 		t->im_cd = 1;
243 		f->imovb = g->prec * 2;		/* 16 bits needed for 8 bit precision, */
244 		f->iaovb = g->prec * 2;		/* 32 bits needed for 16 bit precision */
245 		f->imfvt = a->nords-1;		/* Full variable entry type is biggest available */
246 		f->iafvt = a->nords-1;		/* Full variable accum. type is same */
247 
248 		if (a->ords[f->imfvt].bits < f->imovb) {
249 			fprintf(stderr,"Interpolation table entry size can't be handled\n");
250 			exit(-1);
251 		}
252 
253 		/* Compute details of table entry sizes, number */
254 		tiby = (f->imovb * g->od)/8;				/* Total table bytes needed */
255 		t->im_fs = a->ords[f->imfvt].bits/8;		/* Full entry bytes */
256 		t->im_fv = (t->im_fs * 8)/f->imovb;			/* output values per full entry . */
257 		t->im_fn = tiby/t->im_fs;					/* Number of full entries (may be 0) */
258 		t->im_ts = t->im_fn * t->im_fs;				/* Structure size so far */
259 		tiby -= t->im_fn * t->im_fs;				/* Remaining bytes */
260 
261 		if (tiby <= 0) {
262 			t->im_pn = 0;		/* No partials */
263 			t->im_ps = 0;
264 			t->im_pv = 0;
265 			f->impvt = 0;
266 			f->iapvt = 0;
267 
268 		} else {
269 			t->im_pn = 1;					/* Must be just 1 partial */
270 			t->im_pv = (tiby * 8)/f->imovb;	/* Partial holds remaining entries */
271 
272 #ifdef NEVER	/* For better performance ??? */
273 			if ((f->impvt = findnord(f, tiby * 8)) < 0) {
274 #else			/* Better memory footprint - minimise multi-D entry sizes */
275 				/* (but only if structure is alowed to be mis-aligned!) */
276 			if ((f->impvt = findord(f, tiby * 8)) < 0) {
277 #endif
278 				fprintf(stderr,"Can't find partial interp table entry variable size\n");
279 				exit(-1);
280 			}
281 			f->iapvt = f->impvt;
282 			t->im_ps = a->ords[f->impvt].bits/8;/* Partial entry bytes */
283 
284 			if (a->ords[f->imfvt].align)		/* If full entry's need to be aligned */
285 				t->im_ts += t->im_fs;			/* Round out struct size by full entry */
286 			else
287 				t->im_ts += t->im_ps;			/* Round out to natural size */
288 		}
289 
290 	} else {
291 		/* One 16 bit output value per entry + 32 bit accumulator. */
292 		/* We can conserve table space by not holding the table data in expanded */
293 		/* fixed point format, but expanding it when it is read. */
294 		/* Without resorting to compicated code, this restricts us */
295 		/* to only computing one output value per accumulator. */
296 		t->im_cd = 0;
297 		f->imovb = g->prec;			/* Table holds 16 bit entries with no fractions */
298 		f->iaovb = g->prec * 2;		/* 32 bits needed for 16 bit precision in comp. */
299 
300 		if ((f->imfvt = findord(f, f->imovb)) < 0) {
301 			fprintf(stderr,"Interpolation table entry size can't be handled\n");
302 			exit(-1);
303 		}
304 		if ((f->iafvt = findord(f, f->iaovb)) < 0) {
305 			fprintf(stderr,"Interpolation accumulator size can't be handled\n");
306 			exit(-1);
307 		}
308 
309 		/* Compute details of table entry sizes, number */
310 		t->im_fs = a->ords[f->imfvt].bits/8;		/* Full entry bytes */
311 		t->im_fv = 1;								/* output values per full entry . */
312 		t->im_fn = g->od;							/* Number of full entries */
313 		t->im_ts = t->im_fn * t->im_fs;				/* Total structure size */
314 
315 		t->im_pn = 0;		/* No partials */
316 		t->im_ps = 0;
317 		t->im_pv = 0;
318 		f->impvt = 0;
319 		f->iapvt = 0;
320 	}
321 	f->ian = t->im_fn + t->im_pn;			/* Total number of output accumulators */
322 
323 	/* Figure out how much of the interpolation entry offset to put in the */
324 	/* vertex offset value, and how much to make explicit in accessing the */
325 	/* interpolation table enty. */
326 	if (a->oscale > 0) {		/* We have a scaled index mode */
327 		/* Use as much of the scaled index mode as possible */
328 		/* and then do the balance by scaling the simplex index entry. */
329 		for (t->im_oc = a->oscale; ; t->im_oc >>= 1) {
330 			t->vo_om = t->im_ts/t->im_oc;		/* Simplex index multiplier */
331 			if ((t->vo_om * t->im_oc) == t->im_ts)
332 				break;				/* Got appropriate offset scale */
333 		}
334 	} else if (a->smmul) {		/* Architecure supports fast small multiply */
335 		t->im_oc = t->im_ts;	/* Do scale by structure size explicitly */
336 		t->vo_om = 1;			/* Do none in the Simplex index */
337 	} else {					/* We have no fast tricks */
338 		t->im_oc = 1;			/* Do none explicitly */
339 		t->vo_om = t->im_ts;	/* Do all in Simplex index */
340 	}
341 
342 	/* Compute the number of bits needed to hold an index into */
343 	/* the interpolation table (index is in terms of table entry size). */
344 	/* This value is used to figure out the room needed in the input */
345 	/* table to accumulate the interpolation cube base offset value. (IM_O macro) */
346 	f->ixmnb = calc_bits(g->id, g->itres);
347 
348 #ifdef VERBOSE
349 	/* Summarise the interpolation table arrangements */
350 	printf("\n");
351 	printf("Interpolation table structure:\n");
352 	printf("  Minimum bits needed to index table %d\n", f->ixmnb);
353 	printf("  Entry total size %d bytes\n", t->im_ts);
354 	printf("  Simplex entry offset scale %d\n", t->vo_om);
355 	printf("  Explicit entry offset scale %d\n", t->im_oc);
356 	printf("  %d full entries, size %d bytes\n", t->im_fn, t->im_fs);
357 	printf("  %d partial entries, size %d bytes\n", t->im_pn, t->im_ps);
358 	printf("  to hold %d output values of %d bits\n", g->od, f->imovb);
359 
360 #endif /* VERBOSE */
361 
362 	/* Number of bits needed for the weighting value */
363 	f->wemnb = g->prec+1;	/* Need to hold a weighting factor of 0 - 256 for 8 bits */
364 							/* Need to hold a weighting factor of 0 - 65536 for 16 bits */
365 
366 	/* Variable that would be used to hold it */
367 	if ((f->wevt = findnord(f, f->wemnb)) < 0) {
368 		fprintf(stderr,"Can't find entry size to hold weighting variable\n");
369 		exit(-1);
370 	}
371 
372 	/* Number of bits needed for vertex offset value */
373 	f->vomnb = calc_obits(g->id, g->itres, t->vo_om);
374 
375 	/* Variable that would be used to hold it */
376 	if ((f->vovt = findnord(f, f->vomnb)) < 0) {
377 		fprintf(stderr,"Can't find entry size to hold vertex offset variable\n");
378 		exit(-1);
379 	}
380 
381 	if (t->sort) {
382 		/* If we are using an explicit sort, we need to figure how many */
383 		/* separate entries we need to use to hold the interpolation index, */
384 		/* weighting factor and vertex offset values in the input table. */
385 
386 		/* First try all three in one entry */
387 		if ((f->itet = findord(f, f->ixmnb + f->wemnb + f->vomnb)) >= 0) {/* size to read */
388 			int rem;						/* Remainder bits */
389 
390 			t->it_xs = 0;					/* Combined interp+weight+offset */
391 			t->wo_xs = 0;
392 			t->it_ab = a->ords[f->itet].bits;	/* Bits in combined input entry */
393 			rem = t->it_ab - f->ixmnb - f->wemnb - f->vomnb; /* Spair bits */
394 			t->we_ab = f->wemnb;				/* Get minimum weight bits */
395 			t->vo_ab = f->vomnb + rem/2;		/* vertex offset index bits actually available */
396 			t->ix_ab = t->it_ab - t->vo_ab - t->we_ab;	/* interp index bits actually available */
397 			t->wo_ab = t->we_ab + t->vo_ab;		/* Weight & offset total bits */
398 			t->it_ts = a->ords[f->itet].bits/8;	/* total size in bytes */
399 			f->itvt = nord(f, f->itet);			/* Variable type */
400 
401 			if ((f->wovt = findnord(f, t->we_ab + t->vo_ab)) < 0) {
402 				fprintf(stderr,"Can't find variable size to hold weight/offset\n");
403 				exit(-1);
404 			}
405 			if ((f->wevt = findnord(f, t->we_ab)) < 0) {
406 				fprintf(stderr,"Can't find variable size to hold weighting factor\n");
407 				exit(-1);
408 			}
409 			if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
410 				fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
411 				exit(-1);
412 			}
413 			if ((f->ixvt = findnord(f, t->ix_ab)) < 0) {
414 				fprintf(stderr,"Interp index variable size can't be handled\n");
415 				exit(-1);
416 			}
417 		} else {	/* Interp index will be a separate entry */
418 			int wit, oft, bigt;		/* weighting type, offset type, biggest type */
419 			int combt;				/* Combined type */
420 			int sepbits, combits;	/* Total separate, combined bits */
421 
422 			t->it_xs = 1;				/* Separate interp index and weighting+offset */
423 			if ((f->ixet = findord(f, f->ixmnb)) < 0) {
424 				fprintf(stderr,"Interp index entry size can't be handled\n");
425 				exit(-1);
426 			}
427 			f->ixvt = nord(f, f->ixet);		/* Variable type */
428 			t->ix_ab = a->ords[f->ixet].bits;
429 			t->ix_es = t->ix_ab/8;
430 			t->ix_eo = 0;
431 			t->it_ts = t->ix_es;			/* Input table size so far */
432 
433 			/* Now figure weighting and vertex offset */
434 
435 			/* See if we can fit them into separately readable entries, or whether */
436 			/* they should be combined to minimise overall table size. */
437 
438 			if ((wit = findord(f, f->wemnb)) < 0) {
439 				fprintf(stderr,"Can't find entry size to hold weighting factor\n");
440 				exit(-1);
441 			}
442 			if ((oft = findord(f, f->vomnb)) < 0) {
443 				fprintf(stderr,"Can't find entry size to hold vertex offset index\n");
444 				exit(-1);
445 			}
446 			bigt = wit > oft ? wit : oft;			/* Bigest separate type */
447 
448 			if ((combt = findord(f, f->wemnb + f->vomnb)) < 0) {/* Combined isn't possible */
449 				sepbits = 2 * a->ords[bigt].bits;		/* Total separate bits */
450 				combits = sepbits;						/* Force separate entries */
451 			} else {
452 				sepbits = 2 * a->ords[bigt].bits;		/* Total separate bits */
453 				combits = a->ords[combt].bits;			/* Total combined bits */
454 			}
455 
456 			if (sepbits <= combits) {				/* We will use separate entries */
457 				t->wo_xs = 1;
458 				t->we_es = a->ords[bigt].bits/8;	/* size in bytes for weighting entry */
459 				t->we_ab = a->ords[bigt].bits;		/* bits available for weighting */
460 				t->we_eo = t->ix_es;				/* Entry offset in input table */
461 				t->vo_es = a->ords[bigt].bits/8;	/* size in bytes for vertex offset entry */
462 				t->vo_ab = a->ords[bigt].bits;		/* bits available for vertex offset */
463 				t->vo_eo = t->ix_es + t->we_es;		/* Entry offset in input table */
464 				t->wo_es = t->we_es + t->vo_es;		/* Total entry size for each vertex */
465 				t->it_ts += t->we_es + t->vo_es;	/* Total input entry size in bytes */
466 
467 				f->weet = bigt;				/* Variable type for accessing weighting entry */
468 				f->voet = bigt;				/* Variable type for accessing vertex offset entry */
469 				f->wevt = nord(f, wit);		/* Variable type for holding weight value */
470 				f->vovt = nord(f, oft);		/* Variable type for holding offset value */
471 
472 			} else {								/* We will combine the two entries */
473 				t->wo_xs = 0;
474 				t->wo_es = a->ords[combt].bits/8;	/* entry size in bytes for each entry */
475 				t->wo_ab = a->ords[combt].bits;		/* bits in weightig + offset */
476 				t->we_ab = f->wemnb;				/* bits available for weighting */
477 				t->vo_ab = t->wo_ab - t->we_ab;		/* Allow all spare bits to vertex offset */
478 				t->wo_eo = t->ix_es;				/* entry offset in input table */
479 				t->it_ts += t->wo_es;				/* Final input table size */
480 
481 				f->woet = combt;			/* Variable type for accessing combined entry */
482 				f->wovt = nord(f, combt);	/* Variable type holding weight/offset read value */
483 
484 				if ((f->wevt = findnord(f, t->we_ab)) < 0) {
485 					fprintf(stderr,"Can't find variable size to hold weighting factor\n");
486 					exit(-1);
487 				}
488 				if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
489 					fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
490 					exit(-1);
491 				}
492 			}
493 		}
494 #ifdef VERBOSE
495 		/* Summarise the input table arrangements */
496 		printf("\n");
497 		printf("Input table structure:\n");
498 		printf("  Input table entry size = %d bytes\n",t->it_ts);
499 		if (t->it_ix) {
500 			printf("  Input table extracts value from read values\n");
501 			if (t->wo_xs) {
502 				printf("  Separate Interp., Weighting and Offset values\n");
503 				printf("  Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
504 				printf("  Weighting is at offset %d, size %d bytes\n",t->we_eo, t->we_es);
505 				printf("  Vertex offset is at offset %d, size %d bytes\n",t->vo_eo, t->vo_es);
506 			} else {
507 				printf("  Separate Interp. index and Weightint+Offset value\n");
508 				printf("  Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
509 				printf("  Weighting+Offset is at offset %d, size %d bytes\n",t->wo_eo, t->wo_es);
510 				printf("  Weighting     = %d bits\n",t->we_ab);
511 				printf("  Vertex offset = %d bits\n",t->vo_ab);
512 			}
513 		} else {
514 			printf("  Combined InterpIndex+Weighting+Voffset values\n");
515 			printf("  Values are stored in size %d bytes\n",t->it_ts);
516 			printf("  Interp. index = %d bits\n",t->ix_ab);
517 			printf("  Weighting     = %d bits\n",t->we_ab);
518 			printf("  Vertex offset = %d bits\n",t->vo_ab);
519 		}
520 #endif /* VERBOSE */
521 
522 	} else {	/* Simplex table */
523 		/* If we are going to use a simplex table, figure out how we */
524 		/* will store the weighting value and vertex offset values in it, */
525 		/* as well as the size of index we'll need to address it. */
526 		int wit, oft, bigt;		/* weighting type, offset type, biggest type */
527 		int combt;				/* Combined type */
528 		int sepbits, combits;	/* Total separate, combined bits */
529 
530 		/* See if we can fit them into separately readable entries, or whether */
531 		/* they should be combined to minimise overall table size. */
532 
533 		if ((wit = findord(f, f->wemnb)) < 0) {
534 			fprintf(stderr,"Can't find entry size to hold weighting factor\n");
535 			exit(-1);
536 		}
537 		if ((oft = findord(f, f->vomnb)) < 0) {
538 			fprintf(stderr,"Can't find entry size to hold vertex offset index\n");
539 			exit(-1);
540 		}
541 		bigt = wit > oft ? wit : oft;			/* Bigest separate type */
542 
543 		if ((combt = findord(f, f->wemnb + f->vomnb)) < 0) {/* Combined isn't possible */
544 			sepbits = 2 * a->ords[bigt].bits;		/* Total separate bits */
545 			combits = sepbits;						/* Force separate entries */
546 		} else {
547 			sepbits = 2 * a->ords[bigt].bits;		/* Total separate bits */
548 			combits = a->ords[combt].bits;			/* Total combined bits */
549 		}
550 
551 		if (sepbits <= combits) {				/* We will use separate entries */
552 			t->wo_xs = 1;
553 			t->we_es = a->ords[bigt].bits/8;	/* size in bytes for weighting entry */
554 			t->we_ab = a->ords[bigt].bits;		/* bits available for weighting */
555 			t->we_eo = 0;						/* Entry offset in simplex table */
556 			t->vo_es = a->ords[bigt].bits/8;	/* size in bytes for vertex offset entry */
557 			t->vo_ab = a->ords[bigt].bits;		/* bits available for vertex offset */
558 			t->vo_eo = t->we_es;				/* Entry offset in simplex table */
559 			t->wo_es = t->we_es + t->vo_es;		/* Total entry size for each vertex */
560 			t->sm_ts = (g->id + 1) * (t->we_es + t->vo_es) ;	/* Total size in bytes */
561 
562 			f->weet = bigt;				/* Variable type for accessing weighting entry */
563 			f->voet = bigt;				/* Variable type for accessing vertex offset entry */
564 			f->wevt = nord(f, wit);		/* Variable type for holding weight value */
565 			f->vovt = nord(f, oft);		/* Variable type for holding offset value */
566 
567 		} else {								/* We will combine the two entries */
568 			t->wo_xs = 0;
569 			t->wo_es = a->ords[combt].bits/8;	/* entry size in bytes for each entry */
570 			t->wo_ab = a->ords[combt].bits;		/* bits in weightig + offset */
571 			t->we_ab = f->wemnb;				/* bits available for weighting */
572 			t->vo_ab = t->wo_ab - t->we_ab;		/* Allow all spare bits to vertex offset */
573 			t->wo_eo = 0;						/* entry offset in simplex table */
574 			t->sm_ts = (g->id + 1) * t->wo_es;	/* Total size in bytes */
575 
576 			f->woet = combt;			/* Variable type for accessing combined entry */
577 			f->wovt = nord(f, combt);	/* Variable type holding weight/offset read value */
578 
579 			if ((f->wevt = findnord(f, t->we_ab)) < 0) {
580 				fprintf(stderr,"Can't find variable size to hold weighting factor\n");
581 				exit(-1);
582 			}
583 			if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
584 				fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
585 				exit(-1);
586 			}
587 		}
588 
589 		/* Compute the number of bits needed to hold an index into */
590 		/* the simplex table (index is in terms of table entry size). */
591 		/* This value is used to figure out the room needed in the input */
592 		/* table to accumulate the simplex cube base offset value. (SW_O macro) */
593 		f->sxmnb = calc_bits(g->id, g->stres);
594 
595 #ifdef VERBOSE
596 		/* Summarise the simplex table arrangements */
597 		printf("\n");
598 		printf("Simplex table structure:\n");
599 		printf("  Minimum bits needed to index table %d\n", f->sxmnb);
600 		printf("  Total simplex entry size %d bytes to hold %d entries\n",t->sm_ts, g->id+1);
601 		if (t->wo_xs) {
602 			printf("  Separate entries for offset and weight\n");
603 			printf("  Weighting entry size %d bytes\n",t->we_es);
604 			printf("  Offset entry size %d bytes\n",t->vo_es);
605 		} else {
606 			printf("  Combined offset and weight entries in %d bytes\n",t->wo_es);
607 			printf("  Weighting entry size %d bits\n",t->we_ab);
608 			printf("  Offset entry size %d bits\n",t->vo_ab);
609 		}
610 		printf("  Vertex offset scale factor %d\n", t->vo_om);
611 #endif /* VERBOSE */
612 
613 		/* We known how big the interpolation and simplex */
614 		/* tables indexes are going to be, so complete figuring out */
615 		/* how big the input table entries have to be. */
616 		if ((f->itet = findord(f, f->sxmnb + f->ixmnb)) >= 0) {/* size to read */
617 			int rem;						/* Remainder bits */
618 
619 			t->it_xs = 0;					/* Combined simplex+interp index */
620 
621 			t->it_ab = a->ords[f->itet].bits;	/* Bits in combined input entry */
622 			rem = t->it_ab - f->sxmnb - f->ixmnb;
623 			t->sx_ab = f->sxmnb + rem/2;		/* simplex index bits actually available */
624 			t->ix_ab = t->it_ab - t->sx_ab;		/* interp index bits actually available */
625 			t->it_ts = a->ords[f->itet].bits/8;	/* total size in bytes */
626 			f->itvt = nord(f, f->itet);			/* Variable type */
627 
628 			if ((f->sxvt = findnord(f, t->sx_ab)) < 0) {
629 				fprintf(stderr,"Simplex index variable size can't be handled\n");
630 				exit(-1);
631 			}
632 			if ((f->ixvt = findnord(f, t->ix_ab)) < 0) {
633 				fprintf(stderr,"Interp index variable size can't be handled\n");
634 				exit(-1);
635 			}
636 		} else {						/* Separate entries */
637 			int bbits;					/* Largest number of bits needed */
638 
639 			t->it_xs = 1;				/* Separate simplex+interp indexes */
640 			bbits = f->sxmnb > f->ixmnb ? f->sxmnb : f->ixmnb;
641 
642 			/* Allocate same size for both so that total structure size is power of 2 */
643 			if ((f->sxet = f->ixet = findord(f, bbits)) < 0) {
644 				fprintf(stderr,"Interp/Simplex index entry size can't be handled\n");
645 				exit(-1);
646 			}
647 
648 			t->sx_ab = a->ords[f->sxet].bits;		/* Actual bits available */
649 			t->sx_es = t->sx_ab/8;					/* Entry size in bytes */
650 			t->ix_ab = a->ords[f->ixet].bits;
651 			t->ix_es = t->sx_ab/8;
652 			t->it_ts = t->sx_es + t->ix_es;		/* total size in bytes */
653 			t->sx_eo = 0;						/* simplex index offset in bytes */
654 			t->ix_eo = t->sx_es;				/* interp. index offset in bytes */
655 			f->sxvt = nord(f, f->sxet);			/* Variable type */
656 			f->ixvt = nord(f, f->ixet);			/* Variable type */
657 		}
658 
659 #ifdef VERBOSE
660 		/* Summarise the input table arrangements */
661 		printf("\n");
662 		printf("Input table structure:\n");
663 		if (t->it_ix) {
664 			printf("  Input table extracts value from read values\n");
665 		} else {
666 			printf("  Value extraction read values is explicit\n");
667 		}
668 		printf("  Input table entry size = %d bytes\n",t->it_ts);
669 		if (t->it_xs) {
670 			printf("  Separate Interp. and Simplex index values\n");
671 			printf("  Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
672 			printf("  Simplex index is at offset %d, size %d bytes\n",t->sx_eo, t->sx_es);
673 		} else {
674 			printf("  Combined Interp. and Simplex index values\n");
675 			printf("  Values are size %d bytes\n",t->it_ts);
676 			printf("  Interp. index = %d bits\n",t->ix_ab);
677 			printf("  Simplex index = %d bits\n",t->sx_ab);
678 		}
679 #endif /* VERBOSE */
680 	}
681 
682 	/* Figure out output table stuff */
683 	{
684 		/* A variable to hold the index into an output table */
685 		if ((f->otit = findord(f, g->prec)) < 0) {
686 			fprintf(stderr,"Can't find output table index size\n");
687 			exit(-1);
688 		}
689 		f->otit = nord(f,f->otit);				/* Make temp variable natural size */
690 
691 		if (g->out.pint != 0)	/* Pixel interleaved */
692 			f->nop = 1;			/* Use same pointers for every pixel */
693 		else
694 			f->nop = g->od;		/* Use a separate pointer for each output value */
695 
696 		/* Figure out the output pointer types */
697 		f->otvt = 0;			/* Output table value type */
698 		for (e = 0; e < f->nop; e++) {
699 			if ((f->opt[e] = findord(f, g->out.bpch[e])) < 0) {
700 				fprintf(stderr,"Output channel size can't be handled\n");
701 				exit(-1);
702 			}
703 			if (f->opt[e] > f->otvt)
704 				f->otvt = f->opt[e];	/* Make value type big enough for any channel size */
705 		}
706 		t->ot_ts = a->ords[f->otvt].bits/8;	/* Output table entry size in bytes */
707 
708 		/* Setup information on data placement in output table entries */
709 		for (e = 0; e < g->od; e++) {
710 			t->ot_off[e] = g->out.bov[e];		/* Transfer info from generation spec. */
711 			t->ot_bits[e] = g->out.bpv[e];
712 		}
713 	}
714 
715 #ifdef VERBOSE
716 	/* Summarise the output table arrangements */
717 	printf("Output table structure:\n");
718 	printf("  Entry size = %d bytes\n",t->ot_ts);
719 	printf("  Output value placement within each enry is:\n");
720 	for (e = 0; e < f->nop; e++) {
721 		printf("    %d: Offset %d bits, size %d bits\n", e, t->ot_off[e], t->ot_bits[e]);
722 	}
723 #endif /* VERBOSE */
724 
725 	/* Compute the maximum interpolation table resolution we will be able to handle */
726 	{
727 		int res, ores;
728 
729 		res = calc_res(g->id, t->ix_ab);
730 		ores = calc_ores(g->id, t->vo_ab, t->vo_om);
731 		f->ixmxres = res < ores ? res : ores;
732 	}
733 
734 	/* Compute the maximum simplex table resolution we will be able to handle */
735 	if (t->sort) {
736 		f->sxmxres = 0;
737 	} else {
738 		f->sxmxres = calc_res(g->id, t->sx_ab);
739 	}
740 
741 #ifdef VERBOSE
742 	printf("Emitting introductory code\n"); fflush(stdout);
743 #endif /* VERBOSE */
744 
745 	/* Start of code generation */
746 	doheader(f);			/* Output the header comments */
747 
748 	/* We need an include file */
749 	line(f,"#ifndef  IMDI_INCLUDED");
750 	line(f,"#include <memory.h>");
751 	line(f,"#include \"imdi_utl.h\"");
752 	line(f,"#define  IMDI_INCLUDED");
753 	line(f,"#endif  /* IMDI_INCLUDED */");
754 	cr(f);
755 
756 	/* Declare our explicit pointer type */
757 	line(f,"#ifndef DEFINED_pointer");
758 	line(f,"#define DEFINED_pointer");
759 	line(f,"typedef unsigned char * pointer;");
760 	line(f,"#endif");
761 	cr(f);
762 
763 	/* Declare our explicit structure access macros */
764 
765 #ifdef VERBOSE
766 	printf("Declaring macros\n"); fflush(stdout);
767 #endif /* VERBOSE */
768 
769 	/* Macros for accessing input table entries */
770 	if (t->sort) {
771 		if (t->it_xs) {
772 			line(f,"/* Input table interp. index */");
773 			line(f,"#define IT_IX(p, off) *((%s *)((p) + %d + (off) * %d))",
774 			     a->ords[f->ixet].name, t->ix_eo, t->it_ts);
775 			cr(f);
776 			if (t->wo_xs) {
777 				line(f,"/* Input table input weighting enty */");
778 				line(f,"#define IT_WE(p, off) *((%s *)((p) + %d + (off) * %d))",
779 				     a->ords[f->weet].name, t->we_eo, t->it_ts);
780 				cr(f);
781 				line(f,"/* Input table input offset value enty */");
782 				line(f,"#define IT_VO(p, off) *((%s *)((p) + %d + (off) * %d))",
783 				     a->ords[f->voet].name, t->vo_eo, t->it_ts);
784 				cr(f);
785 			} else {
786 				line(f,"/* Input table input weighting/offset value enty */");
787 				line(f,"#define IT_WO(p, off) *((%s *)((p) + %d + (off) * %d))",
788 				     a->ords[f->woet].name, t->wo_eo, t->it_ts);
789 				cr(f);
790 			}
791 		} else {
792 			line(f,"/* Input table interp index, weighting and vertex offset */");
793 			line(f,"#define IT_IT(p, off) *((%s *)((p) + %d + (off) * %d))",
794 			     a->ords[f->itet].name, 0, t->it_ts);
795 			cr(f);
796 		}
797 
798 		/* Sort primitive macro's */
799 		line(f,"/* Sorting macros */");
800 		if (t->wo_xs) {
801 			line(f,"#define XFR(A, AA, B, BB) A = B; AA = BB;");
802 			line(f,"#define CEX(A, AA, B, BB) if (A < B) { \\");
803 			line(f,"            A ^= B; B ^= A; A ^= B; AA ^= BB; BB ^= AA; AA ^= BB; }");
804 			line(f,"#define CXJ(A, B, BB, D, DD, L) if (A >= B) { D = B; DD = BB; goto L; }");
805 		} else {
806 			line(f,"#define XFR(A, B) A = B;");
807 			line(f,"#define CEX(A, B) if (A < B) { A ^= B; B ^= A; A ^= B; }");
808 			line(f,"#define CXJ(A, B, D, L) if (A >= B) { D = B; goto L; }");
809 		}
810 		line(f,"#define CJ(A, B, L) if (A >= B) goto L;");
811 		cr(f);
812 
813 	} else {	/* Simplex table */
814 		if (t->it_xs) {
815 			line(f,"/* Input table interp. index */");
816 			line(f,"#define IT_IX(p, off) *((%s *)((p) + %d + (off) * %d))",
817 			     a->ords[f->ixet].name, t->ix_eo, t->it_ts);
818 			cr(f);
819 			line(f,"/* Input table simplex index enty */");
820 			line(f,"#define IT_SX(p, off) *((%s *)((p) + %d + (off) * %d))",
821 			     a->ords[f->sxet].name, t->sx_eo, t->it_ts);
822 			cr(f);
823 		} else {
824 			line(f,"/* Input table inter & simplex indexes */");
825 			line(f,"#define IT_IT(p, off) *((%s *)((p) + %d + (off) * %d))",
826 			     a->ords[f->itet].name, 0, t->it_ts);
827 			cr(f);
828 		}
829 	}
830 
831 	if (!t->sort) {
832 		/* Macro for computing a simplex table entry */
833 		line(f,"/* Simplex weighting table access */");
834 		line(f,"#define SW_O(off) ((off) * %d)", t->sm_ts);
835 		cr(f);
836 
837 		/* Macros for accessing the contents of the simplex table */
838 		if (t->wo_xs) { 			/* If separate */
839 			line(f,"/* Simplex table - get weighting value */");
840 			line(f,"#define SX_WE(p, v) *((%s *)((p) + (v) * %d + %d))",
841 			     a->ords[f->weet].name, t->wo_es, t->we_eo);
842 			cr(f);
843 
844 			line(f,"/* Simplex table - get offset value */");
845 			line(f,"#define SX_VO(p, v) *((%s *)((p) + (v) * %d + %d))",
846 			     a->ords[f->voet].name, t->wo_es, t->vo_eo);
847 			cr(f);
848 
849 		} else {	/* Combined */
850 			line(f,"/* Simplex table - get weighting/offset value */");
851 			line(f,"#define SX_WO(p, v) *((%s *)((p) + (v) * %d))",
852 			     a->ords[f->woet].name, t->wo_es);
853 			cr(f);
854 		}
855 	}
856 
857 	/* Macro for computing an interpolation table entry */
858 	line(f,"/* Interpolation multi-dim. table access */");
859 	line(f,"#define IM_O(off) ((off) * %d)", t->im_ts);
860 	cr(f);
861 
862 	/* Macro for accessing an entry in the interpolation table */
863 	line(f,"/* Interpolation table - get vertex values */");
864 
865 	if (t->im_fn > 0) {
866 		/* Arguments to macro are cell base address, vertex offset, data offset */
867 
868 		if (f->imfvt == f->iafvt) {	/* Table and accumulator are the same size */
869 			if (!timp || t->im_fn == 1)
870 				line(f,"#define IM_FE(p, v, c) *((%s *)((p) + (v) * %d + (c) * %d))",
871 				     a->ords[f->imfvt].name, t->im_oc, t->im_fs);
872 			else {
873 				line(f,"#define IM_TP(p, v) ((p) + (v) * %d)", t->im_oc);
874 				line(f,"#define IM_FE(p, c) *((%s *)((p) + (c) * %d))",
875 				     a->ords[f->imfvt].name, t->im_fs);
876 			}
877 		} else {					/* Expand single table entry to accumulator size */
878 			if (!timp || t->im_fn == 1)
879 				line(f,"#define IM_FE(p, v, c) ((%s)*((%s *)((p) + (v) * %d + (c) * %d)))",
880 				     a->ords[f->iafvt].name,
881 				     a->ords[f->imfvt].name, t->im_oc, t->im_fs);
882 			else {
883 				line(f,"#define IM_TP(p, v) ((p) + (v) * %d)", t->im_oc);
884 				line(f,"#define IM_FE(p, c) ((%s)*((%s *)((p) + (c) * %d)))",
885 				     a->ords[f->iafvt].name,
886 				     a->ords[f->imfvt].name, t->im_fs);
887 			}
888 		}
889 	}
890 	if (t->im_pn > 0) {
891 		/* Arguments to macro are cell base address, vertex offset */
892 		/* There is no data offset since there can be only be one partial entry */
893 
894 		if (f->imfvt == f->iafvt)	/* Table and accumulator are the same size */
895 			line(f,"#define IM_PE(p, v) *((%s *)((p) + %d + (v) * %d))",
896 			     a->ords[f->impvt].name, t->im_fn * t->im_fs, t->im_oc);
897 		else						/* Expand single table entry to accumulator size */
898 			line(f,"#define IM_PE(p, v) ((%s)*((%s *)((p) + %d + (v) * %d)))",
899 			     a->ords[f->iafvt].name,
900 			     a->ords[f->impvt].name, t->im_fn * t->im_fs, t->im_oc);
901 	}
902 	cr(f);
903 
904 	/* Macro for accessing an output table entry */
905 	line(f,"/* Output table indexes */");
906 	line(f,"#define OT_E(p, off) *((%s *)((p) + (off) * %d))",
907 	     a->ords[f->otvt].name, t->ot_ts);
908 	cr(f);
909 
910 	/* =============================================== */
911 
912 #ifdef VERBOSE
913 	printf("Starting interpolation function\n"); fflush(stdout);
914 #endif /* VERBOSE */
915 
916 	/* Declare the function */
917 	line(f,"void");
918 	line(f, "imdi_k%d(",index);
919 	line(f, "imdi *s,			/* imdi context */");
920 	line(f, "void **outp,		/* pointer to output pointers */");
921 	line(f, "int  ostride,		/* optional input component stride */");
922 	line(f, "void **inp,		/* pointer to input pointers */");
923 	line(f, "int  istride,		/* optional input component stride */");
924 	line(f, "unsigned int npix	/* Number of pixels to process */");
925 	line(f, ") {");
926 	inc(f);
927 
928 	/* We need access to the imdi_imp */
929 	line(f, "imdi_imp *p = (imdi_imp *)(s->impl);");
930 
931 	/* Declare the input pointers and init them */
932 	for (e = 0; e < f->nip; e++) {
933 		if (g->opt & opts_bwd) {
934 			if (g->opt & opts_istride)
935 				line(f, "%s *ip%d = (%s *)inp[%d] + (npix-1) * istride;",
936 				     a->ords[f->ipt[e]].name, e,
937 				     a->ords[f->ipt[e]].name, e);
938 			else
939 				line(f, "%s *ip%d = (%s *)inp[%d] + (npix-1) * %d;",
940 				     a->ords[f->ipt[e]].name, e,
941 				     a->ords[f->ipt[e]].name, e,
942 				     g->in.chi[e]);
943 		} else {
944 			g->opt |= opts_fwd;			/* Make sure it's marked for what it is */
945 			line(f, "%s *ip%d = (%s *)inp[%d];",
946 			     a->ords[f->ipt[e]].name, e, a->ords[f->ipt[e]].name, e);
947 		}
948 	}
949 
950 	/* Declare the output pointers and init them */
951 	for (e = 0; e < f->nop; e++) {
952 		if (g->opt & opts_bwd) {
953 			if (g->opt & opts_ostride)
954 				line(f, "%s *op%d = (%s *)outp[%d] + (npix-1) * ostride;",
955 				     a->ords[f->opt[e]].name, e,
956 				     a->ords[f->opt[e]].name, e);
957 			else
958 				line(f, "%s *op%d = (%s *)outp[%d] + (npix-1) * %d;",
959 				     a->ords[f->opt[e]].name, e,
960 				     a->ords[f->opt[e]].name, e,
961 				     g->out.chi[e]);
962 		} else {
963 			line(f, "%s *op%d = (%s *)outp[%d];",
964 			     a->ords[f->opt[e]].name, e, a->ords[f->opt[e]].name, e);
965 		}
966 	}
967 
968 	/* Declare and intialise the end pointer */
969 	if (g->opt & opts_bwd) {
970 		if (g->opt & opts_istride)
971 			line(f, "%s *ep = (%s *)inp[0] - istride ;",
972 				    a->ords[f->ipt[0]].name,
973 			        a->ords[f->ipt[0]].name);
974 		else
975 			line(f, "%s *ep = (%s *)inp[0] - %d ;",
976 				    a->ords[f->ipt[0]].name,
977 			        a->ords[f->ipt[0]].name, g->in.chi[0]);
978 	} else {
979 		if (g->opt & opts_istride)
980 			line(f, "%s *ep = (%s *)inp[0] + npix * istride ;",
981 				    a->ords[f->ipt[0]].name,
982 			        a->ords[f->ipt[0]].name);
983 		else
984 			line(f, "%s *ep = (%s *)inp[0] + npix * %d ;",
985 				    a->ords[f->ipt[0]].name,
986 			        a->ords[f->ipt[0]].name, g->in.chi[0]);
987 	}
988 
989 	/* Declare and initialise the input table pointers */
990 	for (e = 0; e < g->id; e++)
991 		line(f,"pointer it%d = (pointer)p->in_tables[%d];",e,e);
992 
993 	/* Declare and initialise the output table pointers */
994 	for (e = 0; e < g->od; e++)
995 		line(f,"pointer ot%d = (pointer)p->out_tables[%d];",e,e);
996 
997 	if (!t->sort) {
998 		/* Declare and initialise the Simplex weighting base pointer */
999 		line(f,"pointer sw_base = (pointer)p->sw_table;");
1000 	}
1001 
1002 	/* Declare and initialise the Interpolation multidim base pointer */
1003 	line(f,"pointer im_base = (pointer)p->im_table;");
1004 
1005 	/* Figure out whether input channel reads can be used directly as table offsets */
1006 	t->it_ix = 1;				/* Default use input table lookup to extract value */
1007 
1008 	if (g->in.packed != 0)
1009 		t->it_ix = 0;				/* Extract will be done explicitly */
1010 
1011 	for (e = 0; e < g->id; e++) {
1012 		int ee = (g->in.pint != 0) ? 0 : e;		/* bpch index */
1013 
1014 		if ((g->in.bov[e] + g->in.bpv[e]) <= 12)
1015 			continue;							/* Table can do extract */
1016 
1017 		if (g->in.bov[e] != 0 || g->in.bpv[e] != g->in.bpch[ee]) {
1018 			t->it_ix = 0;						/* Extract will be done explicitly */
1019 			break;
1020 		}
1021 	}
1022 
1023 	/* ------------------------------- */
1024 #ifdef VERBOSE
1025 	printf("Starting pixel processing loop\n"); fflush(stdout);
1026 #endif /* VERBOSE */
1027 
1028 	/* Start the pixel processing loop */
1029 	cr(f);
1030 	if (g->opt & opts_bwd) {
1031 		sline(f, "for(;ip0 != ep;");
1032 
1033 		if (g->opt & opts_istride)
1034 			for (e = 0; e < f->nip; e++)
1035 				mline(f, " ip%d -= istride,", e);
1036 		else
1037 			for (e = 0; e < f->nip; e++)
1038 				mline(f, " ip%d -= %d,", e, g->in.chi[e]);
1039 
1040 		if (g->opt & opts_ostride)
1041 			for (e = 0; e < f->nop; e++)
1042 				mline(f, " op%d -= ostride%s", e, ((e+1) < f->nop) ? "," : "");
1043 		else
1044 			for (e = 0; e < f->nop; e++)
1045 				mline(f, " op%d -= %d%s", e, g->out.chi[e], ((e+1) < f->nop) ? "," : "");
1046 	} else {
1047 		sline(f, "for(;ip0 != ep;");
1048 
1049 		if (g->opt & opts_istride)
1050 			for (e = 0; e < f->nip; e++)
1051 				mline(f, " ip%d += istride,", e);
1052 		else
1053 			for (e = 0; e < f->nip; e++)
1054 				mline(f, " ip%d += %d,", e, g->in.chi[e]);
1055 
1056 		if (g->opt & opts_ostride)
1057 			for (e = 0; e < f->nop; e++)
1058 				mline(f, " op%d += ostride%s", e, ((e+1) < f->nop) ? "," : "");
1059 		else
1060 			for (e = 0; e < f->nop; e++)
1061 				mline(f, " op%d += %d%s", e, g->out.chi[e], ((e+1) < f->nop) ? "," : "");
1062 	}
1063 	eline(f, ") {");
1064 	inc(f);
1065 
1066 	/* Declare output value accumulator(s) */
1067 	for (i = 0; i < t->im_fn; i++) {
1068 		line(f,"%s ova%d;	/* Output value accumulator */",a->ords[f->iafvt].name,i);
1069 	}
1070 	for (; i < f->ian; i++) {
1071 		line(f,"%s ova%d;	/* Output value partial accumulator */",a->ords[f->iapvt].name,i);
1072 	}
1073 
1074 	/* Context around interp/Simplex table lookup */
1075 	line(f, "{");
1076 	inc(f);
1077 
1078 	if (!t->sort)
1079 		line(f,"pointer swp;");		/* Declare Simplex weighting pointer */
1080 	line(f,"pointer imp;");			/* Declare Interpolation multidim pointer */
1081 
1082 	/* Declare the input weighting/vertex offset variables */
1083 	if (t->sort) {
1084 		for (e = 0; e < g->id; e++) {
1085 			if (t->wo_xs) {
1086 				line(f,"%s we%d;	/* Weighting value variable */",
1087 				       a->ords[f->wevt].name, e);
1088 				line(f,"%s vo%d;	/* Vertex offset variable */",
1089 				       a->ords[f->vovt].name, e);
1090 			} else {
1091 				line(f,"%s wo%d;	/* Weighting value and vertex offset variable */",
1092 				       a->ords[f->wovt].name, e);
1093 			}
1094 		}
1095 	}
1096 
1097 	/* Context around input table processing */
1098 	line(f, "{");
1099 	inc(f);
1100 
1101 	/* Declare the table index variables/input weighting/vertex offset variables */
1102 	if (t->sort) {
1103 		if (!t->it_xs)
1104 			line(f,"%s ti;		/* Input table entry variable */",a->ords[f->itvt].name);
1105 		line(f,"%s ti_i;	/* Interpolation index variable */",a->ords[f->ixvt].name);
1106 	} else {
1107 		if (t->it_xs) {
1108 			line(f,"%s ti_s;	/* Simplex index variable */",a->ords[f->sxvt].name);
1109 			line(f,"%s ti_i;	/* Interpolation index variable */",a->ords[f->ixvt].name);
1110 		} else {
1111 			line(f,"%s ti;	/* Simplex+Interpolation index variable */",a->ords[f->itvt].name);
1112 		}
1113 	}
1114 
1115 	if (g->in.packed != 0)	/* We need to unpack from a single read */
1116 		line(f,"%s rdv;		/* Read value */",a->ords[f->ipt[0]].name);
1117 
1118 	if (t->it_ix == 0) {
1119 		int bv = 0;
1120 		for (e = 0; e < f->nip; e++) {	/* Find largest input type */
1121 			if (f->ipt[e] > bv)
1122 				bv = f->ipt[e];
1123 		}
1124 		bv = nord(f, bv);
1125 		line(f,"%s chv;	/* Channel value */",a->ords[bv].name);
1126 		f->chv_bits = a->ords[bv].bits;
1127 	}
1128 	cr(f);
1129 
1130 #ifdef VERBOSE
1131 	printf("Read code\n"); fflush(stdout);
1132 #endif /* VERBOSE */
1133 
1134 	/* For all the input channels */
1135 	for (e = 0; e < g->id; e++) {
1136 		char rde[50];		/* Read expression */
1137 		char toff[50];		/* Table offset expression */
1138 		int ee = (g->in.pint != 0) ? 0 : e;		/* bpch index */
1139 
1140 		if (g->in.pint != 0) 	/* Pixel interleaved */
1141 			sprintf(rde,"ip0[%d]",e);	/* Offset from single pointer */
1142 		else
1143 			sprintf(rde,"*ip%d",e);		/* Pointer per channel */
1144 
1145 		if (g->in.packed != 0) {
1146 			if (e == 0)
1147 				line(f,"rdv = %s;",rde);	/* Do single read */
1148 			sprintf(rde,"rdv");				/* Use read value for extraction */
1149 		}
1150 
1151 		if (t->it_ix == 0) {
1152 			if (g->in.bov[e] == 0 ) {				/* No offset */
1153 				if (g->in.bpv[e] == g->in.bpch[ee])	/* No mask */
1154 					line(f,"chv = %s;",rde);
1155 				else								/* Just mask  */
1156 					line(f,"chv = (%s & %s);",rde, hmask(g->in.bpv[e]));
1157 			} else {								/* Offset */
1158 				if ((g->in.bov[e] + g->in.bpv[e]) == g->in.bpch[ee])
1159 					line(f,"chv = (%s >> %d);",rde, g->in.bov[e]);
1160 				else {								/* Offset and mask */
1161 					if (a->shfm || g->in.bpv[e] > 32) {
1162 						/* Extract using just shifts */
1163 						line(f,"chv = ((%s << %d) >> %d);", rde,
1164 						        f->chv_bits - g->in.bpv[e] - g->in.bov[e],
1165 						        f->chv_bits - g->in.bpv[e]);
1166 					} else {
1167 						/* Extract using shift and mask */
1168 						line(f,"chv = ((%s >> %d) & %s);",
1169 						        rde, g->in.bov[e], hmask(g->in.bpv[e]));
1170 					}
1171 				}
1172 			}
1173 			sprintf(toff,"chv");
1174 		} else {									/* No extraction */
1175 			sprintf(toff,"%s",rde);
1176 		}
1177 
1178 		if (t->sort) {
1179 			if (t->it_xs) {
1180 				line(f,"ti_i %s= IT_IX(it%d, %s);", e ? "+" : " ", e, toff);
1181 				if (t->wo_xs) {
1182 					line(f,"we%d   = IT_WE(it%d, %s);", e, e, toff);
1183 					line(f,"vo%d   = IT_VO(it%d, %s);", e, e, toff);
1184 				} else {
1185 					line(f,"wo%d   = IT_WO(it%d, %s);", e, e, toff);
1186 				}
1187 			} else {	/* All three combined */
1188 				line(f,"ti = IT_IT(it%d, %s);", e, toff);
1189 				if (a->shfm || t->wo_ab > 32) {
1190 					/* Extract using just shifts */
1191 					line(f,"wo%d   = ((ti << %d) >> %d);	"
1192 					     "/* Extract weighting/vertex offset value */",
1193 					     e, a->ords[f->wovt].bits - t->wo_ab, a->ords[f->wovt].bits - t->wo_ab);
1194 					line(f,"ti_i %s= (ti >> %d);	"
1195 					     "/* Extract interpolation table value */",
1196 					     e ? "+" : " ", t->wo_ab);
1197 				} else {
1198 					/* Extract using shift and mask */
1199 					line(f,"wo%d   = (ti & %s);	"
1200 					     "/* Extract weighting/vertex offset value */",
1201 					     e, hmask(t->wo_ab));
1202 					line(f,"ti_i %s= (ti >> %d);	"
1203 					     "/* Extract interpolation table value */",
1204 					     e ? "+" : " ", t->wo_ab);
1205 				}
1206 			}
1207 
1208 		} else {	/* Simplex */
1209 			if (t->it_xs) {
1210 				/* ~~~~ should toff be forced to be a temp variable ?? */
1211 				/* (ie. force use of rde (above) if t->it_xs is nonz) */
1212 				line(f,"ti_i %s= IT_IX(it%d, %s);", e ? "+" : " ", e, toff);
1213 				line(f,"ti_s %s= IT_SX(it%d, %s);", e ? "+" : " ", e, toff);
1214 			} else {
1215 				line(f,"ti %s= IT_IT(it%d, %s);", e ? "+" : " ", e, toff);
1216 			}
1217 		}
1218 	}
1219 
1220 #ifdef VERBOSE
1221 	printf("Index extraction code\n"); fflush(stdout);
1222 #endif /* VERBOSE */
1223 
1224 	cr(f);
1225 
1226 	if (t->sort) {
1227 		/* Extract Simplex and Interpolation indexes from accumulator */
1228 		line(f,"imp = im_base + IM_O(ti_i);		/* Compute interp. table entry pointer */");
1229 	} else {
1230 		if (t->it_xs) {		/* Extract Simplex and Interpolation indexes from accumulator */
1231 			line(f,"swp = sw_base + SW_O(ti_s);		/* Compute simplex table entry pointer */");
1232 			line(f,"imp = im_base + IM_O(ti_i);		/* Compute interp. table entry pointer */");
1233 		} else {
1234 			line(f,"imp = im_base + IM_O(ti >> %d);		"
1235 			     "/* Extract interp. index and comp. entry */",
1236 			     t->sx_ab);
1237 			if (a->shfm || t->sx_ab > 32) {
1238 				/* Extract using just shifts */
1239 				line(f,"swp = sw_base + SW_O((ti << %d) >> %d);	"
1240 				     "/* Extract simplex index & comp. entry */",
1241 				     a->ords[f->itvt].bits - t->sx_ab, a->ords[f->itvt].bits - t->sx_ab);
1242 			} else {
1243 				/* Extract using shift and mask */
1244 				line(f,"swp = sw_base + SW_O(ti & %s);	"
1245 				     "/* Extract simplex index and comp. entry */",
1246 				     hmask(t->sx_ab));
1247 			}
1248 		}
1249 	}
1250 
1251 	/* Do the explicit sort now */
1252 	if (t->sort) {
1253 		cr(f);
1254 		/* Sort from largest to smallest */
1255 		/* We can use a selection sort, or an insertions sort. */
1256 
1257 		line(f,"/* Sort weighting values and vertex offset values */");
1258 
1259 		if (g->id >= INSTHRESH) {
1260 			/* We do an insertion sort */
1261 			lineinc(f,"{");
1262 			if (t->wo_xs) {
1263 				line(f,"%s wet;	/* Sort temporary */", a->ords[f->wevt].name);
1264 				line(f,"%s vot;	/* Sort temporary */", a->ords[f->vovt].name);
1265 			} else
1266 				line(f,"%s wot;	/* Sort temp variable */", a->ords[f->wovt].name);
1267 			cr(f);
1268 
1269 			for (i = 1; i < g->id; i++) {
1270 				int j;
1271 
1272 				j = i;
1273 				if (j < 2) {	/* Only test & exchange needed */
1274 					if (t->wo_xs)
1275 						line(f,"CEX(we%d, vo%d, we%d, vo%d);",j-1,j-1,j,j);
1276 					else
1277 						line(f,"CEX(wo%d, wo%d);",j-1,j);
1278 
1279 				} else {
1280 					if (t->wo_xs)
1281 						line(f,"XFR(wet, vot, we%d, vo%d);",j,j);
1282 					else
1283 						line(f,"XFR(wot, wo%d);",j);
1284 					while (j > 0) {
1285 						if (j == i) {		/* First test from i */
1286 							if (t->wo_xs)
1287 								line(f,"CJ(we%d, wet, shs%d);",j-1,i);
1288 							else
1289 								line(f,"CJ(wo%d, wot, shs%d);",j-1,i);
1290 							if (t->wo_xs)
1291 								line(f,"XFR(we%d, vo%d, we%d, vo%d);",j,j,j-1,j-1);
1292 							else
1293 								line(f,"XFR(wo%d, wo%d);",j,j-1);
1294 						} else {
1295 							if (t->wo_xs)
1296 								line(f,"CXJ(we%d, wet, vot, we%d, vo%d, shs%d);",j-1,j,j,i);
1297 							else
1298 								line(f,"CXJ(wo%d, wot, wo%d, shs%d);",j-1,j,i);
1299 							if (t->wo_xs)
1300 								line(f,"XFR(we%d, vo%d, we%d, vo%d);",j,j,j-1,j-1);
1301 							else
1302 								line(f,"XFR(wo%d, wo%d);",j,j-1);
1303 						}
1304 						j--;
1305 					}
1306 					if (t->wo_xs)
1307 						line(f,"XFR(we%d, vo%d, wet, vot);",j,j);
1308 					else
1309 						line(f,"XFR(wo%d, wot);",j);
1310 					niline(f,"shs%d:;",i);
1311 				}
1312 			}
1313 			decline(f,"}");
1314 
1315 		} else {
1316 			/* Use a selection sort */
1317 			for (i = 0; i < (g->id-1); i++) {
1318 				for (e = i+1; e < g->id; e++) {
1319 					if (t->wo_xs)
1320 						line(f,"CEX(we%d, vo%d, we%d, vo%d);",i,i,e,e);
1321 					else
1322 						line(f,"CEX(wo%d, wo%d);",i,e);
1323 				}
1324 			}
1325 		}
1326 	}
1327 
1328 	/* End of input table processing context */
1329 	dec(f);
1330 	line(f,"}");
1331 
1332 	line(f,"{");	/* Context around vertex lookup and accumulation */
1333 	inc(f);
1334 
1335 	/* Declare vertex offset and weight variables */
1336 	if (t->sort && t->wo_xs == 0) {
1337 		line(f,"%s nvof;	/* Next vertex offset value */",a->ords[f->vovt].name);
1338 	} else {
1339 		if (!t->wo_xs)	/* If combined in table */
1340 			line(f,"%s vowr;	/* Vertex offset/weight value */",a->ords[f->wovt].name);
1341 	}
1342 	line(f,"%s vof;	/* Vertex offset value */",a->ords[f->vovt].name);
1343 	line(f,"%s vwe;	/* Vertex weighting */",a->ords[f->wevt].name);
1344 	if (timp && t->im_fn > 1)
1345 		line(f,"pointer timp;		/* Temporary interpolation table pointer */");
1346 	cr(f);
1347 
1348 #ifdef VERBOSE
1349 	printf("Vertex offset and weight code\n"); fflush(stdout);
1350 #endif /* VERBOSE */
1351 
1352 	/* For each vertex in the simplex */
1353 	for (e = 0; e < (g->id +1); e++) {
1354 
1355 		if (t->sort) {
1356 
1357 			if (e == 0) {
1358 				line(f,"vof = 0;				/* First vertex offset is 0 */");
1359 			} else {
1360 				if (t->wo_xs)
1361 					line(f,"vof += vo%d;			/* Move to next vertex */",e-1);
1362 				else
1363 					line(f,"vof += nvof;			/* Move to next vertex */");
1364 			}
1365 
1366 			/* Extract the vertex offset and weight values from the sorted input values */
1367 			if (e < g->id && !t->wo_xs) {
1368 				if (a->shfm || t->vo_ab > 32) {
1369 					/* Extract using just shifts */
1370 					line(f,"nvof = ((wo%d << %d) >> %d);	"
1371 					     "/* Extract offset value */",
1372 					     e, a->ords[f->vovt].bits - t->vo_ab, a->ords[f->vovt].bits - t->vo_ab);
1373 					line(f,"wo%d = (wo%d >> %d);	"
1374 					     "	/* Extract weighting value */",
1375 					     e, e, t->vo_ab);
1376 				} else {
1377 					/* Extract using shift and mask */
1378 					line(f,"nvof = (wo%d & %s);	"
1379 					     "/* Extract offset value */",
1380 					     e, hmask(t->vo_ab));
1381 					line(f,"wo%d = (wo%d >> %d);	"
1382 					     "	/* Extract weighting value */",
1383 					     e, e, t->vo_ab);
1384 				}
1385 			}
1386 			/* Compute the weighting value */
1387 			if (!t->wo_xs) {
1388 				if (e == 0) {
1389 					line(f,"vwe = %d - wo%d;		/* Baricentric weighting */", 1 << g->prec, e);
1390 				} else if (e < g->id) {
1391 					line(f,"vwe = wo%d - wo%d;		/* Baricentric weighting */", e-1, e);
1392 				} else {
1393 					line(f,"vwe = wo%d;				/* Baricentric weighting */", e-1);
1394 				}
1395 			} else {
1396 				if (e == 0) {
1397 					line(f,"vwe = %d - we%d;		/* Baricentric weighting */", 1 << g->prec, e);
1398 				} else if (e < g->id) {
1399 					line(f,"vwe = we%d - we%d;		/* Baricentric weighting */", e-1, e);
1400 				} else {
1401 					line(f,"vwe = we%d;				/* Baricentric weighting */", e-1);
1402 				}
1403 			}
1404 
1405 		} else {	/* Not sort */
1406 			/* Read the vertex offset and weight values from the simplex table */
1407 			if (t->wo_xs) { 			/* If separate */
1408 				line(f,"vof = SX_VO(swp, %d);	/* Read vertex offset value */", e);
1409 				line(f,"vwe = SX_WE(swp, %d);	/* Read vertex weighting value */", e);
1410 			} else { 			/* If combined in table */
1411 				line(f,"vowr = SX_WO(swp, %d);	/* Read vertex offset+weighting values */", e);
1412 				if (a->shfm || t->vo_ab > 32) {
1413 					/* Extract using just shifts */
1414 					line(f,"vof = ((vowr << %d) >> %d);	"
1415 					     "/* Extract offset value */",
1416 					     a->ords[f->vovt].bits - t->vo_ab, a->ords[f->vovt].bits - t->vo_ab);
1417 					line(f,"vwe = (vowr >> %d);	"
1418 					     "/* Extract weighting value */",
1419 					     t->vo_ab);
1420 				} else {
1421 					/* Extract using shift and mask */
1422 					line(f,"vof = (vowr & %s);	"
1423 					     "/* Extract offset value */",
1424 					     hmask(t->vo_ab));
1425 					line(f,"vwe = (vowr >> %d);	"
1426 					     "/* Extract weighting value */",
1427 					     t->vo_ab);
1428 				}
1429 			}
1430 		}
1431 
1432 		/* Lookup the vertex value, weight it, and accumulate it into output value */
1433 		if (timp && t->im_fn > 1)
1434 			line(f,"timp = IM_TP(imp, vof);	/* Vertex address */");
1435 		for (i = 0; i < f->ian; i++) {		/* For each output accumulation chunk */
1436 			if (i < t->im_fn) { 	/* Full entry */
1437 				if (!timp || t->im_fn == 1)
1438 					line(f,"ova%d %s= IM_FE(imp, vof, %d) * vwe;	"
1439 					     "/* Accumulate weighted output values */",
1440 					     i, e ? "+" : " ", i);
1441 				else
1442 					line(f,"ova%d %s= IM_FE(timp, %d) * vwe;	"
1443 					     "/* Accumulate weighted output values */",
1444 					     i, e ? "+" : " ", i);
1445 			} else				/* One partial entry */
1446 				line(f,"ova%d %s= IM_PE(imp, vof) * vwe;	"
1447 				     "/* Accumulate last weighted output values */",
1448 				     i, e ? "+" : " ");
1449 		}
1450 	}
1451 
1452 	dec(f);
1453 	line(f, "}"); 	/* End of output value lookup context */
1454 
1455 	dec(f);
1456 	line(f, "}"); 	/* End of output value accumulation context */
1457 
1458 	/* Start of output lookup and write */
1459 	line(f,"{");
1460 	inc(f);
1461 
1462 #ifdef VERBOSE
1463 	printf("Output table code\n"); fflush(stdout);
1464 #endif /* VERBOSE */
1465 
1466 	{
1467 		char wre[50];		/* Write destination expression */
1468 
1469 		if (g->out.packed != 0)	/* We need to pack results into a single write */
1470 			line(f,"%s wrv;		/* Write value */",a->ords[f->ipt[0]].name);
1471 
1472 		/* Declare temporary to hold index into output lookup table */
1473 		line(f,"%s oti;	/* Vertex offset value */",a->ords[f->otit].name);
1474 		if (g->oopt & OOPTS_CHECK)
1475 			line(f,"%s otv;	/* Output temporary value */",a->ords[f->otvt].name);
1476 
1477 		/* For each accumulator value */
1478 		/* (Assume they are in output order for the moment ?) */
1479 		for (e = i = 0; i < f->ian; i++) {		/* For each output accumulation chunk */
1480 			int vpa = i < t->im_fn ? t->im_fv : t->im_pv;		/* Chanel values per accumulator */
1481 			int oat = i < t->im_fn ? f->iafvt : f->iapvt;		/* Output accumulator type */
1482 			int ee;		/* Relative e to this accumulator */
1483 
1484 			/* For each output value in this accumulator */
1485 			for (ee = 0; ee < vpa && e < g->od; ee++, e++) {
1486 				int off, size;		/* Bits to be extracted */
1487 
1488 				/* Extract wanted 8 bits from the 8.8 bit result in accumulator */
1489 				/* (or 16 bits from 16.16) */
1490 				off = ee * f->iaovb + (f->iaovb - g->prec);
1491 				size = g->prec;
1492 
1493 				if (e == 0 || g->out.packed == 0) {
1494 					if (g->out.pint != 0) 			/* Pixel interleaved */
1495 						sprintf(wre,"op0[%d]",e);	/* Offset from single pointer */
1496 					else
1497 						sprintf(wre,"*op%d",e);		/* Pointer per channel */
1498 				}
1499 
1500 				if (a->shfm || size > 32) {
1501 					/* Extract using just shifts */
1502 #ifdef ROUND
1503 					line(f,"oti = (((ova%d + (1 << %d)) << %d) >> %d);	"
1504 					     "/* Extract integer part of result */",
1505 					     i, off-1, a->ords[oat].bits - off - size, a->ords[oat].bits - size);
1506 #else
1507 					line(f,"oti = ((ova%d << %d) >> %d);	"
1508 					     "/* Extract integer part of result */",
1509 					     i, a->ords[oat].bits - off - size, a->ords[oat].bits - size);
1510 #endif
1511 				} else {
1512 					/* Extract using shift and mask */
1513 #ifdef ROUND
1514 					line(f,"oti = (((ova%d + 0x%x) >> %d) & %s);	"
1515 					     "/* Extract integer part of result */",
1516 					     i, (1 << off-1), off, hmask(size));
1517 #else
1518 					line(f,"oti = ((ova%d >> %d) & %s);	"
1519 					     "/* Extract integer part of result */",
1520 					     i, off, hmask(size));
1521 #endif
1522 				}
1523 
1524 				if (g->oopt & OOPT(oopts_check,e)) {	/* Lookup with check */
1525 					line(f,"otv = OT_E(ot%d, oti);	/* Fetch result */", e);
1526 					line(f,"if (otv != p->checkv[%d])	/* Do output value check */", e);
1527 					line(f,"	p->checkf |= (1 << %d);	/* Set check flag */", e);
1528 					if (g->out.packed != 0) {
1529 						if (g->oopt & OOPT(oopts_skip,e))
1530 							return 2;		/* Error, can't skip on pixel interleaved */
1531 						line(f,"wrv %s= otv;", e ? "+" : "", e);
1532 					} else {
1533 						if (g->oopt & OOPT(oopts_skip,e)) {
1534 							line(f,"if ((p->skipf & (1 << %d)) == 0)	/* If not being skipped */", e);
1535 							line(f,"	%s = otv;	/* Write result */", wre);
1536 						} else
1537 							line(f,"%s = otv;	/* Write result */", wre);
1538 					}
1539 				} else {		/* Normal lookup output table */
1540 					/* Lookup in output table and write to destination */
1541 					if (g->out.packed != 0) {
1542 						if (g->oopt & OOPT(oopts_skip,e))
1543 							return 2;		/* Error, can't skip on pixel interleaved */
1544 						line(f,"wrv %s= OT_E(ot%d, oti);", e ? "+" : "", e);
1545 					} else {
1546 						if (g->oopt & OOPT(oopts_skip,e)) {
1547 							line(f,"if ((p->skipf & (1 << %d)) == 0)	/* If not being skipped */", e);
1548 							line(f,"	%s = OT_E(ot%d, oti);	/* Write result */", wre, e);
1549 						} else
1550 							line(f,"%s = OT_E(ot%d, oti);	/* Write result */", wre, e);
1551 					}
1552 				}
1553 			}
1554 		}
1555 
1556 		if (g->out.packed != 0) {	/* Write out the accumulated value */
1557 			line(f,"%s = wrv;	/* Write result */", wre);
1558 		}
1559 	}
1560 
1561 	/* The end of the output lookup and write */
1562 	dec(f);
1563 	line(f, "}");
1564 
1565 	/* The end of the pixel processing loop */
1566 	dec(f);
1567 	line(f, "}");
1568 
1569 	/* The end of the function */
1570 	dec(f);
1571 	line(f, "}");
1572 
1573 	/* Undefine all the macros */
1574 	if (t->sort) {
1575 		if (t->it_xs) {
1576 			if (t->wo_xs) {
1577 				line(f,"#undef IT_WE");
1578 				line(f,"#undef IT_VO");
1579 			} else
1580 				line(f,"#undef IT_WO");
1581 			line(f,"#undef IT_IX");
1582 		} else {
1583 			line(f,"#undef IT_IT");
1584 		}
1585 		line(f,"#undef CXJ");
1586 		line(f,"#undef CJ");
1587 		line(f,"#undef XFR");
1588 		line(f,"#undef CEX");
1589 	} else {
1590 		if (t->it_xs) {
1591 			line(f,"#undef IT_IX");
1592 			line(f,"#undef IT_SX");
1593 		} else {
1594 			line(f,"#undef IT_IT");
1595 		}
1596 
1597 		line(f,"#undef SW_O");
1598 		if (t->wo_xs) {
1599 			line(f,"#undef SX_WE");
1600 			line(f,"#undef SX_VO");
1601 		} else {
1602 			line(f,"#undef SX_WO");
1603 		}
1604 	}
1605 	line(f,"#undef IM_O");
1606 	if (t->im_fn > 0) {
1607 		if (timp && t->im_fn > 1)
1608 			line(f,"#undef IM_TP");
1609 		line(f,"#undef IM_FE");
1610 	}
1611 	if (t->im_pn > 0) {
1612 		line(f,"#undef IM_PE");
1613 	}
1614 	line(f,"#undef OT_E");
1615 
1616 	/* =============================================== */
1617 #ifdef VERBOSE
1618 	printf("Done interpolation code\n"); fflush(stdout);
1619 #endif /* VERBOSE */
1620 
1621 	/* =============================================== */
1622 
1623 	/* !genspec and tabspec delta code! */
1624 	/* We generate code that updates any entries in the genspec and */
1625 	/* tabpsec strucures that are different for this kernel, */
1626 	/* compared to the previously generated kernel. */
1627 	/* In this way, we save a lot of space, at the price */
1628 	/* of having to access the table of kernels sequentially. */
1629 
1630 	/* If the genspec of tabspec structures are modified, */
1631 	/* then corresponding changes need to be made to the code here. */
1632 	{
1633 		int i;
1634 		int s_stres, s_itres;	/* Save values */
1635 		imdi_options s_opt;
1636 
1637 		s_stres = g->stres;
1638 		s_itres = g->itres;
1639 		s_opt = g->opt;
1640 		g->stres = f->sxmxres;			/* Set maximum values */
1641 		g->itres = f->ixmxres;
1642 		g->opt &= ~opts_splx;			/* Don't care about this, only about opts_splx/sort */
1643 		if (frv == 0) {					/* Simplex algorithm wasn't possible */
1644 			g->opt &= ~opts_splx_sort;	/* Therefore we don't care about preference */
1645 			g->opt &= ~opts_sort_splx;
1646 		}
1647 
1648 		/* Declare the genspec & tabspec update function */
1649 		cr(f);
1650 		line(f,"void");
1651 		line(f, "imdi_k%d_gentab(",index);
1652 		line(f, "genspec *g,		/* structure to be updated */");
1653 		line(f, "tabspec *t		/* structure to be updated */");
1654 		line(f, ") {");
1655 		inc(f);
1656 
1657 #define GSET_ENTRY(KEY) if (g->KEY != og->KEY) line(f, "g->%s = %d;",#KEY,g->KEY)
1658 #define GSET_ARRAY(KEY,IX) if (g->KEY[IX] != og->KEY[IX]) line(f, "g->%s[%d] = %d;",#KEY,IX,g->KEY[IX])
1659 #define TSET_ENTRY(KEY) if (t->KEY != ot->KEY) line(f, "t->%s = %d;",#KEY,t->KEY)
1660 #define TSET_ARRAY(KEY,IX) if (t->KEY[IX] != ot->KEY[IX]) line(f, "t->%s[%d] = %d;",#KEY,IX,t->KEY[IX])
1661 
1662 		/* Create code that updates the genspec structure from og to g */
1663 		GSET_ENTRY(prec);
1664 		GSET_ENTRY(id);
1665 		GSET_ENTRY(od);
1666 		GSET_ENTRY(irep);
1667 		GSET_ENTRY(orep);
1668 		GSET_ENTRY(in_signed);
1669 		GSET_ENTRY(out_signed);
1670 
1671 		/* pixlayout structure */
1672 		for (i = 0; i < IXDIDO; i++) {
1673 			GSET_ARRAY(in.bpch,i);
1674 			GSET_ARRAY(in.chi,i);
1675 			GSET_ARRAY(in.bov,i);
1676 			GSET_ARRAY(in.bpv,i);
1677 		}
1678 		GSET_ENTRY(in.pint);
1679 		GSET_ENTRY(in.packed);
1680 
1681 		/* pixlayout structure */
1682 		for (i = 0; i < IXDIDO; i++) {
1683 			GSET_ARRAY(out.bpch,i);
1684 			GSET_ARRAY(out.chi,i);
1685 			GSET_ARRAY(out.bov,i);
1686 			GSET_ARRAY(out.bpv,i);
1687 		}
1688 		GSET_ENTRY(out.pint);
1689 		GSET_ENTRY(out.packed);
1690 
1691 		GSET_ENTRY(oopt);
1692 		GSET_ENTRY(opt);
1693 		GSET_ENTRY(itres);
1694 		GSET_ENTRY(stres);
1695 
1696 		for (i = 0; i < 100; i++) {
1697 			GSET_ARRAY(kkeys,i);
1698 		}
1699 		for (i = 0; i < 100; i++) {
1700 			GSET_ARRAY(kdesc,i);
1701 		}
1702 		for (i = 0; i < 100; i++) {
1703 			GSET_ARRAY(kname,i);
1704 		}
1705 
1706 		/* Create code that updates the tabspec structure from og to g */
1707 		TSET_ENTRY(sort);
1708 		TSET_ENTRY(it_xs);
1709 		TSET_ENTRY(wo_xs);
1710 		TSET_ENTRY(it_ix);
1711 		TSET_ENTRY(it_ab);
1712 		TSET_ENTRY(it_ts);
1713 		TSET_ENTRY(ix_ab);
1714 		TSET_ENTRY(ix_es);
1715 		TSET_ENTRY(ix_eo);
1716 		TSET_ENTRY(sx_ab);
1717 		TSET_ENTRY(sx_es);
1718 		TSET_ENTRY(sx_eo);
1719 		TSET_ENTRY(sm_ts);
1720 		TSET_ENTRY(wo_ab);
1721 		TSET_ENTRY(wo_es);
1722 		TSET_ENTRY(wo_eo);
1723 		TSET_ENTRY(we_ab);
1724 		TSET_ENTRY(we_es);
1725 		TSET_ENTRY(we_eo);
1726 		TSET_ENTRY(vo_ab);
1727 		TSET_ENTRY(vo_es);
1728 		TSET_ENTRY(vo_eo);
1729 		TSET_ENTRY(vo_om);
1730 		TSET_ENTRY(im_cd);
1731 		TSET_ENTRY(im_ts);
1732 		TSET_ENTRY(im_oc);
1733 		TSET_ENTRY(im_fs);
1734 		TSET_ENTRY(im_fn);
1735 		TSET_ENTRY(im_fv);
1736 		TSET_ENTRY(im_ps);
1737 		TSET_ENTRY(im_pn);
1738 		TSET_ENTRY(im_pv);
1739 		TSET_ENTRY(ot_ts);
1740 		for (i = 0; i < IXDO; i++) {
1741 			TSET_ARRAY(ot_off, i);
1742 		}
1743 		for (i = 0; i < IXDO; i++) {
1744 			TSET_ARRAY(ot_bits,i);
1745 		}
1746 
1747 #undef GSET_ENTRY
1748 #undef GSET_ARRAY
1749 #undef TSET_ENTRY
1750 #undef TSET_ARRAY
1751 
1752 		/* The end of the function */
1753 		dec(f);
1754 		line(f, "}");
1755 
1756 		g->opt = s_opt;			/* Restore entry values */
1757 		g->stres = s_stres;
1758 		g->itres = s_itres;
1759 	}
1760 
1761 	/* =============================================== */
1762 
1763 	cr(f); cr(f); cr(f); cr(f); cr(f); cr(f);
1764 
1765 	return frv;
1766 }
1767 
1768 
1769 /* Return bits needed to store index into table of */
1770 /* given resolution and dimensionality. */
1771 static int
1772 calc_bits(
1773 int dim,
1774 int res) {
1775 
1776 	return (int)ceil(log((double)res) * (double)dim/log(2.0) - 1e-14);
1777 }
1778 
1779 /* Return maximum resolution possible given dimensionality */
1780 /* and number of index bits. */
1781 static int
1782 calc_res(
1783 int dim,
1784 int bits) {
1785 	double fres;
1786 
1787 	fres = log(2.0) * (double)bits/(double)dim;
1788 	if (fres > 12 || (fres = exp(fres)) > 65536.0)
1789 		fres = 65536.0;		/* Limit to a sane value */
1790 	return (int)(fres + 1e-14);
1791 }
1792 
1793 /* Return bits needed to store a relative offset of 1, */
1794 /* into a table of given resolution, dimensionality , and */
1795 /* entry size. */
1796 static int
1797 calc_obits(
1798 int dim,
1799 int res,
1800 int esize) {
1801 	double off;		/* Maximum diagonal offset value */
1802 	int bits;
1803 
1804 	if (res == 0 || res == 1)
1805 		return 0;
1806 	if (dim == 1)
1807 		off = esize;
1808 	else {
1809 		off = (double)esize * floor(exp(log((double)res) * dim - log(res-1.0)));
1810 	}
1811 
1812 	bits = (int)ceil(log(off)/log(2.0) - 1e-14);
1813 	return bits;
1814 }
1815 
1816 /* Return maximum resolution possible given dimensionality */
1817 /* number of index bits, and entry size */
1818 static int
1819 calc_ores(
1820 int dim,
1821 int bits,
1822 int esize) {
1823 	int res;
1824 
1825 	/* Find resolution. Stop at arbitrary 65536 */
1826 	for (res = 1; res < 65537; res++) {
1827 		int bn;
1828 		bn = calc_obits(dim, res, esize);
1829 		if (bn > bits) {
1830 			return res-1;
1831 		}
1832 	}
1833 	return res-1;
1834 }
1835 
1836 
1837 
1838 /* Output the introductory comments */
1839 static void
1840 doheader(
1841 	fileo *f
1842 ) {
1843 	genspec *g = f->g;
1844 	tabspec *t = f->t;
1845 	mach_arch *a  = f->a;
1846 	int e;
1847 
1848 	/* - - - - - - - - - - - - */
1849 	/* Output file title block */
1850 	line(f,"/* Integer Multi-Dimensional Interpolation */");
1851 	line(f,"/* Interpolation Kernel Code */");
1852 	line(f,"/* Generated by cgen */");
1853 	line(f,"/* Copyright 2000 - 2007 Graeme W. Gill */");
1854 	line(f,"/* All rights reserved. */");
1855 	line(f,"/* This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :- */\n");
1856 	line(f,"/* see the License.txt file for licencing details.*/\n");
1857 	cr(f);
1858 
1859 	/* - - - - - - - - - - - - */
1860 	/* Output the specification */
1861 	line(f,"/*");
1862 	line(f,"   Interpolation kernel specs:");
1863 	cr(f);
1864 	line(f,"   Input channels per pixel = %d",g->id);
1865 	for (e = 0; e < g->id; e++) {
1866 		line(f,"   Input channel %d bits = %d",e, g->in.bpch[e]);
1867 		line(f,"   Input channel %d increment = %d",e, g->in.chi[e]);
1868 	}
1869 	if (g->in.pint != 0)
1870 		line(f,"   Input is channel interleaved");
1871 	else
1872 		line(f,"   Input is plane interleaved");
1873 
1874 	if (g->in.packed != 0)
1875 		line(f,"   Input channels are packed into one word");
1876 	else
1877 		line(f,"   Input channels are separate words");
1878 
1879 	if (t->it_ix)
1880 		line(f,"   Input value extraction is done in input table lookup");
1881 	cr(f);
1882 
1883 	line(f,"   Output channels per pixel = %d",g->od);
1884 	for (e = 0; e < g->od; e++) {
1885 		line(f,"   Output channel %d bits = %d",e, g->out.bpch[e]);
1886 		line(f,"   Output channel %d increment = %d",e, g->out.chi[e]);
1887 		if (g->oopt & OOPT(oopts_check,e))
1888 			line(f,"   Output channel %d has value check",e);
1889 		if (g->oopt & OOPT(oopts_skip,e))
1890 			line(f,"   Output channel %d has skip available",e);
1891 	}
1892 	if (g->out.pint != 0)
1893 		line(f,"   Output is channel interleaved");
1894 	else
1895 		line(f,"   Output is plane interleaved");
1896 	if (g->out.packed != 0)
1897 		line(f,"   Output channels are packed into one word");
1898 	else
1899 		line(f,"   Output channels are separate words");
1900 	cr(f);
1901 
1902 	line(f,"   Basic Internal precision bits  = %d",g->prec);
1903 	if (t->sort)
1904 		line(f,"   Weight+voffset bits       = %d",t->sx_ab);
1905 	else
1906 		line(f,"   Simplex table index bits       = %d",t->sx_ab);
1907 	line(f,"   Interpolation table index bits = %d",t->ix_ab);
1908 	if (!t->sort)
1909 		line(f,"   Simplex table max resolution = %d",f->sxmxres);
1910 	line(f,"   Interpolation table max resolution = %d",f->ixmxres);
1911 	cr(f);
1912 	line(f,"   Processing direction is %s",g->opt & opts_bwd ? "backwards" : "forwards" );
1913 	line(f,"   Input stride is %ssupported",g->opt & opts_istride ? "" : "not " );
1914 	line(f,"   Output stride is %ssupported",g->opt & opts_ostride ? "" : "not " );
1915 	if (g->opt & opts_splx_sort)
1916 		line(f,"   Prefer simplex over sort algorithm");
1917 	if (g->opt & opts_sort_splx)
1918 		line(f,"   Prefer sort over simplex");
1919 	line(f," */");
1920 	cr(f);
1921 
1922 	/* - - - - - - - - - - - - */
1923 	line(f,"/*");
1924 	line(f,"   Machine architecture specs:");
1925 	cr(f);
1926 	if (a->bigend != 0)
1927 		line(f,"   Big Endian");
1928 	else
1929 		line(f,"   Little endian");
1930 
1931 	if (a->uwa != 0)
1932 		line(f,"   Using maximum sized memory accesses where possible");
1933 	else
1934 		line(f,"   Reading and writing pixel values separately");
1935 
1936 	line(f,"   Pointer size = %d bits",a->pbits);
1937 	cr(f);
1938 
1939 	for (e = 0; e < a->nords; e++) {
1940 		line(f,"   Ordinal size %2d bits is known as '%s'",
1941 		        a->ords[e].bits,a->ords[e].name);
1942 	}
1943 	line(f,"   Natural ordinal is '%s'", a->ords[a->natord].name);
1944 	cr(f);
1945 
1946 	for (e = 0; e < a->nints; e++) {
1947 		line(f,"   Integer size %2d bits is known as '%s'",
1948 		        a->ints[e].bits,a->ints[e].name);
1949 	}
1950 	line(f,"   Natural integer is '%s'", a->ints[a->natint].name);
1951 	cr(f);
1952 
1953 	line(f," */");
1954 	cr(f);
1955 }
1956 
1957 
1958 /* ---------------------------------------- */
1959 /* Architecture support */
1960 /* Find an ordinal with at least bits size */
1961 /* Return -1 if failed */
1962 int findord(
1963 fileo *f,
1964 int bits
1965 ) {
1966 	mach_arch *a  = f->a;
1967 	int i;
1968 
1969 	for (i = 0; i < a->nords; i++) {
1970 		if (a->ords[i].bits >= bits)
1971 			return i;
1972 	}
1973 	return -1;
1974 }
1975 
1976 /* Round ordinal type up to natural size */
1977 int nord(
1978 	fileo *f,
1979 	int ov
1980 ) {
1981 	if (ov >= 0 && ov < f->a->natord)
1982 		ov = f->a->natord;
1983 	return ov;
1984 }
1985 
1986 /* Find an ordinal with at least bits size, */
1987 /* or natural size, whichever is greater. */
1988 /* Return -1 if failed */
1989 int findnord(
1990 	fileo *f,
1991 	int bits
1992 ) {
1993 	int ov;
1994 
1995 	ov = findord(f, bits);
1996 	ov = nord(f, ov);
1997 	return ov;
1998 }
1999 
2000 /* Find an integer with at least bits size */
2001 /* Return -1 if failed */
2002 int findint(
2003 	fileo *f,
2004 	int bits
2005 ) {
2006 	mach_arch *a  = f->a;
2007 	int i;
2008 
2009 	for (i = 0; i < a->nints; i++) {
2010 		if (a->ints[i].bits >= bits)
2011 			return i;
2012 	}
2013 	return -1;
2014 }
2015 
2016 /* Round integer type up to natural size */
2017 int nint(
2018 	fileo *f,
2019 	int iv
2020 ) {
2021 	if (iv >= 0 && iv < f->a->natint)
2022 		iv = f->a->natint;
2023 	return iv;
2024 }
2025 
2026 /* Find an interger with at least bits size, */
2027 /* or natural size, whichever is greater. */
2028 /* Return -1 if failed */
2029 int findnint(
2030 	fileo *f,
2031 	int bits
2032 ) {
2033 	int iv;
2034 
2035 	iv = findint(f, bits);
2036 	iv = nint(f, iv);
2037 	return iv;
2038 }
2039 
2040 
2041 /* ------------------------------------ */
2042 /* File output support */
2043 
2044 /* Output a line to the file (including trailing \n) */
2045 void
2046 line(fileo *f, char *fmt, ...)
2047 {
2048 	int i;
2049 	va_list args;
2050 
2051 	/* Indent to the correct level */
2052 	for (i = 0; i < f->indt; i++)
2053 		fprintf(f->of,"	");
2054 
2055 	va_start(args, fmt);
2056 	vfprintf(f->of, fmt, args);
2057 	va_end(args);
2058 	fprintf(f->of, "\n");
2059 }
2060 
2061 /* Output the start of a line to the file) */
2062 void
2063 sline(fileo *f, char *fmt, ...)
2064 {
2065 	int i;
2066 	va_list args;
2067 
2068 	/* Indent to the correct level */
2069 	for (i = 0; i < f->indt; i++)
2070 		fprintf(f->of,"	");
2071 
2072 	va_start(args, fmt);
2073 	vfprintf(f->of, fmt, args);
2074 	va_end(args);
2075 }
2076 
2077 /* Output the middle of a line to the file) */
2078 void
2079 mline(fileo *f, char *fmt, ...)
2080 {
2081 	va_list args;
2082 
2083 	va_start(args, fmt);
2084 	vfprintf(f->of, fmt, args);
2085 	va_end(args);
2086 }
2087 
2088 /* Output the end of a line to the file (including trailing \n) */
2089 void
2090 eline(fileo *f, char *fmt, ...)
2091 {
2092 	va_list args;
2093 
2094 	va_start(args, fmt);
2095 	vfprintf(f->of, fmt, args);
2096 	va_end(args);
2097 	fprintf(f->of, "\n");
2098 }
2099 
2100 /* Output a line to the file (including trailing \n) */
2101 /* No indent */
2102 void
2103 niline(fileo *f, char *fmt, ...)
2104 {
2105 	va_list args;
2106 
2107 	va_start(args, fmt);
2108 	vfprintf(f->of, fmt, args);
2109 	va_end(args);
2110 	fprintf(f->of, "\n");
2111 }
2112 
2113 /* Output one line and increment indent */
2114 void lineinc(fileo *f, char *fmt, ...) {
2115 	int i;
2116 	va_list args;
2117 
2118 	/* Indent to the correct level */
2119 	for (i = 0; i < f->indt; i++)
2120 		fprintf(f->of,"	");
2121 
2122 	va_start(args, fmt);
2123 	vfprintf(f->of, fmt, args);
2124 	va_end(args);
2125 	fprintf(f->of, "\n");
2126 	f->indt++;
2127 }
2128 
2129 /* Decrement indent and output one line */
2130 void decline(fileo *f, char *fmt, ...) {
2131 	int i;
2132 	va_list args;
2133 
2134 	f->indt--;
2135 	/* Indent to the correct level */
2136 	for (i = 0; i < f->indt; i++)
2137 		fprintf(f->of,"	");
2138 
2139 	va_start(args, fmt);
2140 	vfprintf(f->of, fmt, args);
2141 	va_end(args);
2142 	fprintf(f->of, "\n");
2143 }
2144 
2145 
2146 /* ------------------------------------ */
2147 
2148 
2149 
2150 
2151