1
2 /* Integer Multi-Dimensional Interpolation */
3
4 /*
5 * Copyright 2000 - 2007 Graeme W. Gill
6 * All rights reserved.
7 *
8 * This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :-
9 * see the License.txt file for licencing details.
10 */
11
12 /* 'C' code color transform kernel code generator. */
13
14 /*
15 This module generates C code routines which implement
16 an integer multi-channel transform. The input values
17 are read, passed through per channel lookup tables,
18 a multi-dimentional interpolation table, and then
19 a per channel output lookup table, before being written.
20 */
21
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <stdarg.h>
27 #include <string.h>
28
29 #include "imdi.h"
30 #include "imdi_tab.h"
31
32 #undef VERBOSE
33 #define INSTHRESH 4 /* Use inserion sort of di >= INSTHRESH for best performance. */
34 #undef ROUND /* Round the division after accumulation */
35 /* Improves accuracy at the cost of a little speed */
36
37 /* ------------------------------------ */
38 /* Generator context */
39 typedef struct {
40 FILE *of; /* Output file */
41 int indt; /* Indent */
42
43 /* Other info */
44 genspec *g; /* Generation specifications */
45 tabspec *t; /* Table setup data */
46 mach_arch *a; /* Machine architecture and tuning data */
47
48 /* Code generation information */
49 /* if() conditions are for entry usage */
50
51 /* Pixel read information */
52 int ipt[IXDI]; /* Input pointer types */
53 int nip; /* Actual number of input pointers, accounting for pint */
54 int chv_bits; /* Bits in chv temp variable ?? */
55
56 /* Input table entry */
57 int itet; /* Input table entry type */
58 int itvt; /* Input table variable type */
59 int itmnb; /* Input table minimum bits (actual is it_ab) */
60
61 /* Interpolation index */
62 int ixet; /* Interpolation index entry type */
63 int ixvt; /* Interpolation index variable type */
64 int ixmnb; /* Interpolation index minimum bits (actual is ix_ab ???) */
65 int ixmxres; /* Interpolation table maximum resolution */
66
67 /* Simplex index: if(!sort && it_xs) */
68 int sxet; /* Simplex index entry type */
69 int sxvt; /* Simplex index variable type */
70 int sxmnb; /* Simplex index bits minimum (actual is sx_ab) */
71 int sxmxres; /* Simplex table maximum resolution (0 if sort) */
72
73 /* Combination Weighting + Vertex offset values: if(it_xs && !wo_xs) */
74 int woet; /* Weighting+offset entry type */
75 int wovt; /* Weighting+offset variable type */
76 int womnb; /* Weighting+offset index bits minimum (actual is wo_ab) */
77
78 /* Weighting value: if(it_xs && wo_xs) */
79 int weet; /* Weighting entry type */
80 int wevt; /* Weighting variable type */
81 int wemnb; /* Weighting index bits minimum (actual is we_ab) */
82
83 /* Vertex offset value: if(it_xs && wo_xs) */
84 int voet; /* Vertex offset entry type */
85 int vovt; /* Vertex offset variable type */
86 int vomnb; /* Vertex offset index bits minimum (actual is vo_ab) */
87
88 /* Interpolation table entry: */
89 int imovb; /* Interpolation output value bits per channel required */
90 int imfvt; /* Interpolation full entry & variable type */
91 int impvt; /* Interpolation partial entry variable type */
92
93 /* Interpolation accumulators: */
94 int iaovb; /* Interpolation output value bits per channel required */
95 int iafvt; /* Interpolation full entry & variable type */
96 int iapvt; /* Interpolation partial entry variable type */
97 int ian; /* Total number of accumulators */
98
99 /* Output table lookup */
100 int otit; /* Output table index type */
101 int otvt; /* Output table value type (size is ot_ts bytes) */
102
103 /* Write information */
104 int opt[IXDO]; /* Output pointer types */
105 int nop; /* Actual number of output pointers, accounting for pint */
106
107 } fileo;
108
109 void line(fileo *f, char *fmt, ...); /* Output one line */
110 void sline(fileo *f, char *fmt, ...); /* Output start of line */
111 void mline(fileo *f, char *fmt, ...); /* Output middle of line */
112 void eline(fileo *f, char *fmt, ...); /* Output end of line */
113 void niline(fileo *f, char *fmt, ...); /* Output one line, no indent */
cr(fileo * f)114 void cr(fileo *f) { line(f,""); } /* Output a blank line */
inc(fileo * f)115 void inc(fileo *f) { f->indt++; } /* Increment the indent level */
dec(fileo * f)116 void dec(fileo *f) { f->indt--; } /* Decrement the indent level */
117 void lineinc(fileo *f, char *fmt, ...); /* Output one line and increment indent */
118 void decline(fileo *f, char *fmt, ...); /* Decrement indent and output one line */
119 /* ------------------------------------ */
120
121 int findord(fileo *f, int bits); /* Find ordinal with bits or more */
122 int nord(fileo *f, int ov); /* Round ordinal type up to natural size */
123 int findnord(fileo *f, int bits); /* Find ordinal with bits, or natural larger */
124 int findint(fileo *f, int bits); /* Find integer with bits or more */
125 int nint(fileo *f, int iv); /* Round integer type up to natural size */
126 int findnint(fileo *f, int bits); /* Find integer with bits, or natural larger */
127 static void doheader(fileo *f);
128
129 static int calc_bits(int dim, int res);
130 static int calc_res(int dim, int bits);
131 static int calc_obits(int dim, int res, int esize);
132 static int calc_ores(int dim, int bits, int esize);
133
134
135 /* return a hexadecimal mask string */
136 /* take care of the case when bits >= 32 */
hmask(int bits)137 char *hmask(int bits) {
138 static char buf[20];
139
140 if (bits < 32) {
141 sprintf(buf, "0x%x",(1 << bits)-1);
142 } else if (bits == 32) {
143 return "0xffffffff";
144 } else if (bits == 64) {
145 return "0xffffffffffffffff";
146 } else { /* Bits > 32 */
147 sprintf(buf, "0x%xffffffff",(1 << (bits-32))-1);
148 }
149 return buf;
150 }
151
152 /* Generate a source file to implement the specified */
153 /* interpolation kernel. Fill in return values and return 0 if OK. */
154 /* g->opt should be set to opts_splx_sort or opts_sort_splx if both */
155 /* are being generated, but opts_splx is what actually chooses simplex */
156 /* when available, and is not recorded in the resulting table. */
157 /* Return 1 if this kernel could be generated with a simplex table algorithm, */
158 /* and some other non-zero on another error. */
gen_c_kernel(genspec * g,tabspec * t,mach_arch * a,FILE * fp,int index,genspec * og,tabspec * ot)159 int gen_c_kernel(
160 genspec *g, /* Specification of what to generate */
161 tabspec *t, /* Tablspec that will be filled in */
162 mach_arch *a,
163 FILE *fp, /* File to write to */
164 int index, /* Identification index, 1 = first */
165 genspec *og, /* Previous tables genspec (for diff) */
166 tabspec *ot /* Previous tables tabspec (for diff) */
167 ) {
168 int frv = 0; /* Function return value */
169 unsigned char kk[] = { 0x43, 0x6F, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68,
170 0x74, 0x20, 0x32, 0x30, 0x30, 0x34, 0x20, 0x47,
171 0x72, 0x61, 0x65, 0x6D, 0x65, 0x20, 0x57, 0x2E,
172 0x20, 0x47, 0x69, 0x6C, 0x6C, 0x00 };
173 fileo f[1];
174 int e, i;
175 int timp = 0; /* Flag to use temporary imp pointer. */
176 /* Seem to make x86 MSVC++ slower */
177 /* Has no effect on x86 IBMCC */
178
179 sprintf(g->kname, "imdi_k%d",index); /* Kernel routine base name */
180 strcpy(g->kkeys, (char *)kk); /* Kernel keys for this session */
181
182 /* Setup the file output context */
183 f->of = fp;
184 f->indt = 0; /* Start with no indentation */
185 f->g = g;
186 f->t = t;
187 f->a = a;
188
189 /* (prec is currently permitted to be only 8 or 16) */
190 if (g->prec == 8) {
191 if (g->id <= 4) { /* Simplex table can be used */
192 frv = 1; /* Signal caller that simplex is possible */
193 if (g->opt & opts_splx)
194 t->sort = 0; /* Implicit sort using simplex table lookup */
195 else
196 t->sort = 1; /* Explicit sort */
197 } else {
198 t->sort = 1; /* Explicit sort */
199 }
200
201 } else if (g->prec == 16) {
202 t->sort = 1; /* Explit sort, no simplex table */
203
204 } else {
205 fprintf(stderr,"Can't cope with requested precision of %d bits\n",g->prec);
206 exit(-1);
207 }
208
209 /* Compute input read and input table lookup stuff */
210
211 /* Compute number of input pointers */
212 if (g->in.pint != 0) /* Pixel interleaved */
213 f->nip = 1;
214 else
215 f->nip = g->id;
216
217 /* Figure out the input pointer types */
218 for (e = 0; e < f->nip; e++) {
219 if ((f->ipt[e] = findord(f, g->in.bpch[e])) < 0) {
220 fprintf(stderr,"Input channel size can't be handled\n");
221 exit(-1);
222 }
223 }
224
225 /* Do the rest of the input table size calculations after figuring */
226 /* out simplex and interpolation table sizes. */
227
228 /* Figure out the interpolation multi-dimentional table structure */
229 /* and output accumulation variable sizes. Note that the accumulator */
230 /* size needs to be greater than the basic precision by soem factor, */
231 /* if we are not to get rounding errors due to each value being the sum */
232 /* of di+1 parts with weighting that sum to 1.0. It's convenient in */
233 /* C code case to simply double the basic precision size. */
234 if (g->prec == 8
235 || (g->prec == 16 && a->ords[a->nords-1].bits >= (g->prec * 4))) {
236 int tiby; /* Total interpolation bytes needed */
237
238 /* We assume that we can normally compute more than one */
239 /* output value at a time, so we need to hold the interpolation */
240 /* output data in the expanded fixed point format in both the */
241 /* table and accumulator. */
242 t->im_cd = 1;
243 f->imovb = g->prec * 2; /* 16 bits needed for 8 bit precision, */
244 f->iaovb = g->prec * 2; /* 32 bits needed for 16 bit precision */
245 f->imfvt = a->nords-1; /* Full variable entry type is biggest available */
246 f->iafvt = a->nords-1; /* Full variable accum. type is same */
247
248 if (a->ords[f->imfvt].bits < f->imovb) {
249 fprintf(stderr,"Interpolation table entry size can't be handled\n");
250 exit(-1);
251 }
252
253 /* Compute details of table entry sizes, number */
254 tiby = (f->imovb * g->od)/8; /* Total table bytes needed */
255 t->im_fs = a->ords[f->imfvt].bits/8; /* Full entry bytes */
256 t->im_fv = (t->im_fs * 8)/f->imovb; /* output values per full entry . */
257 t->im_fn = tiby/t->im_fs; /* Number of full entries (may be 0) */
258 t->im_ts = t->im_fn * t->im_fs; /* Structure size so far */
259 tiby -= t->im_fn * t->im_fs; /* Remaining bytes */
260
261 if (tiby <= 0) {
262 t->im_pn = 0; /* No partials */
263 t->im_ps = 0;
264 t->im_pv = 0;
265 f->impvt = 0;
266 f->iapvt = 0;
267
268 } else {
269 t->im_pn = 1; /* Must be just 1 partial */
270 t->im_pv = (tiby * 8)/f->imovb; /* Partial holds remaining entries */
271
272 #ifdef NEVER /* For better performance ??? */
273 if ((f->impvt = findnord(f, tiby * 8)) < 0) {
274 #else /* Better memory footprint - minimise multi-D entry sizes */
275 /* (but only if structure is alowed to be mis-aligned!) */
276 if ((f->impvt = findord(f, tiby * 8)) < 0) {
277 #endif
278 fprintf(stderr,"Can't find partial interp table entry variable size\n");
279 exit(-1);
280 }
281 f->iapvt = f->impvt;
282 t->im_ps = a->ords[f->impvt].bits/8;/* Partial entry bytes */
283
284 if (a->ords[f->imfvt].align) /* If full entry's need to be aligned */
285 t->im_ts += t->im_fs; /* Round out struct size by full entry */
286 else
287 t->im_ts += t->im_ps; /* Round out to natural size */
288 }
289
290 } else {
291 /* One 16 bit output value per entry + 32 bit accumulator. */
292 /* We can conserve table space by not holding the table data in expanded */
293 /* fixed point format, but expanding it when it is read. */
294 /* Without resorting to compicated code, this restricts us */
295 /* to only computing one output value per accumulator. */
296 t->im_cd = 0;
297 f->imovb = g->prec; /* Table holds 16 bit entries with no fractions */
298 f->iaovb = g->prec * 2; /* 32 bits needed for 16 bit precision in comp. */
299
300 if ((f->imfvt = findord(f, f->imovb)) < 0) {
301 fprintf(stderr,"Interpolation table entry size can't be handled\n");
302 exit(-1);
303 }
304 if ((f->iafvt = findord(f, f->iaovb)) < 0) {
305 fprintf(stderr,"Interpolation accumulator size can't be handled\n");
306 exit(-1);
307 }
308
309 /* Compute details of table entry sizes, number */
310 t->im_fs = a->ords[f->imfvt].bits/8; /* Full entry bytes */
311 t->im_fv = 1; /* output values per full entry . */
312 t->im_fn = g->od; /* Number of full entries */
313 t->im_ts = t->im_fn * t->im_fs; /* Total structure size */
314
315 t->im_pn = 0; /* No partials */
316 t->im_ps = 0;
317 t->im_pv = 0;
318 f->impvt = 0;
319 f->iapvt = 0;
320 }
321 f->ian = t->im_fn + t->im_pn; /* Total number of output accumulators */
322
323 /* Figure out how much of the interpolation entry offset to put in the */
324 /* vertex offset value, and how much to make explicit in accessing the */
325 /* interpolation table enty. */
326 if (a->oscale > 0) { /* We have a scaled index mode */
327 /* Use as much of the scaled index mode as possible */
328 /* and then do the balance by scaling the simplex index entry. */
329 for (t->im_oc = a->oscale; ; t->im_oc >>= 1) {
330 t->vo_om = t->im_ts/t->im_oc; /* Simplex index multiplier */
331 if ((t->vo_om * t->im_oc) == t->im_ts)
332 break; /* Got appropriate offset scale */
333 }
334 } else if (a->smmul) { /* Architecure supports fast small multiply */
335 t->im_oc = t->im_ts; /* Do scale by structure size explicitly */
336 t->vo_om = 1; /* Do none in the Simplex index */
337 } else { /* We have no fast tricks */
338 t->im_oc = 1; /* Do none explicitly */
339 t->vo_om = t->im_ts; /* Do all in Simplex index */
340 }
341
342 /* Compute the number of bits needed to hold an index into */
343 /* the interpolation table (index is in terms of table entry size). */
344 /* This value is used to figure out the room needed in the input */
345 /* table to accumulate the interpolation cube base offset value. (IM_O macro) */
346 f->ixmnb = calc_bits(g->id, g->itres);
347
348 #ifdef VERBOSE
349 /* Summarise the interpolation table arrangements */
350 printf("\n");
351 printf("Interpolation table structure:\n");
352 printf(" Minimum bits needed to index table %d\n", f->ixmnb);
353 printf(" Entry total size %d bytes\n", t->im_ts);
354 printf(" Simplex entry offset scale %d\n", t->vo_om);
355 printf(" Explicit entry offset scale %d\n", t->im_oc);
356 printf(" %d full entries, size %d bytes\n", t->im_fn, t->im_fs);
357 printf(" %d partial entries, size %d bytes\n", t->im_pn, t->im_ps);
358 printf(" to hold %d output values of %d bits\n", g->od, f->imovb);
359
360 #endif /* VERBOSE */
361
362 /* Number of bits needed for the weighting value */
363 f->wemnb = g->prec+1; /* Need to hold a weighting factor of 0 - 256 for 8 bits */
364 /* Need to hold a weighting factor of 0 - 65536 for 16 bits */
365
366 /* Variable that would be used to hold it */
367 if ((f->wevt = findnord(f, f->wemnb)) < 0) {
368 fprintf(stderr,"Can't find entry size to hold weighting variable\n");
369 exit(-1);
370 }
371
372 /* Number of bits needed for vertex offset value */
373 f->vomnb = calc_obits(g->id, g->itres, t->vo_om);
374
375 /* Variable that would be used to hold it */
376 if ((f->vovt = findnord(f, f->vomnb)) < 0) {
377 fprintf(stderr,"Can't find entry size to hold vertex offset variable\n");
378 exit(-1);
379 }
380
381 if (t->sort) {
382 /* If we are using an explicit sort, we need to figure how many */
383 /* separate entries we need to use to hold the interpolation index, */
384 /* weighting factor and vertex offset values in the input table. */
385
386 /* First try all three in one entry */
387 if ((f->itet = findord(f, f->ixmnb + f->wemnb + f->vomnb)) >= 0) {/* size to read */
388 int rem; /* Remainder bits */
389
390 t->it_xs = 0; /* Combined interp+weight+offset */
391 t->wo_xs = 0;
392 t->it_ab = a->ords[f->itet].bits; /* Bits in combined input entry */
393 rem = t->it_ab - f->ixmnb - f->wemnb - f->vomnb; /* Spair bits */
394 t->we_ab = f->wemnb; /* Get minimum weight bits */
395 t->vo_ab = f->vomnb + rem/2; /* vertex offset index bits actually available */
396 t->ix_ab = t->it_ab - t->vo_ab - t->we_ab; /* interp index bits actually available */
397 t->wo_ab = t->we_ab + t->vo_ab; /* Weight & offset total bits */
398 t->it_ts = a->ords[f->itet].bits/8; /* total size in bytes */
399 f->itvt = nord(f, f->itet); /* Variable type */
400
401 if ((f->wovt = findnord(f, t->we_ab + t->vo_ab)) < 0) {
402 fprintf(stderr,"Can't find variable size to hold weight/offset\n");
403 exit(-1);
404 }
405 if ((f->wevt = findnord(f, t->we_ab)) < 0) {
406 fprintf(stderr,"Can't find variable size to hold weighting factor\n");
407 exit(-1);
408 }
409 if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
410 fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
411 exit(-1);
412 }
413 if ((f->ixvt = findnord(f, t->ix_ab)) < 0) {
414 fprintf(stderr,"Interp index variable size can't be handled\n");
415 exit(-1);
416 }
417 } else { /* Interp index will be a separate entry */
418 int wit, oft, bigt; /* weighting type, offset type, biggest type */
419 int combt; /* Combined type */
420 int sepbits, combits; /* Total separate, combined bits */
421
422 t->it_xs = 1; /* Separate interp index and weighting+offset */
423 if ((f->ixet = findord(f, f->ixmnb)) < 0) {
424 fprintf(stderr,"Interp index entry size can't be handled\n");
425 exit(-1);
426 }
427 f->ixvt = nord(f, f->ixet); /* Variable type */
428 t->ix_ab = a->ords[f->ixet].bits;
429 t->ix_es = t->ix_ab/8;
430 t->ix_eo = 0;
431 t->it_ts = t->ix_es; /* Input table size so far */
432
433 /* Now figure weighting and vertex offset */
434
435 /* See if we can fit them into separately readable entries, or whether */
436 /* they should be combined to minimise overall table size. */
437
438 if ((wit = findord(f, f->wemnb)) < 0) {
439 fprintf(stderr,"Can't find entry size to hold weighting factor\n");
440 exit(-1);
441 }
442 if ((oft = findord(f, f->vomnb)) < 0) {
443 fprintf(stderr,"Can't find entry size to hold vertex offset index\n");
444 exit(-1);
445 }
446 bigt = wit > oft ? wit : oft; /* Bigest separate type */
447
448 if ((combt = findord(f, f->wemnb + f->vomnb)) < 0) {/* Combined isn't possible */
449 sepbits = 2 * a->ords[bigt].bits; /* Total separate bits */
450 combits = sepbits; /* Force separate entries */
451 } else {
452 sepbits = 2 * a->ords[bigt].bits; /* Total separate bits */
453 combits = a->ords[combt].bits; /* Total combined bits */
454 }
455
456 if (sepbits <= combits) { /* We will use separate entries */
457 t->wo_xs = 1;
458 t->we_es = a->ords[bigt].bits/8; /* size in bytes for weighting entry */
459 t->we_ab = a->ords[bigt].bits; /* bits available for weighting */
460 t->we_eo = t->ix_es; /* Entry offset in input table */
461 t->vo_es = a->ords[bigt].bits/8; /* size in bytes for vertex offset entry */
462 t->vo_ab = a->ords[bigt].bits; /* bits available for vertex offset */
463 t->vo_eo = t->ix_es + t->we_es; /* Entry offset in input table */
464 t->wo_es = t->we_es + t->vo_es; /* Total entry size for each vertex */
465 t->it_ts += t->we_es + t->vo_es; /* Total input entry size in bytes */
466
467 f->weet = bigt; /* Variable type for accessing weighting entry */
468 f->voet = bigt; /* Variable type for accessing vertex offset entry */
469 f->wevt = nord(f, wit); /* Variable type for holding weight value */
470 f->vovt = nord(f, oft); /* Variable type for holding offset value */
471
472 } else { /* We will combine the two entries */
473 t->wo_xs = 0;
474 t->wo_es = a->ords[combt].bits/8; /* entry size in bytes for each entry */
475 t->wo_ab = a->ords[combt].bits; /* bits in weightig + offset */
476 t->we_ab = f->wemnb; /* bits available for weighting */
477 t->vo_ab = t->wo_ab - t->we_ab; /* Allow all spare bits to vertex offset */
478 t->wo_eo = t->ix_es; /* entry offset in input table */
479 t->it_ts += t->wo_es; /* Final input table size */
480
481 f->woet = combt; /* Variable type for accessing combined entry */
482 f->wovt = nord(f, combt); /* Variable type holding weight/offset read value */
483
484 if ((f->wevt = findnord(f, t->we_ab)) < 0) {
485 fprintf(stderr,"Can't find variable size to hold weighting factor\n");
486 exit(-1);
487 }
488 if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
489 fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
490 exit(-1);
491 }
492 }
493 }
494 #ifdef VERBOSE
495 /* Summarise the input table arrangements */
496 printf("\n");
497 printf("Input table structure:\n");
498 printf(" Input table entry size = %d bytes\n",t->it_ts);
499 if (t->it_ix) {
500 printf(" Input table extracts value from read values\n");
501 if (t->wo_xs) {
502 printf(" Separate Interp., Weighting and Offset values\n");
503 printf(" Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
504 printf(" Weighting is at offset %d, size %d bytes\n",t->we_eo, t->we_es);
505 printf(" Vertex offset is at offset %d, size %d bytes\n",t->vo_eo, t->vo_es);
506 } else {
507 printf(" Separate Interp. index and Weightint+Offset value\n");
508 printf(" Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
509 printf(" Weighting+Offset is at offset %d, size %d bytes\n",t->wo_eo, t->wo_es);
510 printf(" Weighting = %d bits\n",t->we_ab);
511 printf(" Vertex offset = %d bits\n",t->vo_ab);
512 }
513 } else {
514 printf(" Combined InterpIndex+Weighting+Voffset values\n");
515 printf(" Values are stored in size %d bytes\n",t->it_ts);
516 printf(" Interp. index = %d bits\n",t->ix_ab);
517 printf(" Weighting = %d bits\n",t->we_ab);
518 printf(" Vertex offset = %d bits\n",t->vo_ab);
519 }
520 #endif /* VERBOSE */
521
522 } else { /* Simplex table */
523 /* If we are going to use a simplex table, figure out how we */
524 /* will store the weighting value and vertex offset values in it, */
525 /* as well as the size of index we'll need to address it. */
526 int wit, oft, bigt; /* weighting type, offset type, biggest type */
527 int combt; /* Combined type */
528 int sepbits, combits; /* Total separate, combined bits */
529
530 /* See if we can fit them into separately readable entries, or whether */
531 /* they should be combined to minimise overall table size. */
532
533 if ((wit = findord(f, f->wemnb)) < 0) {
534 fprintf(stderr,"Can't find entry size to hold weighting factor\n");
535 exit(-1);
536 }
537 if ((oft = findord(f, f->vomnb)) < 0) {
538 fprintf(stderr,"Can't find entry size to hold vertex offset index\n");
539 exit(-1);
540 }
541 bigt = wit > oft ? wit : oft; /* Bigest separate type */
542
543 if ((combt = findord(f, f->wemnb + f->vomnb)) < 0) {/* Combined isn't possible */
544 sepbits = 2 * a->ords[bigt].bits; /* Total separate bits */
545 combits = sepbits; /* Force separate entries */
546 } else {
547 sepbits = 2 * a->ords[bigt].bits; /* Total separate bits */
548 combits = a->ords[combt].bits; /* Total combined bits */
549 }
550
551 if (sepbits <= combits) { /* We will use separate entries */
552 t->wo_xs = 1;
553 t->we_es = a->ords[bigt].bits/8; /* size in bytes for weighting entry */
554 t->we_ab = a->ords[bigt].bits; /* bits available for weighting */
555 t->we_eo = 0; /* Entry offset in simplex table */
556 t->vo_es = a->ords[bigt].bits/8; /* size in bytes for vertex offset entry */
557 t->vo_ab = a->ords[bigt].bits; /* bits available for vertex offset */
558 t->vo_eo = t->we_es; /* Entry offset in simplex table */
559 t->wo_es = t->we_es + t->vo_es; /* Total entry size for each vertex */
560 t->sm_ts = (g->id + 1) * (t->we_es + t->vo_es) ; /* Total size in bytes */
561
562 f->weet = bigt; /* Variable type for accessing weighting entry */
563 f->voet = bigt; /* Variable type for accessing vertex offset entry */
564 f->wevt = nord(f, wit); /* Variable type for holding weight value */
565 f->vovt = nord(f, oft); /* Variable type for holding offset value */
566
567 } else { /* We will combine the two entries */
568 t->wo_xs = 0;
569 t->wo_es = a->ords[combt].bits/8; /* entry size in bytes for each entry */
570 t->wo_ab = a->ords[combt].bits; /* bits in weightig + offset */
571 t->we_ab = f->wemnb; /* bits available for weighting */
572 t->vo_ab = t->wo_ab - t->we_ab; /* Allow all spare bits to vertex offset */
573 t->wo_eo = 0; /* entry offset in simplex table */
574 t->sm_ts = (g->id + 1) * t->wo_es; /* Total size in bytes */
575
576 f->woet = combt; /* Variable type for accessing combined entry */
577 f->wovt = nord(f, combt); /* Variable type holding weight/offset read value */
578
579 if ((f->wevt = findnord(f, t->we_ab)) < 0) {
580 fprintf(stderr,"Can't find variable size to hold weighting factor\n");
581 exit(-1);
582 }
583 if ((f->vovt = findnord(f, t->vo_ab)) < 0) {
584 fprintf(stderr,"Can't find variable size to hold vertex offset index\n");
585 exit(-1);
586 }
587 }
588
589 /* Compute the number of bits needed to hold an index into */
590 /* the simplex table (index is in terms of table entry size). */
591 /* This value is used to figure out the room needed in the input */
592 /* table to accumulate the simplex cube base offset value. (SW_O macro) */
593 f->sxmnb = calc_bits(g->id, g->stres);
594
595 #ifdef VERBOSE
596 /* Summarise the simplex table arrangements */
597 printf("\n");
598 printf("Simplex table structure:\n");
599 printf(" Minimum bits needed to index table %d\n", f->sxmnb);
600 printf(" Total simplex entry size %d bytes to hold %d entries\n",t->sm_ts, g->id+1);
601 if (t->wo_xs) {
602 printf(" Separate entries for offset and weight\n");
603 printf(" Weighting entry size %d bytes\n",t->we_es);
604 printf(" Offset entry size %d bytes\n",t->vo_es);
605 } else {
606 printf(" Combined offset and weight entries in %d bytes\n",t->wo_es);
607 printf(" Weighting entry size %d bits\n",t->we_ab);
608 printf(" Offset entry size %d bits\n",t->vo_ab);
609 }
610 printf(" Vertex offset scale factor %d\n", t->vo_om);
611 #endif /* VERBOSE */
612
613 /* We known how big the interpolation and simplex */
614 /* tables indexes are going to be, so complete figuring out */
615 /* how big the input table entries have to be. */
616 if ((f->itet = findord(f, f->sxmnb + f->ixmnb)) >= 0) {/* size to read */
617 int rem; /* Remainder bits */
618
619 t->it_xs = 0; /* Combined simplex+interp index */
620
621 t->it_ab = a->ords[f->itet].bits; /* Bits in combined input entry */
622 rem = t->it_ab - f->sxmnb - f->ixmnb;
623 t->sx_ab = f->sxmnb + rem/2; /* simplex index bits actually available */
624 t->ix_ab = t->it_ab - t->sx_ab; /* interp index bits actually available */
625 t->it_ts = a->ords[f->itet].bits/8; /* total size in bytes */
626 f->itvt = nord(f, f->itet); /* Variable type */
627
628 if ((f->sxvt = findnord(f, t->sx_ab)) < 0) {
629 fprintf(stderr,"Simplex index variable size can't be handled\n");
630 exit(-1);
631 }
632 if ((f->ixvt = findnord(f, t->ix_ab)) < 0) {
633 fprintf(stderr,"Interp index variable size can't be handled\n");
634 exit(-1);
635 }
636 } else { /* Separate entries */
637 int bbits; /* Largest number of bits needed */
638
639 t->it_xs = 1; /* Separate simplex+interp indexes */
640 bbits = f->sxmnb > f->ixmnb ? f->sxmnb : f->ixmnb;
641
642 /* Allocate same size for both so that total structure size is power of 2 */
643 if ((f->sxet = f->ixet = findord(f, bbits)) < 0) {
644 fprintf(stderr,"Interp/Simplex index entry size can't be handled\n");
645 exit(-1);
646 }
647
648 t->sx_ab = a->ords[f->sxet].bits; /* Actual bits available */
649 t->sx_es = t->sx_ab/8; /* Entry size in bytes */
650 t->ix_ab = a->ords[f->ixet].bits;
651 t->ix_es = t->sx_ab/8;
652 t->it_ts = t->sx_es + t->ix_es; /* total size in bytes */
653 t->sx_eo = 0; /* simplex index offset in bytes */
654 t->ix_eo = t->sx_es; /* interp. index offset in bytes */
655 f->sxvt = nord(f, f->sxet); /* Variable type */
656 f->ixvt = nord(f, f->ixet); /* Variable type */
657 }
658
659 #ifdef VERBOSE
660 /* Summarise the input table arrangements */
661 printf("\n");
662 printf("Input table structure:\n");
663 if (t->it_ix) {
664 printf(" Input table extracts value from read values\n");
665 } else {
666 printf(" Value extraction read values is explicit\n");
667 }
668 printf(" Input table entry size = %d bytes\n",t->it_ts);
669 if (t->it_xs) {
670 printf(" Separate Interp. and Simplex index values\n");
671 printf(" Interp. index is at offset %d, size %d bytes\n",t->ix_eo, t->ix_es);
672 printf(" Simplex index is at offset %d, size %d bytes\n",t->sx_eo, t->sx_es);
673 } else {
674 printf(" Combined Interp. and Simplex index values\n");
675 printf(" Values are size %d bytes\n",t->it_ts);
676 printf(" Interp. index = %d bits\n",t->ix_ab);
677 printf(" Simplex index = %d bits\n",t->sx_ab);
678 }
679 #endif /* VERBOSE */
680 }
681
682 /* Figure out output table stuff */
683 {
684 /* A variable to hold the index into an output table */
685 if ((f->otit = findord(f, g->prec)) < 0) {
686 fprintf(stderr,"Can't find output table index size\n");
687 exit(-1);
688 }
689 f->otit = nord(f,f->otit); /* Make temp variable natural size */
690
691 if (g->out.pint != 0) /* Pixel interleaved */
692 f->nop = 1; /* Use same pointers for every pixel */
693 else
694 f->nop = g->od; /* Use a separate pointer for each output value */
695
696 /* Figure out the output pointer types */
697 f->otvt = 0; /* Output table value type */
698 for (e = 0; e < f->nop; e++) {
699 if ((f->opt[e] = findord(f, g->out.bpch[e])) < 0) {
700 fprintf(stderr,"Output channel size can't be handled\n");
701 exit(-1);
702 }
703 if (f->opt[e] > f->otvt)
704 f->otvt = f->opt[e]; /* Make value type big enough for any channel size */
705 }
706 t->ot_ts = a->ords[f->otvt].bits/8; /* Output table entry size in bytes */
707
708 /* Setup information on data placement in output table entries */
709 for (e = 0; e < g->od; e++) {
710 t->ot_off[e] = g->out.bov[e]; /* Transfer info from generation spec. */
711 t->ot_bits[e] = g->out.bpv[e];
712 }
713 }
714
715 #ifdef VERBOSE
716 /* Summarise the output table arrangements */
717 printf("Output table structure:\n");
718 printf(" Entry size = %d bytes\n",t->ot_ts);
719 printf(" Output value placement within each enry is:\n");
720 for (e = 0; e < f->nop; e++) {
721 printf(" %d: Offset %d bits, size %d bits\n", e, t->ot_off[e], t->ot_bits[e]);
722 }
723 #endif /* VERBOSE */
724
725 /* Compute the maximum interpolation table resolution we will be able to handle */
726 {
727 int res, ores;
728
729 res = calc_res(g->id, t->ix_ab);
730 ores = calc_ores(g->id, t->vo_ab, t->vo_om);
731 f->ixmxres = res < ores ? res : ores;
732 }
733
734 /* Compute the maximum simplex table resolution we will be able to handle */
735 if (t->sort) {
736 f->sxmxres = 0;
737 } else {
738 f->sxmxres = calc_res(g->id, t->sx_ab);
739 }
740
741 #ifdef VERBOSE
742 printf("Emitting introductory code\n"); fflush(stdout);
743 #endif /* VERBOSE */
744
745 /* Start of code generation */
746 doheader(f); /* Output the header comments */
747
748 /* We need an include file */
749 line(f,"#ifndef IMDI_INCLUDED");
750 line(f,"#include <memory.h>");
751 line(f,"#include \"imdi_utl.h\"");
752 line(f,"#define IMDI_INCLUDED");
753 line(f,"#endif /* IMDI_INCLUDED */");
754 cr(f);
755
756 /* Declare our explicit pointer type */
757 line(f,"#ifndef DEFINED_pointer");
758 line(f,"#define DEFINED_pointer");
759 line(f,"typedef unsigned char * pointer;");
760 line(f,"#endif");
761 cr(f);
762
763 /* Declare our explicit structure access macros */
764
765 #ifdef VERBOSE
766 printf("Declaring macros\n"); fflush(stdout);
767 #endif /* VERBOSE */
768
769 /* Macros for accessing input table entries */
770 if (t->sort) {
771 if (t->it_xs) {
772 line(f,"/* Input table interp. index */");
773 line(f,"#define IT_IX(p, off) *((%s *)((p) + %d + (off) * %d))",
774 a->ords[f->ixet].name, t->ix_eo, t->it_ts);
775 cr(f);
776 if (t->wo_xs) {
777 line(f,"/* Input table input weighting enty */");
778 line(f,"#define IT_WE(p, off) *((%s *)((p) + %d + (off) * %d))",
779 a->ords[f->weet].name, t->we_eo, t->it_ts);
780 cr(f);
781 line(f,"/* Input table input offset value enty */");
782 line(f,"#define IT_VO(p, off) *((%s *)((p) + %d + (off) * %d))",
783 a->ords[f->voet].name, t->vo_eo, t->it_ts);
784 cr(f);
785 } else {
786 line(f,"/* Input table input weighting/offset value enty */");
787 line(f,"#define IT_WO(p, off) *((%s *)((p) + %d + (off) * %d))",
788 a->ords[f->woet].name, t->wo_eo, t->it_ts);
789 cr(f);
790 }
791 } else {
792 line(f,"/* Input table interp index, weighting and vertex offset */");
793 line(f,"#define IT_IT(p, off) *((%s *)((p) + %d + (off) * %d))",
794 a->ords[f->itet].name, 0, t->it_ts);
795 cr(f);
796 }
797
798 /* Sort primitive macro's */
799 line(f,"/* Sorting macros */");
800 if (t->wo_xs) {
801 line(f,"#define XFR(A, AA, B, BB) A = B; AA = BB;");
802 line(f,"#define CEX(A, AA, B, BB) if (A < B) { \\");
803 line(f," A ^= B; B ^= A; A ^= B; AA ^= BB; BB ^= AA; AA ^= BB; }");
804 line(f,"#define CXJ(A, B, BB, D, DD, L) if (A >= B) { D = B; DD = BB; goto L; }");
805 } else {
806 line(f,"#define XFR(A, B) A = B;");
807 line(f,"#define CEX(A, B) if (A < B) { A ^= B; B ^= A; A ^= B; }");
808 line(f,"#define CXJ(A, B, D, L) if (A >= B) { D = B; goto L; }");
809 }
810 line(f,"#define CJ(A, B, L) if (A >= B) goto L;");
811 cr(f);
812
813 } else { /* Simplex table */
814 if (t->it_xs) {
815 line(f,"/* Input table interp. index */");
816 line(f,"#define IT_IX(p, off) *((%s *)((p) + %d + (off) * %d))",
817 a->ords[f->ixet].name, t->ix_eo, t->it_ts);
818 cr(f);
819 line(f,"/* Input table simplex index enty */");
820 line(f,"#define IT_SX(p, off) *((%s *)((p) + %d + (off) * %d))",
821 a->ords[f->sxet].name, t->sx_eo, t->it_ts);
822 cr(f);
823 } else {
824 line(f,"/* Input table inter & simplex indexes */");
825 line(f,"#define IT_IT(p, off) *((%s *)((p) + %d + (off) * %d))",
826 a->ords[f->itet].name, 0, t->it_ts);
827 cr(f);
828 }
829 }
830
831 if (!t->sort) {
832 /* Macro for computing a simplex table entry */
833 line(f,"/* Simplex weighting table access */");
834 line(f,"#define SW_O(off) ((off) * %d)", t->sm_ts);
835 cr(f);
836
837 /* Macros for accessing the contents of the simplex table */
838 if (t->wo_xs) { /* If separate */
839 line(f,"/* Simplex table - get weighting value */");
840 line(f,"#define SX_WE(p, v) *((%s *)((p) + (v) * %d + %d))",
841 a->ords[f->weet].name, t->wo_es, t->we_eo);
842 cr(f);
843
844 line(f,"/* Simplex table - get offset value */");
845 line(f,"#define SX_VO(p, v) *((%s *)((p) + (v) * %d + %d))",
846 a->ords[f->voet].name, t->wo_es, t->vo_eo);
847 cr(f);
848
849 } else { /* Combined */
850 line(f,"/* Simplex table - get weighting/offset value */");
851 line(f,"#define SX_WO(p, v) *((%s *)((p) + (v) * %d))",
852 a->ords[f->woet].name, t->wo_es);
853 cr(f);
854 }
855 }
856
857 /* Macro for computing an interpolation table entry */
858 line(f,"/* Interpolation multi-dim. table access */");
859 line(f,"#define IM_O(off) ((off) * %d)", t->im_ts);
860 cr(f);
861
862 /* Macro for accessing an entry in the interpolation table */
863 line(f,"/* Interpolation table - get vertex values */");
864
865 if (t->im_fn > 0) {
866 /* Arguments to macro are cell base address, vertex offset, data offset */
867
868 if (f->imfvt == f->iafvt) { /* Table and accumulator are the same size */
869 if (!timp || t->im_fn == 1)
870 line(f,"#define IM_FE(p, v, c) *((%s *)((p) + (v) * %d + (c) * %d))",
871 a->ords[f->imfvt].name, t->im_oc, t->im_fs);
872 else {
873 line(f,"#define IM_TP(p, v) ((p) + (v) * %d)", t->im_oc);
874 line(f,"#define IM_FE(p, c) *((%s *)((p) + (c) * %d))",
875 a->ords[f->imfvt].name, t->im_fs);
876 }
877 } else { /* Expand single table entry to accumulator size */
878 if (!timp || t->im_fn == 1)
879 line(f,"#define IM_FE(p, v, c) ((%s)*((%s *)((p) + (v) * %d + (c) * %d)))",
880 a->ords[f->iafvt].name,
881 a->ords[f->imfvt].name, t->im_oc, t->im_fs);
882 else {
883 line(f,"#define IM_TP(p, v) ((p) + (v) * %d)", t->im_oc);
884 line(f,"#define IM_FE(p, c) ((%s)*((%s *)((p) + (c) * %d)))",
885 a->ords[f->iafvt].name,
886 a->ords[f->imfvt].name, t->im_fs);
887 }
888 }
889 }
890 if (t->im_pn > 0) {
891 /* Arguments to macro are cell base address, vertex offset */
892 /* There is no data offset since there can be only be one partial entry */
893
894 if (f->imfvt == f->iafvt) /* Table and accumulator are the same size */
895 line(f,"#define IM_PE(p, v) *((%s *)((p) + %d + (v) * %d))",
896 a->ords[f->impvt].name, t->im_fn * t->im_fs, t->im_oc);
897 else /* Expand single table entry to accumulator size */
898 line(f,"#define IM_PE(p, v) ((%s)*((%s *)((p) + %d + (v) * %d)))",
899 a->ords[f->iafvt].name,
900 a->ords[f->impvt].name, t->im_fn * t->im_fs, t->im_oc);
901 }
902 cr(f);
903
904 /* Macro for accessing an output table entry */
905 line(f,"/* Output table indexes */");
906 line(f,"#define OT_E(p, off) *((%s *)((p) + (off) * %d))",
907 a->ords[f->otvt].name, t->ot_ts);
908 cr(f);
909
910 /* =============================================== */
911
912 #ifdef VERBOSE
913 printf("Starting interpolation function\n"); fflush(stdout);
914 #endif /* VERBOSE */
915
916 /* Declare the function */
917 line(f,"void");
918 line(f, "imdi_k%d(",index);
919 line(f, "imdi *s, /* imdi context */");
920 line(f, "void **outp, /* pointer to output pointers */");
921 line(f, "int ostride, /* optional input component stride */");
922 line(f, "void **inp, /* pointer to input pointers */");
923 line(f, "int istride, /* optional input component stride */");
924 line(f, "unsigned int npix /* Number of pixels to process */");
925 line(f, ") {");
926 inc(f);
927
928 /* We need access to the imdi_imp */
929 line(f, "imdi_imp *p = (imdi_imp *)(s->impl);");
930
931 /* Declare the input pointers and init them */
932 for (e = 0; e < f->nip; e++) {
933 if (g->opt & opts_bwd) {
934 if (g->opt & opts_istride)
935 line(f, "%s *ip%d = (%s *)inp[%d] + (npix-1) * istride;",
936 a->ords[f->ipt[e]].name, e,
937 a->ords[f->ipt[e]].name, e);
938 else
939 line(f, "%s *ip%d = (%s *)inp[%d] + (npix-1) * %d;",
940 a->ords[f->ipt[e]].name, e,
941 a->ords[f->ipt[e]].name, e,
942 g->in.chi[e]);
943 } else {
944 g->opt |= opts_fwd; /* Make sure it's marked for what it is */
945 line(f, "%s *ip%d = (%s *)inp[%d];",
946 a->ords[f->ipt[e]].name, e, a->ords[f->ipt[e]].name, e);
947 }
948 }
949
950 /* Declare the output pointers and init them */
951 for (e = 0; e < f->nop; e++) {
952 if (g->opt & opts_bwd) {
953 if (g->opt & opts_ostride)
954 line(f, "%s *op%d = (%s *)outp[%d] + (npix-1) * ostride;",
955 a->ords[f->opt[e]].name, e,
956 a->ords[f->opt[e]].name, e);
957 else
958 line(f, "%s *op%d = (%s *)outp[%d] + (npix-1) * %d;",
959 a->ords[f->opt[e]].name, e,
960 a->ords[f->opt[e]].name, e,
961 g->out.chi[e]);
962 } else {
963 line(f, "%s *op%d = (%s *)outp[%d];",
964 a->ords[f->opt[e]].name, e, a->ords[f->opt[e]].name, e);
965 }
966 }
967
968 /* Declare and intialise the end pointer */
969 if (g->opt & opts_bwd) {
970 if (g->opt & opts_istride)
971 line(f, "%s *ep = (%s *)inp[0] - istride ;",
972 a->ords[f->ipt[0]].name,
973 a->ords[f->ipt[0]].name);
974 else
975 line(f, "%s *ep = (%s *)inp[0] - %d ;",
976 a->ords[f->ipt[0]].name,
977 a->ords[f->ipt[0]].name, g->in.chi[0]);
978 } else {
979 if (g->opt & opts_istride)
980 line(f, "%s *ep = (%s *)inp[0] + npix * istride ;",
981 a->ords[f->ipt[0]].name,
982 a->ords[f->ipt[0]].name);
983 else
984 line(f, "%s *ep = (%s *)inp[0] + npix * %d ;",
985 a->ords[f->ipt[0]].name,
986 a->ords[f->ipt[0]].name, g->in.chi[0]);
987 }
988
989 /* Declare and initialise the input table pointers */
990 for (e = 0; e < g->id; e++)
991 line(f,"pointer it%d = (pointer)p->in_tables[%d];",e,e);
992
993 /* Declare and initialise the output table pointers */
994 for (e = 0; e < g->od; e++)
995 line(f,"pointer ot%d = (pointer)p->out_tables[%d];",e,e);
996
997 if (!t->sort) {
998 /* Declare and initialise the Simplex weighting base pointer */
999 line(f,"pointer sw_base = (pointer)p->sw_table;");
1000 }
1001
1002 /* Declare and initialise the Interpolation multidim base pointer */
1003 line(f,"pointer im_base = (pointer)p->im_table;");
1004
1005 /* Figure out whether input channel reads can be used directly as table offsets */
1006 t->it_ix = 1; /* Default use input table lookup to extract value */
1007
1008 if (g->in.packed != 0)
1009 t->it_ix = 0; /* Extract will be done explicitly */
1010
1011 for (e = 0; e < g->id; e++) {
1012 int ee = (g->in.pint != 0) ? 0 : e; /* bpch index */
1013
1014 if ((g->in.bov[e] + g->in.bpv[e]) <= 12)
1015 continue; /* Table can do extract */
1016
1017 if (g->in.bov[e] != 0 || g->in.bpv[e] != g->in.bpch[ee]) {
1018 t->it_ix = 0; /* Extract will be done explicitly */
1019 break;
1020 }
1021 }
1022
1023 /* ------------------------------- */
1024 #ifdef VERBOSE
1025 printf("Starting pixel processing loop\n"); fflush(stdout);
1026 #endif /* VERBOSE */
1027
1028 /* Start the pixel processing loop */
1029 cr(f);
1030 if (g->opt & opts_bwd) {
1031 sline(f, "for(;ip0 != ep;");
1032
1033 if (g->opt & opts_istride)
1034 for (e = 0; e < f->nip; e++)
1035 mline(f, " ip%d -= istride,", e);
1036 else
1037 for (e = 0; e < f->nip; e++)
1038 mline(f, " ip%d -= %d,", e, g->in.chi[e]);
1039
1040 if (g->opt & opts_ostride)
1041 for (e = 0; e < f->nop; e++)
1042 mline(f, " op%d -= ostride%s", e, ((e+1) < f->nop) ? "," : "");
1043 else
1044 for (e = 0; e < f->nop; e++)
1045 mline(f, " op%d -= %d%s", e, g->out.chi[e], ((e+1) < f->nop) ? "," : "");
1046 } else {
1047 sline(f, "for(;ip0 != ep;");
1048
1049 if (g->opt & opts_istride)
1050 for (e = 0; e < f->nip; e++)
1051 mline(f, " ip%d += istride,", e);
1052 else
1053 for (e = 0; e < f->nip; e++)
1054 mline(f, " ip%d += %d,", e, g->in.chi[e]);
1055
1056 if (g->opt & opts_ostride)
1057 for (e = 0; e < f->nop; e++)
1058 mline(f, " op%d += ostride%s", e, ((e+1) < f->nop) ? "," : "");
1059 else
1060 for (e = 0; e < f->nop; e++)
1061 mline(f, " op%d += %d%s", e, g->out.chi[e], ((e+1) < f->nop) ? "," : "");
1062 }
1063 eline(f, ") {");
1064 inc(f);
1065
1066 /* Declare output value accumulator(s) */
1067 for (i = 0; i < t->im_fn; i++) {
1068 line(f,"%s ova%d; /* Output value accumulator */",a->ords[f->iafvt].name,i);
1069 }
1070 for (; i < f->ian; i++) {
1071 line(f,"%s ova%d; /* Output value partial accumulator */",a->ords[f->iapvt].name,i);
1072 }
1073
1074 /* Context around interp/Simplex table lookup */
1075 line(f, "{");
1076 inc(f);
1077
1078 if (!t->sort)
1079 line(f,"pointer swp;"); /* Declare Simplex weighting pointer */
1080 line(f,"pointer imp;"); /* Declare Interpolation multidim pointer */
1081
1082 /* Declare the input weighting/vertex offset variables */
1083 if (t->sort) {
1084 for (e = 0; e < g->id; e++) {
1085 if (t->wo_xs) {
1086 line(f,"%s we%d; /* Weighting value variable */",
1087 a->ords[f->wevt].name, e);
1088 line(f,"%s vo%d; /* Vertex offset variable */",
1089 a->ords[f->vovt].name, e);
1090 } else {
1091 line(f,"%s wo%d; /* Weighting value and vertex offset variable */",
1092 a->ords[f->wovt].name, e);
1093 }
1094 }
1095 }
1096
1097 /* Context around input table processing */
1098 line(f, "{");
1099 inc(f);
1100
1101 /* Declare the table index variables/input weighting/vertex offset variables */
1102 if (t->sort) {
1103 if (!t->it_xs)
1104 line(f,"%s ti; /* Input table entry variable */",a->ords[f->itvt].name);
1105 line(f,"%s ti_i; /* Interpolation index variable */",a->ords[f->ixvt].name);
1106 } else {
1107 if (t->it_xs) {
1108 line(f,"%s ti_s; /* Simplex index variable */",a->ords[f->sxvt].name);
1109 line(f,"%s ti_i; /* Interpolation index variable */",a->ords[f->ixvt].name);
1110 } else {
1111 line(f,"%s ti; /* Simplex+Interpolation index variable */",a->ords[f->itvt].name);
1112 }
1113 }
1114
1115 if (g->in.packed != 0) /* We need to unpack from a single read */
1116 line(f,"%s rdv; /* Read value */",a->ords[f->ipt[0]].name);
1117
1118 if (t->it_ix == 0) {
1119 int bv = 0;
1120 for (e = 0; e < f->nip; e++) { /* Find largest input type */
1121 if (f->ipt[e] > bv)
1122 bv = f->ipt[e];
1123 }
1124 bv = nord(f, bv);
1125 line(f,"%s chv; /* Channel value */",a->ords[bv].name);
1126 f->chv_bits = a->ords[bv].bits;
1127 }
1128 cr(f);
1129
1130 #ifdef VERBOSE
1131 printf("Read code\n"); fflush(stdout);
1132 #endif /* VERBOSE */
1133
1134 /* For all the input channels */
1135 for (e = 0; e < g->id; e++) {
1136 char rde[50]; /* Read expression */
1137 char toff[50]; /* Table offset expression */
1138 int ee = (g->in.pint != 0) ? 0 : e; /* bpch index */
1139
1140 if (g->in.pint != 0) /* Pixel interleaved */
1141 sprintf(rde,"ip0[%d]",e); /* Offset from single pointer */
1142 else
1143 sprintf(rde,"*ip%d",e); /* Pointer per channel */
1144
1145 if (g->in.packed != 0) {
1146 if (e == 0)
1147 line(f,"rdv = %s;",rde); /* Do single read */
1148 sprintf(rde,"rdv"); /* Use read value for extraction */
1149 }
1150
1151 if (t->it_ix == 0) {
1152 if (g->in.bov[e] == 0 ) { /* No offset */
1153 if (g->in.bpv[e] == g->in.bpch[ee]) /* No mask */
1154 line(f,"chv = %s;",rde);
1155 else /* Just mask */
1156 line(f,"chv = (%s & %s);",rde, hmask(g->in.bpv[e]));
1157 } else { /* Offset */
1158 if ((g->in.bov[e] + g->in.bpv[e]) == g->in.bpch[ee])
1159 line(f,"chv = (%s >> %d);",rde, g->in.bov[e]);
1160 else { /* Offset and mask */
1161 if (a->shfm || g->in.bpv[e] > 32) {
1162 /* Extract using just shifts */
1163 line(f,"chv = ((%s << %d) >> %d);", rde,
1164 f->chv_bits - g->in.bpv[e] - g->in.bov[e],
1165 f->chv_bits - g->in.bpv[e]);
1166 } else {
1167 /* Extract using shift and mask */
1168 line(f,"chv = ((%s >> %d) & %s);",
1169 rde, g->in.bov[e], hmask(g->in.bpv[e]));
1170 }
1171 }
1172 }
1173 sprintf(toff,"chv");
1174 } else { /* No extraction */
1175 sprintf(toff,"%s",rde);
1176 }
1177
1178 if (t->sort) {
1179 if (t->it_xs) {
1180 line(f,"ti_i %s= IT_IX(it%d, %s);", e ? "+" : " ", e, toff);
1181 if (t->wo_xs) {
1182 line(f,"we%d = IT_WE(it%d, %s);", e, e, toff);
1183 line(f,"vo%d = IT_VO(it%d, %s);", e, e, toff);
1184 } else {
1185 line(f,"wo%d = IT_WO(it%d, %s);", e, e, toff);
1186 }
1187 } else { /* All three combined */
1188 line(f,"ti = IT_IT(it%d, %s);", e, toff);
1189 if (a->shfm || t->wo_ab > 32) {
1190 /* Extract using just shifts */
1191 line(f,"wo%d = ((ti << %d) >> %d); "
1192 "/* Extract weighting/vertex offset value */",
1193 e, a->ords[f->wovt].bits - t->wo_ab, a->ords[f->wovt].bits - t->wo_ab);
1194 line(f,"ti_i %s= (ti >> %d); "
1195 "/* Extract interpolation table value */",
1196 e ? "+" : " ", t->wo_ab);
1197 } else {
1198 /* Extract using shift and mask */
1199 line(f,"wo%d = (ti & %s); "
1200 "/* Extract weighting/vertex offset value */",
1201 e, hmask(t->wo_ab));
1202 line(f,"ti_i %s= (ti >> %d); "
1203 "/* Extract interpolation table value */",
1204 e ? "+" : " ", t->wo_ab);
1205 }
1206 }
1207
1208 } else { /* Simplex */
1209 if (t->it_xs) {
1210 /* ~~~~ should toff be forced to be a temp variable ?? */
1211 /* (ie. force use of rde (above) if t->it_xs is nonz) */
1212 line(f,"ti_i %s= IT_IX(it%d, %s);", e ? "+" : " ", e, toff);
1213 line(f,"ti_s %s= IT_SX(it%d, %s);", e ? "+" : " ", e, toff);
1214 } else {
1215 line(f,"ti %s= IT_IT(it%d, %s);", e ? "+" : " ", e, toff);
1216 }
1217 }
1218 }
1219
1220 #ifdef VERBOSE
1221 printf("Index extraction code\n"); fflush(stdout);
1222 #endif /* VERBOSE */
1223
1224 cr(f);
1225
1226 if (t->sort) {
1227 /* Extract Simplex and Interpolation indexes from accumulator */
1228 line(f,"imp = im_base + IM_O(ti_i); /* Compute interp. table entry pointer */");
1229 } else {
1230 if (t->it_xs) { /* Extract Simplex and Interpolation indexes from accumulator */
1231 line(f,"swp = sw_base + SW_O(ti_s); /* Compute simplex table entry pointer */");
1232 line(f,"imp = im_base + IM_O(ti_i); /* Compute interp. table entry pointer */");
1233 } else {
1234 line(f,"imp = im_base + IM_O(ti >> %d); "
1235 "/* Extract interp. index and comp. entry */",
1236 t->sx_ab);
1237 if (a->shfm || t->sx_ab > 32) {
1238 /* Extract using just shifts */
1239 line(f,"swp = sw_base + SW_O((ti << %d) >> %d); "
1240 "/* Extract simplex index & comp. entry */",
1241 a->ords[f->itvt].bits - t->sx_ab, a->ords[f->itvt].bits - t->sx_ab);
1242 } else {
1243 /* Extract using shift and mask */
1244 line(f,"swp = sw_base + SW_O(ti & %s); "
1245 "/* Extract simplex index and comp. entry */",
1246 hmask(t->sx_ab));
1247 }
1248 }
1249 }
1250
1251 /* Do the explicit sort now */
1252 if (t->sort) {
1253 cr(f);
1254 /* Sort from largest to smallest */
1255 /* We can use a selection sort, or an insertions sort. */
1256
1257 line(f,"/* Sort weighting values and vertex offset values */");
1258
1259 if (g->id >= INSTHRESH) {
1260 /* We do an insertion sort */
1261 lineinc(f,"{");
1262 if (t->wo_xs) {
1263 line(f,"%s wet; /* Sort temporary */", a->ords[f->wevt].name);
1264 line(f,"%s vot; /* Sort temporary */", a->ords[f->vovt].name);
1265 } else
1266 line(f,"%s wot; /* Sort temp variable */", a->ords[f->wovt].name);
1267 cr(f);
1268
1269 for (i = 1; i < g->id; i++) {
1270 int j;
1271
1272 j = i;
1273 if (j < 2) { /* Only test & exchange needed */
1274 if (t->wo_xs)
1275 line(f,"CEX(we%d, vo%d, we%d, vo%d);",j-1,j-1,j,j);
1276 else
1277 line(f,"CEX(wo%d, wo%d);",j-1,j);
1278
1279 } else {
1280 if (t->wo_xs)
1281 line(f,"XFR(wet, vot, we%d, vo%d);",j,j);
1282 else
1283 line(f,"XFR(wot, wo%d);",j);
1284 while (j > 0) {
1285 if (j == i) { /* First test from i */
1286 if (t->wo_xs)
1287 line(f,"CJ(we%d, wet, shs%d);",j-1,i);
1288 else
1289 line(f,"CJ(wo%d, wot, shs%d);",j-1,i);
1290 if (t->wo_xs)
1291 line(f,"XFR(we%d, vo%d, we%d, vo%d);",j,j,j-1,j-1);
1292 else
1293 line(f,"XFR(wo%d, wo%d);",j,j-1);
1294 } else {
1295 if (t->wo_xs)
1296 line(f,"CXJ(we%d, wet, vot, we%d, vo%d, shs%d);",j-1,j,j,i);
1297 else
1298 line(f,"CXJ(wo%d, wot, wo%d, shs%d);",j-1,j,i);
1299 if (t->wo_xs)
1300 line(f,"XFR(we%d, vo%d, we%d, vo%d);",j,j,j-1,j-1);
1301 else
1302 line(f,"XFR(wo%d, wo%d);",j,j-1);
1303 }
1304 j--;
1305 }
1306 if (t->wo_xs)
1307 line(f,"XFR(we%d, vo%d, wet, vot);",j,j);
1308 else
1309 line(f,"XFR(wo%d, wot);",j);
1310 niline(f,"shs%d:;",i);
1311 }
1312 }
1313 decline(f,"}");
1314
1315 } else {
1316 /* Use a selection sort */
1317 for (i = 0; i < (g->id-1); i++) {
1318 for (e = i+1; e < g->id; e++) {
1319 if (t->wo_xs)
1320 line(f,"CEX(we%d, vo%d, we%d, vo%d);",i,i,e,e);
1321 else
1322 line(f,"CEX(wo%d, wo%d);",i,e);
1323 }
1324 }
1325 }
1326 }
1327
1328 /* End of input table processing context */
1329 dec(f);
1330 line(f,"}");
1331
1332 line(f,"{"); /* Context around vertex lookup and accumulation */
1333 inc(f);
1334
1335 /* Declare vertex offset and weight variables */
1336 if (t->sort && t->wo_xs == 0) {
1337 line(f,"%s nvof; /* Next vertex offset value */",a->ords[f->vovt].name);
1338 } else {
1339 if (!t->wo_xs) /* If combined in table */
1340 line(f,"%s vowr; /* Vertex offset/weight value */",a->ords[f->wovt].name);
1341 }
1342 line(f,"%s vof; /* Vertex offset value */",a->ords[f->vovt].name);
1343 line(f,"%s vwe; /* Vertex weighting */",a->ords[f->wevt].name);
1344 if (timp && t->im_fn > 1)
1345 line(f,"pointer timp; /* Temporary interpolation table pointer */");
1346 cr(f);
1347
1348 #ifdef VERBOSE
1349 printf("Vertex offset and weight code\n"); fflush(stdout);
1350 #endif /* VERBOSE */
1351
1352 /* For each vertex in the simplex */
1353 for (e = 0; e < (g->id +1); e++) {
1354
1355 if (t->sort) {
1356
1357 if (e == 0) {
1358 line(f,"vof = 0; /* First vertex offset is 0 */");
1359 } else {
1360 if (t->wo_xs)
1361 line(f,"vof += vo%d; /* Move to next vertex */",e-1);
1362 else
1363 line(f,"vof += nvof; /* Move to next vertex */");
1364 }
1365
1366 /* Extract the vertex offset and weight values from the sorted input values */
1367 if (e < g->id && !t->wo_xs) {
1368 if (a->shfm || t->vo_ab > 32) {
1369 /* Extract using just shifts */
1370 line(f,"nvof = ((wo%d << %d) >> %d); "
1371 "/* Extract offset value */",
1372 e, a->ords[f->vovt].bits - t->vo_ab, a->ords[f->vovt].bits - t->vo_ab);
1373 line(f,"wo%d = (wo%d >> %d); "
1374 " /* Extract weighting value */",
1375 e, e, t->vo_ab);
1376 } else {
1377 /* Extract using shift and mask */
1378 line(f,"nvof = (wo%d & %s); "
1379 "/* Extract offset value */",
1380 e, hmask(t->vo_ab));
1381 line(f,"wo%d = (wo%d >> %d); "
1382 " /* Extract weighting value */",
1383 e, e, t->vo_ab);
1384 }
1385 }
1386 /* Compute the weighting value */
1387 if (!t->wo_xs) {
1388 if (e == 0) {
1389 line(f,"vwe = %d - wo%d; /* Baricentric weighting */", 1 << g->prec, e);
1390 } else if (e < g->id) {
1391 line(f,"vwe = wo%d - wo%d; /* Baricentric weighting */", e-1, e);
1392 } else {
1393 line(f,"vwe = wo%d; /* Baricentric weighting */", e-1);
1394 }
1395 } else {
1396 if (e == 0) {
1397 line(f,"vwe = %d - we%d; /* Baricentric weighting */", 1 << g->prec, e);
1398 } else if (e < g->id) {
1399 line(f,"vwe = we%d - we%d; /* Baricentric weighting */", e-1, e);
1400 } else {
1401 line(f,"vwe = we%d; /* Baricentric weighting */", e-1);
1402 }
1403 }
1404
1405 } else { /* Not sort */
1406 /* Read the vertex offset and weight values from the simplex table */
1407 if (t->wo_xs) { /* If separate */
1408 line(f,"vof = SX_VO(swp, %d); /* Read vertex offset value */", e);
1409 line(f,"vwe = SX_WE(swp, %d); /* Read vertex weighting value */", e);
1410 } else { /* If combined in table */
1411 line(f,"vowr = SX_WO(swp, %d); /* Read vertex offset+weighting values */", e);
1412 if (a->shfm || t->vo_ab > 32) {
1413 /* Extract using just shifts */
1414 line(f,"vof = ((vowr << %d) >> %d); "
1415 "/* Extract offset value */",
1416 a->ords[f->vovt].bits - t->vo_ab, a->ords[f->vovt].bits - t->vo_ab);
1417 line(f,"vwe = (vowr >> %d); "
1418 "/* Extract weighting value */",
1419 t->vo_ab);
1420 } else {
1421 /* Extract using shift and mask */
1422 line(f,"vof = (vowr & %s); "
1423 "/* Extract offset value */",
1424 hmask(t->vo_ab));
1425 line(f,"vwe = (vowr >> %d); "
1426 "/* Extract weighting value */",
1427 t->vo_ab);
1428 }
1429 }
1430 }
1431
1432 /* Lookup the vertex value, weight it, and accumulate it into output value */
1433 if (timp && t->im_fn > 1)
1434 line(f,"timp = IM_TP(imp, vof); /* Vertex address */");
1435 for (i = 0; i < f->ian; i++) { /* For each output accumulation chunk */
1436 if (i < t->im_fn) { /* Full entry */
1437 if (!timp || t->im_fn == 1)
1438 line(f,"ova%d %s= IM_FE(imp, vof, %d) * vwe; "
1439 "/* Accumulate weighted output values */",
1440 i, e ? "+" : " ", i);
1441 else
1442 line(f,"ova%d %s= IM_FE(timp, %d) * vwe; "
1443 "/* Accumulate weighted output values */",
1444 i, e ? "+" : " ", i);
1445 } else /* One partial entry */
1446 line(f,"ova%d %s= IM_PE(imp, vof) * vwe; "
1447 "/* Accumulate last weighted output values */",
1448 i, e ? "+" : " ");
1449 }
1450 }
1451
1452 dec(f);
1453 line(f, "}"); /* End of output value lookup context */
1454
1455 dec(f);
1456 line(f, "}"); /* End of output value accumulation context */
1457
1458 /* Start of output lookup and write */
1459 line(f,"{");
1460 inc(f);
1461
1462 #ifdef VERBOSE
1463 printf("Output table code\n"); fflush(stdout);
1464 #endif /* VERBOSE */
1465
1466 {
1467 char wre[50]; /* Write destination expression */
1468
1469 if (g->out.packed != 0) /* We need to pack results into a single write */
1470 line(f,"%s wrv; /* Write value */",a->ords[f->ipt[0]].name);
1471
1472 /* Declare temporary to hold index into output lookup table */
1473 line(f,"%s oti; /* Vertex offset value */",a->ords[f->otit].name);
1474 if (g->oopt & OOPTS_CHECK)
1475 line(f,"%s otv; /* Output temporary value */",a->ords[f->otvt].name);
1476
1477 /* For each accumulator value */
1478 /* (Assume they are in output order for the moment ?) */
1479 for (e = i = 0; i < f->ian; i++) { /* For each output accumulation chunk */
1480 int vpa = i < t->im_fn ? t->im_fv : t->im_pv; /* Chanel values per accumulator */
1481 int oat = i < t->im_fn ? f->iafvt : f->iapvt; /* Output accumulator type */
1482 int ee; /* Relative e to this accumulator */
1483
1484 /* For each output value in this accumulator */
1485 for (ee = 0; ee < vpa && e < g->od; ee++, e++) {
1486 int off, size; /* Bits to be extracted */
1487
1488 /* Extract wanted 8 bits from the 8.8 bit result in accumulator */
1489 /* (or 16 bits from 16.16) */
1490 off = ee * f->iaovb + (f->iaovb - g->prec);
1491 size = g->prec;
1492
1493 if (e == 0 || g->out.packed == 0) {
1494 if (g->out.pint != 0) /* Pixel interleaved */
1495 sprintf(wre,"op0[%d]",e); /* Offset from single pointer */
1496 else
1497 sprintf(wre,"*op%d",e); /* Pointer per channel */
1498 }
1499
1500 if (a->shfm || size > 32) {
1501 /* Extract using just shifts */
1502 #ifdef ROUND
1503 line(f,"oti = (((ova%d + (1 << %d)) << %d) >> %d); "
1504 "/* Extract integer part of result */",
1505 i, off-1, a->ords[oat].bits - off - size, a->ords[oat].bits - size);
1506 #else
1507 line(f,"oti = ((ova%d << %d) >> %d); "
1508 "/* Extract integer part of result */",
1509 i, a->ords[oat].bits - off - size, a->ords[oat].bits - size);
1510 #endif
1511 } else {
1512 /* Extract using shift and mask */
1513 #ifdef ROUND
1514 line(f,"oti = (((ova%d + 0x%x) >> %d) & %s); "
1515 "/* Extract integer part of result */",
1516 i, (1 << off-1), off, hmask(size));
1517 #else
1518 line(f,"oti = ((ova%d >> %d) & %s); "
1519 "/* Extract integer part of result */",
1520 i, off, hmask(size));
1521 #endif
1522 }
1523
1524 if (g->oopt & OOPT(oopts_check,e)) { /* Lookup with check */
1525 line(f,"otv = OT_E(ot%d, oti); /* Fetch result */", e);
1526 line(f,"if (otv != p->checkv[%d]) /* Do output value check */", e);
1527 line(f," p->checkf |= (1 << %d); /* Set check flag */", e);
1528 if (g->out.packed != 0) {
1529 if (g->oopt & OOPT(oopts_skip,e))
1530 return 2; /* Error, can't skip on pixel interleaved */
1531 line(f,"wrv %s= otv;", e ? "+" : "", e);
1532 } else {
1533 if (g->oopt & OOPT(oopts_skip,e)) {
1534 line(f,"if ((p->skipf & (1 << %d)) == 0) /* If not being skipped */", e);
1535 line(f," %s = otv; /* Write result */", wre);
1536 } else
1537 line(f,"%s = otv; /* Write result */", wre);
1538 }
1539 } else { /* Normal lookup output table */
1540 /* Lookup in output table and write to destination */
1541 if (g->out.packed != 0) {
1542 if (g->oopt & OOPT(oopts_skip,e))
1543 return 2; /* Error, can't skip on pixel interleaved */
1544 line(f,"wrv %s= OT_E(ot%d, oti);", e ? "+" : "", e);
1545 } else {
1546 if (g->oopt & OOPT(oopts_skip,e)) {
1547 line(f,"if ((p->skipf & (1 << %d)) == 0) /* If not being skipped */", e);
1548 line(f," %s = OT_E(ot%d, oti); /* Write result */", wre, e);
1549 } else
1550 line(f,"%s = OT_E(ot%d, oti); /* Write result */", wre, e);
1551 }
1552 }
1553 }
1554 }
1555
1556 if (g->out.packed != 0) { /* Write out the accumulated value */
1557 line(f,"%s = wrv; /* Write result */", wre);
1558 }
1559 }
1560
1561 /* The end of the output lookup and write */
1562 dec(f);
1563 line(f, "}");
1564
1565 /* The end of the pixel processing loop */
1566 dec(f);
1567 line(f, "}");
1568
1569 /* The end of the function */
1570 dec(f);
1571 line(f, "}");
1572
1573 /* Undefine all the macros */
1574 if (t->sort) {
1575 if (t->it_xs) {
1576 if (t->wo_xs) {
1577 line(f,"#undef IT_WE");
1578 line(f,"#undef IT_VO");
1579 } else
1580 line(f,"#undef IT_WO");
1581 line(f,"#undef IT_IX");
1582 } else {
1583 line(f,"#undef IT_IT");
1584 }
1585 line(f,"#undef CXJ");
1586 line(f,"#undef CJ");
1587 line(f,"#undef XFR");
1588 line(f,"#undef CEX");
1589 } else {
1590 if (t->it_xs) {
1591 line(f,"#undef IT_IX");
1592 line(f,"#undef IT_SX");
1593 } else {
1594 line(f,"#undef IT_IT");
1595 }
1596
1597 line(f,"#undef SW_O");
1598 if (t->wo_xs) {
1599 line(f,"#undef SX_WE");
1600 line(f,"#undef SX_VO");
1601 } else {
1602 line(f,"#undef SX_WO");
1603 }
1604 }
1605 line(f,"#undef IM_O");
1606 if (t->im_fn > 0) {
1607 if (timp && t->im_fn > 1)
1608 line(f,"#undef IM_TP");
1609 line(f,"#undef IM_FE");
1610 }
1611 if (t->im_pn > 0) {
1612 line(f,"#undef IM_PE");
1613 }
1614 line(f,"#undef OT_E");
1615
1616 /* =============================================== */
1617 #ifdef VERBOSE
1618 printf("Done interpolation code\n"); fflush(stdout);
1619 #endif /* VERBOSE */
1620
1621 /* =============================================== */
1622
1623 /* !genspec and tabspec delta code! */
1624 /* We generate code that updates any entries in the genspec and */
1625 /* tabpsec strucures that are different for this kernel, */
1626 /* compared to the previously generated kernel. */
1627 /* In this way, we save a lot of space, at the price */
1628 /* of having to access the table of kernels sequentially. */
1629
1630 /* If the genspec of tabspec structures are modified, */
1631 /* then corresponding changes need to be made to the code here. */
1632 {
1633 int i;
1634 int s_stres, s_itres; /* Save values */
1635 imdi_options s_opt;
1636
1637 s_stres = g->stres;
1638 s_itres = g->itres;
1639 s_opt = g->opt;
1640 g->stres = f->sxmxres; /* Set maximum values */
1641 g->itres = f->ixmxres;
1642 g->opt &= ~opts_splx; /* Don't care about this, only about opts_splx/sort */
1643 if (frv == 0) { /* Simplex algorithm wasn't possible */
1644 g->opt &= ~opts_splx_sort; /* Therefore we don't care about preference */
1645 g->opt &= ~opts_sort_splx;
1646 }
1647
1648 /* Declare the genspec & tabspec update function */
1649 cr(f);
1650 line(f,"void");
1651 line(f, "imdi_k%d_gentab(",index);
1652 line(f, "genspec *g, /* structure to be updated */");
1653 line(f, "tabspec *t /* structure to be updated */");
1654 line(f, ") {");
1655 inc(f);
1656
1657 #define GSET_ENTRY(KEY) if (g->KEY != og->KEY) line(f, "g->%s = %d;",#KEY,g->KEY)
1658 #define GSET_ARRAY(KEY,IX) if (g->KEY[IX] != og->KEY[IX]) line(f, "g->%s[%d] = %d;",#KEY,IX,g->KEY[IX])
1659 #define TSET_ENTRY(KEY) if (t->KEY != ot->KEY) line(f, "t->%s = %d;",#KEY,t->KEY)
1660 #define TSET_ARRAY(KEY,IX) if (t->KEY[IX] != ot->KEY[IX]) line(f, "t->%s[%d] = %d;",#KEY,IX,t->KEY[IX])
1661
1662 /* Create code that updates the genspec structure from og to g */
1663 GSET_ENTRY(prec);
1664 GSET_ENTRY(id);
1665 GSET_ENTRY(od);
1666 GSET_ENTRY(irep);
1667 GSET_ENTRY(orep);
1668 GSET_ENTRY(in_signed);
1669 GSET_ENTRY(out_signed);
1670
1671 /* pixlayout structure */
1672 for (i = 0; i < IXDIDO; i++) {
1673 GSET_ARRAY(in.bpch,i);
1674 GSET_ARRAY(in.chi,i);
1675 GSET_ARRAY(in.bov,i);
1676 GSET_ARRAY(in.bpv,i);
1677 }
1678 GSET_ENTRY(in.pint);
1679 GSET_ENTRY(in.packed);
1680
1681 /* pixlayout structure */
1682 for (i = 0; i < IXDIDO; i++) {
1683 GSET_ARRAY(out.bpch,i);
1684 GSET_ARRAY(out.chi,i);
1685 GSET_ARRAY(out.bov,i);
1686 GSET_ARRAY(out.bpv,i);
1687 }
1688 GSET_ENTRY(out.pint);
1689 GSET_ENTRY(out.packed);
1690
1691 GSET_ENTRY(oopt);
1692 GSET_ENTRY(opt);
1693 GSET_ENTRY(itres);
1694 GSET_ENTRY(stres);
1695
1696 for (i = 0; i < 100; i++) {
1697 GSET_ARRAY(kkeys,i);
1698 }
1699 for (i = 0; i < 100; i++) {
1700 GSET_ARRAY(kdesc,i);
1701 }
1702 for (i = 0; i < 100; i++) {
1703 GSET_ARRAY(kname,i);
1704 }
1705
1706 /* Create code that updates the tabspec structure from og to g */
1707 TSET_ENTRY(sort);
1708 TSET_ENTRY(it_xs);
1709 TSET_ENTRY(wo_xs);
1710 TSET_ENTRY(it_ix);
1711 TSET_ENTRY(it_ab);
1712 TSET_ENTRY(it_ts);
1713 TSET_ENTRY(ix_ab);
1714 TSET_ENTRY(ix_es);
1715 TSET_ENTRY(ix_eo);
1716 TSET_ENTRY(sx_ab);
1717 TSET_ENTRY(sx_es);
1718 TSET_ENTRY(sx_eo);
1719 TSET_ENTRY(sm_ts);
1720 TSET_ENTRY(wo_ab);
1721 TSET_ENTRY(wo_es);
1722 TSET_ENTRY(wo_eo);
1723 TSET_ENTRY(we_ab);
1724 TSET_ENTRY(we_es);
1725 TSET_ENTRY(we_eo);
1726 TSET_ENTRY(vo_ab);
1727 TSET_ENTRY(vo_es);
1728 TSET_ENTRY(vo_eo);
1729 TSET_ENTRY(vo_om);
1730 TSET_ENTRY(im_cd);
1731 TSET_ENTRY(im_ts);
1732 TSET_ENTRY(im_oc);
1733 TSET_ENTRY(im_fs);
1734 TSET_ENTRY(im_fn);
1735 TSET_ENTRY(im_fv);
1736 TSET_ENTRY(im_ps);
1737 TSET_ENTRY(im_pn);
1738 TSET_ENTRY(im_pv);
1739 TSET_ENTRY(ot_ts);
1740 for (i = 0; i < IXDO; i++) {
1741 TSET_ARRAY(ot_off, i);
1742 }
1743 for (i = 0; i < IXDO; i++) {
1744 TSET_ARRAY(ot_bits,i);
1745 }
1746
1747 #undef GSET_ENTRY
1748 #undef GSET_ARRAY
1749 #undef TSET_ENTRY
1750 #undef TSET_ARRAY
1751
1752 /* The end of the function */
1753 dec(f);
1754 line(f, "}");
1755
1756 g->opt = s_opt; /* Restore entry values */
1757 g->stres = s_stres;
1758 g->itres = s_itres;
1759 }
1760
1761 /* =============================================== */
1762
1763 cr(f); cr(f); cr(f); cr(f); cr(f); cr(f);
1764
1765 return frv;
1766 }
1767
1768
1769 /* Return bits needed to store index into table of */
1770 /* given resolution and dimensionality. */
1771 static int
1772 calc_bits(
1773 int dim,
1774 int res) {
1775
1776 return (int)ceil(log((double)res) * (double)dim/log(2.0) - 1e-14);
1777 }
1778
1779 /* Return maximum resolution possible given dimensionality */
1780 /* and number of index bits. */
1781 static int
1782 calc_res(
1783 int dim,
1784 int bits) {
1785 double fres;
1786
1787 fres = log(2.0) * (double)bits/(double)dim;
1788 if (fres > 12 || (fres = exp(fres)) > 65536.0)
1789 fres = 65536.0; /* Limit to a sane value */
1790 return (int)(fres + 1e-14);
1791 }
1792
1793 /* Return bits needed to store a relative offset of 1, */
1794 /* into a table of given resolution, dimensionality , and */
1795 /* entry size. */
1796 static int
1797 calc_obits(
1798 int dim,
1799 int res,
1800 int esize) {
1801 double off; /* Maximum diagonal offset value */
1802 int bits;
1803
1804 if (res == 0 || res == 1)
1805 return 0;
1806 if (dim == 1)
1807 off = esize;
1808 else {
1809 off = (double)esize * floor(exp(log((double)res) * dim - log(res-1.0)));
1810 }
1811
1812 bits = (int)ceil(log(off)/log(2.0) - 1e-14);
1813 return bits;
1814 }
1815
1816 /* Return maximum resolution possible given dimensionality */
1817 /* number of index bits, and entry size */
1818 static int
1819 calc_ores(
1820 int dim,
1821 int bits,
1822 int esize) {
1823 int res;
1824
1825 /* Find resolution. Stop at arbitrary 65536 */
1826 for (res = 1; res < 65537; res++) {
1827 int bn;
1828 bn = calc_obits(dim, res, esize);
1829 if (bn > bits) {
1830 return res-1;
1831 }
1832 }
1833 return res-1;
1834 }
1835
1836
1837
1838 /* Output the introductory comments */
1839 static void
1840 doheader(
1841 fileo *f
1842 ) {
1843 genspec *g = f->g;
1844 tabspec *t = f->t;
1845 mach_arch *a = f->a;
1846 int e;
1847
1848 /* - - - - - - - - - - - - */
1849 /* Output file title block */
1850 line(f,"/* Integer Multi-Dimensional Interpolation */");
1851 line(f,"/* Interpolation Kernel Code */");
1852 line(f,"/* Generated by cgen */");
1853 line(f,"/* Copyright 2000 - 2007 Graeme W. Gill */");
1854 line(f,"/* All rights reserved. */");
1855 line(f,"/* This material is licenced under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3 :- */\n");
1856 line(f,"/* see the License.txt file for licencing details.*/\n");
1857 cr(f);
1858
1859 /* - - - - - - - - - - - - */
1860 /* Output the specification */
1861 line(f,"/*");
1862 line(f," Interpolation kernel specs:");
1863 cr(f);
1864 line(f," Input channels per pixel = %d",g->id);
1865 for (e = 0; e < g->id; e++) {
1866 line(f," Input channel %d bits = %d",e, g->in.bpch[e]);
1867 line(f," Input channel %d increment = %d",e, g->in.chi[e]);
1868 }
1869 if (g->in.pint != 0)
1870 line(f," Input is channel interleaved");
1871 else
1872 line(f," Input is plane interleaved");
1873
1874 if (g->in.packed != 0)
1875 line(f," Input channels are packed into one word");
1876 else
1877 line(f," Input channels are separate words");
1878
1879 if (t->it_ix)
1880 line(f," Input value extraction is done in input table lookup");
1881 cr(f);
1882
1883 line(f," Output channels per pixel = %d",g->od);
1884 for (e = 0; e < g->od; e++) {
1885 line(f," Output channel %d bits = %d",e, g->out.bpch[e]);
1886 line(f," Output channel %d increment = %d",e, g->out.chi[e]);
1887 if (g->oopt & OOPT(oopts_check,e))
1888 line(f," Output channel %d has value check",e);
1889 if (g->oopt & OOPT(oopts_skip,e))
1890 line(f," Output channel %d has skip available",e);
1891 }
1892 if (g->out.pint != 0)
1893 line(f," Output is channel interleaved");
1894 else
1895 line(f," Output is plane interleaved");
1896 if (g->out.packed != 0)
1897 line(f," Output channels are packed into one word");
1898 else
1899 line(f," Output channels are separate words");
1900 cr(f);
1901
1902 line(f," Basic Internal precision bits = %d",g->prec);
1903 if (t->sort)
1904 line(f," Weight+voffset bits = %d",t->sx_ab);
1905 else
1906 line(f," Simplex table index bits = %d",t->sx_ab);
1907 line(f," Interpolation table index bits = %d",t->ix_ab);
1908 if (!t->sort)
1909 line(f," Simplex table max resolution = %d",f->sxmxres);
1910 line(f," Interpolation table max resolution = %d",f->ixmxres);
1911 cr(f);
1912 line(f," Processing direction is %s",g->opt & opts_bwd ? "backwards" : "forwards" );
1913 line(f," Input stride is %ssupported",g->opt & opts_istride ? "" : "not " );
1914 line(f," Output stride is %ssupported",g->opt & opts_ostride ? "" : "not " );
1915 if (g->opt & opts_splx_sort)
1916 line(f," Prefer simplex over sort algorithm");
1917 if (g->opt & opts_sort_splx)
1918 line(f," Prefer sort over simplex");
1919 line(f," */");
1920 cr(f);
1921
1922 /* - - - - - - - - - - - - */
1923 line(f,"/*");
1924 line(f," Machine architecture specs:");
1925 cr(f);
1926 if (a->bigend != 0)
1927 line(f," Big Endian");
1928 else
1929 line(f," Little endian");
1930
1931 if (a->uwa != 0)
1932 line(f," Using maximum sized memory accesses where possible");
1933 else
1934 line(f," Reading and writing pixel values separately");
1935
1936 line(f," Pointer size = %d bits",a->pbits);
1937 cr(f);
1938
1939 for (e = 0; e < a->nords; e++) {
1940 line(f," Ordinal size %2d bits is known as '%s'",
1941 a->ords[e].bits,a->ords[e].name);
1942 }
1943 line(f," Natural ordinal is '%s'", a->ords[a->natord].name);
1944 cr(f);
1945
1946 for (e = 0; e < a->nints; e++) {
1947 line(f," Integer size %2d bits is known as '%s'",
1948 a->ints[e].bits,a->ints[e].name);
1949 }
1950 line(f," Natural integer is '%s'", a->ints[a->natint].name);
1951 cr(f);
1952
1953 line(f," */");
1954 cr(f);
1955 }
1956
1957
1958 /* ---------------------------------------- */
1959 /* Architecture support */
1960 /* Find an ordinal with at least bits size */
1961 /* Return -1 if failed */
1962 int findord(
1963 fileo *f,
1964 int bits
1965 ) {
1966 mach_arch *a = f->a;
1967 int i;
1968
1969 for (i = 0; i < a->nords; i++) {
1970 if (a->ords[i].bits >= bits)
1971 return i;
1972 }
1973 return -1;
1974 }
1975
1976 /* Round ordinal type up to natural size */
1977 int nord(
1978 fileo *f,
1979 int ov
1980 ) {
1981 if (ov >= 0 && ov < f->a->natord)
1982 ov = f->a->natord;
1983 return ov;
1984 }
1985
1986 /* Find an ordinal with at least bits size, */
1987 /* or natural size, whichever is greater. */
1988 /* Return -1 if failed */
1989 int findnord(
1990 fileo *f,
1991 int bits
1992 ) {
1993 int ov;
1994
1995 ov = findord(f, bits);
1996 ov = nord(f, ov);
1997 return ov;
1998 }
1999
2000 /* Find an integer with at least bits size */
2001 /* Return -1 if failed */
2002 int findint(
2003 fileo *f,
2004 int bits
2005 ) {
2006 mach_arch *a = f->a;
2007 int i;
2008
2009 for (i = 0; i < a->nints; i++) {
2010 if (a->ints[i].bits >= bits)
2011 return i;
2012 }
2013 return -1;
2014 }
2015
2016 /* Round integer type up to natural size */
2017 int nint(
2018 fileo *f,
2019 int iv
2020 ) {
2021 if (iv >= 0 && iv < f->a->natint)
2022 iv = f->a->natint;
2023 return iv;
2024 }
2025
2026 /* Find an interger with at least bits size, */
2027 /* or natural size, whichever is greater. */
2028 /* Return -1 if failed */
2029 int findnint(
2030 fileo *f,
2031 int bits
2032 ) {
2033 int iv;
2034
2035 iv = findint(f, bits);
2036 iv = nint(f, iv);
2037 return iv;
2038 }
2039
2040
2041 /* ------------------------------------ */
2042 /* File output support */
2043
2044 /* Output a line to the file (including trailing \n) */
2045 void
2046 line(fileo *f, char *fmt, ...)
2047 {
2048 int i;
2049 va_list args;
2050
2051 /* Indent to the correct level */
2052 for (i = 0; i < f->indt; i++)
2053 fprintf(f->of," ");
2054
2055 va_start(args, fmt);
2056 vfprintf(f->of, fmt, args);
2057 va_end(args);
2058 fprintf(f->of, "\n");
2059 }
2060
2061 /* Output the start of a line to the file) */
2062 void
2063 sline(fileo *f, char *fmt, ...)
2064 {
2065 int i;
2066 va_list args;
2067
2068 /* Indent to the correct level */
2069 for (i = 0; i < f->indt; i++)
2070 fprintf(f->of," ");
2071
2072 va_start(args, fmt);
2073 vfprintf(f->of, fmt, args);
2074 va_end(args);
2075 }
2076
2077 /* Output the middle of a line to the file) */
2078 void
2079 mline(fileo *f, char *fmt, ...)
2080 {
2081 va_list args;
2082
2083 va_start(args, fmt);
2084 vfprintf(f->of, fmt, args);
2085 va_end(args);
2086 }
2087
2088 /* Output the end of a line to the file (including trailing \n) */
2089 void
2090 eline(fileo *f, char *fmt, ...)
2091 {
2092 va_list args;
2093
2094 va_start(args, fmt);
2095 vfprintf(f->of, fmt, args);
2096 va_end(args);
2097 fprintf(f->of, "\n");
2098 }
2099
2100 /* Output a line to the file (including trailing \n) */
2101 /* No indent */
2102 void
2103 niline(fileo *f, char *fmt, ...)
2104 {
2105 va_list args;
2106
2107 va_start(args, fmt);
2108 vfprintf(f->of, fmt, args);
2109 va_end(args);
2110 fprintf(f->of, "\n");
2111 }
2112
2113 /* Output one line and increment indent */
2114 void lineinc(fileo *f, char *fmt, ...) {
2115 int i;
2116 va_list args;
2117
2118 /* Indent to the correct level */
2119 for (i = 0; i < f->indt; i++)
2120 fprintf(f->of," ");
2121
2122 va_start(args, fmt);
2123 vfprintf(f->of, fmt, args);
2124 va_end(args);
2125 fprintf(f->of, "\n");
2126 f->indt++;
2127 }
2128
2129 /* Decrement indent and output one line */
2130 void decline(fileo *f, char *fmt, ...) {
2131 int i;
2132 va_list args;
2133
2134 f->indt--;
2135 /* Indent to the correct level */
2136 for (i = 0; i < f->indt; i++)
2137 fprintf(f->of," ");
2138
2139 va_start(args, fmt);
2140 vfprintf(f->of, fmt, args);
2141 va_end(args);
2142 fprintf(f->of, "\n");
2143 }
2144
2145
2146 /* ------------------------------------ */
2147
2148
2149
2150
2151