1 /*
2     FLAM3 - cosmic recursive fractal flames
3     Copyright (C) 1992-2009 Spotworks LLC
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 3 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 /* this file is included into flam3.c once for each buffer bit-width */
20 
21 /*
22  * for batch
23  *   generate de filters
24  *   for temporal_sample_batch
25  *     interpolate
26  *     compute colormap
27  *     for subbatch
28  *       compute samples
29  *       buckets += cmap[samples]
30  *   accum += time_filter[temporal_sample_batch] * log[buckets] * de_filter
31  * image = filter(accum)
32  */
33 
34 
35 /* allow this many iterations for settling into attractor */
36 #define FUSE_27 15
37 #define FUSE_28 100
38 #define WHITE_LEVEL 255
39 
40 
41 typedef struct {
42 
43    bucket *b;
44    abucket *accumulate;
45    int width, height, oversample;
46    flam3_de_helper *de;
47    double k1,k2;
48    double curve;
49    int last_thread;
50    int start_row, end_row;
51    flam3_frame *spec;
52    int *aborted;
53    int progress_size;
54 
55 } de_thread_helper;
56 
de_thread(void * dth)57 static void de_thread(void *dth) {
58 
59    de_thread_helper *dthp = (de_thread_helper *)dth;
60    int oversample = dthp->oversample;
61    int ss = (int)floor(oversample / 2.0);
62    int scf = !(oversample & 1);
63    double scfact = pow(oversample/(oversample+1.0), 2.0);
64    int wid=dthp->width;
65    int hig=dthp->height;
66    int ps =dthp->progress_size;
67    int str = (oversample-1)+dthp->start_row;
68    int enr = (oversample-1)+dthp->end_row;
69    int i,j;
70    time_t progress_timer=0;
71    struct timespec pauset;
72    int progress_count = 0;
73    int pixel_num;
74 
75    pauset.tv_sec = 0;
76    pauset.tv_nsec = 100000000;
77 
78    /* Density estimation code */
79    for (j = str; j < enr; j++) {
80       for (i = oversample-1; i < wid-(oversample-1); i++) {
81 
82          int ii,jj;
83          double f_select=0.0;
84          int f_select_int,f_coef_idx;
85          int arr_filt_width;
86          bucket *b;
87          double c[4],ls;
88 
89          b = dthp->b + i + j*wid;
90 
91          /* Don't do anything if there's no hits here */
92          if (b[0][4] == 0 || b[0][3] == 0)
93             continue;
94 
95          /* Count density in ssxss area   */
96          /* Scale if OS>1 for equal iters */
97          for (ii=-ss; ii<=ss; ii++) {
98             for (jj=-ss; jj<=ss; jj++) {
99                b = dthp->b + (i + ii) + (j + jj)*wid;
100                f_select += b[0][4]/255.0;
101             }
102          }
103 
104          if (scf)
105             f_select *= scfact;
106 
107          if (f_select > dthp->de->max_filtered_counts)
108             f_select_int = dthp->de->max_filter_index;
109          else if (f_select<=DE_THRESH)
110             f_select_int = (int)ceil(f_select)-1;
111          else
112             f_select_int = (int)DE_THRESH +
113                (int)floor(pow(f_select-DE_THRESH,dthp->curve));
114 
115          /* If the filter selected below the min specified clamp it to the min */
116          if (f_select_int > dthp->de->max_filter_index)
117             f_select_int = dthp->de->max_filter_index;
118 
119          /* We only have to calculate the values for ~1/8 of the square */
120          f_coef_idx = f_select_int*dthp->de->kernel_size;
121 
122          arr_filt_width = (int)ceil(dthp->de->filter_widths[f_select_int])-1;
123 
124          b = dthp->b + i + j*wid;
125 
126          for (jj=0; jj<=arr_filt_width; jj++) {
127             for (ii=0; ii<=jj; ii++) {
128 
129                /* Skip if coef is 0 */
130                if (dthp->de->filter_coefs[f_coef_idx]==0.0) {
131                   f_coef_idx++;
132                   continue;
133                }
134 
135                c[0] = (double) b[0][0];
136                c[1] = (double) b[0][1];
137                c[2] = (double) b[0][2];
138                c[3] = (double) b[0][3];
139 
140                ls = dthp->de->filter_coefs[f_coef_idx]*(dthp->k1 * log(1.0 + c[3] * dthp->k2))/c[3];
141 
142                c[0] *= ls;
143                c[1] *= ls;
144                c[2] *= ls;
145                c[3] *= ls;
146 
147                if (jj==0 && ii==0) {
148                   add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
149                }
150                else if (ii==0) {
151                   add_c_to_accum(dthp->accumulate,i,jj,j,0,wid,hig,c);
152                   add_c_to_accum(dthp->accumulate,i,-jj,j,0,wid,hig,c);
153                   add_c_to_accum(dthp->accumulate,i,0,j,jj,wid,hig,c);
154                   add_c_to_accum(dthp->accumulate,i,0,j,-jj,wid,hig,c);
155                } else if (jj==ii) {
156                   add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
157                   add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c);
158                   add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c);
159                   add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c);
160                } else {
161                   add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
162                   add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c);
163                   add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c);
164                   add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c);
165                   add_c_to_accum(dthp->accumulate,i,jj,j,ii,wid,hig,c);
166                   add_c_to_accum(dthp->accumulate,i,-jj,j,ii,wid,hig,c);
167                   add_c_to_accum(dthp->accumulate,i,jj,j,-ii,wid,hig,c);
168                   add_c_to_accum(dthp->accumulate,i,-jj,j,-ii,wid,hig,c);
169                }
170 
171                f_coef_idx++;
172 
173             }
174          }
175       }
176 
177       pixel_num = (j-str+1)*wid;
178 
179       if (dthp->last_thread) {
180          /* Standard progress function */
181          if (dthp->spec->verbose && time(NULL) != progress_timer) {
182             progress_timer = time(NULL);
183             fprintf(stderr, "\rdensity estimation: %d/%d          ", j-str, enr-str);
184             fflush(stderr);
185          }
186 
187       }
188       /* Custom progress function */
189       if (dthp->spec->progress && pixel_num > progress_count + ps) {
190 
191          progress_count = ps * floor(pixel_num/(double)ps);
192 
193          if (dthp->last_thread) {
194 
195             int rv = (*dthp->spec->progress)(dthp->spec->progress_parameter,
196 				      100*(j-str)/(double)(enr-str), 1, 0);
197 
198             if (rv==2) { /* PAUSE */
199 
200                *(dthp->aborted) = -1;
201 
202                do {
203 #if defined(_WIN32) /* mingw or msvc */
204 				   Sleep(100);
205 #else
206 				   nanosleep(&pauset,NULL);
207 #endif
208                   rv = (*dthp->spec->progress)(dthp->spec->progress_parameter,
209                      100*(j-str)/(double)(enr-str), 1, 0);
210                } while (rv==2);
211 
212                *(dthp->aborted) = 0;
213 
214             }
215 
216 	        if (rv==1) {
217 			   *(dthp->aborted) = 1;
218 #ifdef HAVE_LIBPTHREAD
219                pthread_exit((void *)0);
220 #else
221                return;
222 #endif
223             }
224          } else {
225 #ifdef HAVE_LIBPTHREAD
226 
227             if (*(dthp->aborted)<0) {
228 
229                 do {
230 #if defined(_WIN32) /* mingw or msvc */
231 				   Sleep(100);
232 #else
233 				   nanosleep(&pauset,NULL);
234 #endif
235                 } while (*(dthp->aborted)<0);
236             }
237 
238             if (*(dthp->aborted)>0) pthread_exit((void *)0);
239 #else
240             if (*(dthp->aborted)>0) return;
241 #endif
242          }
243       }
244 
245    }
246 
247    #ifdef HAVE_LIBPTHREAD
248      pthread_exit((void *)0);
249    #endif
250 
251 }
252 
iter_thread(void * fth)253 static void iter_thread(void *fth) {
254    double sub_batch;
255    int j;
256    flam3_thread_helper *fthp = (flam3_thread_helper *)fth;
257    flam3_iter_constants *ficp = fthp->fic;
258    struct timespec pauset;
259    int SBS = ficp->spec->sub_batch_size;
260    int fuse;
261    int cmap_size = ficp->cmap_size;
262    int cmap_size_m1 = ficp->cmap_size-1;
263 
264    double eta = 0.0;
265 
266    fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27;
267 
268    pauset.tv_sec = 0;
269    pauset.tv_nsec = 100000000;
270 
271 
272    if (fthp->timer_initialize) {
273    	*(ficp->progress_timer) = 0;
274    	memset(ficp->progress_timer_history,0,64*sizeof(time_t));
275    	memset(ficp->progress_history,0,64*sizeof(double));
276    	*(ficp->progress_history_mark) = 0;
277    }
278 
279    for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) {
280       int sub_batch_size, badcount;
281       time_t newt = time(NULL);
282       /* sub_batch is double so this is sketchy */
283       sub_batch_size = (sub_batch + SBS > ficp->batch_size) ?
284                            (ficp->batch_size - sub_batch) : SBS;
285 
286       if (fthp->first_thread && newt != *(ficp->progress_timer)) {
287          double percent = 100.0 *
288              ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num)
289              / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches;
290          int old_mark = 0;
291          int ticker;
292 
293          if (ficp->spec->verbose)
294             fprintf(stderr, "\rchaos: %5.1f%%", percent);
295 
296          *(ficp->progress_timer) = newt;
297          if (ficp->progress_timer_history[*(ficp->progress_history_mark)] &&
298                 ficp->progress_history[*(ficp->progress_history_mark)] < percent)
299             old_mark = *(ficp->progress_history_mark);
300 
301          if (percent > 0) {
302             eta = (100 - percent) * (*(ficp->progress_timer) - ficp->progress_timer_history[old_mark])
303                   / (percent - ficp->progress_history[old_mark]);
304 
305             if (ficp->spec->verbose) {
306                ticker = (*(ficp->progress_timer)&1)?':':'.';
307                if (eta < 1000)
308                   ticker = ':';
309                if (eta > 100)
310                   fprintf(stderr, "  ETA%c %.1f minutes", ticker, eta / 60);
311                else
312                   fprintf(stderr, "  ETA%c %ld seconds ", ticker, (long) ceil(eta));
313                fprintf(stderr, "              \r");
314                fflush(stderr);
315             }
316          }
317 
318          ficp->progress_timer_history[*(ficp->progress_history_mark)] = *(ficp->progress_timer);
319          ficp->progress_history[*(ficp->progress_history_mark)] = percent;
320          *(ficp->progress_history_mark) = (*(ficp->progress_history_mark) + 1) % 64;
321       }
322 
323       /* Custom progress function */
324       if (ficp->spec->progress) {
325          if (fthp->first_thread) {
326 
327             int rv;
328 
329             /* Recalculate % done, as the other calculation only updates once per second */
330             double percent = 100.0 *
331                 ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num)
332                 / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches;
333 
334             rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta);
335 
336             if (rv==2) { /* PAUSE */
337 
338                time_t tnow = time(NULL);
339                time_t tend;
340                int lastpt;
341 
342                ficp->aborted = -1;
343 
344                do {
345 #if defined(_WIN32) /* mingw or msvc */
346 				   Sleep(100);
347 #else
348 				   nanosleep(&pauset,NULL);
349 #endif
350                   rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta);
351                } while (rv==2);
352 
353                /* modify the timer history to compensate for the pause */
354                tend = time(NULL)-tnow;
355 
356                ficp->aborted = 0;
357 
358                for (lastpt=0;lastpt<64;lastpt++) {
359                   if (ficp->progress_timer_history[lastpt]) {
360                       ficp->progress_timer_history[lastpt] += tend;
361                   }
362                }
363 
364             }
365 
366             if (rv==1) { /* ABORT */
367 				   ficp->aborted = 1;
368 #ifdef HAVE_LIBPTHREAD
369                pthread_exit((void *)0);
370 #else
371                return;
372 #endif
373             }
374          } else {
375             if (ficp->aborted<0) {
376 
377             do {
378 #if defined(_WIN32) /* mingw or msvc */
379                Sleep(100);
380 #else
381                nanosleep(&pauset,NULL);
382 #endif
383             } while (ficp->aborted==-1);
384             }
385 #ifdef HAVE_LIBPTHREAD
386             if (ficp->aborted>0) pthread_exit((void *)0);
387 #else
388             if (ficp->aborted>0) return;
389 #endif
390          }
391       }
392 
393       /* Seed iterations */
394       fthp->iter_storage[0] = flam3_random_isaac_11(&(fthp->rc));
395       fthp->iter_storage[1] = flam3_random_isaac_11(&(fthp->rc));
396       fthp->iter_storage[2] = flam3_random_isaac_01(&(fthp->rc));
397       fthp->iter_storage[3] = flam3_random_isaac_01(&(fthp->rc));
398 
399       /* Execute iterations */
400       badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc));
401 
402       #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS)
403         /* Lock mutex for access to accumulator */
404         pthread_mutex_lock(&ficp->bucket_mutex);
405       #endif
406 
407       /* Add the badcount to the counter */
408       ficp->badvals += badcount;
409 
410       /* Put them in the bucket accumulator */
411       for (j = 0; j < sub_batch_size*4; j+=4) {
412          double p0, p1, p00, p11;
413          double dbl_index0,dbl_frac;
414          double interpcolor[4];
415          int ci, color_index0;
416          double *p = &(fthp->iter_storage[j]);
417          bucket *b;
418 
419          if (fthp->cp.rotate != 0.0) {
420             p00 = p[0] - fthp->cp.rot_center[0];
421             p11 = p[1] - fthp->cp.rot_center[1];
422             p0 = p00 * ficp->rot[0][0] + p11 * ficp->rot[0][1] + fthp->cp.rot_center[0];
423             p1 = p00 * ficp->rot[1][0] + p11 * ficp->rot[1][1] + fthp->cp.rot_center[1];
424          } else {
425             p0 = p[0];
426             p1 = p[1];
427          }
428 
429          if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) {
430 
431             double logvis=1.0;
432             bucket *buckets = (bucket *)(ficp->buckets);
433 
434             /* Skip if invisible */
435             if (p[3]==0)
436                continue;
437             else
438                logvis = p[3];
439 
440             b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) +
441                 ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1);
442 
443 #ifdef USE_FLOAT_INDICES
444             color_index0 = 0;
445 
446             //fprintf(stdout,"%.16f\n",p[2]*256.0);
447 
448             while(color_index0 < cmap_size_m1) {
449             	if (ficp->dmap[color_index0+1].index > p[2])
450             	   break;
451             	else
452             	   color_index0++;
453             }
454 
455             if (p[3]==1.0) {
456                bump_no_overflow(b[0][0], ficp->dmap[color_index0].color[0]);
457                bump_no_overflow(b[0][1], ficp->dmap[color_index0].color[1]);
458                bump_no_overflow(b[0][2], ficp->dmap[color_index0].color[2]);
459                bump_no_overflow(b[0][3], ficp->dmap[color_index0].color[3]);
460                bump_no_overflow(b[0][4], 255.0);
461             } else {
462                bump_no_overflow(b[0][0], logvis*ficp->dmap[color_index0].color[0]);
463                bump_no_overflow(b[0][1], logvis*ficp->dmap[color_index0].color[1]);
464                bump_no_overflow(b[0][2], logvis*ficp->dmap[color_index0].color[2]);
465                bump_no_overflow(b[0][3], logvis*ficp->dmap[color_index0].color[3]);
466                bump_no_overflow(b[0][4], logvis*255.0);
467 #else
468             dbl_index0 = p[2] * cmap_size;
469             color_index0 = (int) (dbl_index0);
470 
471             if (flam3_palette_mode_linear == fthp->cp.palette_mode) {
472                if (color_index0 < 0) {
473                   color_index0 = 0;
474                   dbl_frac = 0;
475                } else if (color_index0 >= cmap_size_m1) {
476                   color_index0 = cmap_size_m1-1;
477                   dbl_frac = 1.0;
478                } else {
479                   /* interpolate between color_index0 and color_index0+1 */
480                   dbl_frac = dbl_index0 - (double)color_index0;
481                }
482 
483                for (ci=0;ci<4;ci++) {
484                   interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) +
485                                     ficp->dmap[color_index0+1].color[ci] * dbl_frac;
486                }
487 
488             } else { /* Palette mode step */
489 
490                if (color_index0 < 0) {
491                   color_index0 = 0;
492                } else if (color_index0 >= cmap_size_m1) {
493                   color_index0 = cmap_size_m1;
494                }
495 
496                for (ci=0;ci<4;ci++)
497                   interpcolor[ci] = ficp->dmap[color_index0].color[ci];
498             }
499 
500             if (p[3]==1.0) {
501                bump_no_overflow(b[0][0], interpcolor[0]);
502                bump_no_overflow(b[0][1], interpcolor[1]);
503                bump_no_overflow(b[0][2], interpcolor[2]);
504                bump_no_overflow(b[0][3], interpcolor[3]);
505                bump_no_overflow(b[0][4], 255.0);
506             } else {
507                bump_no_overflow(b[0][0], logvis*interpcolor[0]);
508                bump_no_overflow(b[0][1], logvis*interpcolor[1]);
509                bump_no_overflow(b[0][2], logvis*interpcolor[2]);
510                bump_no_overflow(b[0][3], logvis*interpcolor[3]);
511                bump_no_overflow(b[0][4], logvis*255.0);
512             }
513 #endif
514 
515          }
516       }
517 
518       #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS)
519         /* Release mutex */
520         pthread_mutex_unlock(&ficp->bucket_mutex);
521       #endif
522 
523    }
524    #ifdef HAVE_LIBPTHREAD
525      pthread_exit((void *)0);
526    #endif
527 }
528 
529 static int render_rectangle(flam3_frame *spec, void *out,
530 			     int field, int nchan, int transp, stat_struct *stats) {
531    long nbuckets;
532    int i, j, k, batch_num, temporal_sample_num;
533    double nsamples, batch_size;
534    bucket  *buckets;
535    abucket *accumulate;
536    double *points;
537    double *filter, *temporal_filter, *temporal_deltas, *batch_filter;
538    double ppux=0, ppuy=0;
539    int image_width, image_height;    /* size of the image to produce */
540    int out_width;
541    int filter_width=0;
542    int bytes_per_channel = spec->bytes_per_channel;
543    int oversample;
544    double highpow;
545    int nbatches;
546    int ntemporal_samples;
547    flam3_palette dmap;
548    int gutter_width;
549    double vibrancy = 0.0;
550    double gamma = 0.0;
551    double background[3];
552    int vib_gam_n = 0;
553    time_t progress_began=0;
554    int verbose = spec->verbose;
555    int gnm_idx,max_gnm_de_fw,de_offset;
556    flam3_genome cp;
557    unsigned short *xform_distrib;
558    flam3_iter_constants fic;
559    flam3_thread_helper *fth;
560 #ifdef HAVE_LIBPTHREAD
561    pthread_attr_t pt_attr;
562    pthread_t *myThreads=NULL;
563 #endif
564    int thi;
565    time_t tstart,tend;
566    double sumfilt;
567    char *ai;
568    int cmap_size;
569 
570    char *last_block;
571    size_t memory_rqd;
572 
573    /* Per-render progress timers */
574    time_t progress_timer=0;
575    time_t progress_timer_history[64];
576    double progress_history[64];
577    int progress_history_mark = 0;
578 
579    tstart = time(NULL);
580 
581    fic.badvals = 0;
582    fic.aborted = 0;
583 
584    stats->num_iters = 0;
585 
586    /* correct for apophysis's use of 255 colors in the palette rather than all 256 */
587    cmap_size = 256 - argi("apo_palette",0);
588 
589    memset(&cp,0, sizeof(flam3_genome));
590 
591    /* interpolate and get a control point                      */
592    flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp);
593    oversample = cp.spatial_oversample;
594    highpow = cp.highlight_power;
595    nbatches = cp.nbatches;
596    ntemporal_samples = cp.ntemporal_samples;
597 
598    if (nbatches < 1) {
599        fprintf(stderr, "nbatches must be positive, not %d.\n", nbatches);
600        return(1);
601    }
602 
603    if (oversample < 1) {
604        fprintf(stderr, "oversample must be positive, not %d.\n", oversample);
605        return(1);
606    }
607 
608    /* Initialize the thread helper structures */
609    fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper));
610    for (i=0;i<spec->nthreads;i++)
611       fth[i].cp.final_xform_index=-1;
612 
613    /* Set up the output image dimensions, adjusted for scanline */
614    image_width = cp.width;
615    out_width = image_width;
616    if (field) {
617       image_height = cp.height / 2;
618 
619       if (field == flam3_field_odd)
620          out = (unsigned char *)out + nchan * bytes_per_channel * out_width;
621 
622       out_width *= 2;
623    } else
624       image_height = cp.height;
625 
626 
627    /* Spatial Filter kernel creation */
628    filter_width = flam3_create_spatial_filter(spec, field, &filter);
629 
630    /* handle error */
631    if (filter_width<0) {
632       fprintf(stderr,"flam3_create_spatial_filter returned error: aborting\n");
633       return(1);
634    }
635 
636    /* note we must free 'filter' at the end */
637 
638    /* batch filter */
639    /* may want to revisit this at some point */
640    batch_filter = (double *) malloc(sizeof(double) * nbatches);
641    for (i=0; i<nbatches; i++)
642       batch_filter[i]=1.0/(double)nbatches;
643 
644    /* temporal filter - we must free temporal_filter and temporal_deltas at the end */
645    sumfilt = flam3_create_temporal_filter(nbatches*ntemporal_samples,
646                                           cp.temporal_filter_type,
647                                           cp.temporal_filter_exp,
648                                           cp.temporal_filter_width,
649                                           &temporal_filter, &temporal_deltas);
650 
651 
652    /*
653       the number of additional rows of buckets we put at the edge so
654       that the filter doesn't go off the edge
655    */
656    gutter_width = (filter_width - oversample) / 2;
657 
658    /*
659       Check the size of the density estimation filter.
660       If the 'radius' of the density estimation filter is greater than the
661       gutter width, we have to pad with more.  Otherwise, we can use the same value.
662    */
663    max_gnm_de_fw=0;
664    for (gnm_idx = 0; gnm_idx < spec->ngenomes; gnm_idx++) {
665       int this_width = (int)ceil(spec->genomes[gnm_idx].estimator) * oversample;
666       if (this_width > max_gnm_de_fw)
667          max_gnm_de_fw = this_width;
668    }
669 
670    /* Add OS-1 for the averaging at the edges, if it's > 0 already */
671    if (max_gnm_de_fw>0)
672       max_gnm_de_fw += (oversample-1);
673 
674    /* max_gnm_de_fw is now the number of pixels of additional gutter      */
675    /* necessary to appropriately perform the density estimation filtering */
676    /* Check to see if it's greater than the gutter_width                  */
677    if (max_gnm_de_fw > gutter_width) {
678       de_offset = max_gnm_de_fw - gutter_width;
679       gutter_width = max_gnm_de_fw;
680    } else
681       de_offset = 0;
682 
683 
684    /* Allocate the space required to render the image */
685    fic.height = oversample * image_height + 2 * gutter_width;
686    fic.width  = oversample * image_width  + 2 * gutter_width;
687 
688    nbuckets = (long)fic.width * (long)fic.height;
689    memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets +
690                  4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads);
691    last_block = (char *) malloc(memory_rqd);
692    if (NULL == last_block) {
693       fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd);
694       fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets);
695       return(1);
696    }
697 
698    /* Just free buckets at the end */
699    buckets = (bucket *) last_block;
700    accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets);
701    points = (double *)  (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets);
702 
703    if (verbose) {
704       fprintf(stderr, "chaos: ");
705       progress_began = time(NULL);
706    }
707 
708    background[0] = background[1] = background[2] = 0.0;
709    memset((char *) accumulate, 0, sizeof(abucket) * nbuckets);
710 
711 
712    /* Batch loop - outermost */
713    for (batch_num = 0; batch_num < nbatches; batch_num++) {
714       double de_time;
715       double sample_density=0.0;
716       double k1, area, k2;
717       flam3_de_helper de;
718 
719       de_time = spec->time + temporal_deltas[batch_num*ntemporal_samples];
720 
721       memset((char *) buckets, 0, sizeof(bucket) * nbuckets);
722 
723       /* interpolate and get a control point                      */
724       /* ONLY FOR DENSITY FILTER WIDTH PURPOSES                   */
725       /* additional interpolation will be done in the temporal_sample loop */
726       flam3_interpolate(spec->genomes, spec->ngenomes, de_time, 0, &cp);
727 
728       /* if instructed to by the genome, create the density estimation */
729       /* filter kernels.  Check boundary conditions as well.           */
730       if (cp.estimator < 0.0 || cp.estimator_minimum < 0.0) {
731          fprintf(stderr,"density estimator filter widths must be >= 0\n");
732          return(1);
733       }
734 
735       if (spec->bits <= 32) {
736          if (cp.estimator > 0.0) {
737             fprintf(stderr, "warning: density estimation disabled with %d bit buffers.\n", spec->bits);
738             cp.estimator = 0.0;
739          }
740       }
741 
742       /* Create DE filters */
743       if (cp.estimator > 0.0) {
744          de = flam3_create_de_filters(cp.estimator,cp.estimator_minimum,
745                                       cp.estimator_curve,oversample);
746          if (de.kernel_size<0) {
747             fprintf(stderr,"de.kernel_size returned 0 - aborting.\n");
748             return(1);
749          }
750       } else
751          de.max_filter_index = 0;
752 
753       /* Temporal sample loop */
754       for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) {
755 
756          double temporal_sample_time;
757          double color_scalar = temporal_filter[batch_num*ntemporal_samples + temporal_sample_num];
758 
759          temporal_sample_time = spec->time +
760             temporal_deltas[batch_num*ntemporal_samples + temporal_sample_num];
761 
762          /* Interpolate and get a control point */
763          flam3_interpolate(spec->genomes, spec->ngenomes, temporal_sample_time, 0, &cp);
764 
765          /* Get the xforms ready to render */
766          if (prepare_precalc_flags(&cp)) {
767             fprintf(stderr,"prepare xform pointers returned error: aborting.\n");
768             return(1);
769          }
770          xform_distrib = flam3_create_xform_distrib(&cp);
771          if (xform_distrib==NULL) {
772             fprintf(stderr,"create xform distrib returned error: aborting.\n");
773             return(1);
774          }
775 
776          /* compute the colormap entries.                             */
777          /* the input colormap is 256 long with entries from 0 to 1.0 */
778          for (j = 0; j < CMAP_SIZE; j++) {
779             dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0;
780             for (k = 0; k < 4; k++)
781                dmap[j].color[k] = (cp.palette[(j * 256) / CMAP_SIZE].color[k] * WHITE_LEVEL) * color_scalar;
782          }
783 
784          /* compute camera */
785          if (1) {
786             double t0, t1, shift=0.0, corner0, corner1;
787             double scale;
788 
789             if (cp.sample_density <= 0.0) {
790               fprintf(stderr,
791                  "sample density (quality) must be greater than zero,"
792                  " not %g.\n", cp.sample_density);
793               return(1);
794             }
795 
796             scale = pow(2.0, cp.zoom);
797             sample_density = cp.sample_density * scale * scale;
798 
799             ppux = cp.pixels_per_unit * scale;
800             ppuy = field ? (ppux / 2.0) : ppux;
801             ppux /=  spec->pixel_aspect_ratio;
802             switch (field) {
803                case flam3_field_both: shift =  0.0; break;
804                case flam3_field_even: shift = -0.5; break;
805                case flam3_field_odd:  shift =  0.5; break;
806             }
807             shift = shift / ppux;
808             t0 = (double) gutter_width / (oversample * ppux);
809             t1 = (double) gutter_width / (oversample * ppuy);
810             corner0 = cp.center[0] - image_width / ppux / 2.0;
811             corner1 = cp.center[1] - image_height / ppuy / 2.0;
812             fic.bounds[0] = corner0 - t0;
813             fic.bounds[1] = corner1 - t1 + shift;
814             fic.bounds[2] = corner0 + image_width  / ppux + t0;
815             fic.bounds[3] = corner1 + image_height / ppuy + t1 + shift;
816             fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]);
817             fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]);
818             fic.rot[0][0] = cos(cp.rotate * 2 * M_PI / 360.0);
819             fic.rot[0][1] = -sin(cp.rotate * 2 * M_PI / 360.0);
820             fic.rot[1][0] = -fic.rot[0][1];
821             fic.rot[1][1] = fic.rot[0][0];
822             fic.ws0 = fic.width * fic.size[0];
823             fic.wb0s0 = fic.ws0 * fic.bounds[0];
824             fic.hs1 = fic.height * fic.size[1];
825             fic.hb1s1 = fic.hs1 * fic.bounds[1];
826 
827          }
828 
829          /* number of samples is based only on the output image size */
830          nsamples = sample_density * image_width * image_height;
831 
832          /* how many of these samples are rendered in this loop? */
833          batch_size = nsamples / (nbatches * ntemporal_samples);
834 
835          stats->num_iters += batch_size;
836 
837          /* Fill in the iter constants */
838          fic.xform_distrib = xform_distrib;
839          fic.spec = spec;
840          fic.batch_size = batch_size / (double)spec->nthreads;
841          fic.temporal_sample_num = temporal_sample_num;
842          fic.ntemporal_samples = ntemporal_samples;
843          fic.batch_num = batch_num;
844          fic.nbatches = nbatches;
845          fic.cmap_size = cmap_size;
846 
847          fic.dmap = (flam3_palette_entry *)dmap;
848          fic.color_scalar = color_scalar;
849          fic.buckets = (void *)buckets;
850 
851          /* Need a timer per job */
852          fic.progress_timer = &progress_timer;
853          fic.progress_timer_history = &(progress_timer_history[0]);
854          fic.progress_history = &(progress_history[0]);
855          fic.progress_history_mark = &progress_history_mark;
856 
857          /* Initialize the thread helper structures */
858          for (thi = 0; thi < spec->nthreads; thi++) {
859 
860             int rk;
861             /* Create a new isaac state for this thread */
862             for (rk = 0; rk < RANDSIZ; rk++)
863                fth[thi].rc.randrsl[rk] = irand(&spec->rc);
864 
865             irandinit(&(fth[thi].rc),1);
866 
867             if (0==thi) {
868 
869                fth[thi].first_thread=1;
870                if (0==batch_num && 0==temporal_sample_num)
871                	fth[thi].timer_initialize = 1;
872                else
873                	fth[thi].timer_initialize = 0;
874 
875             } else {
876                fth[thi].first_thread=0;
877 	         	fth[thi].timer_initialize = 0;
878             }
879 
880             fth[thi].iter_storage = &(points[thi*(spec->sub_batch_size)*4]);
881             fth[thi].fic = &fic;
882             flam3_copy(&(fth[thi].cp),&cp);
883 
884          }
885 
886 #ifdef HAVE_LIBPTHREAD
887          /* Let's make some threads */
888          myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
889 
890          #if defined(USE_LOCKS)
891          pthread_mutex_init(&fic.bucket_mutex, NULL);
892          #endif
893 
894          pthread_attr_init(&pt_attr);
895          pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
896 
897          for (thi=0; thi <spec->nthreads; thi ++)
898             pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi])));
899 
900          pthread_attr_destroy(&pt_attr);
901 
902          /* Wait for them to return */
903          for (thi=0; thi < spec->nthreads; thi++)
904             pthread_join(myThreads[thi], NULL);
905 
906          #if defined(USE_LOCKS)
907          pthread_mutex_destroy(&fic.bucket_mutex);
908          #endif
909 
910          free(myThreads);
911 #else
912          for (thi=0; thi < spec->nthreads; thi++)
913             iter_thread( (void *)(&(fth[thi])) );
914 #endif
915 
916          /* Free the xform_distrib array */
917          free(xform_distrib);
918 
919          if (fic.aborted) {
920             if (verbose) fprintf(stderr, "\naborted!\n");
921             goto done;
922          }
923 
924          vibrancy += cp.vibrancy;
925          gamma += cp.gamma;
926          background[0] += cp.background[0];
927          background[1] += cp.background[1];
928          background[2] += cp.background[2];
929          vib_gam_n++;
930 
931       }
932 
933       k1 =(cp.contrast * cp.brightness *
934       PREFILTER_WHITE * 268.0 * batch_filter[batch_num]) / 256;
935       area = image_width * image_height / (ppux * ppuy);
936       k2 = (oversample * oversample * nbatches) /
937              (cp.contrast * area * WHITE_LEVEL * sample_density * sumfilt);
938 #if 0
939       printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy);
940       printf("contrast=%f, brightness=%f, PREFILTER=%d, temporal_filter=%f\n",
941         cp.contrast, cp.brightness, PREFILTER_WHITE, temporal_filter[batch_num]);
942       printf("oversample=%d, nbatches=%d, area = %f, WHITE_LEVEL=%d, sample_density=%f\n",
943         oversample, nbatches, area, WHITE_LEVEL, sample_density);
944       printf("k1=%f,k2=%15.12f\n",k1,k2);
945 #endif
946 
947       if (de.max_filter_index == 0) {
948 
949          for (j = 0; j < fic.height; j++) {
950             for (i = 0; i < fic.width; i++) {
951 
952                abucket *a = accumulate + i + j * fic.width;
953                bucket *b = buckets + i + j * fic.width;
954                double c[4], ls;
955 
956                c[0] = (double) b[0][0];
957                c[1] = (double) b[0][1];
958                c[2] = (double) b[0][2];
959                c[3] = (double) b[0][3];
960                if (0.0 == c[3])
961                   continue;
962 
963                ls = (k1 * log(1.0 + c[3] * k2))/c[3];
964                c[0] *= ls;
965                c[1] *= ls;
966                c[2] *= ls;
967                c[3] *= ls;
968 
969                abump_no_overflow(a[0][0], c[0]);
970                abump_no_overflow(a[0][1], c[1]);
971                abump_no_overflow(a[0][2], c[2]);
972                abump_no_overflow(a[0][3], c[3]);
973             }
974          }
975       } else {
976 
977          de_thread_helper *deth;
978          int de_aborted=0;
979          int myspan = (fic.height-2*(oversample-1)+1);
980          int swath = myspan/(spec->nthreads);
981 
982          /* Create the de helper structures */
983          deth = (de_thread_helper *)calloc(spec->nthreads,sizeof(de_thread_helper));
984 
985          for (thi=0;thi<(spec->nthreads);thi++) {
986 
987             /* Set up the contents of the helper structure */
988             deth[thi].b = buckets;
989             deth[thi].accumulate = accumulate;
990             deth[thi].width = fic.width;
991             deth[thi].height = fic.height;
992             deth[thi].oversample = oversample;
993             deth[thi].progress_size = spec->sub_batch_size/10;
994             deth[thi].de = &de;
995             deth[thi].k1 = k1;
996             deth[thi].k2 = k2;
997             deth[thi].curve = cp.estimator_curve;
998             deth[thi].spec = spec;
999             deth[thi].aborted = &de_aborted;
1000             if ( (spec->nthreads)>myspan) { /* More threads than rows */
1001                deth[thi].start_row=0;
1002                if (thi==spec->nthreads-1) {
1003                   deth[thi].end_row=myspan;
1004                   deth[thi].last_thread=1;
1005                } else {
1006                   deth[thi].end_row=-1;
1007                   deth[thi].last_thread=0;
1008                }
1009             } else { /* Normal case */
1010                deth[thi].start_row=thi*swath;
1011                deth[thi].end_row=(thi+1)*swath;
1012                if (thi==spec->nthreads-1) {
1013                   deth[thi].end_row=myspan;
1014                   deth[thi].last_thread=1;
1015                } else {
1016                   deth[thi].last_thread=0;
1017                }
1018             }
1019          }
1020 
1021 #ifdef HAVE_LIBPTHREAD
1022          /* Let's make some threads */
1023          myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
1024 
1025          pthread_attr_init(&pt_attr);
1026          pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
1027 
1028          for (thi=0; thi <spec->nthreads; thi ++)
1029             pthread_create(&myThreads[thi], &pt_attr, (void *)de_thread, (void *)(&(deth[thi])));
1030 
1031          pthread_attr_destroy(&pt_attr);
1032 
1033          /* Wait for them to return */
1034          for (thi=0; thi < spec->nthreads; thi++)
1035             pthread_join(myThreads[thi], NULL);
1036 
1037          free(myThreads);
1038 #else
1039          for (thi=0; thi <spec->nthreads; thi ++)
1040             de_thread((void *)(&(deth[thi])));
1041 #endif
1042 
1043          free(deth);
1044 
1045          if (de_aborted) {
1046             if (verbose) fprintf(stderr, "\naborted!\n");
1047             goto done;
1048          }
1049 
1050       } /* End density estimation loop */
1051 
1052 
1053       /* If allocated, free the de filter memory for the next batch */
1054       if (de.max_filter_index > 0) {
1055          free(de.filter_coefs);
1056          free(de.filter_widths);
1057       }
1058 
1059    }
1060 
1061    if (verbose) {
1062      fprintf(stderr, "\rchaos: 100.0%%  took: %ld seconds   \n", time(NULL) - progress_began);
1063      fprintf(stderr, "filtering...");
1064    }
1065 
1066 
1067    /* filter the accumulation buffer down into the image */
1068    if (1) {
1069       int x, y;
1070       double t[4],newrgb[3];
1071       double g = 1.0 / (gamma / vib_gam_n);
1072       double tmp,a;
1073       double alpha,ls;
1074       int rgbi;
1075 
1076       double linrange = cp.gam_lin_thresh;
1077 
1078       vibrancy /= vib_gam_n;
1079       background[0] /= vib_gam_n/256.0;
1080       background[1] /= vib_gam_n/256.0;
1081       background[2] /= vib_gam_n/256.0;
1082 
1083       /* If we're in the early clip mode, perform this first step to  */
1084       /* apply the gamma correction and clipping before the spat filt */
1085 
1086       if (spec->earlyclip) {
1087 
1088          for (j = 0; j < fic.height; j++) {
1089             for (i = 0; i < fic.width; i++) {
1090 
1091                abucket *ac = accumulate + i + j*fic.width;
1092 
1093                if (ac[0][3]<=0) {
1094                   alpha = 0.0;
1095                   ls = 0.0;
1096                } else {
1097                   tmp=ac[0][3]/PREFILTER_WHITE;
1098                   alpha = flam3_calc_alpha(tmp,g,linrange);
1099                   ls = vibrancy * 256.0 * alpha / tmp;
1100                   if (alpha<0.0) alpha = 0.0;
1101                   if (alpha>1.0) alpha = 1.0;
1102                }
1103 
1104                t[0] = (double)ac[0][0];
1105                t[1] = (double)ac[0][1];
1106                t[2] = (double)ac[0][2];
1107                t[3] = (double)ac[0][3];
1108 
1109                flam3_calc_newrgb(t, ls, highpow, newrgb);
1110 
1111                for (rgbi=0;rgbi<3;rgbi++) {
1112                   a = newrgb[rgbi];
1113                   a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g);
1114                   if (nchan<=3 || transp==0)
1115                      a += ((1.0 - alpha) * background[rgbi]);
1116                   else {
1117                      if (alpha>0)
1118                         a /= alpha;
1119                      else
1120                         a = 0;
1121                   }
1122 
1123                   /* Clamp here to ensure proper filter functionality */
1124                   if (a>255) a = 255;
1125                   if (a<0) a = 0;
1126 
1127                   /* Replace values in accumulation buffer with these new ones */
1128                   ac[0][rgbi] = a;
1129                }
1130 
1131                ac[0][3] = alpha;
1132 
1133             }
1134          }
1135       }
1136 
1137       /* Apply the spatial filter */
1138       y = de_offset;
1139       for (j = 0; j < image_height; j++) {
1140          x = de_offset;
1141          for (i = 0; i < image_width; i++) {
1142             int ii, jj,rgbi;
1143             void *p;
1144             unsigned short *p16;
1145             unsigned char *p8;
1146             t[0] = t[1] = t[2] = t[3] = 0.0;
1147             for (ii = 0; ii < filter_width; ii++) {
1148                for (jj = 0; jj < filter_width; jj++) {
1149                   double k = filter[ii + jj * filter_width];
1150                   abucket *ac = accumulate + x+ii + (y+jj)*fic.width;
1151 
1152 
1153                   t[0] += k * ac[0][0];
1154                   t[1] += k * ac[0][1];
1155                   t[2] += k * ac[0][2];
1156                   t[3] += k * ac[0][3];
1157 
1158 
1159                }
1160             }
1161 
1162             p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width);
1163             p8 = (unsigned char *)p;
1164             p16 = (unsigned short *)p;
1165 
1166             /* The old way, spatial filter first and then clip after gamma */
1167             if (!spec->earlyclip) {
1168 
1169                tmp=t[3]/PREFILTER_WHITE;
1170 
1171                if (t[3]<=0) {
1172                   alpha = 0.0;
1173                   ls = 0.0;
1174                } else {
1175                   alpha = flam3_calc_alpha(tmp,g,linrange);
1176                   ls = vibrancy * 256.0 * alpha / tmp;
1177                   if (alpha<0.0) alpha = 0.0;
1178                   if (alpha>1.0) alpha = 1.0;
1179                }
1180 
1181                flam3_calc_newrgb(t, ls, highpow, newrgb);
1182 
1183                for (rgbi=0;rgbi<3;rgbi++) {
1184                   a = newrgb[rgbi];
1185                   a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g);
1186                   if (nchan<=3 || transp==0)
1187                      a += ((1.0 - alpha) * background[rgbi]);
1188                   else {
1189                      if (alpha>0)
1190                         a /= alpha;
1191                      else
1192                         a = 0;
1193                   }
1194 
1195                   /* Clamp here to ensure proper filter functionality */
1196                   if (a>255) a = 255;
1197                   if (a<0) a = 0;
1198 
1199                   /* Replace values in accumulation buffer with these new ones */
1200                   t[rgbi] = a;
1201                }
1202                t[3] = alpha;
1203             }
1204 
1205             for (rgbi=0;rgbi<3;rgbi++) {
1206 
1207                a = t[rgbi];
1208 
1209                if (a > 255)
1210                   a = 255;
1211                if (a < 0)
1212                   a = 0;
1213 
1214                if (2==bytes_per_channel) {
1215                   a *= 256.0; /* Scales to [0-65280] */
1216                   p16[rgbi] = (unsigned short) a;
1217                } else {
1218                   p8[rgbi] = (unsigned char) a;
1219                }
1220             }
1221 
1222 
1223             if (t[3]>1)
1224                t[3]=1;
1225             if (t[3]<0)
1226                t[3]=0;
1227 
1228             /* alpha */
1229             if (nchan>3) {
1230                if (transp==1) {
1231                   if (2==bytes_per_channel)
1232                      p16[3] = (unsigned short) (t[3] * 65535);
1233                   else
1234                      p8[3] = (unsigned char) (t[3] * 255);
1235                } else {
1236                   if (2==bytes_per_channel)
1237                      p16[3] = 65535;
1238                   else
1239                      p8[3] = 255;
1240                }
1241             }
1242 
1243             x += oversample;
1244          }
1245          y += oversample;
1246       }
1247    }
1248 
1249  done:
1250 
1251    stats->badvals = fic.badvals;
1252 
1253    free(temporal_filter);
1254    free(temporal_deltas);
1255    free(batch_filter);
1256    free(filter);
1257    free(buckets);
1258 //   free(accumulate);
1259 //   free(points);
1260    /* We have to clear the cps in fth first */
1261    for (thi = 0; thi < spec->nthreads; thi++) {
1262       clear_cp(&(fth[thi].cp),0);
1263    }
1264    free(fth);
1265    clear_cp(&cp,0);
1266 
1267    if (getenv("insert_palette")) {
1268      int ph = 100;
1269      if (ph >= image_height) ph = image_height;
1270      /* insert the palette into the image */
1271      for (j = 0; j < ph; j++) {
1272        for (i = 0; i < image_width; i++) {
1273 	 unsigned char *p = (unsigned char *)out + nchan * (i + j * out_width);
1274 	 p[0] = (unsigned char)dmap[i * 256 / image_width].color[0];
1275 	 p[1] = (unsigned char)dmap[i * 256 / image_width].color[1];
1276 	 p[2] = (unsigned char)dmap[i * 256 / image_width].color[2];
1277        }
1278      }
1279    }
1280 
1281    tend = time(NULL);
1282    stats->render_seconds = (int)(tend-tstart);
1283 
1284    return(0);
1285 
1286 }
1287