1 /*
2 FLAM3 - cosmic recursive fractal flames
3 Copyright (C) 1992-2009 Spotworks LLC
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /* this file is included into flam3.c once for each buffer bit-width */
20
21 /*
22 * for batch
23 * generate de filters
24 * for temporal_sample_batch
25 * interpolate
26 * compute colormap
27 * for subbatch
28 * compute samples
29 * buckets += cmap[samples]
30 * accum += time_filter[temporal_sample_batch] * log[buckets] * de_filter
31 * image = filter(accum)
32 */
33
34
35 /* allow this many iterations for settling into attractor */
36 #define FUSE_27 15
37 #define FUSE_28 100
38 #define WHITE_LEVEL 255
39
40
41 typedef struct {
42
43 bucket *b;
44 abucket *accumulate;
45 int width, height, oversample;
46 flam3_de_helper *de;
47 double k1,k2;
48 double curve;
49 int last_thread;
50 int start_row, end_row;
51 flam3_frame *spec;
52 int *aborted;
53 int progress_size;
54
55 } de_thread_helper;
56
de_thread(void * dth)57 static void de_thread(void *dth) {
58
59 de_thread_helper *dthp = (de_thread_helper *)dth;
60 int oversample = dthp->oversample;
61 int ss = (int)floor(oversample / 2.0);
62 int scf = !(oversample & 1);
63 double scfact = pow(oversample/(oversample+1.0), 2.0);
64 int wid=dthp->width;
65 int hig=dthp->height;
66 int ps =dthp->progress_size;
67 int str = (oversample-1)+dthp->start_row;
68 int enr = (oversample-1)+dthp->end_row;
69 int i,j;
70 time_t progress_timer=0;
71 struct timespec pauset;
72 int progress_count = 0;
73 int pixel_num;
74
75 pauset.tv_sec = 0;
76 pauset.tv_nsec = 100000000;
77
78 /* Density estimation code */
79 for (j = str; j < enr; j++) {
80 for (i = oversample-1; i < wid-(oversample-1); i++) {
81
82 int ii,jj;
83 double f_select=0.0;
84 int f_select_int,f_coef_idx;
85 int arr_filt_width;
86 bucket *b;
87 double c[4],ls;
88
89 b = dthp->b + i + j*wid;
90
91 /* Don't do anything if there's no hits here */
92 if (b[0][4] == 0 || b[0][3] == 0)
93 continue;
94
95 /* Count density in ssxss area */
96 /* Scale if OS>1 for equal iters */
97 for (ii=-ss; ii<=ss; ii++) {
98 for (jj=-ss; jj<=ss; jj++) {
99 b = dthp->b + (i + ii) + (j + jj)*wid;
100 f_select += b[0][4]/255.0;
101 }
102 }
103
104 if (scf)
105 f_select *= scfact;
106
107 if (f_select > dthp->de->max_filtered_counts)
108 f_select_int = dthp->de->max_filter_index;
109 else if (f_select<=DE_THRESH)
110 f_select_int = (int)ceil(f_select)-1;
111 else
112 f_select_int = (int)DE_THRESH +
113 (int)floor(pow(f_select-DE_THRESH,dthp->curve));
114
115 /* If the filter selected below the min specified clamp it to the min */
116 if (f_select_int > dthp->de->max_filter_index)
117 f_select_int = dthp->de->max_filter_index;
118
119 /* We only have to calculate the values for ~1/8 of the square */
120 f_coef_idx = f_select_int*dthp->de->kernel_size;
121
122 arr_filt_width = (int)ceil(dthp->de->filter_widths[f_select_int])-1;
123
124 b = dthp->b + i + j*wid;
125
126 for (jj=0; jj<=arr_filt_width; jj++) {
127 for (ii=0; ii<=jj; ii++) {
128
129 /* Skip if coef is 0 */
130 if (dthp->de->filter_coefs[f_coef_idx]==0.0) {
131 f_coef_idx++;
132 continue;
133 }
134
135 c[0] = (double) b[0][0];
136 c[1] = (double) b[0][1];
137 c[2] = (double) b[0][2];
138 c[3] = (double) b[0][3];
139
140 ls = dthp->de->filter_coefs[f_coef_idx]*(dthp->k1 * log(1.0 + c[3] * dthp->k2))/c[3];
141
142 c[0] *= ls;
143 c[1] *= ls;
144 c[2] *= ls;
145 c[3] *= ls;
146
147 if (jj==0 && ii==0) {
148 add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
149 }
150 else if (ii==0) {
151 add_c_to_accum(dthp->accumulate,i,jj,j,0,wid,hig,c);
152 add_c_to_accum(dthp->accumulate,i,-jj,j,0,wid,hig,c);
153 add_c_to_accum(dthp->accumulate,i,0,j,jj,wid,hig,c);
154 add_c_to_accum(dthp->accumulate,i,0,j,-jj,wid,hig,c);
155 } else if (jj==ii) {
156 add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
157 add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c);
158 add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c);
159 add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c);
160 } else {
161 add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c);
162 add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c);
163 add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c);
164 add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c);
165 add_c_to_accum(dthp->accumulate,i,jj,j,ii,wid,hig,c);
166 add_c_to_accum(dthp->accumulate,i,-jj,j,ii,wid,hig,c);
167 add_c_to_accum(dthp->accumulate,i,jj,j,-ii,wid,hig,c);
168 add_c_to_accum(dthp->accumulate,i,-jj,j,-ii,wid,hig,c);
169 }
170
171 f_coef_idx++;
172
173 }
174 }
175 }
176
177 pixel_num = (j-str+1)*wid;
178
179 if (dthp->last_thread) {
180 /* Standard progress function */
181 if (dthp->spec->verbose && time(NULL) != progress_timer) {
182 progress_timer = time(NULL);
183 fprintf(stderr, "\rdensity estimation: %d/%d ", j-str, enr-str);
184 fflush(stderr);
185 }
186
187 }
188 /* Custom progress function */
189 if (dthp->spec->progress && pixel_num > progress_count + ps) {
190
191 progress_count = ps * floor(pixel_num/(double)ps);
192
193 if (dthp->last_thread) {
194
195 int rv = (*dthp->spec->progress)(dthp->spec->progress_parameter,
196 100*(j-str)/(double)(enr-str), 1, 0);
197
198 if (rv==2) { /* PAUSE */
199
200 *(dthp->aborted) = -1;
201
202 do {
203 #if defined(_WIN32) /* mingw or msvc */
204 Sleep(100);
205 #else
206 nanosleep(&pauset,NULL);
207 #endif
208 rv = (*dthp->spec->progress)(dthp->spec->progress_parameter,
209 100*(j-str)/(double)(enr-str), 1, 0);
210 } while (rv==2);
211
212 *(dthp->aborted) = 0;
213
214 }
215
216 if (rv==1) {
217 *(dthp->aborted) = 1;
218 #ifdef HAVE_LIBPTHREAD
219 pthread_exit((void *)0);
220 #else
221 return;
222 #endif
223 }
224 } else {
225 #ifdef HAVE_LIBPTHREAD
226
227 if (*(dthp->aborted)<0) {
228
229 do {
230 #if defined(_WIN32) /* mingw or msvc */
231 Sleep(100);
232 #else
233 nanosleep(&pauset,NULL);
234 #endif
235 } while (*(dthp->aborted)<0);
236 }
237
238 if (*(dthp->aborted)>0) pthread_exit((void *)0);
239 #else
240 if (*(dthp->aborted)>0) return;
241 #endif
242 }
243 }
244
245 }
246
247 #ifdef HAVE_LIBPTHREAD
248 pthread_exit((void *)0);
249 #endif
250
251 }
252
iter_thread(void * fth)253 static void iter_thread(void *fth) {
254 double sub_batch;
255 int j;
256 flam3_thread_helper *fthp = (flam3_thread_helper *)fth;
257 flam3_iter_constants *ficp = fthp->fic;
258 struct timespec pauset;
259 int SBS = ficp->spec->sub_batch_size;
260 int fuse;
261 int cmap_size = ficp->cmap_size;
262 int cmap_size_m1 = ficp->cmap_size-1;
263
264 double eta = 0.0;
265
266 fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27;
267
268 pauset.tv_sec = 0;
269 pauset.tv_nsec = 100000000;
270
271
272 if (fthp->timer_initialize) {
273 *(ficp->progress_timer) = 0;
274 memset(ficp->progress_timer_history,0,64*sizeof(time_t));
275 memset(ficp->progress_history,0,64*sizeof(double));
276 *(ficp->progress_history_mark) = 0;
277 }
278
279 for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) {
280 int sub_batch_size, badcount;
281 time_t newt = time(NULL);
282 /* sub_batch is double so this is sketchy */
283 sub_batch_size = (sub_batch + SBS > ficp->batch_size) ?
284 (ficp->batch_size - sub_batch) : SBS;
285
286 if (fthp->first_thread && newt != *(ficp->progress_timer)) {
287 double percent = 100.0 *
288 ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num)
289 / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches;
290 int old_mark = 0;
291 int ticker;
292
293 if (ficp->spec->verbose)
294 fprintf(stderr, "\rchaos: %5.1f%%", percent);
295
296 *(ficp->progress_timer) = newt;
297 if (ficp->progress_timer_history[*(ficp->progress_history_mark)] &&
298 ficp->progress_history[*(ficp->progress_history_mark)] < percent)
299 old_mark = *(ficp->progress_history_mark);
300
301 if (percent > 0) {
302 eta = (100 - percent) * (*(ficp->progress_timer) - ficp->progress_timer_history[old_mark])
303 / (percent - ficp->progress_history[old_mark]);
304
305 if (ficp->spec->verbose) {
306 ticker = (*(ficp->progress_timer)&1)?':':'.';
307 if (eta < 1000)
308 ticker = ':';
309 if (eta > 100)
310 fprintf(stderr, " ETA%c %.1f minutes", ticker, eta / 60);
311 else
312 fprintf(stderr, " ETA%c %ld seconds ", ticker, (long) ceil(eta));
313 fprintf(stderr, " \r");
314 fflush(stderr);
315 }
316 }
317
318 ficp->progress_timer_history[*(ficp->progress_history_mark)] = *(ficp->progress_timer);
319 ficp->progress_history[*(ficp->progress_history_mark)] = percent;
320 *(ficp->progress_history_mark) = (*(ficp->progress_history_mark) + 1) % 64;
321 }
322
323 /* Custom progress function */
324 if (ficp->spec->progress) {
325 if (fthp->first_thread) {
326
327 int rv;
328
329 /* Recalculate % done, as the other calculation only updates once per second */
330 double percent = 100.0 *
331 ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num)
332 / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches;
333
334 rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta);
335
336 if (rv==2) { /* PAUSE */
337
338 time_t tnow = time(NULL);
339 time_t tend;
340 int lastpt;
341
342 ficp->aborted = -1;
343
344 do {
345 #if defined(_WIN32) /* mingw or msvc */
346 Sleep(100);
347 #else
348 nanosleep(&pauset,NULL);
349 #endif
350 rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta);
351 } while (rv==2);
352
353 /* modify the timer history to compensate for the pause */
354 tend = time(NULL)-tnow;
355
356 ficp->aborted = 0;
357
358 for (lastpt=0;lastpt<64;lastpt++) {
359 if (ficp->progress_timer_history[lastpt]) {
360 ficp->progress_timer_history[lastpt] += tend;
361 }
362 }
363
364 }
365
366 if (rv==1) { /* ABORT */
367 ficp->aborted = 1;
368 #ifdef HAVE_LIBPTHREAD
369 pthread_exit((void *)0);
370 #else
371 return;
372 #endif
373 }
374 } else {
375 if (ficp->aborted<0) {
376
377 do {
378 #if defined(_WIN32) /* mingw or msvc */
379 Sleep(100);
380 #else
381 nanosleep(&pauset,NULL);
382 #endif
383 } while (ficp->aborted==-1);
384 }
385 #ifdef HAVE_LIBPTHREAD
386 if (ficp->aborted>0) pthread_exit((void *)0);
387 #else
388 if (ficp->aborted>0) return;
389 #endif
390 }
391 }
392
393 /* Seed iterations */
394 fthp->iter_storage[0] = flam3_random_isaac_11(&(fthp->rc));
395 fthp->iter_storage[1] = flam3_random_isaac_11(&(fthp->rc));
396 fthp->iter_storage[2] = flam3_random_isaac_01(&(fthp->rc));
397 fthp->iter_storage[3] = flam3_random_isaac_01(&(fthp->rc));
398
399 /* Execute iterations */
400 badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc));
401
402 #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS)
403 /* Lock mutex for access to accumulator */
404 pthread_mutex_lock(&ficp->bucket_mutex);
405 #endif
406
407 /* Add the badcount to the counter */
408 ficp->badvals += badcount;
409
410 /* Put them in the bucket accumulator */
411 for (j = 0; j < sub_batch_size*4; j+=4) {
412 double p0, p1, p00, p11;
413 double dbl_index0,dbl_frac;
414 double interpcolor[4];
415 int ci, color_index0;
416 double *p = &(fthp->iter_storage[j]);
417 bucket *b;
418
419 if (fthp->cp.rotate != 0.0) {
420 p00 = p[0] - fthp->cp.rot_center[0];
421 p11 = p[1] - fthp->cp.rot_center[1];
422 p0 = p00 * ficp->rot[0][0] + p11 * ficp->rot[0][1] + fthp->cp.rot_center[0];
423 p1 = p00 * ficp->rot[1][0] + p11 * ficp->rot[1][1] + fthp->cp.rot_center[1];
424 } else {
425 p0 = p[0];
426 p1 = p[1];
427 }
428
429 if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) {
430
431 double logvis=1.0;
432 bucket *buckets = (bucket *)(ficp->buckets);
433
434 /* Skip if invisible */
435 if (p[3]==0)
436 continue;
437 else
438 logvis = p[3];
439
440 b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) +
441 ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1);
442
443 #ifdef USE_FLOAT_INDICES
444 color_index0 = 0;
445
446 //fprintf(stdout,"%.16f\n",p[2]*256.0);
447
448 while(color_index0 < cmap_size_m1) {
449 if (ficp->dmap[color_index0+1].index > p[2])
450 break;
451 else
452 color_index0++;
453 }
454
455 if (p[3]==1.0) {
456 bump_no_overflow(b[0][0], ficp->dmap[color_index0].color[0]);
457 bump_no_overflow(b[0][1], ficp->dmap[color_index0].color[1]);
458 bump_no_overflow(b[0][2], ficp->dmap[color_index0].color[2]);
459 bump_no_overflow(b[0][3], ficp->dmap[color_index0].color[3]);
460 bump_no_overflow(b[0][4], 255.0);
461 } else {
462 bump_no_overflow(b[0][0], logvis*ficp->dmap[color_index0].color[0]);
463 bump_no_overflow(b[0][1], logvis*ficp->dmap[color_index0].color[1]);
464 bump_no_overflow(b[0][2], logvis*ficp->dmap[color_index0].color[2]);
465 bump_no_overflow(b[0][3], logvis*ficp->dmap[color_index0].color[3]);
466 bump_no_overflow(b[0][4], logvis*255.0);
467 #else
468 dbl_index0 = p[2] * cmap_size;
469 color_index0 = (int) (dbl_index0);
470
471 if (flam3_palette_mode_linear == fthp->cp.palette_mode) {
472 if (color_index0 < 0) {
473 color_index0 = 0;
474 dbl_frac = 0;
475 } else if (color_index0 >= cmap_size_m1) {
476 color_index0 = cmap_size_m1-1;
477 dbl_frac = 1.0;
478 } else {
479 /* interpolate between color_index0 and color_index0+1 */
480 dbl_frac = dbl_index0 - (double)color_index0;
481 }
482
483 for (ci=0;ci<4;ci++) {
484 interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) +
485 ficp->dmap[color_index0+1].color[ci] * dbl_frac;
486 }
487
488 } else { /* Palette mode step */
489
490 if (color_index0 < 0) {
491 color_index0 = 0;
492 } else if (color_index0 >= cmap_size_m1) {
493 color_index0 = cmap_size_m1;
494 }
495
496 for (ci=0;ci<4;ci++)
497 interpcolor[ci] = ficp->dmap[color_index0].color[ci];
498 }
499
500 if (p[3]==1.0) {
501 bump_no_overflow(b[0][0], interpcolor[0]);
502 bump_no_overflow(b[0][1], interpcolor[1]);
503 bump_no_overflow(b[0][2], interpcolor[2]);
504 bump_no_overflow(b[0][3], interpcolor[3]);
505 bump_no_overflow(b[0][4], 255.0);
506 } else {
507 bump_no_overflow(b[0][0], logvis*interpcolor[0]);
508 bump_no_overflow(b[0][1], logvis*interpcolor[1]);
509 bump_no_overflow(b[0][2], logvis*interpcolor[2]);
510 bump_no_overflow(b[0][3], logvis*interpcolor[3]);
511 bump_no_overflow(b[0][4], logvis*255.0);
512 }
513 #endif
514
515 }
516 }
517
518 #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS)
519 /* Release mutex */
520 pthread_mutex_unlock(&ficp->bucket_mutex);
521 #endif
522
523 }
524 #ifdef HAVE_LIBPTHREAD
525 pthread_exit((void *)0);
526 #endif
527 }
528
529 static int render_rectangle(flam3_frame *spec, void *out,
530 int field, int nchan, int transp, stat_struct *stats) {
531 long nbuckets;
532 int i, j, k, batch_num, temporal_sample_num;
533 double nsamples, batch_size;
534 bucket *buckets;
535 abucket *accumulate;
536 double *points;
537 double *filter, *temporal_filter, *temporal_deltas, *batch_filter;
538 double ppux=0, ppuy=0;
539 int image_width, image_height; /* size of the image to produce */
540 int out_width;
541 int filter_width=0;
542 int bytes_per_channel = spec->bytes_per_channel;
543 int oversample;
544 double highpow;
545 int nbatches;
546 int ntemporal_samples;
547 flam3_palette dmap;
548 int gutter_width;
549 double vibrancy = 0.0;
550 double gamma = 0.0;
551 double background[3];
552 int vib_gam_n = 0;
553 time_t progress_began=0;
554 int verbose = spec->verbose;
555 int gnm_idx,max_gnm_de_fw,de_offset;
556 flam3_genome cp;
557 unsigned short *xform_distrib;
558 flam3_iter_constants fic;
559 flam3_thread_helper *fth;
560 #ifdef HAVE_LIBPTHREAD
561 pthread_attr_t pt_attr;
562 pthread_t *myThreads=NULL;
563 #endif
564 int thi;
565 time_t tstart,tend;
566 double sumfilt;
567 char *ai;
568 int cmap_size;
569
570 char *last_block;
571 size_t memory_rqd;
572
573 /* Per-render progress timers */
574 time_t progress_timer=0;
575 time_t progress_timer_history[64];
576 double progress_history[64];
577 int progress_history_mark = 0;
578
579 tstart = time(NULL);
580
581 fic.badvals = 0;
582 fic.aborted = 0;
583
584 stats->num_iters = 0;
585
586 /* correct for apophysis's use of 255 colors in the palette rather than all 256 */
587 cmap_size = 256 - argi("apo_palette",0);
588
589 memset(&cp,0, sizeof(flam3_genome));
590
591 /* interpolate and get a control point */
592 flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp);
593 oversample = cp.spatial_oversample;
594 highpow = cp.highlight_power;
595 nbatches = cp.nbatches;
596 ntemporal_samples = cp.ntemporal_samples;
597
598 if (nbatches < 1) {
599 fprintf(stderr, "nbatches must be positive, not %d.\n", nbatches);
600 return(1);
601 }
602
603 if (oversample < 1) {
604 fprintf(stderr, "oversample must be positive, not %d.\n", oversample);
605 return(1);
606 }
607
608 /* Initialize the thread helper structures */
609 fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper));
610 for (i=0;i<spec->nthreads;i++)
611 fth[i].cp.final_xform_index=-1;
612
613 /* Set up the output image dimensions, adjusted for scanline */
614 image_width = cp.width;
615 out_width = image_width;
616 if (field) {
617 image_height = cp.height / 2;
618
619 if (field == flam3_field_odd)
620 out = (unsigned char *)out + nchan * bytes_per_channel * out_width;
621
622 out_width *= 2;
623 } else
624 image_height = cp.height;
625
626
627 /* Spatial Filter kernel creation */
628 filter_width = flam3_create_spatial_filter(spec, field, &filter);
629
630 /* handle error */
631 if (filter_width<0) {
632 fprintf(stderr,"flam3_create_spatial_filter returned error: aborting\n");
633 return(1);
634 }
635
636 /* note we must free 'filter' at the end */
637
638 /* batch filter */
639 /* may want to revisit this at some point */
640 batch_filter = (double *) malloc(sizeof(double) * nbatches);
641 for (i=0; i<nbatches; i++)
642 batch_filter[i]=1.0/(double)nbatches;
643
644 /* temporal filter - we must free temporal_filter and temporal_deltas at the end */
645 sumfilt = flam3_create_temporal_filter(nbatches*ntemporal_samples,
646 cp.temporal_filter_type,
647 cp.temporal_filter_exp,
648 cp.temporal_filter_width,
649 &temporal_filter, &temporal_deltas);
650
651
652 /*
653 the number of additional rows of buckets we put at the edge so
654 that the filter doesn't go off the edge
655 */
656 gutter_width = (filter_width - oversample) / 2;
657
658 /*
659 Check the size of the density estimation filter.
660 If the 'radius' of the density estimation filter is greater than the
661 gutter width, we have to pad with more. Otherwise, we can use the same value.
662 */
663 max_gnm_de_fw=0;
664 for (gnm_idx = 0; gnm_idx < spec->ngenomes; gnm_idx++) {
665 int this_width = (int)ceil(spec->genomes[gnm_idx].estimator) * oversample;
666 if (this_width > max_gnm_de_fw)
667 max_gnm_de_fw = this_width;
668 }
669
670 /* Add OS-1 for the averaging at the edges, if it's > 0 already */
671 if (max_gnm_de_fw>0)
672 max_gnm_de_fw += (oversample-1);
673
674 /* max_gnm_de_fw is now the number of pixels of additional gutter */
675 /* necessary to appropriately perform the density estimation filtering */
676 /* Check to see if it's greater than the gutter_width */
677 if (max_gnm_de_fw > gutter_width) {
678 de_offset = max_gnm_de_fw - gutter_width;
679 gutter_width = max_gnm_de_fw;
680 } else
681 de_offset = 0;
682
683
684 /* Allocate the space required to render the image */
685 fic.height = oversample * image_height + 2 * gutter_width;
686 fic.width = oversample * image_width + 2 * gutter_width;
687
688 nbuckets = (long)fic.width * (long)fic.height;
689 memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets +
690 4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads);
691 last_block = (char *) malloc(memory_rqd);
692 if (NULL == last_block) {
693 fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd);
694 fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets);
695 return(1);
696 }
697
698 /* Just free buckets at the end */
699 buckets = (bucket *) last_block;
700 accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets);
701 points = (double *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets);
702
703 if (verbose) {
704 fprintf(stderr, "chaos: ");
705 progress_began = time(NULL);
706 }
707
708 background[0] = background[1] = background[2] = 0.0;
709 memset((char *) accumulate, 0, sizeof(abucket) * nbuckets);
710
711
712 /* Batch loop - outermost */
713 for (batch_num = 0; batch_num < nbatches; batch_num++) {
714 double de_time;
715 double sample_density=0.0;
716 double k1, area, k2;
717 flam3_de_helper de;
718
719 de_time = spec->time + temporal_deltas[batch_num*ntemporal_samples];
720
721 memset((char *) buckets, 0, sizeof(bucket) * nbuckets);
722
723 /* interpolate and get a control point */
724 /* ONLY FOR DENSITY FILTER WIDTH PURPOSES */
725 /* additional interpolation will be done in the temporal_sample loop */
726 flam3_interpolate(spec->genomes, spec->ngenomes, de_time, 0, &cp);
727
728 /* if instructed to by the genome, create the density estimation */
729 /* filter kernels. Check boundary conditions as well. */
730 if (cp.estimator < 0.0 || cp.estimator_minimum < 0.0) {
731 fprintf(stderr,"density estimator filter widths must be >= 0\n");
732 return(1);
733 }
734
735 if (spec->bits <= 32) {
736 if (cp.estimator > 0.0) {
737 fprintf(stderr, "warning: density estimation disabled with %d bit buffers.\n", spec->bits);
738 cp.estimator = 0.0;
739 }
740 }
741
742 /* Create DE filters */
743 if (cp.estimator > 0.0) {
744 de = flam3_create_de_filters(cp.estimator,cp.estimator_minimum,
745 cp.estimator_curve,oversample);
746 if (de.kernel_size<0) {
747 fprintf(stderr,"de.kernel_size returned 0 - aborting.\n");
748 return(1);
749 }
750 } else
751 de.max_filter_index = 0;
752
753 /* Temporal sample loop */
754 for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) {
755
756 double temporal_sample_time;
757 double color_scalar = temporal_filter[batch_num*ntemporal_samples + temporal_sample_num];
758
759 temporal_sample_time = spec->time +
760 temporal_deltas[batch_num*ntemporal_samples + temporal_sample_num];
761
762 /* Interpolate and get a control point */
763 flam3_interpolate(spec->genomes, spec->ngenomes, temporal_sample_time, 0, &cp);
764
765 /* Get the xforms ready to render */
766 if (prepare_precalc_flags(&cp)) {
767 fprintf(stderr,"prepare xform pointers returned error: aborting.\n");
768 return(1);
769 }
770 xform_distrib = flam3_create_xform_distrib(&cp);
771 if (xform_distrib==NULL) {
772 fprintf(stderr,"create xform distrib returned error: aborting.\n");
773 return(1);
774 }
775
776 /* compute the colormap entries. */
777 /* the input colormap is 256 long with entries from 0 to 1.0 */
778 for (j = 0; j < CMAP_SIZE; j++) {
779 dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0;
780 for (k = 0; k < 4; k++)
781 dmap[j].color[k] = (cp.palette[(j * 256) / CMAP_SIZE].color[k] * WHITE_LEVEL) * color_scalar;
782 }
783
784 /* compute camera */
785 if (1) {
786 double t0, t1, shift=0.0, corner0, corner1;
787 double scale;
788
789 if (cp.sample_density <= 0.0) {
790 fprintf(stderr,
791 "sample density (quality) must be greater than zero,"
792 " not %g.\n", cp.sample_density);
793 return(1);
794 }
795
796 scale = pow(2.0, cp.zoom);
797 sample_density = cp.sample_density * scale * scale;
798
799 ppux = cp.pixels_per_unit * scale;
800 ppuy = field ? (ppux / 2.0) : ppux;
801 ppux /= spec->pixel_aspect_ratio;
802 switch (field) {
803 case flam3_field_both: shift = 0.0; break;
804 case flam3_field_even: shift = -0.5; break;
805 case flam3_field_odd: shift = 0.5; break;
806 }
807 shift = shift / ppux;
808 t0 = (double) gutter_width / (oversample * ppux);
809 t1 = (double) gutter_width / (oversample * ppuy);
810 corner0 = cp.center[0] - image_width / ppux / 2.0;
811 corner1 = cp.center[1] - image_height / ppuy / 2.0;
812 fic.bounds[0] = corner0 - t0;
813 fic.bounds[1] = corner1 - t1 + shift;
814 fic.bounds[2] = corner0 + image_width / ppux + t0;
815 fic.bounds[3] = corner1 + image_height / ppuy + t1 + shift;
816 fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]);
817 fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]);
818 fic.rot[0][0] = cos(cp.rotate * 2 * M_PI / 360.0);
819 fic.rot[0][1] = -sin(cp.rotate * 2 * M_PI / 360.0);
820 fic.rot[1][0] = -fic.rot[0][1];
821 fic.rot[1][1] = fic.rot[0][0];
822 fic.ws0 = fic.width * fic.size[0];
823 fic.wb0s0 = fic.ws0 * fic.bounds[0];
824 fic.hs1 = fic.height * fic.size[1];
825 fic.hb1s1 = fic.hs1 * fic.bounds[1];
826
827 }
828
829 /* number of samples is based only on the output image size */
830 nsamples = sample_density * image_width * image_height;
831
832 /* how many of these samples are rendered in this loop? */
833 batch_size = nsamples / (nbatches * ntemporal_samples);
834
835 stats->num_iters += batch_size;
836
837 /* Fill in the iter constants */
838 fic.xform_distrib = xform_distrib;
839 fic.spec = spec;
840 fic.batch_size = batch_size / (double)spec->nthreads;
841 fic.temporal_sample_num = temporal_sample_num;
842 fic.ntemporal_samples = ntemporal_samples;
843 fic.batch_num = batch_num;
844 fic.nbatches = nbatches;
845 fic.cmap_size = cmap_size;
846
847 fic.dmap = (flam3_palette_entry *)dmap;
848 fic.color_scalar = color_scalar;
849 fic.buckets = (void *)buckets;
850
851 /* Need a timer per job */
852 fic.progress_timer = &progress_timer;
853 fic.progress_timer_history = &(progress_timer_history[0]);
854 fic.progress_history = &(progress_history[0]);
855 fic.progress_history_mark = &progress_history_mark;
856
857 /* Initialize the thread helper structures */
858 for (thi = 0; thi < spec->nthreads; thi++) {
859
860 int rk;
861 /* Create a new isaac state for this thread */
862 for (rk = 0; rk < RANDSIZ; rk++)
863 fth[thi].rc.randrsl[rk] = irand(&spec->rc);
864
865 irandinit(&(fth[thi].rc),1);
866
867 if (0==thi) {
868
869 fth[thi].first_thread=1;
870 if (0==batch_num && 0==temporal_sample_num)
871 fth[thi].timer_initialize = 1;
872 else
873 fth[thi].timer_initialize = 0;
874
875 } else {
876 fth[thi].first_thread=0;
877 fth[thi].timer_initialize = 0;
878 }
879
880 fth[thi].iter_storage = &(points[thi*(spec->sub_batch_size)*4]);
881 fth[thi].fic = &fic;
882 flam3_copy(&(fth[thi].cp),&cp);
883
884 }
885
886 #ifdef HAVE_LIBPTHREAD
887 /* Let's make some threads */
888 myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
889
890 #if defined(USE_LOCKS)
891 pthread_mutex_init(&fic.bucket_mutex, NULL);
892 #endif
893
894 pthread_attr_init(&pt_attr);
895 pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
896
897 for (thi=0; thi <spec->nthreads; thi ++)
898 pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi])));
899
900 pthread_attr_destroy(&pt_attr);
901
902 /* Wait for them to return */
903 for (thi=0; thi < spec->nthreads; thi++)
904 pthread_join(myThreads[thi], NULL);
905
906 #if defined(USE_LOCKS)
907 pthread_mutex_destroy(&fic.bucket_mutex);
908 #endif
909
910 free(myThreads);
911 #else
912 for (thi=0; thi < spec->nthreads; thi++)
913 iter_thread( (void *)(&(fth[thi])) );
914 #endif
915
916 /* Free the xform_distrib array */
917 free(xform_distrib);
918
919 if (fic.aborted) {
920 if (verbose) fprintf(stderr, "\naborted!\n");
921 goto done;
922 }
923
924 vibrancy += cp.vibrancy;
925 gamma += cp.gamma;
926 background[0] += cp.background[0];
927 background[1] += cp.background[1];
928 background[2] += cp.background[2];
929 vib_gam_n++;
930
931 }
932
933 k1 =(cp.contrast * cp.brightness *
934 PREFILTER_WHITE * 268.0 * batch_filter[batch_num]) / 256;
935 area = image_width * image_height / (ppux * ppuy);
936 k2 = (oversample * oversample * nbatches) /
937 (cp.contrast * area * WHITE_LEVEL * sample_density * sumfilt);
938 #if 0
939 printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy);
940 printf("contrast=%f, brightness=%f, PREFILTER=%d, temporal_filter=%f\n",
941 cp.contrast, cp.brightness, PREFILTER_WHITE, temporal_filter[batch_num]);
942 printf("oversample=%d, nbatches=%d, area = %f, WHITE_LEVEL=%d, sample_density=%f\n",
943 oversample, nbatches, area, WHITE_LEVEL, sample_density);
944 printf("k1=%f,k2=%15.12f\n",k1,k2);
945 #endif
946
947 if (de.max_filter_index == 0) {
948
949 for (j = 0; j < fic.height; j++) {
950 for (i = 0; i < fic.width; i++) {
951
952 abucket *a = accumulate + i + j * fic.width;
953 bucket *b = buckets + i + j * fic.width;
954 double c[4], ls;
955
956 c[0] = (double) b[0][0];
957 c[1] = (double) b[0][1];
958 c[2] = (double) b[0][2];
959 c[3] = (double) b[0][3];
960 if (0.0 == c[3])
961 continue;
962
963 ls = (k1 * log(1.0 + c[3] * k2))/c[3];
964 c[0] *= ls;
965 c[1] *= ls;
966 c[2] *= ls;
967 c[3] *= ls;
968
969 abump_no_overflow(a[0][0], c[0]);
970 abump_no_overflow(a[0][1], c[1]);
971 abump_no_overflow(a[0][2], c[2]);
972 abump_no_overflow(a[0][3], c[3]);
973 }
974 }
975 } else {
976
977 de_thread_helper *deth;
978 int de_aborted=0;
979 int myspan = (fic.height-2*(oversample-1)+1);
980 int swath = myspan/(spec->nthreads);
981
982 /* Create the de helper structures */
983 deth = (de_thread_helper *)calloc(spec->nthreads,sizeof(de_thread_helper));
984
985 for (thi=0;thi<(spec->nthreads);thi++) {
986
987 /* Set up the contents of the helper structure */
988 deth[thi].b = buckets;
989 deth[thi].accumulate = accumulate;
990 deth[thi].width = fic.width;
991 deth[thi].height = fic.height;
992 deth[thi].oversample = oversample;
993 deth[thi].progress_size = spec->sub_batch_size/10;
994 deth[thi].de = &de;
995 deth[thi].k1 = k1;
996 deth[thi].k2 = k2;
997 deth[thi].curve = cp.estimator_curve;
998 deth[thi].spec = spec;
999 deth[thi].aborted = &de_aborted;
1000 if ( (spec->nthreads)>myspan) { /* More threads than rows */
1001 deth[thi].start_row=0;
1002 if (thi==spec->nthreads-1) {
1003 deth[thi].end_row=myspan;
1004 deth[thi].last_thread=1;
1005 } else {
1006 deth[thi].end_row=-1;
1007 deth[thi].last_thread=0;
1008 }
1009 } else { /* Normal case */
1010 deth[thi].start_row=thi*swath;
1011 deth[thi].end_row=(thi+1)*swath;
1012 if (thi==spec->nthreads-1) {
1013 deth[thi].end_row=myspan;
1014 deth[thi].last_thread=1;
1015 } else {
1016 deth[thi].last_thread=0;
1017 }
1018 }
1019 }
1020
1021 #ifdef HAVE_LIBPTHREAD
1022 /* Let's make some threads */
1023 myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
1024
1025 pthread_attr_init(&pt_attr);
1026 pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
1027
1028 for (thi=0; thi <spec->nthreads; thi ++)
1029 pthread_create(&myThreads[thi], &pt_attr, (void *)de_thread, (void *)(&(deth[thi])));
1030
1031 pthread_attr_destroy(&pt_attr);
1032
1033 /* Wait for them to return */
1034 for (thi=0; thi < spec->nthreads; thi++)
1035 pthread_join(myThreads[thi], NULL);
1036
1037 free(myThreads);
1038 #else
1039 for (thi=0; thi <spec->nthreads; thi ++)
1040 de_thread((void *)(&(deth[thi])));
1041 #endif
1042
1043 free(deth);
1044
1045 if (de_aborted) {
1046 if (verbose) fprintf(stderr, "\naborted!\n");
1047 goto done;
1048 }
1049
1050 } /* End density estimation loop */
1051
1052
1053 /* If allocated, free the de filter memory for the next batch */
1054 if (de.max_filter_index > 0) {
1055 free(de.filter_coefs);
1056 free(de.filter_widths);
1057 }
1058
1059 }
1060
1061 if (verbose) {
1062 fprintf(stderr, "\rchaos: 100.0%% took: %ld seconds \n", time(NULL) - progress_began);
1063 fprintf(stderr, "filtering...");
1064 }
1065
1066
1067 /* filter the accumulation buffer down into the image */
1068 if (1) {
1069 int x, y;
1070 double t[4],newrgb[3];
1071 double g = 1.0 / (gamma / vib_gam_n);
1072 double tmp,a;
1073 double alpha,ls;
1074 int rgbi;
1075
1076 double linrange = cp.gam_lin_thresh;
1077
1078 vibrancy /= vib_gam_n;
1079 background[0] /= vib_gam_n/256.0;
1080 background[1] /= vib_gam_n/256.0;
1081 background[2] /= vib_gam_n/256.0;
1082
1083 /* If we're in the early clip mode, perform this first step to */
1084 /* apply the gamma correction and clipping before the spat filt */
1085
1086 if (spec->earlyclip) {
1087
1088 for (j = 0; j < fic.height; j++) {
1089 for (i = 0; i < fic.width; i++) {
1090
1091 abucket *ac = accumulate + i + j*fic.width;
1092
1093 if (ac[0][3]<=0) {
1094 alpha = 0.0;
1095 ls = 0.0;
1096 } else {
1097 tmp=ac[0][3]/PREFILTER_WHITE;
1098 alpha = flam3_calc_alpha(tmp,g,linrange);
1099 ls = vibrancy * 256.0 * alpha / tmp;
1100 if (alpha<0.0) alpha = 0.0;
1101 if (alpha>1.0) alpha = 1.0;
1102 }
1103
1104 t[0] = (double)ac[0][0];
1105 t[1] = (double)ac[0][1];
1106 t[2] = (double)ac[0][2];
1107 t[3] = (double)ac[0][3];
1108
1109 flam3_calc_newrgb(t, ls, highpow, newrgb);
1110
1111 for (rgbi=0;rgbi<3;rgbi++) {
1112 a = newrgb[rgbi];
1113 a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g);
1114 if (nchan<=3 || transp==0)
1115 a += ((1.0 - alpha) * background[rgbi]);
1116 else {
1117 if (alpha>0)
1118 a /= alpha;
1119 else
1120 a = 0;
1121 }
1122
1123 /* Clamp here to ensure proper filter functionality */
1124 if (a>255) a = 255;
1125 if (a<0) a = 0;
1126
1127 /* Replace values in accumulation buffer with these new ones */
1128 ac[0][rgbi] = a;
1129 }
1130
1131 ac[0][3] = alpha;
1132
1133 }
1134 }
1135 }
1136
1137 /* Apply the spatial filter */
1138 y = de_offset;
1139 for (j = 0; j < image_height; j++) {
1140 x = de_offset;
1141 for (i = 0; i < image_width; i++) {
1142 int ii, jj,rgbi;
1143 void *p;
1144 unsigned short *p16;
1145 unsigned char *p8;
1146 t[0] = t[1] = t[2] = t[3] = 0.0;
1147 for (ii = 0; ii < filter_width; ii++) {
1148 for (jj = 0; jj < filter_width; jj++) {
1149 double k = filter[ii + jj * filter_width];
1150 abucket *ac = accumulate + x+ii + (y+jj)*fic.width;
1151
1152
1153 t[0] += k * ac[0][0];
1154 t[1] += k * ac[0][1];
1155 t[2] += k * ac[0][2];
1156 t[3] += k * ac[0][3];
1157
1158
1159 }
1160 }
1161
1162 p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width);
1163 p8 = (unsigned char *)p;
1164 p16 = (unsigned short *)p;
1165
1166 /* The old way, spatial filter first and then clip after gamma */
1167 if (!spec->earlyclip) {
1168
1169 tmp=t[3]/PREFILTER_WHITE;
1170
1171 if (t[3]<=0) {
1172 alpha = 0.0;
1173 ls = 0.0;
1174 } else {
1175 alpha = flam3_calc_alpha(tmp,g,linrange);
1176 ls = vibrancy * 256.0 * alpha / tmp;
1177 if (alpha<0.0) alpha = 0.0;
1178 if (alpha>1.0) alpha = 1.0;
1179 }
1180
1181 flam3_calc_newrgb(t, ls, highpow, newrgb);
1182
1183 for (rgbi=0;rgbi<3;rgbi++) {
1184 a = newrgb[rgbi];
1185 a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g);
1186 if (nchan<=3 || transp==0)
1187 a += ((1.0 - alpha) * background[rgbi]);
1188 else {
1189 if (alpha>0)
1190 a /= alpha;
1191 else
1192 a = 0;
1193 }
1194
1195 /* Clamp here to ensure proper filter functionality */
1196 if (a>255) a = 255;
1197 if (a<0) a = 0;
1198
1199 /* Replace values in accumulation buffer with these new ones */
1200 t[rgbi] = a;
1201 }
1202 t[3] = alpha;
1203 }
1204
1205 for (rgbi=0;rgbi<3;rgbi++) {
1206
1207 a = t[rgbi];
1208
1209 if (a > 255)
1210 a = 255;
1211 if (a < 0)
1212 a = 0;
1213
1214 if (2==bytes_per_channel) {
1215 a *= 256.0; /* Scales to [0-65280] */
1216 p16[rgbi] = (unsigned short) a;
1217 } else {
1218 p8[rgbi] = (unsigned char) a;
1219 }
1220 }
1221
1222
1223 if (t[3]>1)
1224 t[3]=1;
1225 if (t[3]<0)
1226 t[3]=0;
1227
1228 /* alpha */
1229 if (nchan>3) {
1230 if (transp==1) {
1231 if (2==bytes_per_channel)
1232 p16[3] = (unsigned short) (t[3] * 65535);
1233 else
1234 p8[3] = (unsigned char) (t[3] * 255);
1235 } else {
1236 if (2==bytes_per_channel)
1237 p16[3] = 65535;
1238 else
1239 p8[3] = 255;
1240 }
1241 }
1242
1243 x += oversample;
1244 }
1245 y += oversample;
1246 }
1247 }
1248
1249 done:
1250
1251 stats->badvals = fic.badvals;
1252
1253 free(temporal_filter);
1254 free(temporal_deltas);
1255 free(batch_filter);
1256 free(filter);
1257 free(buckets);
1258 // free(accumulate);
1259 // free(points);
1260 /* We have to clear the cps in fth first */
1261 for (thi = 0; thi < spec->nthreads; thi++) {
1262 clear_cp(&(fth[thi].cp),0);
1263 }
1264 free(fth);
1265 clear_cp(&cp,0);
1266
1267 if (getenv("insert_palette")) {
1268 int ph = 100;
1269 if (ph >= image_height) ph = image_height;
1270 /* insert the palette into the image */
1271 for (j = 0; j < ph; j++) {
1272 for (i = 0; i < image_width; i++) {
1273 unsigned char *p = (unsigned char *)out + nchan * (i + j * out_width);
1274 p[0] = (unsigned char)dmap[i * 256 / image_width].color[0];
1275 p[1] = (unsigned char)dmap[i * 256 / image_width].color[1];
1276 p[2] = (unsigned char)dmap[i * 256 / image_width].color[2];
1277 }
1278 }
1279 }
1280
1281 tend = time(NULL);
1282 stats->render_seconds = (int)(tend-tstart);
1283
1284 return(0);
1285
1286 }
1287