1 /**************************************************************************
2 * This file is part of the Fraqtive program
3 * Copyright (C) 2004-2012 Michał Męciński
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 **************************************************************************/
18
19 #include "generatorcore.h"
20
21 #include <qglobal.h>
22
23 #include <math.h>
24 #include <cstdlib>
25
26 #if defined( HAVE_SSE2 )
27 # include <emmintrin.h>
28 #endif
29
30 // see http://wiki.mimec.org/wiki/Fraqtive/Generator_Core for a description of this code
31
32 namespace GeneratorCore
33 {
34
35 static const double BailoutRadius = 64.0;
36
37 #if defined( Q_CC_MSVC )
38 # pragma float_control( precise, off )
39 # pragma intrinsic( log, sqrt, exp, atan2, sin, cos, fabs )
40 #endif
41
42 static const double BailoutLog = log( 2.0 * log( BailoutRadius ) );
43
calculateResult(int maxIterations,int count,double final,double exponent)44 static inline double calculateResult( int maxIterations, int count, double final, double exponent )
45 {
46 if ( count == 0 )
47 return 0.0;
48
49 double value = ( maxIterations - count ) + ( BailoutLog - log( log( sqrt( final ) ) ) ) / log( exponent );
50
51 return sqrt( value );
52 }
53
54 template<Variant VARIANT>
55 static void adjust( double& /*zx*/, double& /*zy*/ );
56
57 template<>
adjust(double &,double &)58 inline void adjust<NormalVariant>( double& /*zx*/, double& /*zy*/ )
59 {
60 }
61
62 template<>
adjust(double &,double & zy)63 inline void adjust<ConjugateVariant>( double& /*zx*/, double& zy )
64 {
65 zy = -zy;
66 }
67
68 template<>
adjust(double & zx,double & zy)69 inline void adjust<AbsoluteVariant>( double& zx, double& zy )
70 {
71 zx = fabs( zx );
72 zy = fabs( zy );
73 }
74
75 template<>
adjust(double &,double & zy)76 inline void adjust<AbsoluteImVariant>( double& /*zx*/, double& zy )
77 {
78 zy = fabs( zy );
79 }
80
81 template<Variant VARIANT>
calculate(double x,double y,double cx,double cy,double exponent,int maxIterations)82 static inline double calculate( double x, double y, double cx, double cy, double exponent, int maxIterations )
83 {
84 double zx = x;
85 double zy = y;
86 double radius;
87
88 double exp2 = 0.5 * exponent;
89
90 for ( int k = maxIterations; k > 0; k-- ) {
91 adjust<VARIANT>( zx, zy );
92
93 double zxx = zx * zx;
94 double zyy = zy * zy;
95 radius = zxx + zyy;
96
97 if ( radius >= BailoutRadius )
98 return calculateResult( maxIterations, k, radius, exponent );
99
100 double z = exp( log( radius ) * exp2 );
101 double fi = exponent * atan2( zy, zx );
102
103 zx = z * cos( fi ) + cx;
104 zy = z * sin( fi ) + cy;
105 }
106
107 return 0.0;
108 }
109
110 #if defined( Q_CC_MSVC )
111 # pragma float_control( precise, on )
112 # pragma function( log, sqrt, exp, atan2, sin, cos, fabs )
113 #endif
114
115 class MandelbrotParams
116 {
117 public:
MandelbrotParams(double exponent)118 MandelbrotParams( double exponent ) :
119 m_exponent( exponent )
120 {
121 }
122
123 protected:
124 double m_exponent;
125 };
126
127 template<Variant VARIANT>
128 class MandelbrotFunctor : public Functor, public MandelbrotParams
129 {
130 public:
MandelbrotFunctor(const MandelbrotParams & params)131 MandelbrotFunctor( const MandelbrotParams& params ) : MandelbrotParams( params )
132 {
133 }
134
operator ()(double zx,double zy,int maxIterations)135 double operator()( double zx, double zy, int maxIterations )
136 {
137 return calculate<VARIANT>( zx, zy, zx, zy, m_exponent, maxIterations );
138 }
139 };
140
141 class JuliaParams : public MandelbrotParams
142 {
143 public:
JuliaParams(double cx,double cy,double exponent)144 JuliaParams( double cx, double cy, double exponent ) : MandelbrotParams( exponent ),
145 m_cx( cx ),
146 m_cy( cy )
147 {
148 }
149
150 protected:
151 double m_cx;
152 double m_cy;
153 };
154
155 template<Variant VARIANT>
156 class JuliaFunctor : public Functor, public JuliaParams
157 {
158 public:
JuliaFunctor(const JuliaParams & params)159 JuliaFunctor( const JuliaParams& params ) : JuliaParams( params )
160 {
161 }
162
operator ()(double zx,double zy,int maxIterations)163 double operator()( double zx, double zy, int maxIterations )
164 {
165 return calculate<VARIANT>( zx, zy, m_cx, m_cy, m_exponent, maxIterations );
166 }
167 };
168
169 template<typename BASE, template<Variant VARIANT> class FACTORY>
170 class VariantDispatcher
171 {
172 public:
173 template<typename PARAMS>
create(Variant variant,const PARAMS & params)174 static BASE* create( Variant variant, const PARAMS& params )
175 {
176 switch ( variant ) {
177 case NormalVariant:
178 return FACTORY<NormalVariant>::create( params );
179 case ConjugateVariant:
180 return FACTORY<ConjugateVariant>::create( params );
181 case AbsoluteVariant:
182 return FACTORY<AbsoluteVariant>::create( params );
183 case AbsoluteImVariant:
184 return FACTORY<AbsoluteImVariant>::create( params );
185 }
186 return NULL;
187 }
188 };
189
190 template<typename BASE, template<Variant VARIANT> class FUNCTOR>
191 class FunctorFactory
192 {
193 public:
194 template<typename PARAMS>
create(Variant variant,const PARAMS & params)195 static BASE* create( Variant variant, const PARAMS& params )
196 {
197 return VariantDispatcher<BASE, InnerFactory>::create( variant, params );
198 }
199
200 private:
201 template<Variant VARIANT>
202 class InnerFactory
203 {
204 public:
205 template<typename PARAMS>
create(const PARAMS & params)206 static BASE* create( const PARAMS& params )
207 {
208 return new FUNCTOR<VARIANT>( params );
209 }
210 };
211 };
212
createMandelbrotFunctor(double exponent,Variant variant)213 Functor* createMandelbrotFunctor( double exponent, Variant variant )
214 {
215 return FunctorFactory<Functor, MandelbrotFunctor>::create( variant, MandelbrotParams( exponent ) );
216 }
217
createJuliaFunctor(double cx,double cy,double exponent,Variant variant)218 Functor* createJuliaFunctor( double cx, double cy, double exponent, Variant variant )
219 {
220 return FunctorFactory<Functor, JuliaFunctor>::create( variant, JuliaParams( cx, cy, exponent ) );
221 }
222
223 template<int N>
calculatePower(double & zx,double & zy,double & radius)224 static inline void calculatePower( double& zx, double& zy, double& radius )
225 {
226 if ( N % 2 == 0 ) {
227 calculatePower<N / 2>( zx, zy, radius );
228
229 double zxx = zx * zx;
230 double zyy = zy * zy;
231 double zxy = zx * zy;
232
233 zx = zxx - zyy;
234 zy = zxy + zxy;
235 } else {
236 double zx2 = zx;
237 double zy2 = zy;
238 calculatePower<N - 1>( zx2, zy2, radius );
239
240 double zxx2 = zx * zx2;
241 double zxy2 = zx * zy2;
242 double zyx2 = zy * zx2;
243 double zyy2 = zy * zy2;
244
245 zx = zxx2 - zyy2;
246 zy = zxy2 + zyx2;
247 }
248 }
249
250 template<>
calculatePower(double & zx,double & zy,double & radius)251 inline void calculatePower<2>( double& zx, double& zy, double& radius )
252 {
253 double zxx = zx * zx;
254 double zyy = zy * zy;
255 double zxy = zx * zy;
256
257 zx = zxx - zyy;
258 zy = zxy + zxy;
259 radius = zxx + zyy;
260 }
261
262 template<>
calculatePower(double &,double &,double &)263 inline void calculatePower<1>( double& /*zx*/, double& /*zy*/, double& /*radius*/ )
264 {
265 }
266
267 template<int N, Variant VARIANT>
calculateFast(double x,double y,double cx,double cy,int maxIterations)268 static double calculateFast( double x, double y, double cx, double cy, int maxIterations )
269 {
270 double zx = x;
271 double zy = y;
272
273 for ( int k = maxIterations; k > 0; k-- ) {
274 adjust<VARIANT>( zx, zy );
275
276 double radius;
277 calculatePower<N>( zx, zy, radius );
278
279 if ( radius >= BailoutRadius )
280 return calculateResult( maxIterations, k, radius, N );
281
282 zx += cx;
283 zy += cy;
284 }
285
286 return 0.0;
287 }
288
289 class MandelbrotFastParams
290 {
291 public:
MandelbrotFastParams()292 MandelbrotFastParams()
293 {
294 }
295 };
296
297 template<int N, Variant VARIANT>
298 class MandelbrotFastFunctor : public Functor, public MandelbrotFastParams
299 {
300 public:
MandelbrotFastFunctor(const MandelbrotFastParams & params)301 MandelbrotFastFunctor( const MandelbrotFastParams& params ) : MandelbrotFastParams( params )
302 {
303 }
304
operator ()(double zx,double zy,int maxIterations)305 double operator()( double zx, double zy, int maxIterations )
306 {
307 return calculateFast<N, VARIANT>( zx, zy, zx, zy, maxIterations );
308 }
309 };
310
311 class JuliaFastParams : public MandelbrotFastParams
312 {
313 public:
JuliaFastParams(double cx,double cy)314 JuliaFastParams( double cx, double cy ) : MandelbrotFastParams(),
315 m_cx( cx ),
316 m_cy( cy )
317 {
318 }
319
320 protected:
321 double m_cx;
322 double m_cy;
323 };
324
325 template<int N, Variant VARIANT>
326 class JuliaFastFunctor : public Functor, public JuliaFastParams
327 {
328 public:
JuliaFastFunctor(const JuliaFastParams & params)329 JuliaFastFunctor( const JuliaFastParams& params ) : JuliaFastParams( params )
330 {
331 }
332
operator ()(double zx,double zy,int maxIterations)333 double operator()( double zx, double zy, int maxIterations )
334 {
335 return calculateFast<N, VARIANT>( zx, zy, m_cx, m_cy, maxIterations );
336 }
337
338 };
339
340 template<typename BASE, template<int N, Variant VARIANT> class FACTORY, int EXPONENT = MaxExponent>
341 class ExponentDispatcher
342 {
343 public:
344 template<typename PARAMS>
create(int exponent,Variant variant,const PARAMS & params)345 static BASE* create( int exponent, Variant variant, const PARAMS& params )
346 {
347 if ( exponent == EXPONENT )
348 return VariantDispatcher<BASE, FactoryAdapter>::create( variant, params );
349 return ExponentDispatcher<BASE, FACTORY, EXPONENT - 1>::create( exponent, variant, params );
350 }
351
352 private:
353 template<Variant VARIANT>
354 class FactoryAdapter
355 {
356 public:
357 template<typename PARAMS>
create(const PARAMS & params)358 static BASE* create( const PARAMS& params )
359 {
360 return FACTORY<EXPONENT, VARIANT>::create( params );
361 }
362 };
363 };
364
365 template<typename BASE, template<int N, Variant VARIANT> class FACTORY>
366 class ExponentDispatcher<BASE, FACTORY, 1>
367 {
368 public:
369 template<typename PARAMS>
create(int,Variant,const PARAMS &)370 static BASE* create( int /*exponent*/, Variant /*variant*/, const PARAMS& /*params*/ )
371 {
372 return NULL;
373 }
374 };
375
376 template<typename BASE, template<int N, Variant VARIANT> class FUNCTOR>
377 class FastFunctorFactory
378 {
379 public:
380 template<typename PARAMS>
create(int exponent,Variant variant,const PARAMS & params)381 static BASE* create( int exponent, Variant variant, const PARAMS& params )
382 {
383 return ExponentDispatcher<BASE, InnerFactory>::create( exponent, variant, params );
384 }
385
386 private:
387 template<int N, Variant VARIANT>
388 class InnerFactory
389 {
390 public:
391 template<typename PARAMS>
create(const PARAMS & params)392 static BASE* create( const PARAMS& params )
393 {
394 return new FUNCTOR<N, VARIANT>( params );
395 }
396 };
397 };
398
createMandelbrotFastFunctor(int exponent,Variant variant)399 Functor* createMandelbrotFastFunctor( int exponent, Variant variant )
400 {
401 return FastFunctorFactory<Functor, MandelbrotFastFunctor>::create( exponent, variant, MandelbrotFastParams() );
402 }
403
createJuliaFastFunctor(double cx,double cy,int exponent,Variant variant)404 Functor* createJuliaFastFunctor( double cx, double cy, int exponent, Variant variant )
405 {
406 return FastFunctorFactory<Functor, JuliaFastFunctor>::create( exponent, variant, JuliaFastParams( cx, cy ) );
407 }
408
generatePreview(const Input & input,const Output & output,Functor * functor,int maxIterations)409 void generatePreview( const Input& input, const Output& output, Functor* functor, int maxIterations )
410 {
411 for ( int y = 0; y < output.m_height; y += CellSize ) {
412 double* row = output.m_buffer + output.m_stride * y;
413 for ( int x = 0; x < output.m_width; x += CellSize ) {
414 double zx = input.m_x + input.m_ca * x + input.m_sa * y;
415 double zy = input.m_y - input.m_sa * x + input.m_ca * y;
416 row[ x ] = ( *functor )( zx, zy, maxIterations );
417 }
418 }
419 }
420
checkThreshold(double p1,double p2,double threshold)421 static inline bool checkThreshold( double p1, double p2, double threshold )
422 {
423 double pmin, pmax;
424 if ( p1 < p2 )
425 pmin = p1, pmax = p2;
426 else
427 pmin = p2, pmax = p1;
428
429 if ( pmin == 0.0 && pmax != 0.0 )
430 return true;
431
432 if ( ( pmax - pmin ) > threshold )
433 return true;
434
435 return false;
436 }
437
checkThreshold(double p1,double p2,double p3,double p4,double threshold)438 static inline bool checkThreshold( double p1, double p2, double p3, double p4, double threshold )
439 {
440 return checkThreshold( p1, p2, threshold )
441 || checkThreshold( p3, p4, threshold )
442 || checkThreshold( p1, p3, threshold )
443 || checkThreshold( p2, p4, threshold );
444 }
445
generateDetails(const Input & input,const Output & output,Functor * functor,int maxIterations,double threshold)446 void generateDetails( const Input& input, const Output& output, Functor* functor, int maxIterations, double threshold )
447 {
448 for ( int y = 0; y < output.m_height; y += CellSize ) {
449 double* row = output.m_buffer + output.m_stride * y;
450 for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
451 double p1 = row[ x ];
452 double p2 = row[ x + CellSize ];
453 if ( checkThreshold( p1, p2, threshold ) ) {
454 for ( int i = 1; i < CellSize; i++ ) {
455 double zx = input.m_x + input.m_ca * ( x + i ) + input.m_sa * y;
456 double zy = input.m_y - input.m_sa * ( x + i ) + input.m_ca * y;
457 row[ x + i ] = ( *functor )( zx, zy, maxIterations );
458 }
459 }
460 }
461 }
462
463 for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
464 double* row = output.m_buffer + output.m_stride * y;
465 for ( int x = 0; x < output.m_width; x += CellSize ) {
466 double p1 = row[ x ];
467 double p2 = row[ output.m_stride * CellSize + x ];
468 if ( checkThreshold( p1, p2, threshold ) ) {
469 for ( int i = 1; i < CellSize; i++ ) {
470 double zx = input.m_x + input.m_ca * x + input.m_sa * ( y + i );
471 double zy = input.m_y - input.m_sa * x + input.m_ca * ( y + i );
472 row[ output.m_stride * i + x ] = ( *functor )( zx, zy, maxIterations );
473 }
474 }
475 }
476 }
477
478 for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
479 double* row = output.m_buffer + output.m_stride * y;
480 for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
481 double p1 = row[ x ];
482 double p2 = row[ x + CellSize ];
483 double p3 = row[ output.m_stride * CellSize + x ];
484 double p4 = row[ output.m_stride * CellSize + x + CellSize ];
485 if ( checkThreshold( p1, p2, p3, p4, threshold ) ) {
486 for ( int i = 1; i < CellSize; i++ ) {
487 for ( int j = 1; j < CellSize; j++ ) {
488 double zx = input.m_x + input.m_ca * ( x + j ) + input.m_sa * ( y + i );
489 double zy = input.m_y - input.m_sa * ( x + j ) + input.m_ca * ( y + i );
490 row[ output.m_stride * i + x + j ] = ( *functor )( zx, zy, maxIterations );
491 }
492 }
493 }
494 }
495 }
496 }
497
interpolate(const Output & output)498 void interpolate( const Output& output )
499 {
500 for ( int y = 0; y < output.m_height; y += CellSize ) {
501 double* row = output.m_buffer + output.m_stride * y;
502 for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
503 double p1 = row[ x ];
504 double p2 = row[ x + CellSize ];
505 for ( int i = 1; i < CellSize; i++ )
506 row[ x + i ] = (double)( CellSize - i ) / (double)CellSize * p1 + (double)i / (double)CellSize * p2;
507 }
508 }
509 for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
510 double* row = output.m_buffer + output.m_stride * y;
511 for ( int x = 0; x < output.m_width; x++ ) {
512 double p1 = row[ x ];
513 double p2 = row[ output.m_stride * CellSize + x ];
514 for ( int i = 1; i < CellSize; i++ )
515 row[ output.m_stride * i + x ] = (double)( CellSize - i ) / (double)CellSize * p1 + (double)i / (double)CellSize * p2;
516 }
517 }
518 }
519
520 #if defined( HAVE_SSE2 )
521
522 #if defined( Q_CC_MSVC )
523 # define ALIGNXMM( var ) __declspec(align(16)) var
524 #else
525 # define ALIGNXMM( var ) var __attribute__((aligned(16)))
526 #endif
527
528 enum CPUFeatures
529 {
530 MMX = 1,
531 SSE = 2,
532 SSE2 = 4
533 };
534
535 // based on qdrawhelper.cpp
detectCPUFeatures()536 static int detectCPUFeatures()
537 {
538 #if defined( __x86_64__ ) || defined( __ia64__ ) || defined( Q_OS_WIN64 )
539 return MMX | SSE | SSE2;
540 #elif defined( __i386__ ) || defined( _M_IX86 )
541 int result = 0;
542 #if defined( Q_CC_GNU )
543 asm( "push %%ebx\n"
544 "pushf\n"
545 "pop %%eax\n"
546 "mov %%eax, %%ebx\n"
547 "xor $0x00200000, %%eax\n"
548 "push %%eax\n"
549 "popf\n"
550 "pushf\n"
551 "pop %%eax\n"
552 "xor %%edx, %%edx\n"
553 "xor %%ebx, %%eax\n"
554 "jz 1f\n"
555 "mov $0x00000001, %%eax\n"
556 "cpuid\n"
557 "1:\n"
558 "pop %%ebx\n"
559 "mov %%edx, %0\n"
560 : "=r" ( result )
561 :
562 : "%eax", "%ecx", "%edx"
563 );
564 #elif defined ( Q_OS_WIN )
565 _asm {
566 push eax
567 push ebx
568 push ecx
569 push edx
570 pushfd
571 pop eax
572 mov ebx, eax
573 xor eax, 00200000h
574 push eax
575 popfd
576 pushfd
577 pop eax
578 mov edx, 0
579 xor eax, ebx
580 jz skip
581 mov eax, 1
582 cpuid
583 mov result, edx
584 skip:
585 pop edx
586 pop ecx
587 pop ebx
588 pop eax
589 }
590 #endif
591 int features = 0;
592 if ( result & ( 1 << 23 ) )
593 features |= MMX;
594 if ( result & ( 1 << 25 ) )
595 features |= SSE;
596 if ( result & ( 1 << 26 ) )
597 features |= SSE2;
598 return features;
599 #else
600 return 0;
601 #endif
602 }
603
604 static const int AvailableCPUFeatures = detectCPUFeatures();
605
isSSE2Available()606 bool isSSE2Available()
607 {
608 return AvailableCPUFeatures & SSE2;
609 }
610
611 template<int N>
calculatePowerSSE2(__m128d & zx,__m128d & zy,__m128d & radius)612 static inline void calculatePowerSSE2( __m128d& zx, __m128d& zy, __m128d& radius )
613 {
614 if ( N % 2 == 0 ) {
615 calculatePowerSSE2<N / 2>( zx, zy, radius );
616
617 __m128d zxx = _mm_mul_pd( zx, zx );
618 __m128d zyy = _mm_mul_pd( zy, zy );
619 __m128d zxy = _mm_mul_pd( zx, zy );
620
621 zx = _mm_sub_pd( zxx, zyy );
622 zy = _mm_add_pd( zxy, zxy );
623 } else {
624 __m128d zx2 = zx;
625 __m128d zy2 = zy;
626 calculatePowerSSE2<N - 1>( zx2, zy2, radius );
627
628 __m128d zxx2 = _mm_mul_pd( zx, zx2 );
629 __m128d zxy2 = _mm_mul_pd( zx, zy2 );
630 __m128d zyx2 = _mm_mul_pd( zy, zx2 );
631 __m128d zyy2 = _mm_mul_pd( zy, zy2 );
632
633 zx = _mm_sub_pd( zxx2, zyy2 );
634 zy = _mm_add_pd( zxy2, zyx2 );
635 }
636 }
637
638 template<>
calculatePowerSSE2(__m128d & zx,__m128d & zy,__m128d & radius)639 inline void calculatePowerSSE2<2>( __m128d& zx, __m128d& zy, __m128d& radius )
640 {
641 __m128d zxx = _mm_mul_pd( zx, zx );
642 __m128d zyy = _mm_mul_pd( zy, zy );
643 __m128d zxy = _mm_mul_pd( zx, zy );
644
645 zx = _mm_sub_pd( zxx, zyy );
646 zy = _mm_add_pd( zxy, zxy );
647 radius = _mm_add_pd( zxx, zyy );
648 }
649
650 template<>
calculatePowerSSE2(__m128d &,__m128d &,__m128d &)651 inline void calculatePowerSSE2<1>( __m128d& /*zx*/, __m128d& /*zy*/, __m128d& /*radius*/ )
652 {
653 }
654
655 template<Variant VARIANT>
656 static void adjustSSE2( __m128d& /*zx*/, __m128d& /*zy*/ );
657
658 template<>
adjustSSE2(__m128d &,__m128d &)659 inline void adjustSSE2<NormalVariant>( __m128d& /*zx*/, __m128d& /*zy*/ )
660 {
661 }
662
663 template<>
adjustSSE2(__m128d &,__m128d & zy)664 inline void adjustSSE2<ConjugateVariant>( __m128d& /*zx*/, __m128d& zy )
665 {
666 __m128d mask = _mm_castsi128_pd( _mm_set_epi32( int( 0x80000000 ), 0, int( 0x80000000 ), 0 ) );
667 zy = _mm_xor_pd( mask, zy );
668 }
669
670 template<>
adjustSSE2(__m128d & zx,__m128d & zy)671 inline void adjustSSE2<AbsoluteVariant>( __m128d& zx, __m128d& zy )
672 {
673 __m128d mask = _mm_castsi128_pd( _mm_set_epi32( 0x7fffffff, int( 0xffffffff ), 0x7fffffff, int( 0xffffffff ) ) );
674 zx = _mm_and_pd( zx, mask );
675 zy = _mm_and_pd( zy, mask );
676 }
677
678 template<>
adjustSSE2(__m128d &,__m128d & zy)679 inline void adjustSSE2<AbsoluteImVariant>( __m128d& /*zx*/, __m128d& zy )
680 {
681 __m128d mask = _mm_castsi128_pd( _mm_set_epi32( 0x7fffffff, int( 0xffffffff ), 0x7fffffff, int( 0xffffffff ) ) );
682 zy = _mm_and_pd( zy, mask );
683 }
684
685 template<int N, Variant VARIANT>
calculateStepSSE2(int k,__m128d & zx,__m128d & zy,__m128d cx,__m128d cy,__m128d rmax,int count[],double final[])686 static inline bool calculateStepSSE2( int k, __m128d& zx, __m128d& zy, __m128d cx, __m128d cy, __m128d rmax, int count[], double final[] )
687 {
688 adjustSSE2<VARIANT>( zx, zy );
689
690 __m128d radius;
691 calculatePowerSSE2<N>( zx, zy, radius );
692
693 int mask = _mm_movemask_pd( _mm_cmpge_pd( radius, rmax ) );
694
695 zx = _mm_add_pd( zx, cx );
696 zy = _mm_add_pd( zy, cy );
697
698 if ( mask ) {
699 if ( mask & 1 ) {
700 if ( !count[ 0 ] ) {
701 count[ 0 ] = k;
702 _mm_storel_pd( &final[ 0 ], radius );
703
704 if ( count[ 1 ] )
705 return true;
706 }
707 }
708 if ( mask & 2 ) {
709 if ( !count[ 1 ] ) {
710 count[ 1 ] = k;
711 _mm_storeh_pd( &final[ 1 ], radius );
712
713 if ( count[ 0 ] )
714 return true;
715 }
716 }
717 }
718
719 return false;
720 }
721
722 template<int N, Variant VARIANT, int STEPS>
723 class RepeatStepsSSE2
724 {
725 public:
calculate(int k,__m128d & zx,__m128d & zy,__m128d cx,__m128d cy,__m128d rmax,int count[],double final[])726 static inline bool calculate( int k, __m128d& zx, __m128d& zy, __m128d cx, __m128d cy, __m128d rmax, int count[], double final[] )
727 {
728 if ( calculateStepSSE2<N, VARIANT>( k, zx, zy, cx, cy, rmax, count, final ) )
729 return true;
730
731 if ( RepeatStepsSSE2<N, VARIANT, STEPS - 1>::calculate( k - 1, zx, zy, cx, cy, rmax, count, final ) )
732 return true;
733
734 return false;
735 }
736
737 static const int Steps = STEPS;
738 };
739
740 template<int N, Variant VARIANT>
741 class RepeatStepsSSE2<N, VARIANT, 0>
742 {
743 public:
calculate(int,__m128d &,__m128d &,__m128d,__m128d,__m128d,int[],double[])744 static inline bool calculate( int /*k*/, __m128d& /*zx*/, __m128d& /*zy*/, __m128d /*cx*/, __m128d /*cy*/, __m128d /*rmax*/, int /*count*/[], double /*final*/[] )
745 {
746 return false;
747 }
748 };
749
750 template<int N, Variant VARIANT>
751 class AutoStepsSSE2 : public RepeatStepsSSE2<N, VARIANT, ( N <= 5 ) ? 2 : 1>
752 {
753 };
754
755 template<int N, Variant VARIANT>
calculateSSE2(double result[],double x[],double y[],double cx[],double cy[],int maxIterations)756 static inline void calculateSSE2( double result[], double x[], double y[], double cx[], double cy[], int maxIterations )
757 {
758 __m128d zx = _mm_load_pd( x );
759 __m128d zy = _mm_load_pd( y );
760
761 __m128d rcx = _mm_load_pd( cx );
762 __m128d rcy = _mm_load_pd( cy );
763
764 __m128d rmax = _mm_set1_pd( BailoutRadius );
765
766 int count[ 2 ] = { 0, 0 };
767 double final[ 2 ] = { 0.0, 0.0 };
768
769 for ( int k = maxIterations; k > 0; k -= AutoStepsSSE2<N, VARIANT>::Steps ) {
770 if ( AutoStepsSSE2<N, VARIANT>::calculate( k, zx, zy, rcx, rcy, rmax, count, final ) )
771 break;
772 }
773
774 result[ 0 ] = count[ 0 ] ? calculateResult( maxIterations, count[ 0 ], final[ 0 ], N ) : 0.0;
775 result[ 1 ] = count[ 1 ] ? calculateResult( maxIterations, count[ 1 ], final[ 1 ], N ) : 0.0;
776 }
777
778 template<int N, Variant VARIANT>
779 class MandelbrotFunctorSSE2 : public FunctorSSE2, public MandelbrotFastParams
780 {
781 public:
MandelbrotFunctorSSE2(const MandelbrotFastParams & params)782 MandelbrotFunctorSSE2( const MandelbrotFastParams& params ) : MandelbrotFastParams( params )
783 {
784 }
785
operator ()(double result[],double zx[],double zy[],int maxIterations)786 void operator()( double result[], double zx[], double zy[], int maxIterations )
787 {
788 calculateSSE2<N, VARIANT>( result, zx, zy, zx, zy, maxIterations );
789 }
790 };
791
792 template<int N, Variant VARIANT>
793 class JuliaFunctorSSE2 : public FunctorSSE2, public JuliaFastParams
794 {
795 public:
JuliaFunctorSSE2(const JuliaFastParams & params)796 JuliaFunctorSSE2( const JuliaFastParams& params ) : JuliaFastParams( params )
797 {
798 }
799
operator ()(double result[],double zx[],double zy[],int maxIterations)800 void operator()( double result[], double zx[], double zy[], int maxIterations )
801 {
802 ALIGNXMM( double cx[ 2 ] ) = { m_cx, m_cx };
803 ALIGNXMM( double cy[ 2 ] ) = { m_cy, m_cy };
804 calculateSSE2<N, VARIANT>( result, zx, zy, cx, cy, maxIterations );
805 }
806 };
807
createMandelbrotFunctorSSE2(int exponent,Variant variant)808 FunctorSSE2* createMandelbrotFunctorSSE2( int exponent, Variant variant )
809 {
810 return FastFunctorFactory<FunctorSSE2, MandelbrotFunctorSSE2>::create( exponent, variant, MandelbrotFastParams() );
811 }
812
createJuliaFunctorSSE2(double cx,double cy,int exponent,Variant variant)813 FunctorSSE2* createJuliaFunctorSSE2( double cx, double cy, int exponent, Variant variant )
814 {
815 return FastFunctorFactory<FunctorSSE2, JuliaFunctorSSE2>::create( exponent, variant, JuliaFastParams( cx, cy ) );
816 }
817
generatePreviewSSE2(const Input & input,const Output & output,FunctorSSE2 * functor,int maxIterations)818 void generatePreviewSSE2( const Input& input, const Output& output, FunctorSSE2* functor, int maxIterations )
819 {
820 ALIGNXMM( double zx[ 2 ] );
821 ALIGNXMM( double zy[ 2 ] );
822
823 double result[ 2 ];
824
825 for ( int y = 0; y < output.m_height; y += CellSize ) {
826 double* row = output.m_buffer + output.m_stride * y;
827 for ( int x = 0; x < output.m_width; x += CellSize ) {
828 zx[ 0 ] = input.m_x + input.m_ca * x + input.m_sa * y;
829 zx[ 1 ] = zx[ 0 ] + input.m_ca * CellSize;
830 zy[ 0 ] = input.m_y - input.m_sa * x + input.m_ca * y;
831 zy[ 1 ] = zy[ 0 ] - input.m_sa * CellSize;
832 ( *functor )( result, zx, zy, maxIterations );
833 row[ x ] = result[ 0 ];
834 if ( x + CellSize < output.m_width )
835 row[ x + CellSize ] = result[ 1 ];
836 }
837 }
838 }
839
generateDetailsSSE2(const Input & input,const Output & output,FunctorSSE2 * functor,int maxIterations,double threshold)840 void generateDetailsSSE2( const Input& input, const Output& output, FunctorSSE2* functor, int maxIterations, double threshold )
841 {
842 ALIGNXMM( double zx[ 2 ] );
843 ALIGNXMM( double zy[ 2 ] );
844
845 double result[ 2 ];
846
847 for ( int y = 0; y < output.m_height; y += CellSize ) {
848 double* row = output.m_buffer + output.m_stride * y;
849 for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
850 double p1 = row[ x ];
851 double p2 = row[ x + CellSize ];
852 if ( checkThreshold( p1, p2, threshold ) ) {
853 for ( int i = 1; i < CellSize; i += 2 ) {
854 zx[ 0 ] = input.m_x + input.m_ca * ( x + i ) + input.m_sa * y;
855 zx[ 1 ] = zx[ 0 ] + input.m_ca;
856 zy[ 0 ] = input.m_y - input.m_sa * ( x + i ) + input.m_ca * y;
857 zy[ 1 ] = zy[ 0 ] - input.m_sa;
858 ( *functor )( result, zx, zy, maxIterations );
859 row[ x + i ] = result[ 0 ];
860 if ( i + 1 < CellSize )
861 row[ x + i + 1 ] = result[ 1 ];
862 }
863 }
864 }
865 }
866
867 for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
868 double* row = output.m_buffer + output.m_stride * y;
869 for ( int x = 0; x < output.m_width; x += CellSize ) {
870 double p1 = row[ x ];
871 double p2 = row[ output.m_stride * CellSize + x ];
872 if ( checkThreshold( p1, p2, threshold ) ) {
873 for ( int i = 1; i < CellSize; i += 2 ) {
874 zx[ 0 ] = input.m_x + input.m_ca * x + input.m_sa * ( y + i );
875 zx[ 1 ] = zx[ 0 ] + input.m_sa;
876 zy[ 0 ] = input.m_y - input.m_sa * x + input.m_ca * ( y + i );
877 zy[ 1 ] = zy[ 0 ] + input.m_ca;
878 ( *functor )( result, zx, zy, maxIterations );
879 row[ output.m_stride * i + x ] = result[ 0 ];
880 if ( i + 1 < CellSize )
881 row[ output.m_stride * ( i + 1 ) + x ] = result[ 1 ];
882 }
883 }
884 }
885 }
886
887 for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
888 double* row = output.m_buffer + output.m_stride * y;
889 for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
890 double p1 = row[ x ];
891 double p2 = row[ x + CellSize ];
892 double p3 = row[ output.m_stride * CellSize + x ];
893 double p4 = row[ output.m_stride * CellSize + x + CellSize ];
894 if ( checkThreshold( p1, p2, p3, p4, threshold ) ) {
895 for ( int i = 1; i < CellSize; i++ ) {
896 for ( int j = 1; j < CellSize; j += 2 ) {
897 zx[ 0 ] = input.m_x + input.m_ca * ( x + j ) + input.m_sa * ( y + i );
898 zx[ 1 ] = zx[ 0 ] + input.m_ca;
899 zy[ 0 ] = input.m_y - input.m_sa * ( x + j ) + input.m_ca * ( y + i );
900 zy[ 1 ] = zy[ 0 ] - input.m_sa;
901 ( *functor )( result, zx, zy, maxIterations );
902 row[ output.m_stride * i + x + j ] = result[ 0 ];
903 if ( j + 1 < CellSize )
904 row[ output.m_stride * i + x + j + 1 ] = result[ 1 ];
905 }
906 }
907 }
908 }
909 }
910 }
911
912 #endif // defined( HAVE_SSE2 )
913
914 } // namespace GeneratorCore
915