1 /**************************************************************************
2 * This file is part of the Fraqtive program
3 * Copyright (C) 2004-2012 Michał Męciński
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 **************************************************************************/
18 
19 #include "generatorcore.h"
20 
21 #include <qglobal.h>
22 
23 #include <math.h>
24 #include <cstdlib>
25 
26 #if defined( HAVE_SSE2 )
27 # include <emmintrin.h>
28 #endif
29 
30 // see http://wiki.mimec.org/wiki/Fraqtive/Generator_Core for a description of this code
31 
32 namespace GeneratorCore
33 {
34 
35 static const double BailoutRadius = 64.0;
36 
37 #if defined( Q_CC_MSVC )
38 # pragma float_control( precise, off )
39 # pragma intrinsic( log, sqrt, exp, atan2, sin, cos, fabs )
40 #endif
41 
42 static const double BailoutLog = log( 2.0 * log( BailoutRadius ) );
43 
calculateResult(int maxIterations,int count,double final,double exponent)44 static inline double calculateResult( int maxIterations, int count, double final, double exponent )
45 {
46     if ( count == 0 )
47         return 0.0;
48 
49     double value = ( maxIterations - count ) + ( BailoutLog - log( log( sqrt( final ) ) ) ) / log( exponent );
50 
51     return sqrt( value );
52 }
53 
54 template<Variant VARIANT>
55 static void adjust( double& /*zx*/, double& /*zy*/ );
56 
57 template<>
adjust(double &,double &)58 inline void adjust<NormalVariant>( double& /*zx*/, double& /*zy*/ )
59 {
60 }
61 
62 template<>
adjust(double &,double & zy)63 inline void adjust<ConjugateVariant>( double& /*zx*/, double& zy )
64 {
65     zy = -zy;
66 }
67 
68 template<>
adjust(double & zx,double & zy)69 inline void adjust<AbsoluteVariant>( double& zx, double& zy )
70 {
71     zx = fabs( zx );
72     zy = fabs( zy );
73 }
74 
75 template<>
adjust(double &,double & zy)76 inline void adjust<AbsoluteImVariant>( double& /*zx*/, double& zy )
77 {
78     zy = fabs( zy );
79 }
80 
81 template<Variant VARIANT>
calculate(double x,double y,double cx,double cy,double exponent,int maxIterations)82 static inline double calculate( double x, double y, double cx, double cy, double exponent, int maxIterations )
83 {
84     double zx = x;
85     double zy = y;
86     double radius;
87 
88     double exp2 = 0.5 * exponent;
89 
90     for ( int k = maxIterations; k > 0; k-- ) {
91         adjust<VARIANT>( zx, zy );
92 
93         double zxx = zx * zx;
94         double zyy = zy * zy;
95         radius = zxx + zyy;
96 
97         if ( radius >= BailoutRadius )
98             return calculateResult( maxIterations, k, radius, exponent );
99 
100         double z = exp( log( radius ) * exp2 );
101         double fi = exponent * atan2( zy, zx );
102 
103         zx = z * cos( fi ) + cx;
104         zy = z * sin( fi ) + cy;
105     }
106 
107     return 0.0;
108 }
109 
110 #if defined( Q_CC_MSVC )
111 # pragma float_control( precise, on )
112 # pragma function( log, sqrt, exp, atan2, sin, cos, fabs )
113 #endif
114 
115 class MandelbrotParams
116 {
117 public:
MandelbrotParams(double exponent)118     MandelbrotParams( double exponent ) :
119         m_exponent( exponent )
120     {
121     }
122 
123 protected:
124     double m_exponent;
125 };
126 
127 template<Variant VARIANT>
128 class MandelbrotFunctor : public Functor, public MandelbrotParams
129 {
130 public:
MandelbrotFunctor(const MandelbrotParams & params)131     MandelbrotFunctor( const MandelbrotParams& params ) : MandelbrotParams( params )
132     {
133     }
134 
operator ()(double zx,double zy,int maxIterations)135     double operator()( double zx, double zy, int maxIterations )
136     {
137         return calculate<VARIANT>( zx, zy, zx, zy, m_exponent, maxIterations );
138     }
139 };
140 
141 class JuliaParams : public MandelbrotParams
142 {
143 public:
JuliaParams(double cx,double cy,double exponent)144     JuliaParams( double cx, double cy, double exponent ) : MandelbrotParams( exponent ),
145         m_cx( cx ),
146         m_cy( cy )
147     {
148     }
149 
150 protected:
151     double m_cx;
152     double m_cy;
153 };
154 
155 template<Variant VARIANT>
156 class JuliaFunctor : public Functor, public JuliaParams
157 {
158 public:
JuliaFunctor(const JuliaParams & params)159     JuliaFunctor( const JuliaParams& params ) : JuliaParams( params )
160     {
161     }
162 
operator ()(double zx,double zy,int maxIterations)163     double operator()( double zx, double zy, int maxIterations )
164     {
165         return calculate<VARIANT>( zx, zy, m_cx, m_cy, m_exponent, maxIterations );
166     }
167 };
168 
169 template<typename BASE, template<Variant VARIANT> class FACTORY>
170 class VariantDispatcher
171 {
172 public:
173     template<typename PARAMS>
create(Variant variant,const PARAMS & params)174     static BASE* create( Variant variant, const PARAMS& params )
175     {
176         switch ( variant ) {
177             case NormalVariant:
178                 return FACTORY<NormalVariant>::create( params );
179             case ConjugateVariant:
180                 return FACTORY<ConjugateVariant>::create( params );
181             case AbsoluteVariant:
182                 return FACTORY<AbsoluteVariant>::create( params );
183             case AbsoluteImVariant:
184                 return FACTORY<AbsoluteImVariant>::create( params );
185         }
186         return NULL;
187     }
188 };
189 
190 template<typename BASE, template<Variant VARIANT> class FUNCTOR>
191 class FunctorFactory
192 {
193 public:
194     template<typename PARAMS>
create(Variant variant,const PARAMS & params)195     static BASE* create( Variant variant, const PARAMS& params )
196     {
197         return VariantDispatcher<BASE, InnerFactory>::create( variant, params );
198     }
199 
200 private:
201     template<Variant VARIANT>
202     class InnerFactory
203     {
204     public:
205         template<typename PARAMS>
create(const PARAMS & params)206         static BASE* create( const PARAMS& params )
207         {
208             return new FUNCTOR<VARIANT>( params );
209         }
210     };
211 };
212 
createMandelbrotFunctor(double exponent,Variant variant)213 Functor* createMandelbrotFunctor( double exponent, Variant variant )
214 {
215     return FunctorFactory<Functor, MandelbrotFunctor>::create( variant, MandelbrotParams( exponent ) );
216 }
217 
createJuliaFunctor(double cx,double cy,double exponent,Variant variant)218 Functor* createJuliaFunctor( double cx, double cy, double exponent, Variant variant )
219 {
220     return FunctorFactory<Functor, JuliaFunctor>::create( variant, JuliaParams( cx, cy, exponent ) );
221 }
222 
223 template<int N>
calculatePower(double & zx,double & zy,double & radius)224 static inline void calculatePower( double& zx, double& zy, double& radius )
225 {
226     if ( N % 2 == 0 ) {
227         calculatePower<N / 2>( zx, zy, radius );
228 
229         double zxx = zx * zx;
230         double zyy = zy * zy;
231         double zxy = zx * zy;
232 
233         zx = zxx - zyy;
234         zy = zxy + zxy;
235     } else {
236         double zx2 = zx;
237         double zy2 = zy;
238         calculatePower<N - 1>( zx2, zy2, radius );
239 
240         double zxx2 = zx * zx2;
241         double zxy2 = zx * zy2;
242         double zyx2 = zy * zx2;
243         double zyy2 = zy * zy2;
244 
245         zx = zxx2 - zyy2;
246         zy = zxy2 + zyx2;
247     }
248 }
249 
250 template<>
calculatePower(double & zx,double & zy,double & radius)251 inline void calculatePower<2>( double& zx, double& zy, double& radius )
252 {
253     double zxx = zx * zx;
254     double zyy = zy * zy;
255     double zxy = zx * zy;
256 
257     zx = zxx - zyy;
258     zy = zxy + zxy;
259     radius = zxx + zyy;
260 }
261 
262 template<>
calculatePower(double &,double &,double &)263 inline void calculatePower<1>( double& /*zx*/, double& /*zy*/, double& /*radius*/ )
264 {
265 }
266 
267 template<int N, Variant VARIANT>
calculateFast(double x,double y,double cx,double cy,int maxIterations)268 static double calculateFast( double x, double y, double cx, double cy, int maxIterations )
269 {
270     double zx = x;
271     double zy = y;
272 
273     for ( int k = maxIterations; k > 0; k-- ) {
274         adjust<VARIANT>( zx, zy );
275 
276         double radius;
277         calculatePower<N>( zx, zy, radius );
278 
279         if ( radius >= BailoutRadius )
280             return calculateResult( maxIterations, k, radius, N );
281 
282         zx += cx;
283         zy += cy;
284     }
285 
286     return 0.0;
287 }
288 
289 class MandelbrotFastParams
290 {
291 public:
MandelbrotFastParams()292     MandelbrotFastParams()
293     {
294     }
295 };
296 
297 template<int N, Variant VARIANT>
298 class MandelbrotFastFunctor : public Functor, public MandelbrotFastParams
299 {
300 public:
MandelbrotFastFunctor(const MandelbrotFastParams & params)301     MandelbrotFastFunctor( const MandelbrotFastParams& params ) : MandelbrotFastParams( params )
302     {
303     }
304 
operator ()(double zx,double zy,int maxIterations)305     double operator()( double zx, double zy, int maxIterations )
306     {
307         return calculateFast<N, VARIANT>( zx, zy, zx, zy, maxIterations );
308     }
309 };
310 
311 class JuliaFastParams : public MandelbrotFastParams
312 {
313 public:
JuliaFastParams(double cx,double cy)314     JuliaFastParams( double cx, double cy ) : MandelbrotFastParams(),
315         m_cx( cx ),
316         m_cy( cy )
317     {
318     }
319 
320 protected:
321     double m_cx;
322     double m_cy;
323 };
324 
325 template<int N, Variant VARIANT>
326 class JuliaFastFunctor : public Functor, public JuliaFastParams
327 {
328 public:
JuliaFastFunctor(const JuliaFastParams & params)329     JuliaFastFunctor( const JuliaFastParams& params ) : JuliaFastParams( params )
330     {
331     }
332 
operator ()(double zx,double zy,int maxIterations)333     double operator()( double zx, double zy, int maxIterations )
334     {
335         return calculateFast<N, VARIANT>( zx, zy, m_cx, m_cy, maxIterations );
336     }
337 
338 };
339 
340 template<typename BASE, template<int N, Variant VARIANT> class FACTORY, int EXPONENT = MaxExponent>
341 class ExponentDispatcher
342 {
343 public:
344     template<typename PARAMS>
create(int exponent,Variant variant,const PARAMS & params)345     static BASE* create( int exponent, Variant variant, const PARAMS& params )
346     {
347         if ( exponent == EXPONENT )
348             return VariantDispatcher<BASE, FactoryAdapter>::create( variant, params );
349         return ExponentDispatcher<BASE, FACTORY, EXPONENT - 1>::create( exponent, variant, params );
350     }
351 
352 private:
353     template<Variant VARIANT>
354     class FactoryAdapter
355     {
356     public:
357         template<typename PARAMS>
create(const PARAMS & params)358         static BASE* create( const PARAMS& params )
359         {
360             return FACTORY<EXPONENT, VARIANT>::create( params );
361         }
362     };
363 };
364 
365 template<typename BASE, template<int N, Variant VARIANT> class FACTORY>
366 class ExponentDispatcher<BASE, FACTORY, 1>
367 {
368 public:
369     template<typename PARAMS>
create(int,Variant,const PARAMS &)370     static BASE* create( int /*exponent*/, Variant /*variant*/, const PARAMS& /*params*/ )
371     {
372         return NULL;
373     }
374 };
375 
376 template<typename BASE, template<int N, Variant VARIANT> class FUNCTOR>
377 class FastFunctorFactory
378 {
379 public:
380     template<typename PARAMS>
create(int exponent,Variant variant,const PARAMS & params)381     static BASE* create( int exponent, Variant variant, const PARAMS& params )
382     {
383         return ExponentDispatcher<BASE, InnerFactory>::create( exponent, variant, params );
384     }
385 
386 private:
387     template<int N, Variant VARIANT>
388     class InnerFactory
389     {
390     public:
391         template<typename PARAMS>
create(const PARAMS & params)392         static BASE* create( const PARAMS& params )
393         {
394             return new FUNCTOR<N, VARIANT>( params );
395         }
396     };
397 };
398 
createMandelbrotFastFunctor(int exponent,Variant variant)399 Functor* createMandelbrotFastFunctor( int exponent, Variant variant )
400 {
401     return FastFunctorFactory<Functor, MandelbrotFastFunctor>::create( exponent, variant, MandelbrotFastParams() );
402 }
403 
createJuliaFastFunctor(double cx,double cy,int exponent,Variant variant)404 Functor* createJuliaFastFunctor( double cx, double cy, int exponent, Variant variant )
405 {
406     return FastFunctorFactory<Functor, JuliaFastFunctor>::create( exponent, variant, JuliaFastParams( cx, cy ) );
407 }
408 
generatePreview(const Input & input,const Output & output,Functor * functor,int maxIterations)409 void generatePreview( const Input& input, const Output& output, Functor* functor, int maxIterations )
410 {
411     for ( int y = 0; y < output.m_height; y += CellSize ) {
412         double* row = output.m_buffer + output.m_stride * y;
413         for ( int x = 0; x < output.m_width; x += CellSize ) {
414             double zx = input.m_x + input.m_ca * x + input.m_sa * y;
415             double zy = input.m_y - input.m_sa * x + input.m_ca * y;
416             row[ x ] = ( *functor )( zx, zy, maxIterations );
417         }
418     }
419 }
420 
checkThreshold(double p1,double p2,double threshold)421 static inline bool checkThreshold( double p1, double p2, double threshold )
422 {
423     double pmin, pmax;
424     if ( p1 < p2 )
425         pmin = p1, pmax = p2;
426     else
427         pmin = p2, pmax = p1;
428 
429     if ( pmin == 0.0 && pmax != 0.0 )
430         return true;
431 
432     if ( ( pmax - pmin ) > threshold )
433         return true;
434 
435     return false;
436 }
437 
checkThreshold(double p1,double p2,double p3,double p4,double threshold)438 static inline bool checkThreshold( double p1, double p2, double p3, double p4, double threshold )
439 {
440     return checkThreshold( p1, p2, threshold )
441         || checkThreshold( p3, p4, threshold )
442         || checkThreshold( p1, p3, threshold )
443         || checkThreshold( p2, p4, threshold );
444 }
445 
generateDetails(const Input & input,const Output & output,Functor * functor,int maxIterations,double threshold)446 void generateDetails( const Input& input, const Output& output, Functor* functor, int maxIterations, double threshold )
447 {
448     for ( int y = 0; y < output.m_height; y += CellSize ) {
449         double* row = output.m_buffer + output.m_stride * y;
450         for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
451             double p1 = row[ x ];
452             double p2 = row[ x + CellSize ];
453             if ( checkThreshold( p1, p2, threshold ) ) {
454                 for ( int i = 1; i < CellSize; i++ ) {
455                     double zx = input.m_x + input.m_ca * ( x + i ) + input.m_sa * y;
456                     double zy = input.m_y - input.m_sa * ( x + i ) + input.m_ca * y;
457                     row[ x + i ] = ( *functor )( zx, zy, maxIterations );
458                 }
459             }
460         }
461     }
462 
463     for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
464         double* row = output.m_buffer + output.m_stride * y;
465         for ( int x = 0; x < output.m_width; x += CellSize ) {
466             double p1 = row[ x ];
467             double p2 = row[ output.m_stride * CellSize + x ];
468             if ( checkThreshold( p1, p2, threshold ) ) {
469                 for ( int i = 1; i < CellSize; i++ ) {
470                     double zx = input.m_x + input.m_ca * x + input.m_sa * ( y + i );
471                     double zy = input.m_y - input.m_sa * x + input.m_ca * ( y + i );
472                     row[ output.m_stride * i + x ] = ( *functor )( zx, zy, maxIterations );
473                 }
474             }
475         }
476     }
477 
478     for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
479         double* row = output.m_buffer + output.m_stride * y;
480         for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
481             double p1 = row[ x ];
482             double p2 = row[ x + CellSize ];
483             double p3 = row[ output.m_stride * CellSize + x ];
484             double p4 = row[ output.m_stride * CellSize + x + CellSize ];
485             if ( checkThreshold( p1, p2, p3, p4, threshold ) ) {
486                 for ( int i = 1; i < CellSize; i++ ) {
487                     for ( int j = 1; j < CellSize; j++ ) {
488                         double zx = input.m_x + input.m_ca * ( x + j ) + input.m_sa * ( y + i );
489                         double zy = input.m_y - input.m_sa * ( x + j ) + input.m_ca * ( y + i );
490                         row[ output.m_stride * i + x + j ] = ( *functor )( zx, zy, maxIterations );
491                     }
492                 }
493             }
494         }
495     }
496 }
497 
interpolate(const Output & output)498 void interpolate( const Output& output )
499 {
500     for ( int y = 0; y < output.m_height; y += CellSize ) {
501         double* row = output.m_buffer + output.m_stride * y;
502         for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
503             double p1 = row[ x ];
504             double p2 = row[ x + CellSize ];
505             for ( int i = 1; i < CellSize; i++ )
506                 row[ x + i ] = (double)( CellSize - i ) / (double)CellSize * p1 + (double)i / (double)CellSize * p2;
507         }
508     }
509     for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
510         double* row = output.m_buffer + output.m_stride * y;
511         for ( int x = 0; x < output.m_width; x++ ) {
512             double p1 = row[ x ];
513             double p2 = row[ output.m_stride * CellSize + x ];
514             for ( int i = 1; i < CellSize; i++ )
515                 row[ output.m_stride * i + x ] = (double)( CellSize - i ) / (double)CellSize * p1 + (double)i / (double)CellSize * p2;
516         }
517     }
518 }
519 
520 #if defined( HAVE_SSE2 )
521 
522 #if defined( Q_CC_MSVC )
523 # define ALIGNXMM( var ) __declspec(align(16)) var
524 #else
525 # define ALIGNXMM( var ) var __attribute__((aligned(16)))
526 #endif
527 
528 enum CPUFeatures
529 {
530     MMX = 1,
531     SSE = 2,
532     SSE2 = 4
533 };
534 
535 // based on qdrawhelper.cpp
detectCPUFeatures()536 static int detectCPUFeatures()
537 {
538 #if defined( __x86_64__ ) || defined( __ia64__ ) || defined( Q_OS_WIN64 )
539     return MMX | SSE | SSE2;
540 #elif defined( __i386__ ) || defined( _M_IX86 )
541     int result = 0;
542 #if defined( Q_CC_GNU )
543     asm( "push %%ebx\n"
544          "pushf\n"
545          "pop %%eax\n"
546          "mov %%eax, %%ebx\n"
547          "xor $0x00200000, %%eax\n"
548          "push %%eax\n"
549          "popf\n"
550          "pushf\n"
551          "pop %%eax\n"
552          "xor %%edx, %%edx\n"
553          "xor %%ebx, %%eax\n"
554          "jz 1f\n"
555          "mov $0x00000001, %%eax\n"
556          "cpuid\n"
557          "1:\n"
558          "pop %%ebx\n"
559          "mov %%edx, %0\n"
560         : "=r" ( result )
561         :
562         : "%eax", "%ecx", "%edx"
563         );
564 #elif defined ( Q_OS_WIN )
565     _asm {
566         push eax
567         push ebx
568         push ecx
569         push edx
570         pushfd
571         pop eax
572         mov ebx, eax
573         xor eax, 00200000h
574         push eax
575         popfd
576         pushfd
577         pop eax
578         mov edx, 0
579         xor eax, ebx
580         jz skip
581         mov eax, 1
582         cpuid
583         mov result, edx
584     skip:
585         pop edx
586         pop ecx
587         pop ebx
588         pop eax
589     }
590 #endif
591     int features = 0;
592     if ( result & ( 1 << 23 ) )
593         features |= MMX;
594     if ( result & ( 1 << 25 ) )
595         features |= SSE;
596     if ( result & ( 1 << 26 ) )
597         features |= SSE2;
598     return features;
599 #else
600     return 0;
601 #endif
602 }
603 
604 static const int AvailableCPUFeatures = detectCPUFeatures();
605 
isSSE2Available()606 bool isSSE2Available()
607 {
608     return AvailableCPUFeatures & SSE2;
609 }
610 
611 template<int N>
calculatePowerSSE2(__m128d & zx,__m128d & zy,__m128d & radius)612 static inline void calculatePowerSSE2( __m128d& zx, __m128d& zy, __m128d& radius )
613 {
614     if ( N % 2 == 0 ) {
615         calculatePowerSSE2<N / 2>( zx, zy, radius );
616 
617         __m128d zxx = _mm_mul_pd( zx, zx );
618         __m128d zyy = _mm_mul_pd( zy, zy );
619         __m128d zxy = _mm_mul_pd( zx, zy );
620 
621         zx = _mm_sub_pd( zxx, zyy );
622         zy = _mm_add_pd( zxy, zxy );
623     } else {
624         __m128d zx2 = zx;
625         __m128d zy2 = zy;
626         calculatePowerSSE2<N - 1>( zx2, zy2, radius );
627 
628         __m128d zxx2 = _mm_mul_pd( zx, zx2 );
629         __m128d zxy2 = _mm_mul_pd( zx, zy2 );
630         __m128d zyx2 = _mm_mul_pd( zy, zx2 );
631         __m128d zyy2 = _mm_mul_pd( zy, zy2 );
632 
633         zx = _mm_sub_pd( zxx2, zyy2 );
634         zy = _mm_add_pd( zxy2, zyx2 );
635     }
636 }
637 
638 template<>
calculatePowerSSE2(__m128d & zx,__m128d & zy,__m128d & radius)639 inline void calculatePowerSSE2<2>( __m128d& zx, __m128d& zy, __m128d& radius )
640 {
641     __m128d zxx = _mm_mul_pd( zx, zx );
642     __m128d zyy = _mm_mul_pd( zy, zy );
643     __m128d zxy = _mm_mul_pd( zx, zy );
644 
645     zx = _mm_sub_pd( zxx, zyy );
646     zy = _mm_add_pd( zxy, zxy );
647     radius = _mm_add_pd( zxx, zyy );
648 }
649 
650 template<>
calculatePowerSSE2(__m128d &,__m128d &,__m128d &)651 inline void calculatePowerSSE2<1>( __m128d& /*zx*/, __m128d& /*zy*/, __m128d& /*radius*/ )
652 {
653 }
654 
655 template<Variant VARIANT>
656 static void adjustSSE2( __m128d& /*zx*/, __m128d& /*zy*/ );
657 
658 template<>
adjustSSE2(__m128d &,__m128d &)659 inline void adjustSSE2<NormalVariant>( __m128d& /*zx*/, __m128d& /*zy*/ )
660 {
661 }
662 
663 template<>
adjustSSE2(__m128d &,__m128d & zy)664 inline void adjustSSE2<ConjugateVariant>( __m128d& /*zx*/, __m128d& zy )
665 {
666     __m128d mask = _mm_castsi128_pd( _mm_set_epi32( int( 0x80000000 ), 0, int( 0x80000000 ), 0 ) );
667     zy = _mm_xor_pd( mask, zy );
668 }
669 
670 template<>
adjustSSE2(__m128d & zx,__m128d & zy)671 inline void adjustSSE2<AbsoluteVariant>( __m128d& zx, __m128d& zy )
672 {
673     __m128d mask = _mm_castsi128_pd( _mm_set_epi32( 0x7fffffff, int( 0xffffffff ), 0x7fffffff, int( 0xffffffff ) ) );
674     zx = _mm_and_pd( zx, mask );
675     zy = _mm_and_pd( zy, mask );
676 }
677 
678 template<>
adjustSSE2(__m128d &,__m128d & zy)679 inline void adjustSSE2<AbsoluteImVariant>( __m128d& /*zx*/, __m128d& zy )
680 {
681     __m128d mask = _mm_castsi128_pd( _mm_set_epi32( 0x7fffffff, int( 0xffffffff ), 0x7fffffff, int( 0xffffffff ) ) );
682     zy = _mm_and_pd( zy, mask );
683 }
684 
685 template<int N, Variant VARIANT>
calculateStepSSE2(int k,__m128d & zx,__m128d & zy,__m128d cx,__m128d cy,__m128d rmax,int count[],double final[])686 static inline bool calculateStepSSE2( int k, __m128d& zx, __m128d& zy, __m128d cx, __m128d cy, __m128d rmax, int count[], double final[] )
687 {
688     adjustSSE2<VARIANT>( zx, zy );
689 
690     __m128d radius;
691     calculatePowerSSE2<N>( zx, zy, radius );
692 
693     int mask = _mm_movemask_pd( _mm_cmpge_pd( radius, rmax ) );
694 
695     zx = _mm_add_pd( zx, cx );
696     zy = _mm_add_pd( zy, cy );
697 
698     if ( mask ) {
699         if ( mask & 1 ) {
700             if ( !count[ 0 ] ) {
701                 count[ 0 ] = k;
702                 _mm_storel_pd( &final[ 0 ], radius );
703 
704                 if ( count[ 1 ] )
705                     return true;
706             }
707         }
708         if ( mask & 2 ) {
709             if ( !count[ 1 ] ) {
710                 count[ 1 ] = k;
711                 _mm_storeh_pd( &final[ 1 ], radius );
712 
713                 if ( count[ 0 ] )
714                     return true;
715             }
716         }
717     }
718 
719     return false;
720 }
721 
722 template<int N, Variant VARIANT, int STEPS>
723 class RepeatStepsSSE2
724 {
725 public:
calculate(int k,__m128d & zx,__m128d & zy,__m128d cx,__m128d cy,__m128d rmax,int count[],double final[])726     static inline bool calculate( int k, __m128d& zx, __m128d& zy, __m128d cx, __m128d cy, __m128d rmax, int count[], double final[] )
727     {
728         if ( calculateStepSSE2<N, VARIANT>( k, zx, zy, cx, cy, rmax, count, final ) )
729             return true;
730 
731         if ( RepeatStepsSSE2<N, VARIANT, STEPS - 1>::calculate( k - 1, zx, zy, cx, cy, rmax, count, final ) )
732             return true;
733 
734         return false;
735     }
736 
737     static const int Steps = STEPS;
738 };
739 
740 template<int N, Variant VARIANT>
741 class RepeatStepsSSE2<N, VARIANT, 0>
742 {
743 public:
calculate(int,__m128d &,__m128d &,__m128d,__m128d,__m128d,int[],double[])744     static inline bool calculate( int /*k*/, __m128d& /*zx*/, __m128d& /*zy*/, __m128d /*cx*/, __m128d /*cy*/, __m128d /*rmax*/, int /*count*/[], double /*final*/[] )
745     {
746         return false;
747     }
748 };
749 
750 template<int N, Variant VARIANT>
751 class AutoStepsSSE2 : public RepeatStepsSSE2<N, VARIANT, ( N <= 5 ) ? 2 : 1>
752 {
753 };
754 
755 template<int N, Variant VARIANT>
calculateSSE2(double result[],double x[],double y[],double cx[],double cy[],int maxIterations)756 static inline void calculateSSE2( double result[], double x[], double y[], double cx[], double cy[], int maxIterations )
757 {
758     __m128d zx = _mm_load_pd( x );
759     __m128d zy = _mm_load_pd( y );
760 
761     __m128d rcx = _mm_load_pd( cx );
762     __m128d rcy = _mm_load_pd( cy );
763 
764     __m128d rmax = _mm_set1_pd( BailoutRadius );
765 
766     int count[ 2 ] = { 0, 0 };
767     double final[ 2 ] = { 0.0, 0.0 };
768 
769     for ( int k = maxIterations; k > 0; k -= AutoStepsSSE2<N, VARIANT>::Steps ) {
770         if ( AutoStepsSSE2<N, VARIANT>::calculate( k, zx, zy, rcx, rcy, rmax, count, final ) )
771             break;
772     }
773 
774     result[ 0 ] = count[ 0 ] ? calculateResult( maxIterations, count[ 0 ], final[ 0 ], N ) : 0.0;
775     result[ 1 ] = count[ 1 ] ? calculateResult( maxIterations, count[ 1 ], final[ 1 ], N ) : 0.0;
776 }
777 
778 template<int N, Variant VARIANT>
779 class MandelbrotFunctorSSE2 : public FunctorSSE2, public MandelbrotFastParams
780 {
781 public:
MandelbrotFunctorSSE2(const MandelbrotFastParams & params)782     MandelbrotFunctorSSE2( const MandelbrotFastParams& params ) : MandelbrotFastParams( params )
783     {
784     }
785 
operator ()(double result[],double zx[],double zy[],int maxIterations)786     void operator()( double result[], double zx[], double zy[], int maxIterations )
787     {
788         calculateSSE2<N, VARIANT>( result, zx, zy, zx, zy, maxIterations );
789     }
790 };
791 
792 template<int N, Variant VARIANT>
793 class JuliaFunctorSSE2 : public FunctorSSE2, public JuliaFastParams
794 {
795 public:
JuliaFunctorSSE2(const JuliaFastParams & params)796     JuliaFunctorSSE2( const JuliaFastParams& params ) : JuliaFastParams( params )
797     {
798     }
799 
operator ()(double result[],double zx[],double zy[],int maxIterations)800     void operator()( double result[], double zx[], double zy[], int maxIterations )
801     {
802         ALIGNXMM( double cx[ 2 ] ) = { m_cx, m_cx };
803         ALIGNXMM( double cy[ 2 ] ) = { m_cy, m_cy };
804         calculateSSE2<N, VARIANT>( result, zx, zy, cx, cy, maxIterations );
805     }
806 };
807 
createMandelbrotFunctorSSE2(int exponent,Variant variant)808 FunctorSSE2* createMandelbrotFunctorSSE2( int exponent, Variant variant )
809 {
810     return FastFunctorFactory<FunctorSSE2, MandelbrotFunctorSSE2>::create( exponent, variant, MandelbrotFastParams() );
811 }
812 
createJuliaFunctorSSE2(double cx,double cy,int exponent,Variant variant)813 FunctorSSE2* createJuliaFunctorSSE2( double cx, double cy, int exponent, Variant variant )
814 {
815     return FastFunctorFactory<FunctorSSE2, JuliaFunctorSSE2>::create( exponent, variant, JuliaFastParams( cx, cy ) );
816 }
817 
generatePreviewSSE2(const Input & input,const Output & output,FunctorSSE2 * functor,int maxIterations)818 void generatePreviewSSE2( const Input& input, const Output& output, FunctorSSE2* functor, int maxIterations )
819 {
820     ALIGNXMM( double zx[ 2 ] );
821     ALIGNXMM( double zy[ 2 ] );
822 
823     double result[ 2 ];
824 
825     for ( int y = 0; y < output.m_height; y += CellSize ) {
826         double* row = output.m_buffer + output.m_stride * y;
827         for ( int x = 0; x < output.m_width; x += CellSize ) {
828             zx[ 0 ] = input.m_x + input.m_ca * x + input.m_sa * y;
829             zx[ 1 ] = zx[ 0 ] + input.m_ca * CellSize;
830             zy[ 0 ] = input.m_y - input.m_sa * x + input.m_ca * y;
831             zy[ 1 ] = zy[ 0 ] - input.m_sa * CellSize;
832             ( *functor )( result, zx, zy, maxIterations );
833             row[ x ] = result[ 0 ];
834             if ( x + CellSize < output.m_width )
835                 row[ x + CellSize ] = result[ 1 ];
836         }
837     }
838 }
839 
generateDetailsSSE2(const Input & input,const Output & output,FunctorSSE2 * functor,int maxIterations,double threshold)840 void generateDetailsSSE2( const Input& input, const Output& output, FunctorSSE2* functor, int maxIterations, double threshold )
841 {
842     ALIGNXMM( double zx[ 2 ] );
843     ALIGNXMM( double zy[ 2 ] );
844 
845     double result[ 2 ];
846 
847     for ( int y = 0; y < output.m_height; y += CellSize ) {
848         double* row = output.m_buffer + output.m_stride * y;
849         for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
850             double p1 = row[ x ];
851             double p2 = row[ x + CellSize ];
852             if ( checkThreshold( p1, p2, threshold ) ) {
853                 for ( int i = 1; i < CellSize; i += 2 ) {
854                     zx[ 0 ] = input.m_x + input.m_ca * ( x + i ) + input.m_sa * y;
855                     zx[ 1 ] = zx[ 0 ] + input.m_ca;
856                     zy[ 0 ] = input.m_y - input.m_sa * ( x + i ) + input.m_ca * y;
857                     zy[ 1 ] = zy[ 0 ] - input.m_sa;
858                     ( *functor )( result, zx, zy, maxIterations );
859                     row[ x + i ] = result[ 0 ];
860                     if ( i + 1 < CellSize )
861                         row[ x + i + 1 ] = result[ 1 ];
862                 }
863             }
864         }
865     }
866 
867     for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
868         double* row = output.m_buffer + output.m_stride * y;
869         for ( int x = 0; x < output.m_width; x += CellSize ) {
870             double p1 = row[ x ];
871             double p2 = row[ output.m_stride * CellSize + x ];
872             if ( checkThreshold( p1, p2, threshold ) ) {
873                 for ( int i = 1; i < CellSize; i += 2 ) {
874                     zx[ 0 ] = input.m_x + input.m_ca * x + input.m_sa * ( y + i );
875                     zx[ 1 ] = zx[ 0 ] + input.m_sa;
876                     zy[ 0 ] = input.m_y - input.m_sa * x + input.m_ca * ( y + i );
877                     zy[ 1 ] = zy[ 0 ] + input.m_ca;
878                     ( *functor )( result, zx, zy, maxIterations );
879                     row[ output.m_stride * i + x ] = result[ 0 ];
880                     if ( i + 1 < CellSize )
881                         row[ output.m_stride * ( i + 1 ) + x ] = result[ 1 ];
882                 }
883             }
884         }
885     }
886 
887     for ( int y = 0; y < output.m_height - CellSize; y += CellSize ) {
888         double* row = output.m_buffer + output.m_stride * y;
889         for ( int x = 0; x < output.m_width - CellSize; x += CellSize ) {
890             double p1 = row[ x ];
891             double p2 = row[ x + CellSize ];
892             double p3 = row[ output.m_stride * CellSize + x ];
893             double p4 = row[ output.m_stride * CellSize + x + CellSize ];
894             if ( checkThreshold( p1, p2, p3, p4, threshold ) ) {
895                 for ( int i = 1; i < CellSize; i++ ) {
896                     for ( int j = 1; j < CellSize; j += 2 ) {
897                         zx[ 0 ] = input.m_x + input.m_ca * ( x + j ) + input.m_sa * ( y + i );
898                         zx[ 1 ] = zx[ 0 ] + input.m_ca;
899                         zy[ 0 ] = input.m_y - input.m_sa * ( x + j ) + input.m_ca * ( y + i );
900                         zy[ 1 ] = zy[ 0 ] - input.m_sa;
901                         ( *functor )( result, zx, zy, maxIterations );
902                         row[ output.m_stride * i + x + j ] = result[ 0 ];
903                         if ( j + 1 < CellSize )
904                             row[ output.m_stride * i + x + j + 1 ] = result[ 1 ];
905                     }
906                 }
907             }
908         }
909     }
910 }
911 
912 #endif // defined( HAVE_SSE2 )
913 
914 } // namespace GeneratorCore
915