1"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware
2optimization, starting from parsing the command arguments, to managing the
3relation between the CPU baseline and dispatch-able features,
4also generating the required C headers and ending with compiling
5the sources with proper compiler's flags.
6
7`CCompilerOpt` doesn't provide runtime detection for the CPU features,
8instead only focuses on the compiler side, but it creates abstract C headers
9that can be used later for the final runtime dispatching process."""
10
11import sys, io, os, re, textwrap, pprint, inspect, atexit, subprocess
12
13class _Config:
14    """An abstract class holds all configurable attributes of `CCompilerOpt`,
15    these class attributes can be used to change the default behavior
16    of `CCompilerOpt` in order to fit other requirements.
17
18    Attributes
19    ----------
20    conf_nocache : bool
21        Set True to disable memory and file cache.
22        Default is False.
23
24    conf_noopt : bool
25        Set True to forces the optimization to be disabled,
26        in this case `CCompilerOpt` tends to generate all
27        expected headers in order to 'not' break the build.
28        Default is False.
29
30    conf_cache_factors : list
31        Add extra factors to the primary caching factors. The caching factors
32        are utilized to determine if there are changes had happened that
33        requires to discard the cache and re-updating it. The primary factors
34        are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc).
35        Default is list of two items, containing the time of last modification
36        of `ccompiler_opt` and value of attribute "conf_noopt"
37
38    conf_tmp_path : str,
39        The path of temporary directory. Default is auto-created
40        temporary directory via ``tempfile.mkdtemp()``.
41
42    conf_check_path : str
43        The path of testing files. Each added CPU feature must have a
44        **C** source file contains at least one intrinsic or instruction that
45        related to this feature, so it can be tested against the compiler.
46        Default is ``./distutils/checks``.
47
48    conf_target_groups : dict
49        Extra tokens that can be reached from dispatch-able sources through
50        the special mark ``@targets``. Default is an empty dictionary.
51
52        **Notes**:
53            - case-insensitive for tokens and group names
54            - sign '#' must stick in the begin of group name and only within ``@targets``
55
56        **Example**:
57            .. code-block:: console
58
59                $ "@targets #avx_group other_tokens" > group_inside.c
60
61            >>> CCompilerOpt.conf_target_groups["avx_group"] = \\
62            "$werror $maxopt avx2 avx512f avx512_skx"
63            >>> cco = CCompilerOpt(cc_instance)
64            >>> cco.try_dispatch(["group_inside.c"])
65
66    conf_c_prefix : str
67        The prefix of public C definitions. Default is ``"NPY_"``.
68
69    conf_c_prefix_ : str
70        The prefix of internal C definitions. Default is ``"NPY__"``.
71
72    conf_cc_flags : dict
73        Nested dictionaries defining several compiler flags
74        that linked to some major functions, the main key
75        represent the compiler name and sub-keys represent
76        flags names. Default is already covers all supported
77        **C** compilers.
78
79        Sub-keys explained as follows:
80
81        "native": str or None
82            used by argument option `native`, to detect the current
83            machine support via the compiler.
84        "werror": str or None
85            utilized to treat warning as errors during testing CPU features
86            against the compiler and also for target's policy `$werror`
87            via dispatch-able sources.
88        "maxopt": str or None
89            utilized for target's policy '$maxopt' and the value should
90            contains the maximum acceptable optimization by the compiler.
91            e.g. in gcc `'-O3'`
92
93        **Notes**:
94            * case-sensitive for compiler names and flags
95            * use space to separate multiple flags
96            * any flag will tested against the compiler and it will skipped
97              if it's not applicable.
98
99    conf_min_features : dict
100        A dictionary defines the used CPU features for
101        argument option `'min'`, the key represent the CPU architecture
102        name e.g. `'x86'`. Default values provide the best effort
103        on wide range of users platforms.
104
105        **Note**: case-sensitive for architecture names.
106
107    conf_features : dict
108        Nested dictionaries used for identifying the CPU features.
109        the primary key is represented as a feature name or group name
110        that gathers several features. Default values covers all
111        supported features but without the major options like "flags",
112        these undefined options handle it by method `conf_features_partial()`.
113        Default value is covers almost all CPU features for *X86*, *IBM/Power64*
114        and *ARM 7/8*.
115
116        Sub-keys explained as follows:
117
118        "implies" : str or list, optional,
119            List of CPU feature names to be implied by it,
120            the feature name must be defined within `conf_features`.
121            Default is None.
122
123        "flags": str or list, optional
124            List of compiler flags. Default is None.
125
126        "detect": str or list, optional
127            List of CPU feature names that required to be detected
128            in runtime. By default, its the feature name or features
129            in "group" if its specified.
130
131        "implies_detect": bool, optional
132            If True, all "detect" of implied features will be combined.
133            Default is True. see `feature_detect()`.
134
135        "group": str or list, optional
136            Same as "implies" but doesn't require the feature name to be
137            defined within `conf_features`.
138
139        "interest": int, required
140            a key for sorting CPU features
141
142        "headers": str or list, optional
143            intrinsics C header file
144
145        "disable": str, optional
146            force disable feature, the string value should contains the
147            reason of disabling.
148
149        "autovec": bool or None, optional
150            True or False to declare that CPU feature can be auto-vectorized
151            by the compiler.
152            By default(None), treated as True if the feature contains at
153            least one applicable flag. see `feature_can_autovec()`
154
155        "extra_checks": str or list, optional
156            Extra test case names for the CPU feature that need to be tested
157            against the compiler.
158
159            Each test case must have a C file named ``extra_xxxx.c``, where
160            ``xxxx`` is the case name in lower case, under 'conf_check_path'.
161            It should contain at least one intrinsic or function related to the test case.
162
163            If the compiler able to successfully compile the C file then `CCompilerOpt`
164            will add a C ``#define`` for it into the main dispatch header, e.g.
165            ```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case.
166
167        **NOTES**:
168            * space can be used as separator with options that supports "str or list"
169            * case-sensitive for all values and feature name must be in upper-case.
170            * if flags aren't applicable, its will skipped rather than disable the
171              CPU feature
172            * the CPU feature will disabled if the compiler fail to compile
173              the test file
174    """
175    conf_nocache = False
176    conf_noopt = False
177    conf_cache_factors = None
178    conf_tmp_path = None
179    conf_check_path = os.path.join(
180        os.path.dirname(os.path.realpath(__file__)), "checks"
181    )
182    conf_target_groups = {}
183    conf_c_prefix = 'NPY_'
184    conf_c_prefix_ = 'NPY__'
185    conf_cc_flags = dict(
186        gcc = dict(
187            # native should always fail on arm and ppc64,
188            # native usually works only with x86
189            native = '-march=native',
190            opt = '-O3',
191            werror = '-Werror'
192        ),
193        clang = dict(
194            native = '-march=native',
195            opt = "-O3",
196            werror = '-Werror'
197        ),
198        icc = dict(
199            native = '-xHost',
200            opt = '-O3',
201            werror = '-Werror'
202        ),
203        iccw = dict(
204            native = '/QxHost',
205            opt = '/O3',
206            werror = '/Werror'
207        ),
208        msvc = dict(
209            native = None,
210            opt = '/O2',
211            werror = '/WX'
212        )
213    )
214    conf_min_features = dict(
215        x86 = "SSE SSE2",
216        x64 = "SSE SSE2 SSE3",
217        ppc64 = '', # play it safe
218        ppc64le = "VSX VSX2",
219        armhf = '', # play it safe
220        aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
221    )
222    conf_features = dict(
223        # X86
224        SSE = dict(
225            interest=1, headers="xmmintrin.h",
226            # enabling SSE without SSE2 is useless also
227            # it's non-optional for x86_64
228            implies="SSE2"
229        ),
230        SSE2   = dict(interest=2, implies="SSE", headers="emmintrin.h"),
231        SSE3   = dict(interest=3, implies="SSE2", headers="pmmintrin.h"),
232        SSSE3  = dict(interest=4, implies="SSE3", headers="tmmintrin.h"),
233        SSE41  = dict(interest=5, implies="SSSE3", headers="smmintrin.h"),
234        POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"),
235        SSE42  = dict(interest=7, implies="POPCNT"),
236        AVX    = dict(
237            interest=8, implies="SSE42", headers="immintrin.h",
238            implies_detect=False
239        ),
240        XOP    = dict(interest=9, implies="AVX", headers="x86intrin.h"),
241        FMA4   = dict(interest=10, implies="AVX", headers="x86intrin.h"),
242        F16C   = dict(interest=11, implies="AVX"),
243        FMA3   = dict(interest=12, implies="F16C"),
244        AVX2   = dict(interest=13, implies="F16C"),
245        AVX512F = dict(
246            interest=20, implies="FMA3 AVX2", implies_detect=False,
247            extra_checks="AVX512F_REDUCE"
248        ),
249        AVX512CD = dict(interest=21, implies="AVX512F"),
250        AVX512_KNL = dict(
251            interest=40, implies="AVX512CD", group="AVX512ER AVX512PF",
252            detect="AVX512_KNL", implies_detect=False
253        ),
254        AVX512_KNM = dict(
255            interest=41, implies="AVX512_KNL",
256            group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ",
257            detect="AVX512_KNM", implies_detect=False
258        ),
259        AVX512_SKX = dict(
260            interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ",
261            detect="AVX512_SKX", implies_detect=False,
262            extra_checks="AVX512BW_MASK AVX512DQ_MASK"
263        ),
264        AVX512_CLX = dict(
265            interest=43, implies="AVX512_SKX", group="AVX512VNNI",
266            detect="AVX512_CLX"
267        ),
268        AVX512_CNL = dict(
269            interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI",
270            detect="AVX512_CNL", implies_detect=False
271        ),
272        AVX512_ICL = dict(
273            interest=45, implies="AVX512_CLX AVX512_CNL",
274            group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ",
275            detect="AVX512_ICL", implies_detect=False
276        ),
277        # IBM/Power
278        ## Power7/ISA 2.06
279        VSX = dict(interest=1, headers="altivec.h"),
280        ## Power8/ISA 2.07
281        VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
282        ## Power9/ISA 3.00
283        VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
284        # ARM
285        NEON  = dict(interest=1, headers="arm_neon.h"),
286        NEON_FP16 = dict(interest=2, implies="NEON"),
287        ## FMA
288        NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"),
289        ## Advanced SIMD
290        ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False),
291        ## ARMv8.2 half-precision & vector arithm
292        ASIMDHP = dict(interest=5, implies="ASIMD"),
293        ## ARMv8.2 dot product
294        ASIMDDP = dict(interest=6, implies="ASIMD"),
295        ## ARMv8.2 Single & half-precision Multiply
296        ASIMDFHM = dict(interest=7, implies="ASIMDHP"),
297    )
298    def conf_features_partial(self):
299        """Return a dictionary of supported CPU features by the platform,
300        and accumulate the rest of undefined options in `conf_features`,
301        the returned dict has same rules and notes in
302        class attribute `conf_features`, also its override
303        any options that been set in 'conf_features'.
304        """
305        if self.cc_noopt:
306            # optimization is disabled
307            return {}
308
309        on_x86 = self.cc_on_x86 or self.cc_on_x64
310        is_unix = self.cc_is_gcc or self.cc_is_clang
311
312        if on_x86 and is_unix: return dict(
313            SSE    = dict(flags="-msse"),
314            SSE2   = dict(flags="-msse2"),
315            SSE3   = dict(flags="-msse3"),
316            SSSE3  = dict(flags="-mssse3"),
317            SSE41  = dict(flags="-msse4.1"),
318            POPCNT = dict(flags="-mpopcnt"),
319            SSE42  = dict(flags="-msse4.2"),
320            AVX    = dict(flags="-mavx"),
321            F16C   = dict(flags="-mf16c"),
322            XOP    = dict(flags="-mxop"),
323            FMA4   = dict(flags="-mfma4"),
324            FMA3   = dict(flags="-mfma"),
325            AVX2   = dict(flags="-mavx2"),
326            AVX512F = dict(flags="-mavx512f"),
327            AVX512CD = dict(flags="-mavx512cd"),
328            AVX512_KNL = dict(flags="-mavx512er -mavx512pf"),
329            AVX512_KNM = dict(
330                flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq"
331            ),
332            AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"),
333            AVX512_CLX = dict(flags="-mavx512vnni"),
334            AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"),
335            AVX512_ICL = dict(
336                flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq"
337            )
338        )
339        if on_x86 and self.cc_is_icc: return dict(
340            SSE    = dict(flags="-msse"),
341            SSE2   = dict(flags="-msse2"),
342            SSE3   = dict(flags="-msse3"),
343            SSSE3  = dict(flags="-mssse3"),
344            SSE41  = dict(flags="-msse4.1"),
345            POPCNT = {},
346            SSE42  = dict(flags="-msse4.2"),
347            AVX    = dict(flags="-mavx"),
348            F16C   = {},
349            XOP    = dict(disable="Intel Compiler doesn't support it"),
350            FMA4   = dict(disable="Intel Compiler doesn't support it"),
351            # Intel Compiler doesn't support AVX2 or FMA3 independently
352            FMA3 = dict(
353                implies="F16C AVX2", flags="-march=core-avx2"
354            ),
355            AVX2 = dict(implies="FMA3", flags="-march=core-avx2"),
356            # Intel Compiler doesn't support AVX512F or AVX512CD independently
357            AVX512F = dict(
358                implies="AVX2 AVX512CD", flags="-march=common-avx512"
359            ),
360            AVX512CD = dict(
361                implies="AVX2 AVX512F", flags="-march=common-avx512"
362            ),
363            AVX512_KNL = dict(flags="-xKNL"),
364            AVX512_KNM = dict(flags="-xKNM"),
365            AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"),
366            AVX512_CLX = dict(flags="-xCASCADELAKE"),
367            AVX512_CNL = dict(flags="-xCANNONLAKE"),
368            AVX512_ICL = dict(flags="-xICELAKE-CLIENT"),
369        )
370        if on_x86 and self.cc_is_iccw: return dict(
371            SSE    = dict(flags="/arch:SSE"),
372            SSE2   = dict(flags="/arch:SSE2"),
373            SSE3   = dict(flags="/arch:SSE3"),
374            SSSE3  = dict(flags="/arch:SSSE3"),
375            SSE41  = dict(flags="/arch:SSE4.1"),
376            POPCNT = {},
377            SSE42  = dict(flags="/arch:SSE4.2"),
378            AVX    = dict(flags="/arch:AVX"),
379            F16C   = {},
380            XOP    = dict(disable="Intel Compiler doesn't support it"),
381            FMA4   = dict(disable="Intel Compiler doesn't support it"),
382            # Intel Compiler doesn't support FMA3 or AVX2 independently
383            FMA3 = dict(
384                implies="F16C AVX2", flags="/arch:CORE-AVX2"
385            ),
386            AVX2 = dict(
387                implies="FMA3", flags="/arch:CORE-AVX2"
388            ),
389            # Intel Compiler doesn't support AVX512F or AVX512CD independently
390            AVX512F = dict(
391                implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512"
392            ),
393            AVX512CD = dict(
394                implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512"
395            ),
396            AVX512_KNL = dict(flags="/Qx:KNL"),
397            AVX512_KNM = dict(flags="/Qx:KNM"),
398            AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"),
399            AVX512_CLX = dict(flags="/Qx:CASCADELAKE"),
400            AVX512_CNL = dict(flags="/Qx:CANNONLAKE"),
401            AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT")
402        )
403        if on_x86 and self.cc_is_msvc: return dict(
404            SSE    = dict(flags="/arch:SSE"),
405            SSE2   = dict(flags="/arch:SSE2"),
406            SSE3   = {},
407            SSSE3  = {},
408            SSE41  = {},
409            POPCNT = dict(headers="nmmintrin.h"),
410            SSE42  = {},
411            AVX    = dict(flags="/arch:AVX"),
412            F16C   = {},
413            XOP    = dict(headers="ammintrin.h"),
414            FMA4   = dict(headers="ammintrin.h"),
415            # MSVC doesn't support FMA3 or AVX2 independently
416            FMA3 = dict(
417                implies="F16C AVX2", flags="/arch:AVX2"
418            ),
419            AVX2 = dict(
420                implies="F16C FMA3", flags="/arch:AVX2"
421            ),
422            # MSVC doesn't support AVX512F or AVX512CD independently,
423            # always generate instructions belong to (VL/VW/DQ)
424            AVX512F = dict(
425                implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512"
426            ),
427            AVX512CD = dict(
428                implies="AVX512F AVX512_SKX", flags="/arch:AVX512"
429            ),
430            AVX512_KNL = dict(
431                disable="MSVC compiler doesn't support it"
432            ),
433            AVX512_KNM = dict(
434                disable="MSVC compiler doesn't support it"
435            ),
436            AVX512_SKX = dict(flags="/arch:AVX512"),
437            AVX512_CLX = {},
438            AVX512_CNL = {},
439            AVX512_ICL = {}
440        )
441
442        on_power = self.cc_on_ppc64le or self.cc_on_ppc64
443        if on_power:
444            partial = dict(
445                VSX = dict(
446                    implies=("VSX2" if self.cc_on_ppc64le else ""),
447                    flags="-mvsx"
448                ),
449                VSX2 = dict(
450                    flags="-mcpu=power8", implies_detect=False
451                ),
452                VSX3 = dict(
453                    flags="-mcpu=power9 -mtune=power9", implies_detect=False
454                )
455            )
456            if self.cc_is_clang:
457                partial["VSX"]["flags"]  = "-maltivec -mvsx"
458                partial["VSX2"]["flags"] = "-mpower8-vector"
459                partial["VSX3"]["flags"] = "-mpower9-vector"
460
461            return partial
462
463        if self.cc_on_aarch64 and is_unix: return dict(
464            NEON = dict(
465                implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
466            ),
467            NEON_FP16 = dict(
468                implies="NEON NEON_VFPV4 ASIMD", autovec=True
469            ),
470            NEON_VFPV4 = dict(
471                implies="NEON NEON_FP16 ASIMD", autovec=True
472            ),
473            ASIMD = dict(
474                implies="NEON NEON_FP16 NEON_VFPV4", autovec=True
475            ),
476            ASIMDHP = dict(
477                flags="-march=armv8.2-a+fp16"
478            ),
479            ASIMDDP = dict(
480                flags="-march=armv8.2-a+dotprod"
481            ),
482            ASIMDFHM = dict(
483                flags="-march=armv8.2-a+fp16fml"
484            ),
485        )
486        if self.cc_on_armhf and is_unix: return dict(
487            NEON = dict(
488                flags="-mfpu=neon"
489            ),
490            NEON_FP16 = dict(
491                flags="-mfpu=neon-fp16 -mfp16-format=ieee"
492            ),
493            NEON_VFPV4 = dict(
494                flags="-mfpu=neon-vfpv4",
495            ),
496            ASIMD = dict(
497                flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd",
498            ),
499            ASIMDHP = dict(
500                flags="-march=armv8.2-a+fp16"
501            ),
502            ASIMDDP = dict(
503                flags="-march=armv8.2-a+dotprod",
504            ),
505            ASIMDFHM = dict(
506                flags="-march=armv8.2-a+fp16fml"
507            )
508        )
509        # TODO: ARM MSVC
510        return {}
511
512    def __init__(self):
513        if self.conf_tmp_path is None:
514            import tempfile, shutil
515            tmp = tempfile.mkdtemp()
516            def rm_temp():
517                try:
518                    shutil.rmtree(tmp)
519                except IOError:
520                    pass
521            atexit.register(rm_temp)
522            self.conf_tmp_path = tmp
523
524        if self.conf_cache_factors is None:
525            self.conf_cache_factors = [
526                os.path.getmtime(__file__),
527                self.conf_nocache
528            ]
529
530class _Distutils:
531    """A helper class that provides a collection of fundamental methods
532    implemented in a top of Python and NumPy Distutils.
533
534    The idea behind this class is to gather all methods that it may
535    need to override in case of reuse 'CCompilerOpt' in environment
536    different than of what NumPy has.
537
538    Parameters
539    ----------
540    ccompiler : `CCompiler`
541        The generate instance that returned from `distutils.ccompiler.new_compiler()`.
542    """
543    def __init__(self, ccompiler):
544        self._ccompiler = ccompiler
545
546    def dist_compile(self, sources, flags, **kwargs):
547        """Wrap CCompiler.compile()"""
548        assert(isinstance(sources, list))
549        assert(isinstance(flags, list))
550        flags = kwargs.pop("extra_postargs", []) + flags
551        return self._ccompiler.compile(
552            sources, extra_postargs=flags, **kwargs
553        )
554
555    def dist_test(self, source, flags):
556        """Return True if 'CCompiler.compile()' able to compile
557        a source file with certain flags.
558        """
559        assert(isinstance(source, str))
560        from distutils.errors import CompileError
561        cc = self._ccompiler;
562        bk_spawn = getattr(cc, 'spawn', None)
563        if bk_spawn:
564            cc_type = getattr(self._ccompiler, "compiler_type", "")
565            if cc_type in ("msvc",):
566                setattr(cc, 'spawn', self._dist_test_spawn_paths)
567            else:
568                setattr(cc, 'spawn', self._dist_test_spawn)
569        test = False
570        try:
571            self.dist_compile(
572                [source], flags, output_dir=self.conf_tmp_path
573            )
574            test = True
575        except CompileError as e:
576            self.dist_log(str(e), stderr=True)
577        if bk_spawn:
578            setattr(cc, 'spawn', bk_spawn)
579        return test
580
581    def dist_info(self):
582        """
583        Return a tuple containing info about (platform, compiler, extra_args),
584        required by the abstract class '_CCompiler' for discovering the
585        platform environment. This is also used as a cache factor in order
586        to detect any changes happening from outside.
587        """
588        if hasattr(self, "_dist_info"):
589            return self._dist_info
590
591        cc_type = getattr(self._ccompiler, "compiler_type", '')
592        if cc_type in ("intelem", "intelemw"):
593            platform = "x86_64"
594        elif cc_type in ("intel", "intelw", "intele"):
595            platform = "x86"
596        else:
597            from distutils.util import get_platform
598            platform = get_platform()
599
600        cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", ''))
601        if not cc_type or cc_type == "unix":
602            if hasattr(cc_info, "__iter__"):
603                compiler = cc_info[0]
604            else:
605                compiler = str(cc_info)
606        else:
607            compiler = cc_type
608
609        if hasattr(cc_info, "__iter__") and len(cc_info) > 1:
610            extra_args = ' '.join(cc_info[1:])
611        else:
612            extra_args  = os.environ.get("CFLAGS", "")
613            extra_args += os.environ.get("CPPFLAGS", "")
614
615        self._dist_info = (platform, compiler, extra_args)
616        return self._dist_info
617
618    @staticmethod
619    def dist_error(*args):
620        """Raise a compiler error"""
621        from distutils.errors import CompileError
622        raise CompileError(_Distutils._dist_str(*args))
623
624    @staticmethod
625    def dist_fatal(*args):
626        """Raise a distutils error"""
627        from distutils.errors import DistutilsError
628        raise DistutilsError(_Distutils._dist_str(*args))
629
630    @staticmethod
631    def dist_log(*args, stderr=False):
632        """Print a console message"""
633        from numpy.distutils import log
634        out = _Distutils._dist_str(*args)
635        if stderr:
636            log.warn(out)
637        else:
638            log.info(out)
639
640    @staticmethod
641    def dist_load_module(name, path):
642        """Load a module from file, required by the abstract class '_Cache'."""
643        from numpy.compat import npy_load_module
644        try:
645            return npy_load_module(name, path)
646        except Exception as e:
647            _Distutils.dist_log(e, stderr=True)
648        return None
649
650    @staticmethod
651    def _dist_str(*args):
652        """Return a string to print by log and errors."""
653        def to_str(arg):
654            if not isinstance(arg, str) and hasattr(arg, '__iter__'):
655                ret = []
656                for a in arg:
657                    ret.append(to_str(a))
658                return '('+ ' '.join(ret) + ')'
659            return str(arg)
660
661        stack = inspect.stack()[2]
662        start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno)
663        out = ' '.join([
664            to_str(a)
665            for a in (*args,)
666        ])
667        return start + out
668
669    def _dist_test_spawn_paths(self, cmd, display=None):
670        """
671        Fix msvc SDK ENV path same as distutils do
672        without it we get c1: fatal error C1356: unable to find mspdbcore.dll
673        """
674        if not hasattr(self._ccompiler, "_paths"):
675            self._dist_test_spawn(cmd)
676            return
677        old_path = os.getenv("path")
678        try:
679            os.environ["path"] = self._ccompiler._paths
680            self._dist_test_spawn(cmd)
681        finally:
682            os.environ["path"] = old_path
683
684    _dist_warn_regex = re.compile(
685        # intel and msvc compilers don't raise
686        # fatal errors when flags are wrong or unsupported
687        ".*("
688        "warning D9002|"  # msvc, it should be work with any language.
689        "invalid argument for option" # intel
690        ").*"
691    )
692    @staticmethod
693    def _dist_test_spawn(cmd, display=None):
694        from distutils.errors import CompileError
695        try:
696            o = subprocess.check_output(cmd, stderr=subprocess.STDOUT,
697                                        universal_newlines=True)
698            if o and re.match(_Distutils._dist_warn_regex, o):
699                _Distutils.dist_error(
700                    "Flags in command", cmd ,"aren't supported by the compiler"
701                    ", output -> \n%s" % o
702                )
703        except subprocess.CalledProcessError as exc:
704            o = exc.output
705            s = exc.returncode
706        except OSError:
707            o = b''
708            s = 127
709        else:
710            return None
711        _Distutils.dist_error(
712            "Command", cmd, "failed with exit status %d output -> \n%s" % (
713            s, o
714        ))
715
716_share_cache = {}
717class _Cache:
718    """An abstract class handles caching functionality, provides two
719    levels of caching, in-memory by share instances attributes among
720    each other and by store attributes into files.
721
722    **Note**:
723        any attributes that start with ``_`` or ``conf_`` will be ignored.
724
725    Parameters
726    ----------
727    cache_path: str or None
728        The path of cache file, if None then cache in file will disabled.
729
730    *factors:
731        The caching factors that need to utilize next to `conf_cache_factors`.
732
733    Attributes
734    ----------
735    cache_private: set
736        Hold the attributes that need be skipped from "in-memory cache".
737
738    cache_infile: bool
739        Utilized during initializing this class, to determine if the cache was able
740        to loaded from the specified cache path in 'cache_path'.
741    """
742
743    # skip attributes from cache
744    _cache_ignore = re.compile("^(_|conf_)")
745
746    def __init__(self, cache_path=None, *factors):
747        self.cache_me = {}
748        self.cache_private = set()
749        self.cache_infile = False
750
751        if self.conf_nocache:
752            self.dist_log("cache is disabled by `Config`")
753            return
754
755        chash = self.cache_hash(*factors, *self.conf_cache_factors)
756        if cache_path:
757            if os.path.exists(cache_path):
758                self.dist_log("load cache from file ->", cache_path)
759                cache_mod = self.dist_load_module("cache", cache_path)
760                if not cache_mod:
761                    self.dist_log(
762                        "unable to load the cache file as a module",
763                        stderr=True
764                    )
765                elif not hasattr(cache_mod, "hash") or \
766                     not hasattr(cache_mod, "data"):
767                    self.dist_log("invalid cache file", stderr=True)
768                elif chash == cache_mod.hash:
769                    self.dist_log("hit the file cache")
770                    for attr, val in cache_mod.data.items():
771                        setattr(self, attr, val)
772                    self.cache_infile = True
773                else:
774                    self.dist_log("miss the file cache")
775
776            atexit.register(self._cache_write, cache_path, chash)
777
778        if not self.cache_infile:
779            other_cache = _share_cache.get(chash)
780            if other_cache:
781                self.dist_log("hit the memory cache")
782                for attr, val in other_cache.__dict__.items():
783                    if attr in other_cache.cache_private or \
784                               re.match(self._cache_ignore, attr):
785                        continue
786                    setattr(self, attr, val)
787
788        _share_cache[chash] = self
789
790    def __del__(self):
791        # TODO: remove the cache form share on del
792        pass
793
794    def _cache_write(self, cache_path, cache_hash):
795        # TODO: don't write if the cache doesn't change
796        self.dist_log("write cache to path ->", cache_path)
797        for attr in list(self.__dict__.keys()):
798            if re.match(self._cache_ignore, attr):
799                self.__dict__.pop(attr)
800
801        d = os.path.dirname(cache_path)
802        if not os.path.exists(d):
803            os.makedirs(d)
804
805        repr_dict = pprint.pformat(self.__dict__, compact=True)
806        with open(cache_path, "w") as f:
807            f.write(textwrap.dedent("""\
808            # AUTOGENERATED DON'T EDIT
809            # Please make changes to the code generator \
810            (distutils/ccompiler_opt.py)
811            hash = {}
812            data = \\
813            """).format(cache_hash))
814            f.write(repr_dict)
815
816    def cache_hash(self, *factors):
817        # is there a built-in non-crypto hash?
818        # sdbm
819        chash = 0
820        for f in factors:
821            for char in str(f):
822                chash  = ord(char) + (chash << 6) + (chash << 16) - chash
823                chash &= 0xFFFFFFFF
824        return chash
825
826    @staticmethod
827    def me(cb):
828        """
829        A static method that can be treated as a decorator to
830        dynamically cache certain methods.
831        """
832        def cache_wrap_me(self, *args, **kwargs):
833            # good for normal args
834            cache_key = str((
835                cb.__name__, *args, *kwargs.keys(), *kwargs.values()
836            ))
837            if cache_key in self.cache_me:
838                return self.cache_me[cache_key]
839            ccb = cb(self, *args, **kwargs)
840            self.cache_me[cache_key] = ccb
841            return ccb
842        return cache_wrap_me
843
844class _CCompiler(object):
845    """A helper class for `CCompilerOpt` containing all utilities that
846    related to the fundamental compiler's functions.
847
848    Attributes
849    ----------
850    cc_on_x86 : bool
851        True when the target architecture is 32-bit x86
852    cc_on_x64 : bool
853        True when the target architecture is 64-bit x86
854    cc_on_ppc64 : bool
855        True when the target architecture is 64-bit big-endian PowerPC
856    cc_on_armhf : bool
857        True when the target architecture is 32-bit ARMv7+
858    cc_on_aarch64 : bool
859        True when the target architecture is 64-bit Armv8-a+
860    cc_on_noarch : bool
861        True when the target architecture is unknown or not supported
862    cc_is_gcc : bool
863        True if the compiler is GNU or
864        if the compiler is unknown
865    cc_is_clang : bool
866        True if the compiler is Clang
867    cc_is_icc : bool
868        True if the compiler is Intel compiler (unix like)
869    cc_is_iccw : bool
870        True if the compiler is Intel compiler (msvc like)
871    cc_is_nocc : bool
872        True if the compiler isn't supported directly,
873        Note: that cause a fail-back to gcc
874    cc_has_debug : bool
875        True if the compiler has debug flags
876    cc_has_native : bool
877        True if the compiler has native flags
878    cc_noopt : bool
879        True if the compiler has definition 'DISABLE_OPT*',
880        or 'cc_on_noarch' is True
881    cc_march : str
882        The target architecture name, or "unknown" if
883        the architecture isn't supported
884    cc_name : str
885        The compiler name, or "unknown" if the compiler isn't supported
886    cc_flags : dict
887        Dictionary containing the initialized flags of `_Config.conf_cc_flags`
888    """
889    def __init__(self):
890        if hasattr(self, "cc_is_cached"):
891            return
892        #      attr                regex
893        detect_arch = (
894            ("cc_on_x64",      ".*(x|x86_|amd)64.*"),
895            ("cc_on_x86",      ".*(win32|x86|i386|i686).*"),
896            ("cc_on_ppc64le",  ".*(powerpc|ppc)64(el|le).*"),
897            ("cc_on_ppc64",    ".*(powerpc|ppc)64.*"),
898            ("cc_on_aarch64",  ".*(aarch64|arm64).*"),
899            ("cc_on_armhf",    ".*arm.*"),
900            # undefined platform
901            ("cc_on_noarch",    ""),
902        )
903        detect_compiler = (
904            ("cc_is_gcc",     r".*(gcc|gnu\-g).*"),
905            ("cc_is_clang",    ".*clang.*"),
906            ("cc_is_iccw",     ".*(intelw|intelemw|iccw).*"), # intel msvc like
907            ("cc_is_icc",      ".*(intel|icc).*"), # intel unix like
908            ("cc_is_msvc",     ".*msvc.*"),
909            # undefined compiler will be treat it as gcc
910            ("cc_is_nocc",     ""),
911        )
912        detect_args = (
913           ("cc_has_debug",  ".*(O0|Od|ggdb|coverage|debug:full).*"),
914           ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),
915           # in case if the class run with -DNPY_DISABLE_OPTIMIZATION
916           ("cc_noopt", ".*DISABLE_OPT.*"),
917        )
918
919        dist_info = self.dist_info()
920        platform, compiler_info, extra_args = dist_info
921        # set False to all attrs
922        for section in (detect_arch, detect_compiler, detect_args):
923            for attr, rgex in section:
924                setattr(self, attr, False)
925
926        for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
927            for attr, rgex in detect:
928                if rgex and not re.match(rgex, searchin, re.IGNORECASE):
929                    continue
930                setattr(self, attr, True)
931                break
932
933        for attr, rgex in detect_args:
934            if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
935                continue
936            setattr(self, attr, True)
937
938        if self.cc_on_noarch:
939            self.dist_log(
940                "unable to detect CPU architecture which lead to disable the optimization. "
941                f"check dist_info:<<\n{dist_info}\n>>",
942                stderr=True
943            )
944            self.cc_noopt = True
945
946        if self.conf_noopt:
947            self.dist_log("Optimization is disabled by the Config", stderr=True)
948            self.cc_noopt = True
949
950        if self.cc_is_nocc:
951            """
952            mingw can be treated as a gcc, and also xlc even if it based on clang,
953            but still has the same gcc optimization flags.
954            """
955            self.dist_log(
956                "unable to detect compiler type which leads to treating it as GCC. "
957                "this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC."
958                f"check dist_info:<<\n{dist_info}\n>>",
959                stderr=True
960            )
961            self.cc_is_gcc = True
962
963        self.cc_march = "unknown"
964        for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
965            if getattr(self, "cc_on_" + arch):
966                self.cc_march = arch
967                break
968
969        self.cc_name = "unknown"
970        for name in ("gcc", "clang", "iccw", "icc", "msvc"):
971            if getattr(self, "cc_is_" + name):
972                self.cc_name = name
973                break
974
975        self.cc_flags = {}
976        compiler_flags = self.conf_cc_flags.get(self.cc_name)
977        if compiler_flags is None:
978            self.dist_fatal(
979                "undefined flag for compiler '%s', "
980                "leave an empty dict instead" % self.cc_name
981            )
982        for name, flags in compiler_flags.items():
983            self.cc_flags[name] = nflags = []
984            if flags:
985                assert(isinstance(flags, str))
986                flags = flags.split()
987                for f in flags:
988                    if self.cc_test_flags([f]):
989                        nflags.append(f)
990
991        self.cc_is_cached = True
992
993    @_Cache.me
994    def cc_test_flags(self, flags):
995        """
996        Returns True if the compiler supports 'flags'.
997        """
998        assert(isinstance(flags, list))
999        self.dist_log("testing flags", flags)
1000        test_path = os.path.join(self.conf_check_path, "test_flags.c")
1001        test = self.dist_test(test_path, flags)
1002        if not test:
1003            self.dist_log("testing failed", stderr=True)
1004        return test
1005
1006    def cc_normalize_flags(self, flags):
1007        """
1008        Remove the conflicts that caused due gathering implied features flags.
1009
1010        Parameters
1011        ----------
1012        'flags' list, compiler flags
1013            flags should be sorted from the lowest to the highest interest.
1014
1015        Returns
1016        -------
1017        list, filtered from any conflicts.
1018
1019        Examples
1020        --------
1021        >>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod'])
1022        ['armv8.2-a+fp16+dotprod']
1023
1024        >>> self.cc_normalize_flags(
1025            ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2']
1026        )
1027        ['-march=core-avx2']
1028        """
1029        assert(isinstance(flags, list))
1030        if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc:
1031            return self._cc_normalize_unix(flags)
1032
1033        if self.cc_is_msvc or self.cc_is_iccw:
1034            return self._cc_normalize_win(flags)
1035        return flags
1036
1037    _cc_normalize_unix_mrgx = re.compile(
1038        # 1- to check the highest of
1039        r"^(-mcpu=|-march=|-x[A-Z0-9\-])"
1040    )
1041    _cc_normalize_unix_frgx = re.compile(
1042        # 2- to remove any flags starts with
1043        # -march, -mcpu, -x(INTEL) and '-m' without '='
1044        r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]))(?!-m[a-z0-9\-\.]*.$)"
1045    )
1046    _cc_normalize_unix_krgx = re.compile(
1047        # 3- keep only the highest of
1048        r"^(-mfpu|-mtune)"
1049    )
1050    _cc_normalize_arch_ver = re.compile(
1051        r"[0-9.]"
1052    )
1053    def _cc_normalize_unix(self, flags):
1054        def ver_flags(f):
1055            #        arch ver  subflag
1056            # -march=armv8.2-a+fp16fml
1057            tokens = f.split('+')
1058            ver = float('0' + ''.join(
1059                re.findall(self._cc_normalize_arch_ver, tokens[0])
1060            ))
1061            return ver, tokens[0], tokens[1:]
1062
1063        if len(flags) <= 1:
1064            return flags
1065        # get the highest matched flag
1066        for i, cur_flag in enumerate(reversed(flags)):
1067            if not re.match(self._cc_normalize_unix_mrgx, cur_flag):
1068                continue
1069            lower_flags = flags[:-(i+1)]
1070            upper_flags = flags[-i:]
1071            filterd = list(filter(
1072                self._cc_normalize_unix_frgx.search, lower_flags
1073            ))
1074            # gather subflags
1075            ver, arch, subflags = ver_flags(cur_flag)
1076            if ver > 0 and len(subflags) > 0:
1077                for xflag in lower_flags:
1078                    xver, _, xsubflags = ver_flags(xflag)
1079                    if ver == xver:
1080                        subflags = xsubflags + subflags
1081                cur_flag = arch + '+' + '+'.join(subflags)
1082
1083            flags = filterd + [cur_flag]
1084            if i > 0:
1085                flags += upper_flags
1086            break
1087
1088        # to remove overridable flags
1089        final_flags = []
1090        matched = set()
1091        for f in reversed(flags):
1092            match = re.match(self._cc_normalize_unix_krgx, f)
1093            if not match:
1094                pass
1095            elif match[0] in matched:
1096                continue
1097            else:
1098                matched.add(match[0])
1099            final_flags.insert(0, f)
1100        return final_flags
1101
1102    _cc_normalize_win_frgx = re.compile(
1103        r"^(?!(/arch\:|/Qx\:))"
1104    )
1105    _cc_normalize_win_mrgx = re.compile(
1106        r"^(/arch|/Qx:)"
1107    )
1108    def _cc_normalize_win(self, flags):
1109        for i, f in enumerate(reversed(flags)):
1110            if not re.match(self._cc_normalize_win_mrgx, f):
1111                continue
1112            i += 1
1113            return list(filter(
1114                self._cc_normalize_win_frgx.search, flags[:-i]
1115            )) + flags[-i:]
1116        return flags
1117
1118class _Feature:
1119    """A helper class for `CCompilerOpt` that managing CPU features.
1120
1121    Attributes
1122    ----------
1123    feature_supported : dict
1124        Dictionary containing all CPU features that supported
1125        by the platform, according to the specified values in attribute
1126        `_Config.conf_features` and `_Config.conf_features_partial()`
1127
1128    feature_min : set
1129        The minimum support of CPU features, according to
1130        the specified values in attribute `_Config.conf_min_features`.
1131    """
1132    def __init__(self):
1133        if hasattr(self, "feature_is_cached"):
1134            return
1135        self.feature_supported = pfeatures = self.conf_features_partial()
1136        for feature_name in list(pfeatures.keys()):
1137            feature  = pfeatures[feature_name]
1138            cfeature = self.conf_features[feature_name]
1139            feature.update({
1140                k:v for k,v in cfeature.items() if k not in feature
1141            })
1142            disabled = feature.get("disable")
1143            if disabled is not None:
1144                pfeatures.pop(feature_name)
1145                self.dist_log(
1146                    "feature '%s' is disabled," % feature_name,
1147                    disabled, stderr=True
1148                )
1149                continue
1150            # list is used internally for these options
1151            for option in (
1152                "implies", "group", "detect", "headers", "flags", "extra_checks"
1153            ) :
1154                oval = feature.get(option)
1155                if isinstance(oval, str):
1156                    feature[option] = oval.split()
1157
1158        self.feature_min = set()
1159        min_f = self.conf_min_features.get(self.cc_march, "")
1160        for F in min_f.upper().split():
1161            if F in self.feature_supported:
1162                self.feature_min.add(F)
1163
1164        self.feature_is_cached = True
1165
1166    def feature_names(self, names=None, force_flags=None):
1167        """
1168        Returns a set of CPU feature names that supported by platform and the **C** compiler.
1169
1170        Parameters
1171        ----------
1172        'names': sequence or None, optional
1173            Specify certain CPU features to test it against the **C** compiler.
1174            if None(default), it will test all current supported features.
1175            **Note**: feature names must be in upper-case.
1176
1177        'force_flags': list or None, optional
1178            If None(default), default compiler flags for every CPU feature will be used
1179            during the test.
1180        """
1181        assert(
1182            names is None or (
1183                not isinstance(names, str) and
1184                hasattr(names, "__iter__")
1185            )
1186        )
1187        assert(force_flags is None or isinstance(force_flags, list))
1188        if names is None:
1189            names = self.feature_supported.keys()
1190        supported_names = set()
1191        for f in names:
1192            if self.feature_is_supported(f, force_flags=force_flags):
1193                supported_names.add(f)
1194        return supported_names
1195
1196    def feature_is_exist(self, name):
1197        """
1198        Returns True if a certain feature is exist and covered within
1199        `_Config.conf_features`.
1200
1201        Parameters
1202        ----------
1203        'name': str
1204            feature name in uppercase.
1205        """
1206        assert(name.isupper())
1207        return name in self.conf_features
1208
1209    def feature_sorted(self, names, reverse=False):
1210        """
1211        Sort a list of CPU features ordered by the lowest interest.
1212
1213        Parameters
1214        ----------
1215        'names': sequence
1216            sequence of supported feature names in uppercase.
1217        'reverse': bool, optional
1218            If true, the sorted features is reversed. (highest interest)
1219
1220        Returns
1221        -------
1222        list, sorted CPU features
1223        """
1224        def sort_cb(k):
1225            if isinstance(k, str):
1226                return self.feature_supported[k]["interest"]
1227            # multiple features
1228            rank = max([self.feature_supported[f]["interest"] for f in k])
1229            # FIXME: that's not a safe way to increase the rank for
1230            # multi targets
1231            rank += len(k) -1
1232            return rank
1233        return sorted(names, reverse=reverse, key=sort_cb)
1234
1235    def feature_implies(self, names, keep_origins=False):
1236        """
1237        Return a set of CPU features that implied by 'names'
1238
1239        Parameters
1240        ----------
1241        names: str or sequence of str
1242            CPU feature name(s) in uppercase.
1243
1244        keep_origins: bool
1245            if False(default) then the returned set will not contain any
1246            features from 'names'. This case happens only when two features
1247            imply each other.
1248
1249        Examples
1250        --------
1251        >>> self.feature_implies("SSE3")
1252        {'SSE', 'SSE2'}
1253        >>> self.feature_implies("SSE2")
1254        {'SSE'}
1255        >>> self.feature_implies("SSE2", keep_origins=True)
1256        # 'SSE2' found here since 'SSE' and 'SSE2' imply each other
1257        {'SSE', 'SSE2'}
1258        """
1259        def get_implies(name, _caller=set()):
1260            implies = set()
1261            d = self.feature_supported[name]
1262            for i in d.get("implies", []):
1263                implies.add(i)
1264                if i in _caller:
1265                    # infinity recursive guard since
1266                    # features can imply each other
1267                    continue
1268                _caller.add(name)
1269                implies = implies.union(get_implies(i, _caller))
1270            return implies
1271
1272        if isinstance(names, str):
1273            implies = get_implies(names)
1274            names = [names]
1275        else:
1276            assert(hasattr(names, "__iter__"))
1277            implies = set()
1278            for n in names:
1279                implies = implies.union(get_implies(n))
1280        if not keep_origins:
1281            implies.difference_update(names)
1282        return implies
1283
1284    def feature_implies_c(self, names):
1285        """same as feature_implies() but combining 'names'"""
1286        if isinstance(names, str):
1287            names = set((names,))
1288        else:
1289            names = set(names)
1290        return names.union(self.feature_implies(names))
1291
1292    def feature_ahead(self, names):
1293        """
1294        Return list of features in 'names' after remove any
1295        implied features and keep the origins.
1296
1297        Parameters
1298        ----------
1299        'names': sequence
1300            sequence of CPU feature names in uppercase.
1301
1302        Returns
1303        -------
1304        list of CPU features sorted as-is 'names'
1305
1306        Examples
1307        --------
1308        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41"])
1309        ["SSE41"]
1310        # assume AVX2 and FMA3 implies each other and AVX2
1311        # is the highest interest
1312        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
1313        ["AVX2"]
1314        # assume AVX2 and FMA3 don't implies each other
1315        >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
1316        ["AVX2", "FMA3"]
1317        """
1318        assert(
1319            not isinstance(names, str)
1320            and hasattr(names, '__iter__')
1321        )
1322        implies = self.feature_implies(names, keep_origins=True)
1323        ahead = [n for n in names if n not in implies]
1324        if len(ahead) == 0:
1325            # return the highest interested feature
1326            # if all features imply each other
1327            ahead = self.feature_sorted(names, reverse=True)[:1]
1328        return ahead
1329
1330    def feature_untied(self, names):
1331        """
1332        same as 'feature_ahead()' but if both features implied each other
1333        and keep the highest interest.
1334
1335        Parameters
1336        ----------
1337        'names': sequence
1338            sequence of CPU feature names in uppercase.
1339
1340        Returns
1341        -------
1342        list of CPU features sorted as-is 'names'
1343
1344        Examples
1345        --------
1346        >>> self.feature_untied(["SSE2", "SSE3", "SSE41"])
1347        ["SSE2", "SSE3", "SSE41"]
1348        # assume AVX2 and FMA3 implies each other
1349        >>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"])
1350        ["SSE2", "SSE3", "SSE41", "AVX2"]
1351        """
1352        assert(
1353            not isinstance(names, str)
1354            and hasattr(names, '__iter__')
1355        )
1356        final = []
1357        for n in names:
1358            implies = self.feature_implies(n)
1359            tied = [
1360                nn for nn in final
1361                if nn in implies and n in self.feature_implies(nn)
1362            ]
1363            if tied:
1364                tied = self.feature_sorted(tied + [n])
1365                if n not in tied[1:]:
1366                    continue
1367                final.remove(tied[:1][0])
1368            final.append(n)
1369        return final
1370
1371    def feature_get_til(self, names, keyisfalse):
1372        """
1373        same as `feature_implies_c()` but stop collecting implied
1374        features when feature's option that provided through
1375        parameter 'keyisfalse' is False, also sorting the returned
1376        features.
1377        """
1378        def til(tnames):
1379            # sort from highest to lowest interest then cut if "key" is False
1380            tnames = self.feature_implies_c(tnames)
1381            tnames = self.feature_sorted(tnames, reverse=True)
1382            for i, n in enumerate(tnames):
1383                if not self.feature_supported[n].get(keyisfalse, True):
1384                    tnames = tnames[:i+1]
1385                    break
1386            return tnames
1387
1388        if isinstance(names, str) or len(names) <= 1:
1389            names = til(names)
1390            # normalize the sort
1391            names.reverse()
1392            return names
1393
1394        names = self.feature_ahead(names)
1395        names = {t for n in names for t in til(n)}
1396        return self.feature_sorted(names)
1397
1398    def feature_detect(self, names):
1399        """
1400        Return a list of CPU features that required to be detected
1401        sorted from the lowest to highest interest.
1402        """
1403        names = self.feature_get_til(names, "implies_detect")
1404        detect = []
1405        for n in names:
1406            d = self.feature_supported[n]
1407            detect += d.get("detect", d.get("group", [n]))
1408        return detect
1409
1410    @_Cache.me
1411    def feature_flags(self, names):
1412        """
1413        Return a list of CPU features flags sorted from the lowest
1414        to highest interest.
1415        """
1416        names = self.feature_sorted(self.feature_implies_c(names))
1417        flags = []
1418        for n in names:
1419            d = self.feature_supported[n]
1420            f = d.get("flags", [])
1421            if not f or not self.cc_test_flags(f):
1422                continue
1423            flags += f
1424        return self.cc_normalize_flags(flags)
1425
1426    @_Cache.me
1427    def feature_test(self, name, force_flags=None):
1428        """
1429        Test a certain CPU feature against the compiler through its own
1430        check file.
1431
1432        Parameters
1433        ----------
1434        'name': str
1435            Supported CPU feature name.
1436
1437        'force_flags': list or None, optional
1438            If None(default), the returned flags from `feature_flags()`
1439            will be used.
1440       """
1441        if force_flags is None:
1442            force_flags = self.feature_flags(name)
1443
1444        self.dist_log(
1445            "testing feature '%s' with flags (%s)" % (
1446            name, ' '.join(force_flags)
1447        ))
1448        # Each CPU feature must have C source code contains at
1449        # least one intrinsic or instruction related to this feature.
1450        test_path = os.path.join(
1451            self.conf_check_path, "cpu_%s.c" % name.lower()
1452        )
1453        if not os.path.exists(test_path):
1454            self.dist_fatal("feature test file is not exist", test_path)
1455
1456        test = self.dist_test(test_path, force_flags + self.cc_flags["werror"])
1457        if not test:
1458            self.dist_log("testing failed", stderr=True)
1459        return test
1460
1461    @_Cache.me
1462    def feature_is_supported(self, name, force_flags=None):
1463        """
1464        Check if a certain CPU feature is supported by the platform and compiler.
1465
1466        Parameters
1467        ----------
1468        'name': str
1469            CPU feature name in uppercase.
1470
1471        'force_flags': list or None, optional
1472            If None(default), default compiler flags for every CPU feature will be used
1473            during test.
1474        """
1475        assert(name.isupper())
1476        assert(force_flags is None or isinstance(force_flags, list))
1477
1478        supported = name in self.feature_supported
1479        if supported:
1480            for impl in self.feature_implies(name):
1481                if not self.feature_test(impl, force_flags):
1482                    return False
1483            if not self.feature_test(name, force_flags):
1484                return False
1485        return supported
1486
1487    @_Cache.me
1488    def feature_can_autovec(self, name):
1489        """
1490        check if the feature can be auto-vectorized by the compiler
1491        """
1492        assert(isinstance(name, str))
1493        d = self.feature_supported[name]
1494        can = d.get("autovec", None)
1495        if can is None:
1496            valid_flags = [
1497                self.cc_test_flags([f]) for f in d.get("flags", [])
1498            ]
1499            can = valid_flags and any(valid_flags)
1500        return can
1501
1502    @_Cache.me
1503    def feature_extra_checks(self, name):
1504        """
1505        Return a list of supported extra checks after testing them against
1506        the compiler.
1507
1508        Parameters
1509        ----------
1510        names: str
1511            CPU feature name in uppercase.
1512        """
1513        assert isinstance(name, str)
1514        d = self.feature_supported[name]
1515        extra_checks = d.get("extra_checks", [])
1516        if not extra_checks:
1517            return []
1518
1519        self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks)
1520        flags = self.feature_flags(name)
1521        available = []
1522        not_available = []
1523        for chk in extra_checks:
1524            test_path = os.path.join(
1525                self.conf_check_path, "extra_%s.c" % chk.lower()
1526            )
1527            if not os.path.exists(test_path):
1528                self.dist_fatal("extra check file does not exist", test_path)
1529
1530            is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"])
1531            if is_supported:
1532                available.append(chk)
1533            else:
1534                not_available.append(chk)
1535
1536        if not_available:
1537            self.dist_log("testing failed for checks", not_available, stderr=True)
1538        return available
1539
1540
1541    def feature_c_preprocessor(self, feature_name, tabs=0):
1542        """
1543        Generate C preprocessor definitions and include headers of a CPU feature.
1544
1545        Parameters
1546        ----------
1547        'feature_name': str
1548            CPU feature name in uppercase.
1549        'tabs': int
1550            if > 0, align the generated strings to the right depend on number of tabs.
1551
1552        Returns
1553        -------
1554        str, generated C preprocessor
1555
1556        Examples
1557        --------
1558        >>> self.feature_c_preprocessor("SSE3")
1559        /** SSE3 **/
1560        #define NPY_HAVE_SSE3 1
1561        #include <pmmintrin.h>
1562        """
1563        assert(feature_name.isupper())
1564        feature = self.feature_supported.get(feature_name)
1565        assert(feature is not None)
1566
1567        prepr = [
1568            "/** %s **/" % feature_name,
1569            "#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name)
1570        ]
1571        prepr += [
1572            "#include <%s>" % h for h in feature.get("headers", [])
1573        ]
1574
1575        extra_defs = feature.get("group", [])
1576        extra_defs += self.feature_extra_checks(feature_name)
1577        for edef in extra_defs:
1578            # Guard extra definitions in case of duplicate with
1579            # another feature
1580            prepr += [
1581                "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef),
1582                "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef),
1583                "#endif",
1584            ]
1585
1586        if tabs > 0:
1587            prepr = [('\t'*tabs) + l for l in prepr]
1588        return '\n'.join(prepr)
1589
1590class _Parse:
1591    """A helper class that parsing main arguments of `CCompilerOpt`,
1592    also parsing configuration statements in dispatch-able sources.
1593
1594    Parameters
1595    ----------
1596    cpu_baseline: str or None
1597        minimal set of required CPU features or special options.
1598
1599    cpu_dispatch: str or None
1600        dispatched set of additional CPU features or special options.
1601
1602    Special options can be:
1603        - **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features`
1604        - **MAX**: Enables all supported CPU features by the Compiler and platform.
1605        - **NATIVE**: Enables all CPU features that supported by the current machine.
1606        - **NONE**: Enables nothing
1607        - **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**.
1608            NOTE: operand + is only added for nominal reason.
1609
1610    NOTES:
1611        - Case-insensitive among all CPU features and special options.
1612        - Comma or space can be used as a separator.
1613        - If the CPU feature is not supported by the user platform or compiler,
1614          it will be skipped rather than raising a fatal error.
1615        - Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features
1616        - 'cpu_baseline' force enables implied features.
1617
1618    Attributes
1619    ----------
1620    parse_baseline_names : list
1621        Final CPU baseline's feature names(sorted from low to high)
1622    parse_baseline_flags : list
1623        Compiler flags of baseline features
1624    parse_dispatch_names : list
1625        Final CPU dispatch-able feature names(sorted from low to high)
1626    parse_target_groups : dict
1627        Dictionary containing initialized target groups that configured
1628        through class attribute `conf_target_groups`.
1629
1630        The key is represent the group name and value is a tuple
1631        contains three items :
1632            - bool, True if group has the 'baseline' option.
1633            - list, list of CPU features.
1634            - list, list of extra compiler flags.
1635
1636    """
1637    def __init__(self, cpu_baseline, cpu_dispatch):
1638        self._parse_policies = dict(
1639            # POLICY NAME, (HAVE, NOT HAVE, [DEB])
1640            KEEP_BASELINE = (
1641                None, self._parse_policy_not_keepbase,
1642                []
1643            ),
1644            KEEP_SORT = (
1645                self._parse_policy_keepsort,
1646                self._parse_policy_not_keepsort,
1647                []
1648            ),
1649            MAXOPT = (
1650                self._parse_policy_maxopt, None,
1651                []
1652            ),
1653            WERROR = (
1654                self._parse_policy_werror, None,
1655                []
1656            ),
1657            AUTOVEC = (
1658                self._parse_policy_autovec, None,
1659                ["MAXOPT"]
1660            )
1661        )
1662        if hasattr(self, "parse_is_cached"):
1663            return
1664
1665        self.parse_baseline_names = []
1666        self.parse_baseline_flags = []
1667        self.parse_dispatch_names = []
1668        self.parse_target_groups = {}
1669
1670        if self.cc_noopt:
1671            # skip parsing baseline and dispatch args and keep parsing target groups
1672            cpu_baseline = cpu_dispatch = None
1673
1674        self.dist_log("check requested baseline")
1675        if cpu_baseline is not None:
1676            cpu_baseline = self._parse_arg_features("cpu_baseline", cpu_baseline)
1677            baseline_names = self.feature_names(cpu_baseline)
1678            self.parse_baseline_flags = self.feature_flags(baseline_names)
1679            self.parse_baseline_names = self.feature_sorted(
1680                self.feature_implies_c(baseline_names)
1681            )
1682
1683        self.dist_log("check requested dispatch-able features")
1684        if cpu_dispatch is not None:
1685            cpu_dispatch_ = self._parse_arg_features("cpu_dispatch", cpu_dispatch)
1686            cpu_dispatch = {
1687                f for f in cpu_dispatch_
1688                if f not in self.parse_baseline_names
1689            }
1690            conflict_baseline = cpu_dispatch_.difference(cpu_dispatch)
1691            self.parse_dispatch_names = self.feature_sorted(
1692                self.feature_names(cpu_dispatch)
1693            )
1694            if len(conflict_baseline) > 0:
1695                self.dist_log(
1696                    "skip features", conflict_baseline, "since its part of baseline"
1697                )
1698
1699        self.dist_log("initialize targets groups")
1700        for group_name, tokens in self.conf_target_groups.items():
1701            self.dist_log("parse target group", group_name)
1702            GROUP_NAME = group_name.upper()
1703            if not tokens or not tokens.strip():
1704                # allow empty groups, useful in case if there's a need
1705                # to disable certain group since '_parse_target_tokens()'
1706                # requires at least one valid target
1707                self.parse_target_groups[GROUP_NAME] = (
1708                    False, [], []
1709                )
1710                continue
1711            has_baseline, features, extra_flags = \
1712                self._parse_target_tokens(tokens)
1713            self.parse_target_groups[GROUP_NAME] = (
1714                has_baseline, features, extra_flags
1715            )
1716
1717        self.parse_is_cached = True
1718
1719    def parse_targets(self, source):
1720        """
1721        Fetch and parse configuration statements that required for
1722        defining the targeted CPU features, statements should be declared
1723        in the top of source in between **C** comment and start
1724        with a special mark **@targets**.
1725
1726        Configuration statements are sort of keywords representing
1727        CPU features names, group of statements and policies, combined
1728        together to determine the required optimization.
1729
1730        Parameters
1731        ----------
1732        source: str
1733            the path of **C** source file.
1734
1735        Returns
1736        -------
1737        - bool, True if group has the 'baseline' option
1738        - list, list of CPU features
1739        - list, list of extra compiler flags
1740        """
1741        self.dist_log("looking for '@targets' inside -> ", source)
1742        # get lines between /*@targets and */
1743        with open(source) as fd:
1744            tokens = ""
1745            max_to_reach = 1000 # good enough, isn't?
1746            start_with = "@targets"
1747            start_pos = -1
1748            end_with = "*/"
1749            end_pos = -1
1750            for current_line, line in enumerate(fd):
1751                if current_line == max_to_reach:
1752                    self.dist_fatal("reached the max of lines")
1753                    break
1754                if start_pos == -1:
1755                    start_pos = line.find(start_with)
1756                    if start_pos == -1:
1757                        continue
1758                    start_pos += len(start_with)
1759                tokens += line
1760                end_pos = line.find(end_with)
1761                if end_pos != -1:
1762                    end_pos += len(tokens) - len(line)
1763                    break
1764
1765        if start_pos == -1:
1766            self.dist_fatal("expected to find '%s' within a C comment" % start_with)
1767        if end_pos == -1:
1768            self.dist_fatal("expected to end with '%s'" % end_with)
1769
1770        tokens = tokens[start_pos:end_pos]
1771        return self._parse_target_tokens(tokens)
1772
1773    _parse_regex_arg = re.compile(r'\s|[,]|([+-])')
1774    def _parse_arg_features(self, arg_name, req_features):
1775        if not isinstance(req_features, str):
1776            self.dist_fatal("expected a string in '%s'" % arg_name)
1777
1778        final_features = set()
1779        # space and comma can be used as a separator
1780        tokens = list(filter(None, re.split(self._parse_regex_arg, req_features)))
1781        append = True # append is the default
1782        for tok in tokens:
1783            if tok[0] in ("#", "$"):
1784                self.dist_fatal(
1785                    arg_name, "target groups and policies "
1786                    "aren't allowed from arguments, "
1787                    "only from dispatch-able sources"
1788                )
1789            if tok == '+':
1790                append = True
1791                continue
1792            if tok == '-':
1793                append = False
1794                continue
1795
1796            TOK = tok.upper() # we use upper-case internally
1797            features_to = set()
1798            if TOK == "NONE":
1799                pass
1800            elif TOK == "NATIVE":
1801                native = self.cc_flags["native"]
1802                if not native:
1803                    self.dist_fatal(arg_name,
1804                        "native option isn't supported by the compiler"
1805                    )
1806                features_to = self.feature_names(force_flags=native)
1807            elif TOK == "MAX":
1808                features_to = self.feature_supported.keys()
1809            elif TOK == "MIN":
1810                features_to = self.feature_min
1811            else:
1812                if TOK in self.feature_supported:
1813                    features_to.add(TOK)
1814                else:
1815                    if not self.feature_is_exist(TOK):
1816                        self.dist_fatal(arg_name,
1817                            ", '%s' isn't a known feature or option" % tok
1818                        )
1819            if append:
1820                final_features = final_features.union(features_to)
1821            else:
1822                final_features = final_features.difference(features_to)
1823
1824            append = True # back to default
1825
1826        return final_features
1827
1828    _parse_regex_target = re.compile(r'\s|[*,/]|([()])')
1829    def _parse_target_tokens(self, tokens):
1830        assert(isinstance(tokens, str))
1831        final_targets = [] # to keep it sorted as specified
1832        extra_flags = []
1833        has_baseline = False
1834
1835        skipped  = set()
1836        policies = set()
1837        multi_target = None
1838
1839        tokens = list(filter(None, re.split(self._parse_regex_target, tokens)))
1840        if not tokens:
1841            self.dist_fatal("expected one token at least")
1842
1843        for tok in tokens:
1844            TOK = tok.upper()
1845            ch = tok[0]
1846            if ch in ('+', '-'):
1847                self.dist_fatal(
1848                    "+/- are 'not' allowed from target's groups or @targets, "
1849                    "only from cpu_baseline and cpu_dispatch parms"
1850                )
1851            elif ch == '$':
1852                if multi_target is not None:
1853                    self.dist_fatal(
1854                        "policies aren't allowed inside multi-target '()'"
1855                        ", only CPU features"
1856                    )
1857                policies.add(self._parse_token_policy(TOK))
1858            elif ch == '#':
1859                if multi_target is not None:
1860                    self.dist_fatal(
1861                        "target groups aren't allowed inside multi-target '()'"
1862                        ", only CPU features"
1863                    )
1864                has_baseline, final_targets, extra_flags = \
1865                self._parse_token_group(TOK, has_baseline, final_targets, extra_flags)
1866            elif ch == '(':
1867                if multi_target is not None:
1868                    self.dist_fatal("unclosed multi-target, missing ')'")
1869                multi_target = set()
1870            elif ch == ')':
1871                if multi_target is None:
1872                    self.dist_fatal("multi-target opener '(' wasn't found")
1873                targets = self._parse_multi_target(multi_target)
1874                if targets is None:
1875                    skipped.add(tuple(multi_target))
1876                else:
1877                    if len(targets) == 1:
1878                        targets = targets[0]
1879                    if targets and targets not in final_targets:
1880                        final_targets.append(targets)
1881                multi_target = None # back to default
1882            else:
1883                if TOK == "BASELINE":
1884                    if multi_target is not None:
1885                        self.dist_fatal("baseline isn't allowed inside multi-target '()'")
1886                    has_baseline = True
1887                    continue
1888
1889                if multi_target is not None:
1890                    multi_target.add(TOK)
1891                    continue
1892
1893                if not self.feature_is_exist(TOK):
1894                    self.dist_fatal("invalid target name '%s'" % TOK)
1895
1896                is_enabled = (
1897                    TOK in self.parse_baseline_names or
1898                    TOK in self.parse_dispatch_names
1899                )
1900                if  is_enabled:
1901                    if TOK not in final_targets:
1902                        final_targets.append(TOK)
1903                    continue
1904
1905                skipped.add(TOK)
1906
1907        if multi_target is not None:
1908            self.dist_fatal("unclosed multi-target, missing ')'")
1909        if skipped:
1910            self.dist_log(
1911                "skip targets", skipped,
1912                "not part of baseline or dispatch-able features"
1913            )
1914
1915        final_targets = self.feature_untied(final_targets)
1916
1917        # add polices dependencies
1918        for p in list(policies):
1919            _, _, deps = self._parse_policies[p]
1920            for d in deps:
1921                if d in policies:
1922                    continue
1923                self.dist_log(
1924                    "policy '%s' force enables '%s'" % (
1925                    p, d
1926                ))
1927                policies.add(d)
1928
1929        # release policies filtrations
1930        for p, (have, nhave, _) in self._parse_policies.items():
1931            func = None
1932            if p in policies:
1933                func = have
1934                self.dist_log("policy '%s' is ON" % p)
1935            else:
1936                func = nhave
1937            if not func:
1938                continue
1939            has_baseline, final_targets, extra_flags = func(
1940                has_baseline, final_targets, extra_flags
1941            )
1942
1943        return has_baseline, final_targets, extra_flags
1944
1945    def _parse_token_policy(self, token):
1946        """validate policy token"""
1947        if len(token) <= 1 or token[-1:] == token[0]:
1948            self.dist_fatal("'$' must stuck in the begin of policy name")
1949        token = token[1:]
1950        if token not in self._parse_policies:
1951            self.dist_fatal(
1952                "'%s' is an invalid policy name, available policies are" % token,
1953                self._parse_policies.keys()
1954            )
1955        return token
1956
1957    def _parse_token_group(self, token, has_baseline, final_targets, extra_flags):
1958        """validate group token"""
1959        if len(token) <= 1 or token[-1:] == token[0]:
1960            self.dist_fatal("'#' must stuck in the begin of group name")
1961
1962        token = token[1:]
1963        ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get(
1964            token, (False, None, [])
1965        )
1966        if gtargets is None:
1967            self.dist_fatal(
1968                "'%s' is an invalid target group name, " % token + \
1969                "available target groups are",
1970                self.parse_target_groups.keys()
1971            )
1972        if ghas_baseline:
1973            has_baseline = True
1974        # always keep sorting as specified
1975        final_targets += [f for f in gtargets if f not in final_targets]
1976        extra_flags += [f for f in gextra_flags if f not in extra_flags]
1977        return has_baseline, final_targets, extra_flags
1978
1979    def _parse_multi_target(self, targets):
1980        """validate multi targets that defined between parentheses()"""
1981        # remove any implied features and keep the origins
1982        if not targets:
1983            self.dist_fatal("empty multi-target '()'")
1984        if not all([
1985            self.feature_is_exist(tar) for tar in targets
1986        ]) :
1987            self.dist_fatal("invalid target name in multi-target", targets)
1988        if not all([
1989            (
1990                tar in self.parse_baseline_names or
1991                tar in self.parse_dispatch_names
1992            )
1993            for tar in targets
1994        ]) :
1995            return None
1996        targets = self.feature_ahead(targets)
1997        if not targets:
1998            return None
1999        # force sort multi targets, so it can be comparable
2000        targets = self.feature_sorted(targets)
2001        targets = tuple(targets) # hashable
2002        return targets
2003
2004    def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags):
2005        """skip all baseline features"""
2006        skipped = []
2007        for tar in final_targets[:]:
2008            is_base = False
2009            if isinstance(tar, str):
2010                is_base = tar in self.parse_baseline_names
2011            else:
2012                # multi targets
2013                is_base = all([
2014                    f in self.parse_baseline_names
2015                    for f in tar
2016                ])
2017            if is_base:
2018                skipped.append(tar)
2019                final_targets.remove(tar)
2020
2021        if skipped:
2022            self.dist_log("skip baseline features", skipped)
2023
2024        return has_baseline, final_targets, extra_flags
2025
2026    def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags):
2027        """leave a notice that $keep_sort is on"""
2028        self.dist_log(
2029            "policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n"
2030            "are 'not' sorted depend on the highest interest but"
2031            "as specified in the dispatch-able source or the extra group"
2032        )
2033        return has_baseline, final_targets, extra_flags
2034
2035    def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags):
2036        """sorted depend on the highest interest"""
2037        final_targets = self.feature_sorted(final_targets, reverse=True)
2038        return has_baseline, final_targets, extra_flags
2039
2040    def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags):
2041        """append the compiler optimization flags"""
2042        if self.cc_has_debug:
2043            self.dist_log("debug mode is detected, policy 'maxopt' is skipped.")
2044        elif self.cc_noopt:
2045            self.dist_log("optimization is disabled, policy 'maxopt' is skipped.")
2046        else:
2047            flags = self.cc_flags["opt"]
2048            if not flags:
2049                self.dist_log(
2050                    "current compiler doesn't support optimization flags, "
2051                    "policy 'maxopt' is skipped", stderr=True
2052                )
2053            else:
2054                extra_flags += flags
2055        return has_baseline, final_targets, extra_flags
2056
2057    def _parse_policy_werror(self, has_baseline, final_targets, extra_flags):
2058        """force warnings to treated as errors"""
2059        flags = self.cc_flags["werror"]
2060        if not flags:
2061            self.dist_log(
2062                "current compiler doesn't support werror flags, "
2063                "warnings will 'not' treated as errors", stderr=True
2064            )
2065        else:
2066            self.dist_log("compiler warnings are treated as errors")
2067            extra_flags += flags
2068        return has_baseline, final_targets, extra_flags
2069
2070    def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags):
2071        """skip features that has no auto-vectorized support by compiler"""
2072        skipped = []
2073        for tar in final_targets[:]:
2074            if isinstance(tar, str):
2075                can = self.feature_can_autovec(tar)
2076            else: # multiple target
2077                can = all([
2078                    self.feature_can_autovec(t)
2079                    for t in tar
2080                ])
2081            if not can:
2082                final_targets.remove(tar)
2083                skipped.append(tar)
2084
2085        if skipped:
2086            self.dist_log("skip non auto-vectorized features", skipped)
2087
2088        return has_baseline, final_targets, extra_flags
2089
2090class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
2091    """
2092    A helper class for `CCompiler` aims to provide extra build options
2093    to effectively control of compiler optimizations that are directly
2094    related to CPU features.
2095    """
2096    def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None):
2097        _Config.__init__(self)
2098        _Distutils.__init__(self, ccompiler)
2099        _Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch)
2100        _CCompiler.__init__(self)
2101        _Feature.__init__(self)
2102        if not self.cc_noopt and self.cc_has_native:
2103            self.dist_log(
2104                "native flag is specified through environment variables. "
2105                "force cpu-baseline='native'"
2106            )
2107            cpu_baseline = "native"
2108        _Parse.__init__(self, cpu_baseline, cpu_dispatch)
2109        # keep the requested features untouched, need it later for report
2110        # and trace purposes
2111        self._requested_baseline = cpu_baseline
2112        self._requested_dispatch = cpu_dispatch
2113        # key is the dispatch-able source and value is a tuple
2114        # contains two items (has_baseline[boolean], dispatched-features[list])
2115        self.sources_status = getattr(self, "sources_status", {})
2116        # every instance should has a separate one
2117        self.cache_private.add("sources_status")
2118        # set it at the end to make sure the cache writing was done after init
2119        # this class
2120        self.hit_cache = hasattr(self, "hit_cache")
2121
2122    def is_cached(self):
2123        """
2124        Returns True if the class loaded from the cache file
2125        """
2126        return self.cache_infile and self.hit_cache
2127
2128    def cpu_baseline_flags(self):
2129        """
2130        Returns a list of final CPU baseline compiler flags
2131        """
2132        return self.parse_baseline_flags
2133
2134    def cpu_baseline_names(self):
2135        """
2136        return a list of final CPU baseline feature names
2137        """
2138        return self.parse_baseline_names
2139
2140    def cpu_dispatch_names(self):
2141        """
2142        return a list of final CPU dispatch feature names
2143        """
2144        return self.parse_dispatch_names
2145
2146    def try_dispatch(self, sources, src_dir=None, **kwargs):
2147        """
2148        Compile one or more dispatch-able sources and generates object files,
2149        also generates abstract C config headers and macros that
2150        used later for the final runtime dispatching process.
2151
2152        The mechanism behind it is to takes each source file that specified
2153        in 'sources' and branching it into several files depend on
2154        special configuration statements that must be declared in the
2155        top of each source which contains targeted CPU features,
2156        then it compiles every branched source with the proper compiler flags.
2157
2158        Parameters
2159        ----------
2160        sources : list
2161            Must be a list of dispatch-able sources file paths,
2162            and configuration statements must be declared inside
2163            each file.
2164
2165        src_dir : str
2166            Path of parent directory for the generated headers and wrapped sources.
2167            If None(default) the files will generated in-place.
2168
2169        **kwargs : any
2170            Arguments to pass on to the `CCompiler.compile()`
2171
2172        Returns
2173        -------
2174        list : generated object files
2175
2176        Raises
2177        ------
2178        CompileError
2179            Raises by `CCompiler.compile()` on compiling failure.
2180        DistutilsError
2181            Some errors during checking the sanity of configuration statements.
2182
2183        See Also
2184        --------
2185        parse_targets :
2186            Parsing the configuration statements of dispatch-able sources.
2187        """
2188        to_compile = {}
2189        baseline_flags = self.cpu_baseline_flags()
2190        include_dirs = kwargs.setdefault("include_dirs", [])
2191
2192        for src in sources:
2193            output_dir = os.path.dirname(src)
2194            if src_dir:
2195                if not output_dir.startswith(src_dir):
2196                    output_dir = os.path.join(src_dir, output_dir)
2197                if output_dir not in include_dirs:
2198                    # To allow including the generated config header(*.dispatch.h)
2199                    # by the dispatch-able sources
2200                    include_dirs.append(output_dir)
2201
2202            has_baseline, targets, extra_flags = self.parse_targets(src)
2203            nochange = self._generate_config(output_dir, src, targets, has_baseline)
2204            for tar in targets:
2205                tar_src = self._wrap_target(output_dir, src, tar, nochange=nochange)
2206                flags = tuple(extra_flags + self.feature_flags(tar))
2207                to_compile.setdefault(flags, []).append(tar_src)
2208
2209            if has_baseline:
2210                flags = tuple(extra_flags + baseline_flags)
2211                to_compile.setdefault(flags, []).append(src)
2212
2213            self.sources_status[src] = (has_baseline, targets)
2214
2215        # For these reasons, the sources are compiled in a separate loop:
2216        # - Gathering all sources with the same flags to benefit from
2217        #   the parallel compiling as much as possible.
2218        # - To generate all config headers of the dispatchable sources,
2219        #   before the compilation in case if there are dependency relationships
2220        #   among them.
2221        objects = []
2222        for flags, srcs in to_compile.items():
2223            objects += self.dist_compile(srcs, list(flags), **kwargs)
2224        return objects
2225
2226    def generate_dispatch_header(self, header_path):
2227        """
2228        Generate the dispatch header which contains the #definitions and headers
2229        for platform-specific instruction-sets for the enabled CPU baseline and
2230        dispatch-able features.
2231
2232        Its highly recommended to take a look at the generated header
2233        also the generated source files via `try_dispatch()`
2234        in order to get the full picture.
2235        """
2236        self.dist_log("generate CPU dispatch header: (%s)" % header_path)
2237
2238        baseline_names = self.cpu_baseline_names()
2239        dispatch_names = self.cpu_dispatch_names()
2240        baseline_len = len(baseline_names)
2241        dispatch_len = len(dispatch_names)
2242
2243        header_dir = os.path.dirname(header_path)
2244        if not os.path.exists(header_dir):
2245            self.dist_log(
2246                f"dispatch header dir {header_dir} does not exist, creating it",
2247                stderr=True
2248            )
2249            os.makedirs(header_dir)
2250
2251        with open(header_path, 'w') as f:
2252            baseline_calls = ' \\\n'.join([
2253                (
2254                    "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))"
2255                ) % (self.conf_c_prefix, f)
2256                for f in baseline_names
2257            ])
2258            dispatch_calls = ' \\\n'.join([
2259                (
2260                    "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))"
2261                ) % (self.conf_c_prefix, f)
2262                for f in dispatch_names
2263            ])
2264            f.write(textwrap.dedent("""\
2265                /*
2266                 * AUTOGENERATED DON'T EDIT
2267                 * Please make changes to the code generator (distutils/ccompiler_opt.py)
2268                */
2269                #define {pfx}WITH_CPU_BASELINE  "{baseline_str}"
2270                #define {pfx}WITH_CPU_DISPATCH  "{dispatch_str}"
2271                #define {pfx}WITH_CPU_BASELINE_N {baseline_len}
2272                #define {pfx}WITH_CPU_DISPATCH_N {dispatch_len}
2273                #define {pfx}WITH_CPU_EXPAND_(X) X
2274                #define {pfx}WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \\
2275                {baseline_calls}
2276                #define {pfx}WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \\
2277                {dispatch_calls}
2278            """).format(
2279                pfx=self.conf_c_prefix, baseline_str=" ".join(baseline_names),
2280                dispatch_str=" ".join(dispatch_names), baseline_len=baseline_len,
2281                dispatch_len=dispatch_len, baseline_calls=baseline_calls,
2282                dispatch_calls=dispatch_calls
2283            ))
2284            baseline_pre = ''
2285            for name in baseline_names:
2286                baseline_pre += self.feature_c_preprocessor(name, tabs=1) + '\n'
2287
2288            dispatch_pre = ''
2289            for name in dispatch_names:
2290                dispatch_pre += textwrap.dedent("""\
2291                #ifdef {pfx}CPU_TARGET_{name}
2292                {pre}
2293                #endif /*{pfx}CPU_TARGET_{name}*/
2294                """).format(
2295                    pfx=self.conf_c_prefix_, name=name, pre=self.feature_c_preprocessor(
2296                    name, tabs=1
2297                ))
2298
2299            f.write(textwrap.dedent("""\
2300            /******* baseline features *******/
2301            {baseline_pre}
2302            /******* dispatch features *******/
2303            {dispatch_pre}
2304            """).format(
2305                pfx=self.conf_c_prefix_, baseline_pre=baseline_pre,
2306                dispatch_pre=dispatch_pre
2307            ))
2308
2309    def report(self, full=False):
2310        report = []
2311        platform_rows = []
2312        baseline_rows = []
2313        dispatch_rows = []
2314        report.append(("Platform", platform_rows))
2315        report.append(("", ""))
2316        report.append(("CPU baseline", baseline_rows))
2317        report.append(("", ""))
2318        report.append(("CPU dispatch", dispatch_rows))
2319
2320        ########## platform ##########
2321        platform_rows.append(("Architecture", (
2322            "unsupported" if self.cc_on_noarch else self.cc_march)
2323        ))
2324        platform_rows.append(("Compiler", (
2325            "unix-like"   if self.cc_is_nocc   else self.cc_name)
2326        ))
2327        ########## baseline ##########
2328        if self.cc_noopt:
2329            baseline_rows.append(("Requested", "optimization disabled"))
2330        else:
2331            baseline_rows.append(("Requested", repr(self._requested_baseline)))
2332
2333        baseline_names = self.cpu_baseline_names()
2334        baseline_rows.append((
2335            "Enabled", (' '.join(baseline_names) if baseline_names else "none")
2336        ))
2337        baseline_flags = self.cpu_baseline_flags()
2338        baseline_rows.append((
2339            "Flags", (' '.join(baseline_flags) if baseline_flags else "none")
2340        ))
2341        extra_checks = []
2342        for name in baseline_names:
2343            extra_checks += self.feature_extra_checks(name)
2344        baseline_rows.append((
2345            "Extra checks", (' '.join(extra_checks) if extra_checks else "none")
2346        ))
2347
2348        ########## dispatch ##########
2349        if self.cc_noopt:
2350            baseline_rows.append(("Requested", "optimization disabled"))
2351        else:
2352            dispatch_rows.append(("Requested", repr(self._requested_dispatch)))
2353
2354        dispatch_names = self.cpu_dispatch_names()
2355        dispatch_rows.append((
2356            "Enabled", (' '.join(dispatch_names) if dispatch_names else "none")
2357        ))
2358        ########## Generated ##########
2359        # TODO:
2360        # - collect object names from 'try_dispatch()'
2361        #   then get size of each object and printed
2362        # - give more details about the features that not
2363        #   generated due compiler support
2364        # - find a better output's design.
2365        #
2366        target_sources = {}
2367        for source, (_, targets) in self.sources_status.items():
2368            for tar in targets:
2369                target_sources.setdefault(tar, []).append(source)
2370
2371        if not full or not target_sources:
2372            generated = ""
2373            for tar in self.feature_sorted(target_sources):
2374                sources = target_sources[tar]
2375                name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
2376                generated += name + "[%d] " % len(sources)
2377            dispatch_rows.append(("Generated", generated[:-1] if generated else "none"))
2378        else:
2379            dispatch_rows.append(("Generated", ''))
2380            for tar in self.feature_sorted(target_sources):
2381                sources = target_sources[tar]
2382                pretty_name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
2383                flags = ' '.join(self.feature_flags(tar))
2384                implies = ' '.join(self.feature_sorted(self.feature_implies(tar)))
2385                detect = ' '.join(self.feature_detect(tar))
2386                extra_checks = []
2387                for name in ((tar,) if isinstance(tar, str) else tar):
2388                    extra_checks += self.feature_extra_checks(name)
2389                extra_checks = (' '.join(extra_checks) if extra_checks else "none")
2390
2391                dispatch_rows.append(('', ''))
2392                dispatch_rows.append((pretty_name, implies))
2393                dispatch_rows.append(("Flags", flags))
2394                dispatch_rows.append(("Extra checks", extra_checks))
2395                dispatch_rows.append(("Detect", detect))
2396                for src in sources:
2397                    dispatch_rows.append(("", src))
2398
2399        ###############################
2400        # TODO: add support for 'markdown' format
2401        text = []
2402        secs_len = [len(secs) for secs, _ in report]
2403        cols_len = [len(col) for _, rows in report for col, _ in rows]
2404        tab = ' ' * 2
2405        pad =  max(max(secs_len), max(cols_len))
2406        for sec, rows in report:
2407            if not sec:
2408                text.append("") # empty line
2409                continue
2410            sec += ' ' * (pad - len(sec))
2411            text.append(sec + tab + ': ')
2412            for col, val in rows:
2413                col += ' ' * (pad - len(col))
2414                text.append(tab + col + ': ' + val)
2415
2416        return '\n'.join(text)
2417
2418    def _wrap_target(self, output_dir, dispatch_src, target, nochange=False):
2419        assert(isinstance(target, (str, tuple)))
2420        if isinstance(target, str):
2421            ext_name = target_name = target
2422        else:
2423            # multi-target
2424            ext_name = '.'.join(target)
2425            target_name = '__'.join(target)
2426
2427        wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src))
2428        wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower())
2429        if nochange and os.path.exists(wrap_path):
2430            return wrap_path
2431
2432        self.dist_log("wrap dispatch-able target -> ", wrap_path)
2433        # sorting for readability
2434        features = self.feature_sorted(self.feature_implies_c(target))
2435        target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_
2436        target_defs = [target_join + f for f in features]
2437        target_defs = '\n'.join(target_defs)
2438
2439        with open(wrap_path, "w") as fd:
2440            fd.write(textwrap.dedent("""\
2441            /**
2442             * AUTOGENERATED DON'T EDIT
2443             * Please make changes to the code generator \
2444             (distutils/ccompiler_opt.py)
2445             */
2446            #define {pfx}CPU_TARGET_MODE
2447            #define {pfx}CPU_TARGET_CURRENT {target_name}
2448            {target_defs}
2449            #include "{path}"
2450            """).format(
2451                pfx=self.conf_c_prefix_, target_name=target_name,
2452                path=os.path.abspath(dispatch_src), target_defs=target_defs
2453            ))
2454        return wrap_path
2455
2456    def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False):
2457        config_path = os.path.basename(dispatch_src).replace(".c", ".h")
2458        config_path = os.path.join(output_dir, config_path)
2459        # check if targets didn't change to avoid recompiling
2460        cache_hash = self.cache_hash(targets, has_baseline)
2461        try:
2462            with open(config_path) as f:
2463                last_hash = f.readline().split("cache_hash:")
2464                if len(last_hash) == 2 and int(last_hash[1]) == cache_hash:
2465                    return True
2466        except IOError:
2467            pass
2468
2469        self.dist_log("generate dispatched config -> ", config_path)
2470        dispatch_calls = []
2471        for tar in targets:
2472            if isinstance(tar, str):
2473                target_name = tar
2474            else: # multi target
2475                target_name = '__'.join([t for t in tar])
2476            req_detect = self.feature_detect(tar)
2477            req_detect = '&&'.join([
2478                "CHK(%s)" % f for f in req_detect
2479            ])
2480            dispatch_calls.append(
2481                "\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % (
2482                self.conf_c_prefix_, req_detect, target_name
2483            ))
2484        dispatch_calls = ' \\\n'.join(dispatch_calls)
2485
2486        if has_baseline:
2487            baseline_calls = (
2488                "\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))"
2489            ) % self.conf_c_prefix_
2490        else:
2491            baseline_calls = ''
2492
2493        with open(config_path, "w") as fd:
2494            fd.write(textwrap.dedent("""\
2495            // cache_hash:{cache_hash}
2496            /**
2497             * AUTOGENERATED DON'T EDIT
2498             * Please make changes to the code generator (distutils/ccompiler_opt.py)
2499             */
2500            #ifndef {pfx}CPU_DISPATCH_EXPAND_
2501                #define {pfx}CPU_DISPATCH_EXPAND_(X) X
2502            #endif
2503            #undef {pfx}CPU_DISPATCH_BASELINE_CALL
2504            #undef {pfx}CPU_DISPATCH_CALL
2505            #define {pfx}CPU_DISPATCH_BASELINE_CALL(CB, ...) \\
2506            {baseline_calls}
2507            #define {pfx}CPU_DISPATCH_CALL(CHK, CB, ...) \\
2508            {dispatch_calls}
2509            """).format(
2510                pfx=self.conf_c_prefix_, baseline_calls=baseline_calls,
2511                dispatch_calls=dispatch_calls, cache_hash=cache_hash
2512            ))
2513        return False
2514
2515def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs):
2516    """
2517    Create a new instance of 'CCompilerOpt' and generate the dispatch header
2518    which contains the #definitions and headers of platform-specific instruction-sets for
2519    the enabled CPU baseline and dispatch-able features.
2520
2521    Parameters
2522    ----------
2523    compiler : CCompiler instance
2524    dispatch_hpath : str
2525        path of the dispatch header
2526
2527    **kwargs: passed as-is to `CCompilerOpt(...)`
2528    Returns
2529    -------
2530    new instance of CCompilerOpt
2531    """
2532    opt = CCompilerOpt(compiler, **kwargs)
2533    if not os.path.exists(dispatch_hpath) or not opt.is_cached():
2534        opt.generate_dispatch_header(dispatch_hpath)
2535    return opt
2536