1*671ea119Smrg /* x86/coreibwl gmp-mparam.h -- Compiler/machine parameter header file.
2*671ea119Smrg 
3*671ea119Smrg Copyright 2019 Free Software Foundation, Inc.
4*671ea119Smrg 
5*671ea119Smrg This file is part of the GNU MP Library.
6*671ea119Smrg 
7*671ea119Smrg The GNU MP Library is free software; you can redistribute it and/or modify
8*671ea119Smrg it under the terms of either:
9*671ea119Smrg 
10*671ea119Smrg   * the GNU Lesser General Public License as published by the Free
11*671ea119Smrg     Software Foundation; either version 3 of the License, or (at your
12*671ea119Smrg     option) any later version.
13*671ea119Smrg 
14*671ea119Smrg or
15*671ea119Smrg 
16*671ea119Smrg   * the GNU General Public License as published by the Free Software
17*671ea119Smrg     Foundation; either version 2 of the License, or (at your option) any
18*671ea119Smrg     later version.
19*671ea119Smrg 
20*671ea119Smrg or both in parallel, as here.
21*671ea119Smrg 
22*671ea119Smrg The GNU MP Library is distributed in the hope that it will be useful, but
23*671ea119Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24*671ea119Smrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25*671ea119Smrg for more details.
26*671ea119Smrg 
27*671ea119Smrg You should have received copies of the GNU General Public License and the
28*671ea119Smrg GNU Lesser General Public License along with the GNU MP Library.  If not,
29*671ea119Smrg see https://www.gnu.org/licenses/.  */
30*671ea119Smrg 
31*671ea119Smrg #define GMP_LIMB_BITS 32
32*671ea119Smrg #define GMP_LIMB_BYTES 4
33*671ea119Smrg 
34*671ea119Smrg /* 3400-3800 MHz Intel Xeon E3-1285Lv4 Broadwell */
35*671ea119Smrg /* FFT tuning limit = 67,000,000 */
36*671ea119Smrg /* Generated by tuneup.c, 2019-10-20, gcc 8.3 */
37*671ea119Smrg 
38*671ea119Smrg #define MOD_1_NORM_THRESHOLD                15
39*671ea119Smrg #define MOD_1_UNNORM_THRESHOLD              16
40*671ea119Smrg #define MOD_1N_TO_MOD_1_1_THRESHOLD         10
41*671ea119Smrg #define MOD_1U_TO_MOD_1_1_THRESHOLD          8
42*671ea119Smrg #define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
43*671ea119Smrg #define MOD_1_2_TO_MOD_1_4_THRESHOLD        10
44*671ea119Smrg #define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
45*671ea119Smrg #define USE_PREINV_DIVREM_1                  1  /* native */
46*671ea119Smrg #define DIV_QR_1N_PI1_METHOD                 1  /* 21.34% faster than 2 */
47*671ea119Smrg #define DIV_QR_1_NORM_THRESHOLD             14
48*671ea119Smrg #define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
49*671ea119Smrg #define DIV_QR_2_PI2_THRESHOLD              29
50*671ea119Smrg #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
51*671ea119Smrg #define BMOD_1_TO_MOD_1_THRESHOLD           19
52*671ea119Smrg 
53*671ea119Smrg #define DIV_1_VS_MUL_1_PERCENT             295
54*671ea119Smrg 
55*671ea119Smrg #define MUL_TOOM22_THRESHOLD                26
56*671ea119Smrg #define MUL_TOOM33_THRESHOLD                97
57*671ea119Smrg #define MUL_TOOM44_THRESHOLD               220
58*671ea119Smrg #define MUL_TOOM6H_THRESHOLD               306
59*671ea119Smrg #define MUL_TOOM8H_THRESHOLD               454
60*671ea119Smrg 
61*671ea119Smrg #define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
62*671ea119Smrg #define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
63*671ea119Smrg #define MUL_TOOM42_TO_TOOM53_THRESHOLD     154
64*671ea119Smrg #define MUL_TOOM42_TO_TOOM63_THRESHOLD     169
65*671ea119Smrg #define MUL_TOOM43_TO_TOOM54_THRESHOLD     136
66*671ea119Smrg 
67*671ea119Smrg #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
68*671ea119Smrg #define SQR_TOOM2_THRESHOLD                 44
69*671ea119Smrg #define SQR_TOOM3_THRESHOLD                134
70*671ea119Smrg #define SQR_TOOM4_THRESHOLD                242
71*671ea119Smrg #define SQR_TOOM6_THRESHOLD                342
72*671ea119Smrg #define SQR_TOOM8_THRESHOLD                502
73*671ea119Smrg 
74*671ea119Smrg #define MULMID_TOOM42_THRESHOLD             98
75*671ea119Smrg 
76*671ea119Smrg #define MULMOD_BNM1_THRESHOLD               20
77*671ea119Smrg #define SQRMOD_BNM1_THRESHOLD               23
78*671ea119Smrg 
79*671ea119Smrg #define MUL_FFT_MODF_THRESHOLD             540  /* k = 5 */
80*671ea119Smrg #define MUL_FFT_TABLE3                                      \
81*671ea119Smrg   { {    540, 5}, {     29, 6}, {     15, 5}, {     31, 6}, \
82*671ea119Smrg     {     16, 5}, {     33, 6}, {     17, 5}, {     36, 6}, \
83*671ea119Smrg     {     25, 7}, {     13, 6}, {     29, 7}, {     15, 6}, \
84*671ea119Smrg     {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
85*671ea119Smrg     {     39, 7}, {     21, 6}, {     43, 7}, {     23, 6}, \
86*671ea119Smrg     {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
87*671ea119Smrg     {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
88*671ea119Smrg     {     27, 7}, {     55, 9}, {     15, 8}, {     31, 7}, \
89*671ea119Smrg     {     63, 8}, {     43, 9}, {     23, 8}, {     55,10}, \
90*671ea119Smrg     {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
91*671ea119Smrg     {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
92*671ea119Smrg     {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
93*671ea119Smrg     {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
94*671ea119Smrg     {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
95*671ea119Smrg     {     63,10}, {    143, 9}, {    287,10}, {    159,11}, \
96*671ea119Smrg     {     95, 7}, {   1599, 8}, {    831, 9}, {    431, 8}, \
97*671ea119Smrg     {    863, 9}, {    447,10}, {    239, 9}, {    479,10}, \
98*671ea119Smrg     {    255, 9}, {    511,10}, {    287,11}, {    159,10}, \
99*671ea119Smrg     {    319, 9}, {    639,10}, {    335, 9}, {    671,11}, \
100*671ea119Smrg     {    191,10}, {    383, 9}, {    767,10}, {    399,11}, \
101*671ea119Smrg     {    223,12}, {    127,11}, {    255,10}, {    511, 9}, \
102*671ea119Smrg     {   1023,11}, {    287,10}, {    607,11}, {    319,10}, \
103*671ea119Smrg     {    671,11}, {    351,12}, {    191,11}, {    383,10}, \
104*671ea119Smrg     {    799,11}, {    415,13}, {    127,12}, {    255,11}, \
105*671ea119Smrg     {    543,10}, {   1119,11}, {    607,12}, {    319,11}, \
106*671ea119Smrg     {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
107*671ea119Smrg     {    799,10}, {   1599,11}, {    863,12}, {    447,11}, \
108*671ea119Smrg     {    959,13}, {    255,12}, {    511,11}, {   1119,12}, \
109*671ea119Smrg     {    575,11}, {   1215,12}, {    639,11}, {   1343,12}, \
110*671ea119Smrg     {    703,11}, {   1407,13}, {    383,12}, {    767,11}, \
111*671ea119Smrg     {   1599,12}, {    831,11}, {   1727,12}, {    959,14}, \
112*671ea119Smrg     {    255,13}, {    511,12}, {   1215,13}, {    639,12}, \
113*671ea119Smrg     {   1471,13}, {    767,12}, {   1727,13}, {    895,12}, \
114*671ea119Smrg     {   1919,14}, {    511,13}, {   1023,12}, {   2239,13}, \
115*671ea119Smrg     {   1151,12}, {   2431,13}, {   1279,12}, {   2623,13}, \
116*671ea119Smrg     {   1407,12}, {   2815,14}, {    767,13}, {   1535,12}, \
117*671ea119Smrg     {   3135,13}, {   1663,12}, {   3455,13}, {   1919,15}, \
118*671ea119Smrg     {    511,14}, {   1023,13}, {   2175,12}, {   4479,13}, \
119*671ea119Smrg     {   2431,14}, {   1279,13}, {   2943,12}, {   5887,14}, \
120*671ea119Smrg     {   1535,13}, {   3455,14}, {   1791,13}, {   3839,15}, \
121*671ea119Smrg     {   1023,14}, {   2047,13}, {   4479,14}, {   2303,13}, \
122*671ea119Smrg     {   4991,12}, {   9983,14}, {   2559,13}, {   5247,14}, \
123*671ea119Smrg     {   2815,13}, {   5887,15}, {   1535,14}, {   3839,16} }
124*671ea119Smrg #define MUL_FFT_TABLE3_SIZE 172
125*671ea119Smrg #define MUL_FFT_THRESHOLD                 7424
126*671ea119Smrg 
127*671ea119Smrg #define SQR_FFT_MODF_THRESHOLD             472  /* k = 5 */
128*671ea119Smrg #define SQR_FFT_TABLE3                                      \
129*671ea119Smrg   { {    472, 5}, {     29, 6}, {     15, 5}, {     33, 6}, \
130*671ea119Smrg     {     37, 7}, {     19, 6}, {     40, 7}, {     29, 8}, \
131*671ea119Smrg     {     15, 7}, {     35, 8}, {     19, 7}, {     43, 8}, \
132*671ea119Smrg     {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
133*671ea119Smrg     {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
134*671ea119Smrg     {     55,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
135*671ea119Smrg     {     39, 8}, {     83, 9}, {     47, 8}, {     95, 9}, \
136*671ea119Smrg     {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
137*671ea119Smrg     {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
138*671ea119Smrg     {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
139*671ea119Smrg     {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
140*671ea119Smrg     {    159,11}, {     95,12}, {     63,11}, {    127,10}, \
141*671ea119Smrg     {    271, 9}, {    543, 6}, {   4479, 7}, {   2431, 8}, \
142*671ea119Smrg     {   1247, 7}, {   2495, 8}, {   1279,10}, {    351,11}, \
143*671ea119Smrg     {    191,10}, {    399, 9}, {    799,10}, {    415,12}, \
144*671ea119Smrg     {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
145*671ea119Smrg     {    607,11}, {    319,10}, {    639,11}, {    351,12}, \
146*671ea119Smrg     {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
147*671ea119Smrg     {    831,13}, {    127,12}, {    255,11}, {    511,10}, \
148*671ea119Smrg     {   1023,11}, {    543,10}, {   1087,11}, {    607,12}, \
149*671ea119Smrg     {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
150*671ea119Smrg     {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
151*671ea119Smrg     {    447,11}, {    927,13}, {    255,12}, {    511,11}, \
152*671ea119Smrg     {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
153*671ea119Smrg     {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
154*671ea119Smrg     {    767,11}, {   1599,12}, {    831,11}, {   1663,12}, \
155*671ea119Smrg     {    895,11}, {   1855,14}, {    255,13}, {    511,12}, \
156*671ea119Smrg     {   1023,11}, {   2047,12}, {   1087,11}, {   2239,12}, \
157*671ea119Smrg     {   1215,13}, {    639,12}, {   1471,13}, {    767,12}, \
158*671ea119Smrg     {   1663,13}, {    895,12}, {   1983,14}, {    511,13}, \
159*671ea119Smrg     {   1023,12}, {   2239,13}, {   1151,12}, {   2495,13}, \
160*671ea119Smrg     {   1279,12}, {   2623,13}, {   1407,14}, {    767,13}, \
161*671ea119Smrg     {   1535,12}, {   3135,13}, {   1663,12}, {   3455,13}, \
162*671ea119Smrg     {   1919,15}, {    511,14}, {   1023,13}, {   2175,12}, \
163*671ea119Smrg     {   4479,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
164*671ea119Smrg     {   5887,14}, {   1535,13}, {   3455,14}, {   1791,13}, \
165*671ea119Smrg     {   3839,15}, {   1023,14}, {   2047,13}, {   4479,14}, \
166*671ea119Smrg     {   2303,13}, {   4991,12}, {   9983,14}, {   2815,13}, \
167*671ea119Smrg     {   5887,15}, {   1535,14}, {   3327,13}, {   6783,14}, \
168*671ea119Smrg     {   3839,16} }
169*671ea119Smrg #define SQR_FFT_TABLE3_SIZE 157
170*671ea119Smrg #define SQR_FFT_THRESHOLD                 5568
171*671ea119Smrg 
172*671ea119Smrg #define MULLO_BASECASE_THRESHOLD            16
173*671ea119Smrg #define MULLO_DC_THRESHOLD                  37
174*671ea119Smrg #define MULLO_MUL_N_THRESHOLD            14281
175*671ea119Smrg #define SQRLO_BASECASE_THRESHOLD             0  /* always */
176*671ea119Smrg #define SQRLO_DC_THRESHOLD                 137
177*671ea119Smrg #define SQRLO_SQR_THRESHOLD              10821
178*671ea119Smrg 
179*671ea119Smrg #define DC_DIV_QR_THRESHOLD                 54
180*671ea119Smrg #define DC_DIVAPPR_Q_THRESHOLD             146
181*671ea119Smrg #define DC_BDIV_QR_THRESHOLD                98
182*671ea119Smrg #define DC_BDIV_Q_THRESHOLD                218
183*671ea119Smrg 
184*671ea119Smrg #define INV_MULMOD_BNM1_THRESHOLD           50
185*671ea119Smrg #define INV_NEWTON_THRESHOLD               173
186*671ea119Smrg #define INV_APPR_THRESHOLD                 165
187*671ea119Smrg 
188*671ea119Smrg #define BINV_NEWTON_THRESHOLD              278
189*671ea119Smrg #define REDC_1_TO_REDC_N_THRESHOLD          79
190*671ea119Smrg 
191*671ea119Smrg #define MU_DIV_QR_THRESHOLD               1787
192*671ea119Smrg #define MU_DIVAPPR_Q_THRESHOLD            1787
193*671ea119Smrg #define MUPI_DIV_QR_THRESHOLD               78
194*671ea119Smrg #define MU_BDIV_QR_THRESHOLD              1589
195*671ea119Smrg #define MU_BDIV_Q_THRESHOLD               1830
196*671ea119Smrg 
197*671ea119Smrg #define POWM_SEC_TABLE  1,16,126,416,932
198*671ea119Smrg 
199*671ea119Smrg #define GET_STR_DC_THRESHOLD                11
200*671ea119Smrg #define GET_STR_PRECOMPUTE_THRESHOLD        17
201*671ea119Smrg #define SET_STR_DC_THRESHOLD               306
202*671ea119Smrg #define SET_STR_PRECOMPUTE_THRESHOLD       894
203*671ea119Smrg 
204*671ea119Smrg #define FAC_DSC_THRESHOLD                  141
205*671ea119Smrg #define FAC_ODD_THRESHOLD                   34
206*671ea119Smrg 
207*671ea119Smrg #define MATRIX22_STRASSEN_THRESHOLD         20
208*671ea119Smrg #define HGCD2_DIV1_METHOD                    3  /* 5.97% faster than 1 */
209*671ea119Smrg #define HGCD_THRESHOLD                      73
210*671ea119Smrg #define HGCD_APPR_THRESHOLD                123
211*671ea119Smrg #define HGCD_REDUCE_THRESHOLD             3664
212*671ea119Smrg #define GCD_DC_THRESHOLD                   562
213*671ea119Smrg #define GCDEXT_DC_THRESHOLD                465
214*671ea119Smrg #define JACOBI_BASE_METHOD                   1  /* 31.16% faster than 3 */
215*671ea119Smrg 
216*671ea119Smrg /* Tuneup completed successfully, took 35114 seconds */
217