1 /* x86_64 fat binary initializers.
2 
3    Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
4    Torbjorn Granlund (port to x86_64)
5 
6    THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
7    THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
8    COMPLETELY IN FUTURE GNU MP RELEASES.
9 
10 Copyright 2003, 2004, 2009, 2011-2015, 2017 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include <stdio.h>    /* for printf */
39 #include <stdlib.h>   /* for getenv */
40 #include <string.h>
41 
42 #include "gmp-impl.h"
43 
44 /* Change this to "#define TRACE(x) x" for some traces. */
45 #define TRACE(x)
46 
47 
48 /* fat_entry.asm */
49 long __gmpn_cpuid (char [12], int);
50 
51 
52 #if WANT_FAKE_CPUID
53 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
54    variable.  Anything can be used, but for now it's the canonical cpu types
55    as per config.guess/config.sub.  */
56 
57 #define __gmpn_cpuid            fake_cpuid
58 
59 #define MAKE_FMS(family, model)						\
60   ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
61    + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
62 
63 static struct {
64   const char  *name;
65   const char  *vendor;
66   unsigned    fms;
67 } fake_cpuid_table[] = {
68   { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
69   { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
70   { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
71   { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
72   { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
73   { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
74   { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
75   { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
76   { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
77   { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
78   { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
79   { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
80   { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
81   { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
82   { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
83   { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
84   { "pentium4",   "GenuineIntel", MAKE_FMS (15, 3) },
85 
86   { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
87   { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
88   { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
89   { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
90   { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
91   { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
92   { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
93   { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
94   { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },
95 
96   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
97 };
98 
99 static int
fake_cpuid_lookup(void)100 fake_cpuid_lookup (void)
101 {
102   char  *s;
103   int   i;
104 
105   s = getenv ("GMP_CPU_TYPE");
106   if (s == NULL)
107     {
108       printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
109       abort ();
110     }
111 
112   for (i = 0; i < numberof (fake_cpuid_table); i++)
113     if (strcmp (s, fake_cpuid_table[i].name) == 0)
114       return i;
115 
116   printf ("GMP_CPU_TYPE=%s unknown\n", s);
117   abort ();
118 }
119 
120 static long
fake_cpuid(char dst[12],unsigned int id)121 fake_cpuid (char dst[12], unsigned int id)
122 {
123   int  i = fake_cpuid_lookup();
124 
125   switch (id) {
126   case 0:
127     memcpy (dst, fake_cpuid_table[i].vendor, 12);
128     return 0;
129   case 1:
130     return fake_cpuid_table[i].fms;
131   case 7:
132     dst[0] = 0xff;				/* BMI1, AVX2, etc */
133     dst[1] = 0xff;				/* BMI2, etc */
134     return 0;
135   case 0x80000001:
136     dst[4 + 29 / 8] = (1 << (29 % 8));		/* "long" mode */
137     return 0;
138   default:
139     printf ("fake_cpuid(): oops, unknown id %d\n", id);
140     abort ();
141   }
142 }
143 #endif
144 
145 
146 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
147 typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
148 
149 struct cpuvec_t __gmpn_cpuvec = {
150   __MPN(add_n_init),
151   __MPN(addlsh1_n_init),
152   __MPN(addlsh2_n_init),
153   __MPN(addmul_1_init),
154   __MPN(addmul_2_init),
155   __MPN(bdiv_dbm1c_init),
156   __MPN(cnd_add_n_init),
157   __MPN(cnd_sub_n_init),
158   __MPN(com_init),
159   __MPN(copyd_init),
160   __MPN(copyi_init),
161   __MPN(divexact_1_init),
162   __MPN(divrem_1_init),
163   __MPN(gcd_11_init),
164   __MPN(lshift_init),
165   __MPN(lshiftc_init),
166   __MPN(mod_1_init),
167   __MPN(mod_1_1p_init),
168   __MPN(mod_1_1p_cps_init),
169   __MPN(mod_1s_2p_init),
170   __MPN(mod_1s_2p_cps_init),
171   __MPN(mod_1s_4p_init),
172   __MPN(mod_1s_4p_cps_init),
173   __MPN(mod_34lsub1_init),
174   __MPN(modexact_1c_odd_init),
175   __MPN(mul_1_init),
176   __MPN(mul_basecase_init),
177   __MPN(mullo_basecase_init),
178   __MPN(preinv_divrem_1_init),
179   __MPN(preinv_mod_1_init),
180   __MPN(redc_1_init),
181   __MPN(redc_2_init),
182   __MPN(rshift_init),
183   __MPN(sqr_basecase_init),
184   __MPN(sub_n_init),
185   __MPN(sublsh1_n_init),
186   __MPN(submul_1_init),
187   0
188 };
189 
190 int __gmpn_cpuvec_initialized = 0;
191 
192 /* The following setups start with generic x86, then overwrite with
193    specifics for a chip, and higher versions of that chip.
194 
195    The arrangement of the setups here will normally be the same as the $path
196    selections in configure.in for the respective chips.
197 
198    This code is reentrant and thread safe.  We always calculate the same
199    decided_cpuvec, so if two copies of the code are running it doesn't
200    matter which completes first, both write the same to __gmpn_cpuvec.
201 
202    We need to go via decided_cpuvec because if one thread has completed
203    __gmpn_cpuvec then it may be making use of the threshold values in that
204    vector.  If another thread is still running __gmpn_cpuvec_init then we
205    don't want it to write different values to those fields since some of the
206    asm routines only operate correctly up to their own defined threshold,
207    not an arbitrary value.  */
208 
209 static int
gmp_workaround_skylake_cpuid_bug()210 gmp_workaround_skylake_cpuid_bug ()
211 {
212   char feature_string[49];
213   char processor_name_string[49];
214   static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ };
215   int i;
216 
217   /* Example strings:                                   */
218   /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz"          */
219   /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz"         */
220   /*                  ^               ^               ^ */
221   /*     0x80000002       0x80000003      0x80000004    */
222   /* We match out just the 0x80000003 part here. */
223 
224   /* In their infinitive wisdom, Intel decided to use one register order for
225      the vendor string, and another for the processor name string.  We shuffle
226      things about here, rather than write a new variant of our assembly cpuid.
227   */
228 
229   unsigned int eax, ebx, ecx, edx;
230   eax = __gmpn_cpuid (feature_string, 0x80000003);
231   ebx = ((unsigned int *)feature_string)[0];
232   edx = ((unsigned int *)feature_string)[1];
233   ecx = ((unsigned int *)feature_string)[2];
234 
235   ((unsigned int *) (processor_name_string))[0] = eax;
236   ((unsigned int *) (processor_name_string))[1] = ebx;
237   ((unsigned int *) (processor_name_string))[2] = ecx;
238   ((unsigned int *) (processor_name_string))[3] = edx;
239 
240   processor_name_string[16] = 0;
241 
242   for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++)
243     {
244       if (strstr (processor_name_string, bad_cpus[i]) != 0)
245 	return 1;
246     }
247   return 0;
248 }
249 
250 enum {BMI2_BIT = 8};
251 
252 void
__gmpn_cpuvec_init(void)253 __gmpn_cpuvec_init (void)
254 {
255   struct cpuvec_t  decided_cpuvec;
256   char vendor_string[13];
257   char dummy_string[12];
258   long fms;
259   int family, model;
260 
261   TRACE (printf ("__gmpn_cpuvec_init:\n"));
262 
263   memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
264 
265   CPUVEC_SETUP_x86_64;
266   CPUVEC_SETUP_fat;
267 
268   __gmpn_cpuid (vendor_string, 0);
269   vendor_string[12] = 0;
270 
271   fms = __gmpn_cpuid (dummy_string, 1);
272   family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
273   model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
274 
275   /* Check extended feature flags */
276   __gmpn_cpuid (dummy_string, 0x80000001);
277   if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
278     abort (); /* longmode-capable-bit turned off! */
279 
280   /*********************************************************/
281   /*** WARNING: keep this list in sync with config.guess ***/
282   /*********************************************************/
283   if (strcmp (vendor_string, "GenuineIntel") == 0)
284     {
285       switch (family)
286 	{
287 	case 6:
288 	  switch (model)
289 	    {
290 	    case 0x0f:		/* Conroe Merom Kentsfield Allendale */
291 	    case 0x10:
292 	    case 0x11:
293 	    case 0x12:
294 	    case 0x13:
295 	    case 0x14:
296 	    case 0x15:
297 	    case 0x16:
298 	    case 0x17:		/* PNR Wolfdale Yorkfield */
299 	    case 0x18:
300 	    case 0x19:
301 	    case 0x1d:		/* PNR Dunnington */
302 	      CPUVEC_SETUP_core2;
303 	      break;
304 
305 	    case 0x1c:		/* Atom Silverthorne */
306 	    case 0x26:		/* Atom Lincroft */
307 	    case 0x27:		/* Atom Saltwell? */
308 	    case 0x36:		/* Atom Cedarview/Saltwell */
309 	      CPUVEC_SETUP_atom;
310 	      break;
311 
312 	    case 0x1a:		/* NHM Gainestown */
313 	    case 0x1b:
314 	    case 0x1e:		/* NHM Lynnfield/Jasper */
315 	    case 0x1f:
316 	    case 0x20:
317 	    case 0x21:
318 	    case 0x22:
319 	    case 0x23:
320 	    case 0x24:
321 	    case 0x25:		/* WSM Clarkdale/Arrandale */
322 	    case 0x28:
323 	    case 0x29:
324 	    case 0x2b:
325 	    case 0x2c:		/* WSM Gulftown */
326 	    case 0x2e:		/* NHM Beckton */
327 	    case 0x2f:		/* WSM Eagleton */
328 	      CPUVEC_SETUP_core2;
329 	      CPUVEC_SETUP_coreinhm;
330 	      break;
331 
332 	    case 0x37:		/* Silvermont */
333 	    case 0x4a:		/* Silvermont */
334 	    case 0x4c:		/* Airmont */
335 	    case 0x4d:		/* Silvermont/Avoton */
336 	    case 0x5a:		/* Silvermont */
337 	      CPUVEC_SETUP_atom;
338 	      CPUVEC_SETUP_silvermont;
339 	      break;
340 
341 	    case 0x5c:		/* Goldmont */
342 	    case 0x5f:		/* Goldmont */
343 	    case 0x7a:		/* Goldmont Plus */
344 	      CPUVEC_SETUP_atom;
345 	      CPUVEC_SETUP_silvermont;
346 	      CPUVEC_SETUP_goldmont;
347 	      break;
348 
349 	    case 0x2a:		/* SB */
350 	    case 0x2d:		/* SBC-EP */
351 	    case 0x3a:		/* IBR */
352 	    case 0x3e:		/* IBR Ivytown */
353 	      CPUVEC_SETUP_core2;
354 	      CPUVEC_SETUP_coreinhm;
355 	      CPUVEC_SETUP_coreisbr;
356 	      break;
357 	    case 0x3c:		/* Haswell client */
358 	    case 0x3f:		/* Haswell server */
359 	    case 0x45:		/* Haswell ULT */
360 	    case 0x46:		/* Crystal Well */
361 	      CPUVEC_SETUP_core2;
362 	      CPUVEC_SETUP_coreinhm;
363 	      CPUVEC_SETUP_coreisbr;
364 	      /* Some Haswells lack BMI2.  Let them appear as Sandybridges for
365 		 now.  */
366 	      __gmpn_cpuid (dummy_string, 7);
367 	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
368 		break;
369 	      CPUVEC_SETUP_coreihwl;
370 	      break;
371 	    case 0x3d:		/* Broadwell */
372 	    case 0x47:		/* Broadwell */
373 	    case 0x4f:		/* Broadwell server */
374 	    case 0x56:		/* Broadwell microserver */
375 	      CPUVEC_SETUP_core2;
376 	      CPUVEC_SETUP_coreinhm;
377 	      CPUVEC_SETUP_coreisbr;
378 	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
379 		break;
380 	      CPUVEC_SETUP_coreihwl;
381 	      CPUVEC_SETUP_coreibwl;
382 	      break;
383 	    case 0x4e:		/* Skylake client */
384 	    case 0x55:		/* Skylake server */
385 	    case 0x5e:		/* Skylake */
386 	    case 0x8e:		/* Kabylake */
387 	    case 0x9e:		/* Kabylake */
388 	      CPUVEC_SETUP_core2;
389 	      CPUVEC_SETUP_coreinhm;
390 	      CPUVEC_SETUP_coreisbr;
391 	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
392 		break;
393 	      if (gmp_workaround_skylake_cpuid_bug ())
394 		break;
395 	      CPUVEC_SETUP_coreihwl;
396 	      CPUVEC_SETUP_coreibwl;
397 	      CPUVEC_SETUP_skylake;
398 	      break;
399 	    }
400 	  break;
401 
402 	case 15:
403 	  CPUVEC_SETUP_pentium4;
404 	  break;
405 	}
406     }
407   else if (strcmp (vendor_string, "AuthenticAMD") == 0)
408     {
409       switch (family)
410 	{
411 	case 0x0f:		/* k8 */
412 	case 0x11:		/* "fam 11h", mix of k8 and k10 */
413 	case 0x13:
414 	  CPUVEC_SETUP_k8;
415 	  break;
416 
417 	case 0x10:		/* k10 */
418 	case 0x12:		/* k10 (llano) */
419 	  CPUVEC_SETUP_k8;
420 	  CPUVEC_SETUP_k10;
421 	  break;
422 
423 	case 0x14:		/* bobcat */
424 	  CPUVEC_SETUP_k8;
425 	  CPUVEC_SETUP_k10;
426 	  CPUVEC_SETUP_bt1;
427 	  break;
428 
429 	case 0x16:		/* jaguar */
430 	  CPUVEC_SETUP_k8;
431 	  CPUVEC_SETUP_k10;
432 	  CPUVEC_SETUP_bt1;
433 	  CPUVEC_SETUP_bt2;
434 	  break;
435 
436 	case 0x15:		/* bulldozer, piledriver, steamroller, excavator */
437 	  CPUVEC_SETUP_k8;
438 	  CPUVEC_SETUP_k10;
439 	  CPUVEC_SETUP_bd1;
440 	  break;
441 
442 	case 0x17:		/* zen */
443 	case 0x19:		/* zen3 */
444 	  CPUVEC_SETUP_zen;
445 	  break;
446 	}
447     }
448   else if (strcmp (vendor_string, "CentaurHauls") == 0)
449     {
450       switch (family)
451 	{
452 	case 6:
453 	  if (model >= 15)
454 	    CPUVEC_SETUP_nano;
455 	  break;
456 	}
457     }
458 
459   /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
460      Instead default to the plain versions from whichever CPU we detected.
461      The function arguments are compatible, no need for any glue code.  */
462   if (decided_cpuvec.preinv_divrem_1 == NULL)
463     decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
464   if (decided_cpuvec.preinv_mod_1 == NULL)
465     decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
466 
467   ASSERT_CPUVEC (decided_cpuvec);
468   CPUVEC_INSTALL (decided_cpuvec);
469 
470   /* Set this once the threshold fields are ready.
471      Use volatile to prevent it getting moved.  */
472   *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
473 }
474