1 /* Simple expression parser for GMP-ECM.
2 
3 Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2012 Jim Fougeron, Paul Zimmermann and Alexander Kruppa.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or (at your
8 option) any later version.
9 
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING.  If not, see
17 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
18 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <time.h>
24 #include "ecm-ecm.h"
25 #include "getprime_r.h"
26 
27 #ifdef HAVE_STRINGS_H
28 # include <strings.h> /* for strncasecmp */
29 #endif
30 
31 #ifdef HAVE_CTYPE_H
32 # include <ctype.h>
33 #endif
34 
35 
36 /*****************************************************************
37  *   Syntax for this VERY simple recursive expression parser:	 *
38  *                                                               *
39  *   ( or [ or { along with ) or ] or } are valid for grouping   *
40  *   Normal "simple" operators:  + - * /  (. can be used for *)  *
41  *   Module:                     n%m    345%11                   *
42  *   Unary minus is supported:   -n     -500                     *
43  *   Exponentation:              n^m    2^500                    *
44  *   Simple factorial:           n!     53!  == 1*2*3*4...*52*53 *
45  *   Multi-factorial:            n!m    15!3 == 15.12.9.6.3      *
46  *   Simple Primorial:           n#     11# == 2*3*5*7*11        *
47  *   Reduced Primorial:          n#m    17#5 == 5.7.11.13.17     *
48  *                                                               *
49  * Adding (working on these at least:				 *
50  *   Phi(x,n)							 *
51  *                                                               *
52  * NOTE Lines ending in a \ character are "joined"               *
53  * NOTE C++ // single line comments (rest of line is a comment)  *
54  *                                                               *
55  ****************************************************************/
56 
57 /* value only used by the expression parser */
58 static mpz_t t, mpOne;
59 static char *expr_str;
60 
61 static void eval_power (mpz_t prior_n, mpz_t n,char op);
62 static void eval_product (mpz_t prior_n, mpz_t n,char op);
63 static void eval_sum (mpz_t prior_n, mpz_t n,char op);
64 static int  eval_Phi (mpz_t prior_n, mpz_t n, int ParamCnt);
65 static int  eval_2 (int bInFuncParams);
66 
67 #if 0 /* strncasecmp is a required function in configure.in */
68 #if defined (_MSC_VER) || defined (__MINGW32__)
69 #define strncasecmp strnicmp
70 #endif
71 #endif
72 
73 /***************************************/
74 /* Main expression evaluation function */
75 /* This is the function that the app   */
76 /* calls to read the expression line   */
77 /***************************************/
eval(mpcandi_t * n,FILE * fd,int primetest)78 int eval (mpcandi_t *n, FILE *fd, int primetest)
79 {
80   int ret;
81   int nMaxSize = 2000, nCurSize = 0;
82   int c;
83   char *expr = (char *) malloc (nMaxSize + 1);
84 
85   ASSERT_ALWAYS (expr != NULL);
86 JoinLinesLoop:
87   c = fgetc (fd);
88   if (0)
89     {
90 ChompLine:
91       do
92         c = fgetc (fd);
93       while (c != EOF && !IS_NEWLINE(c));
94       if (IS_NEWLINE(c))
95         goto JoinLinesLoop;
96     }
97 
98   while (c != EOF && !IS_NEWLINE(c) && c != ';')
99     {
100       if (c == '/')
101 	{
102 	  /* This might be a C++ // comment or it might be a / division operator.
103 	     Check it out, and if it is a comment, then "eat it" */
104 	  int peek_c = fgetc (fd);
105 	  if (peek_c == '/')
106 	    /* Got a C++ single line comment, so Chomp the line */
107 	    goto ChompLine;
108 
109 	  /* Put the char back on the file, then allow the code to add the '/' char to the buffer */
110 	  ungetc (peek_c, fd);
111         }
112 
113       /* strip space and tabs out here, and then we DON'T have to mess with them in the rest of the parser */
114       if (!isspace (c) && c != '"' && c != '\'')
115 	expr[nCurSize++] = (char) c;
116 
117       if (nCurSize == nMaxSize)
118       {
119 	char *cp;
120 	nMaxSize += nMaxSize / 2;
121 	cp = (char *) realloc (expr, nMaxSize + 1);
122         ASSERT_ALWAYS (cp != NULL);
123 	expr = cp;
124       }
125       c = fgetc (fd);
126     }
127   expr[nCurSize] = 0;
128   if (!nCurSize)
129     ret = 0;
130   else
131     {
132       if (c == ';')
133 	ungetc (c, fd);
134       mpz_init (t);
135       expr_str = expr;
136       ret = eval_2 (0);
137       if (ret)
138 	{
139 	  char *s;
140 	  char *cpTmpExpr = expr;
141 	  s = mpz_get_str (NULL, 10, t);
142 	  if (!strcmp(s, cpTmpExpr))
143 	    cpTmpExpr = NULL;
144 	  ret = mpcandi_t_add_candidate (n, t, cpTmpExpr, primetest);
145 	  free (s); /* size strlen (s) + 1 */
146 	}
147       mpz_clear(t);
148     }
149   free(expr);
150   return ret;
151 }
152 
eval_str(mpcandi_t * n,char * cp,int primetest,char ** EndChar)153 int eval_str (mpcandi_t *n, char *cp, int primetest, char **EndChar)
154 {
155   int ret;
156   int nMaxSize=2000, nCurSize=0;
157   char *c;
158   char *expr = (char *) malloc(nMaxSize+1);
159 
160   ASSERT_ALWAYS (expr != NULL);
161   c = cp;
162 JoinLinesLoop:
163   if (*c == '#')
164     {
165       do
166         ++c;
167       while (*c && !IS_NEWLINE(*c));
168       if (IS_NEWLINE(*c))
169         goto JoinLinesLoop;
170     }
171 
172   while (*c && !IS_NEWLINE(*c) && *c != ';')
173     {
174       /* strip space and tabs out here, and then we DON'T have to mess with them in the rest of the parser */
175       if (!isspace((int) *c) && *c != '"' && *c != '\'')
176 	expr[nCurSize++] = *c;
177       if (nCurSize == nMaxSize)
178       {
179 	char *cp;
180 	nMaxSize += nMaxSize / 2;
181 	cp = (char *) realloc (expr, nMaxSize + 1);
182         ASSERT_ALWAYS (cp != NULL);
183 	expr = cp;
184       }
185       ++c;
186     }
187   expr[nCurSize] = 0;
188   if (!nCurSize)
189     ret = 0;
190   else
191     {
192       if (*c != ';')
193 	++c;
194       mpz_init(t);
195       expr_str = expr;
196       ret = eval_2(0);
197       if (ret)
198 	{
199 	  char *s;
200 	  char *cpTmpExpr = expr;
201 	  s = mpz_get_str (NULL, 10, t);
202 	  if (!strcmp(s, cpTmpExpr))
203 	    cpTmpExpr = NULL;
204 	  ret = mpcandi_t_add_candidate(n, t, cpTmpExpr, primetest);
205 	  free (s); /* size strlen (s) + 1 */
206 	}
207       mpz_clear(t);
208     }
209   free(expr);
210   if (EndChar && *EndChar)
211     *EndChar = c;
212   return ret;
213 }
214 
eval_power(mpz_t prior_n,mpz_t n,char op)215 void eval_power (mpz_t prior_n, mpz_t n,char op)
216 {
217 #if defined (DEBUG_EVALUATOR)
218   if ('#'==op || '^'==op || '!'==op || '@'==op || '$'==op)
219     {
220       fprintf (stderr, "eval_power ");
221       mpz_out_str(stderr, 10, prior_n);
222       fprintf (stderr, "%c", op);
223       mpz_out_str(stderr, 10, n);
224       fprintf (stderr, "\n");
225     }
226 #endif
227 
228   if ('^'==op)
229     mpz_pow_ui(n,prior_n,mpz_get_ui(n));
230   else if ('!'==op)	/* simple factorial  (syntax n!    example: 7! == 1*2*3*4*5*6*7) */
231     mpz_fac_ui(n,mpz_get_ui(n));
232   else if ('@'==op)	/* Multi factorial   (syntax n!prior_n
233                            Example: 15!3 == 15*12*9*6*3
234                            Note: 15!3 is substituted into 15@3 by the parser */
235     {
236       long nCur;
237       unsigned long nDecr;
238       nCur = mpz_get_si(prior_n);
239       nDecr = mpz_get_ui(n);
240       mpz_set_ui(n,1);
241       while (nCur > 1)
242 	{
243 	  /* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */
244 	  mpz_mul_ui(n,n,nCur);
245 	  nCur -= nDecr;
246 	}
247     }
248   else if ('#'==op)  /* simple primorial (syntax  n#   example:  11# == 2*3*5*7*11 */
249     {
250       unsigned long nMax;
251       unsigned long p;
252       prime_info_t prime_info;
253 
254       prime_info_init (prime_info);
255       nMax = mpz_get_ui (n);
256       mpz_set_ui (n, 1);
257       for (p = 2; p <= nMax; p = getprime_mt (prime_info))
258 	/* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */
259 	mpz_mul_ui (n, n, p);
260       prime_info_clear (prime_info); /* free the prime table */
261     }
262   else if ('$'==op)  /* reduced primorial (syntax  n#prior_n   example:  13#5 == (5*7*11*13) */
263     {
264       unsigned long p;
265       unsigned long nMax;
266       unsigned long nStart;
267       prime_info_t prime_info;
268 
269       prime_info_init (prime_info);
270       nMax = mpz_get_ui (prior_n);
271       nStart = mpz_get_ui (n);
272       mpz_set_ui (n, 1);
273       /*printf ("Reduced-primorial  %ld#%ld\n", nMax, nStart);*/
274       for (p = 2; p <= nMax; p = getprime_mt (prime_info))
275 	{
276 	  if (p >= nStart)
277 	    /* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */
278 	    mpz_mul_ui (n, n, p);
279 	}
280       prime_info_clear (prime_info); /* free the prime table */
281     }
282 }
283 
284 void
eval_product(mpz_t prior_n,mpz_t n,char op)285 eval_product (mpz_t prior_n, mpz_t n, char op)
286 {
287 #if defined (DEBUG_EVALUATOR)
288   if ('*'==op || '.'==op || '/'==op || '%'==op)
289     {
290       fprintf (stderr, "eval_product ");
291       mpz_out_str(stderr, 10, prior_n);
292       fprintf (stderr, "%c", op);
293       mpz_out_str(stderr, 10, n);
294       fprintf (stderr, "\n");
295     }
296 #endif
297   if ('*' == op || '.' == op)
298     mpz_mul (n, prior_n, n);
299   else if ('/' == op)
300     {
301       mpz_t r;
302       mpz_init (r);
303       mpz_tdiv_qr (n, r, prior_n, n);
304       if (mpz_cmp_ui (r, 0) != 0)
305         {
306           fprintf (stderr, "Parsing Error: inexact division\n");
307           exit (EXIT_FAILURE);
308         }
309       mpz_clear (r);
310     }
311   else if ('%' == op)
312     mpz_tdiv_r (n, prior_n, n);
313 }
314 
eval_sum(mpz_t prior_n,mpz_t n,char op)315 void eval_sum (mpz_t prior_n, mpz_t n,char op)
316 {
317 #if defined (DEBUG_EVALUATOR)
318   if ('+'==op || '-'==op)
319     {
320       fprintf (stderr, "eval_sum ");
321       mpz_out_str(stderr, 10, prior_n);
322       fprintf (stderr, "%c", op);
323       mpz_out_str(stderr, 10, n);
324       fprintf (stderr, "\n");
325     }
326 #endif
327 
328   if ('+' == op)
329     mpz_add(n,prior_n,n);
330   else if ('-' == op)
331     mpz_sub(n,prior_n,n);
332 }
333 
eval_Phi(mpz_t b,mpz_t n,int ParamCnt)334 int eval_Phi (mpz_t b, mpz_t n, int ParamCnt)
335 {
336   int factors[200];
337   unsigned dwFactors=0, dw;
338   unsigned long B;
339   unsigned long p;
340   mpz_t D, T, org_n;
341   prime_info_t prime_info;
342 
343   if (ParamCnt == 0)
344     return 0;
345 
346   if (mpz_cmp_ui (n, 1) == 0)
347     {
348       /* return value is 1 if b is composite, or b if b is prime */
349       int isPrime = mpz_probab_prime_p (b, PROBAB_PRIME_TESTS);
350       if (isPrime)
351 	mpz_set (n, b);
352       else
353 	mpz_set (n, mpOne);
354       return 1;
355     }
356   if (mpz_cmp_si (n, -1) == 0)
357     /* this is actually INVALID, but it is easier to simply */
358     return 0;
359 
360   /* OK parse the Phi out now */
361   if (mpz_cmp_ui (b, 0) <= 0)
362     return 0;
363 
364   if (mpz_cmp_ui (b, 1) == 0)
365     {
366       if (mpz_cmp_ui (n, 1) != 0)
367 	mpz_sub_ui (n, n, 1);
368       return 1;
369     }
370 
371   /* Ok, do the real h_primative work, since we are not one of the trivial case */
372 
373   if (mpz_fits_ulong_p (b) == 0)
374     return 0;
375 
376   B = mpz_get_ui (b);
377 
378   /* Obtain the factors of B */
379   prime_info_init (prime_info);
380   for (p = 2; p <= B; p = getprime_mt (prime_info))
381     {
382       if (B % p == 0)
383 	{
384 	  /* Add the factor one time */
385 	  factors[dwFactors++] = p;
386 	  /* but be sure to totally remove it */
387 	  do { B /= p; } while (B % p == 0);
388         }
389      }
390   prime_info_clear (prime_info); /* free the prime tables */
391   B = mpz_get_si (b);
392 
393   mpz_init_set (org_n, n);
394   mpz_set_ui (n, 1);
395   mpz_init_set_ui (D, 1);
396   mpz_init (T);
397 
398   for(dw=0;(dw<(1U<<dwFactors)); dw++)
399     {
400       /* for all Mobius terms */
401       int iPower=B;
402       int iMobius=0;
403       unsigned dwIndex=0;
404       unsigned dwMask=1;
405 
406       while(dwIndex < dwFactors)
407 	{
408 	  if(dw&dwMask)
409 	    {
410 	      /* printf ("iMobius = %d iPower = %d, dwIndex = %d ", iMobius, iPower, dwIndex); */
411 	      iMobius++;
412 	      iPower/=factors[dwIndex];
413 	      /* printf ("Then iPower = %d\n", iPower);  */
414 	    }
415 	  dwMask<<=1;
416 	  ++dwIndex;
417 	}
418       /*
419       fprintf (stderr, "Taking ");
420       mpz_out_str(stderr, 10, org_n);
421       fprintf (stderr, "^%d-1\n", iPower);
422       */
423       mpz_pow_ui(T, org_n, iPower);
424       mpz_sub_ui(T, T, 1);
425 
426       if(iMobius&1)
427       {
428 	/*
429 	fprintf (stderr, "Muling D=D*T  ");
430 	mpz_out_str(stderr, 10, D);
431 	fprintf (stderr, "*");
432 	mpz_out_str(stderr, 10, T);
433 	fprintf (stderr, "\n");
434 	*/
435 	mpz_mul(D, D, T);
436       }
437       else
438       {
439 	/*
440 	fprintf (stderr, "Muling n=n*T  ");
441 	mpz_out_str(stderr, 10, n);
442 	fprintf (stderr, "*");
443 	mpz_out_str(stderr, 10, T);
444 	fprintf (stderr, "\n");
445 	*/
446 	mpz_mul(n, n, T);
447       }
448   }
449   mpz_divexact(n, n, D);
450   mpz_clear(T);
451   mpz_clear(org_n);
452   mpz_clear(D);
453 
454   return 1;
455 }
456 
457 /* A simple partial-recursive decent parser */
eval_2(int bInFuncParams)458 int eval_2 (int bInFuncParams)
459 {
460   mpz_t n_stack[4];
461   mpz_t n;
462   int i;
463   int num_base;
464   char op_stack[4];
465   char op;
466   char negate;
467   for (i=0;i<4;i++)
468     {
469       op_stack[i]=0;
470       mpz_init(n_stack[i]);
471     }
472   mpz_init(n);
473   op = 0;
474   negate = 0;
475 
476   for (;;)
477     {
478       if ('-'==(*expr_str))
479 	{
480 	  expr_str++;
481 	  negate=1;
482 	}
483       else
484 	negate=0;
485       if ('('==(*expr_str) || '['==(*expr_str) || '{'==(*expr_str))    /* Number is subexpression */
486 	{
487 	  expr_str++;
488 	  eval_2 (bInFuncParams);
489 	  mpz_set(n, t);
490 	}
491       else            /* Number is decimal value */
492 	{
493 	  for (i=0;isdigit((int) expr_str[i]);i++)
494 	    ;
495 	  if (!i)         /* No digits found */
496 	    {
497 	      /* check for a valid "function" */
498 	      if (!strncasecmp (&expr_str[i], "phi(", 4))
499 		{
500 		  /* Process the phi(B,N) function */
501 		  expr_str+=4;
502 		  /* eval the first parameter.  NOTE we pass a 1 since we ARE in parameter mode,
503 		     and this causes the ',' character to act as the end of expression */
504 		  if (eval_2 (1) != 2)
505 		    {
506 		      fprintf (stderr, "Error, Function Phi() requires 2 parameters\n");
507                       exit (EXIT_FAILURE);
508 		    }
509 		  /* Save off the parameter */
510 		  mpz_set(n_stack[3], t);
511 		  /* Now eval the second parameter NOTE we pass a 0 since we are NOT expecting a ','
512 		     character to end the expression, but are expecting a ) character to end the function */
513 		  if (eval_2 (0))
514 		    {
515 		      mpz_set(n, t);
516 		      if (eval_Phi (n_stack[3], n, 1) == 0)
517 			{
518                           fprintf (stderr, "\nParsing Error -  Invalid "
519                                    "parameter passed to the Phi function\n");
520                           exit (EXIT_FAILURE);
521 			}
522 		    }
523 		  goto MONADIC_SUFFIX_LOOP;
524 		}
525 	      else
526 		{
527 		  fprintf (stderr, "\nError - invalid number [%c]\n", expr_str[i]);
528                   exit (EXIT_FAILURE);
529 		}
530 	    }
531 	  /* Now check for a hex number.  If so, handle it as such */
532 	  num_base=10;  /* assume base 10 */
533 	  if (i == 1 && !strncasecmp(expr_str, "0x", 2))
534 	    {
535 		num_base = 16;	/* Kick up to hex */
536 		expr_str += 2;	/* skip the 0x string of the number */
537 		for (i=0;isxdigit((int) expr_str[i]);i++)
538 	          ;
539 	    }
540 	  op=expr_str[i];
541 	  expr_str[i]=0;
542 	  mpz_set_str(n,expr_str,num_base);
543 	  expr_str+=i;
544 	  *expr_str=op;
545       }
546       if (negate)
547 	mpz_neg(n,n);
548 
549 /* This label is needed for "normal" primorials and factorials, since they are evaluated Monadic suffix
550    expressions.  Most of this parser assumes Dyadic operators  with the only exceptino being the
551    Monadic prefix operator of "unary minus" which is handled by simply ignoring it (but remembering),
552    and then fixing the expression up when completed. */
553 /* This is ALSO where functions should be sent.  A function should "act" like a stand alone number.
554    We should NOT start processing, and expecting a number, but we should expect an operator first */
555 MONADIC_SUFFIX_LOOP:
556         op=*expr_str++;
557 
558       if (0==op || ')'==op || ']'==op || '}'==op || (','==op&&bInFuncParams))
559 	{
560 	  eval_power (n_stack[2],n,op_stack[2]);
561 	  eval_product (n_stack[1],n,op_stack[1]);
562 	  eval_sum (n_stack[0],n,op_stack[0]);
563 	  mpz_set(t, n);
564 	  mpz_clear(n);
565 	  for (i=0;i<4;i++)
566 	    mpz_clear(n_stack[i]);
567 	  /* Hurray! a valid expression (or sub-expression) was parsed! */
568 	  return ','==op?2:1;
569 	}
570       else
571 	{
572 	  if ('^' == op)
573 	    {
574 	      eval_power (n_stack[2],n,op_stack[2]);
575 	      mpz_set(n_stack[2], n);
576 	      op_stack[2]='^';
577 	    }
578 	  else if ('!' == op)
579 	    {
580 	      if (!isdigit((int) *expr_str))
581 		{
582 		  /* If the next char is not a digit, then this is a simple factorial, and not a "multi" factorial */
583 		  mpz_set(n_stack[2], n);
584 		  op_stack[2]='!';
585 		  goto MONADIC_SUFFIX_LOOP;
586 		}
587 	      eval_power (n_stack[2],n,op_stack[2]);
588 	      mpz_set(n_stack[2], n);
589 	      op_stack[2]='@';
590 	    }
591 	  else if ('#' == op)
592 	    {
593 	      if (!isdigit((int) *expr_str))
594 		{
595   		  /* If the next char is not a digit, then this is a simple primorial, and not a "reduced" primorial */
596 		  mpz_set(n_stack[2], n);
597 		  op_stack[2]='#';
598 		  goto MONADIC_SUFFIX_LOOP;
599 		}
600 	      eval_power (n_stack[2],n,op_stack[2]);
601 	      mpz_set(n_stack[2], n);
602 	      op_stack[2]='$';
603 	    }
604 	  else
605 	    {
606 	      if ('.'==op || '*'==op || '/'==op || '%'==op)
607 		{
608 		  eval_power (n_stack[2],n,op_stack[2]);
609 		  op_stack[2]=0;
610 		  eval_product (n_stack[1],n,op_stack[1]);
611 		  mpz_set(n_stack[1], n);
612 		  op_stack[1]=op;
613 		}
614 	      else
615 		{
616 		  if ('+'==op || '-'==op)
617 		    {
618 		      eval_power (n_stack[2],n,op_stack[2]);
619 		      op_stack[2]=0;
620 		      eval_product (n_stack[1],n,op_stack[1]);
621 		      op_stack[1]=0;
622 		      eval_sum (n_stack[0],n,op_stack[0]);
623 		      mpz_set(n_stack[0], n);
624 		      op_stack[0]=op;
625 		    }
626 		  else    /* Error - invalid operator */
627 		    {
628 		      fprintf (stderr, "\nError - unknown operator: '%c'\n", op);
629                       exit (EXIT_FAILURE);
630 		     }
631 		}
632 	    }
633 	}
634     }
635 }
636 
init_expr(void)637 void init_expr(void)
638 {
639   mpz_init_set_ui(mpOne, 1);
640 }
641 
free_expr(void)642 void free_expr(void)
643 {
644   mpz_clear(mpOne);
645 }
646