1 // -*- mode:C++ ; compile-command: "g++-3.4 -I.. -I../include -g -c -Wall modpoly.cc -DHAVE_CONFIG_H -DIN_GIAC" -*- 2 // N.B.: compiling with g++-3.4 -O2 -D_I386_ does not work 3 #include "giacPCH.h" 4 /* Univariate dense polynomials including modular arithmetic 5 * Copyright (C) 2000,2014 B. Parisse, Institut Fourier, 38402 St Martin d'Heres 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 using namespace std; 21 #ifdef HAVE_CONFIG_H 22 #include "config.h" 23 #endif 24 #include "sym2poly.h" 25 #include "modpoly.h" 26 #include "usual.h" 27 #include "prog.h" 28 #include "derive.h" 29 #include "ezgcd.h" 30 #include "cocoa.h" // for memory_usage 31 #include "quater.h" 32 #include "modfactor.h" 33 #include "giacintl.h" 34 #include <stdlib.h> 35 #include <cmath> 36 #include <stdexcept> 37 #include <string.h> 38 #ifdef HAVE_SYS_TIME_H 39 #include <time.h> 40 #else 41 #if !defined BESTA_OS && !defined EMCC 42 #define clock_t int 43 #define CLOCK() 0 44 #endif 45 #endif 46 47 #define GIAC_PRECOND 1 // if multiplying by w mod p, pre-computes (w*2^32)/p 48 //#define GIAC_CACHEW 1 // FFT, cache w^(2^t*p) for t>0 49 #if defined GIAC_CACHEW && GIAC_PRECOND 50 #undef GIAC_PRECOND // incompatible 51 #endif 52 53 // vector class version 1 by Agner Fog https://github.com/vectorclass 54 // this might be faster for CPU with AVX512DQ instruction set 55 // (fast multiplication of Vec4q) 56 #ifdef HAVE_VCL1_VECTORCLASS_H 57 #include <vcl1/vectorclass.h> 58 #endif 59 60 #ifndef NO_NAMESPACE_GIAC 61 namespace giac { 62 #endif // ndef NO_NAMESPACE_GIAC 63 64 const double prec(1.0/(1LL<<51)); // 52? 65 find_invp(int p)66 double find_invp(int p){ 67 return (1.0/p)*(1.0-prec); // insure that invp is lower than 1/p 68 } 69 70 // Fourier primes that fit in 32 bit int 71 const int p1=2013265921,p2=1811939329,p3=469762049,p4=2113929217; 72 const double invp1=(1.0-prec)/p1,invp2=(1.0-prec)/p2,invp3=(1.0-prec)/p3,invp4=(1.0-prec)/p4; 73 const longlong p1p2=longlong(p1)*p2,p1p2sur2=p1p2/2; 74 _fft_mult_size(const gen & args,GIAC_CONTEXT)75 gen _fft_mult_size(const gen & args,GIAC_CONTEXT){ 76 if (args.type==_VECT && args._VECTptr->empty()) 77 return FFTMUL_SIZE; 78 if (args.type!=_INT_ || args.val<1) 79 return gensizeerr(contextptr); 80 return FFTMUL_SIZE=args.val; 81 } 82 static const char _fft_mult_size_s []="fft_mult_size"; 83 static define_unary_function_eval (__fft_mult_size,&_fft_mult_size,_fft_mult_size_s); 84 define_unary_function_ptr5( at_fft_mult_size ,alias_at_fft_mult_size,&__fft_mult_size,0,true); 85 86 static const char _fft_mult_s []="fft_mult"; 87 static define_unary_function_eval (__fft_mult,&_fft_mult_size,_fft_mult_s); 88 define_unary_function_ptr5( at_fft_mult ,alias_at_fft_mult,&__fft_mult,0,true); 89 _min_proba_time(const gen & args,GIAC_CONTEXT)90 gen _min_proba_time(const gen & args,GIAC_CONTEXT){ 91 if (args.type==_INT_ && args.val>=0) 92 return min_proba_time=args.val; 93 if (args.type==_DOUBLE_ && args._DOUBLE_val>=0) 94 return min_proba_time=args._DOUBLE_val; 95 if (args.type==_VECT && args._VECTptr->empty()) 96 return min_proba_time; 97 return gensizeerr(contextptr); 98 } 99 static const char _min_proba_time_s []="min_proba_time"; 100 static define_unary_function_eval (__min_proba_time,&_min_proba_time,_min_proba_time_s); 101 define_unary_function_ptr5( at_min_proba_time ,alias_at_min_proba_time,&__min_proba_time,0,true); 102 103 // random modular number nrandom(environment * env)104 gen nrandom(environment * env){ 105 if (env->moduloon && is_zero(env->coeff)){ 106 double d=env->modulo.to_int(); 107 int j=(int) (d*std_rand()/(RAND_MAX+1.0)); 108 return smod(gen(j),env->modulo); 109 } 110 else { 111 double d=env->pn.to_int(); 112 int j=(int) (d*std_rand()/(RAND_MAX+1.0)); 113 return env->coeff.makegen(j); 114 } 115 } 116 invenv(const gen & g,environment * env)117 gen invenv(const gen & g,environment * env){ 118 if (g.type==_USER) 119 return g._USERptr->inv(); 120 return invmod(g,env->modulo); 121 } 122 123 /* 124 void inpowmod(const gen & a,int n,const gen & m,gen & res){ 125 if (!n){ 126 res=gen(1); 127 return ; 128 } 129 if (n==1){ 130 res=a; 131 return ; 132 } 133 inpowmod(a,n/2,m,res); 134 res=smod((res*res),m); 135 if (n%2) 136 res=smod((res*a),m); 137 } 138 139 gen powmod(const gen & a,int n,const gen & m){ 140 if (!n) 141 return 1; 142 if (n==1) 143 return a; 144 assert(n>1); 145 gen res; 146 inpowmod(a,n,m,res); 147 return res; 148 } 149 */ powmod(unsigned a,unsigned long n,unsigned m)150 unsigned powmod(unsigned a,unsigned long n,unsigned m){ 151 if (!n) 152 return 1; 153 if (n==1) 154 return a; 155 if (n==2) 156 return (a*ulonglong(a))%m; 157 unsigned b=a%m,c=1; 158 while (n>0){ 159 if (n%2) 160 c=(c*ulonglong(b))%m; 161 n /= 2; 162 b=(b*ulonglong(b))%m; 163 } 164 return c; 165 } 166 167 derivative(const modpoly & p)168 modpoly derivative(const modpoly & p){ 169 if (p.empty()) 170 return p; 171 modpoly new_coord; 172 int d=int(p.size())-1; 173 new_coord.reserve(d); 174 modpoly::const_iterator it=p.begin(); // itend=p.end(), 175 for (;d;++it,--d) 176 new_coord.push_back((*it)*gen(d)); 177 return new_coord; 178 } 179 derivative(const modpoly & p,environment * env)180 modpoly derivative(const modpoly & p,environment * env){ 181 if (p.empty()) 182 return p; 183 modpoly new_coord; 184 int d=int(p.size())-1; 185 new_coord.reserve(d); 186 modpoly::const_iterator it=p.begin(); // itend=p.end(), 187 gen n0( 0); 188 for (;d;++it,--d) 189 if ( smod((*it)*gen(d),env->modulo)!=n0 ) 190 break; 191 for (;d;++it,--d) 192 new_coord.push_back( smod((*it)*gen(d),env->modulo) ); 193 return new_coord; 194 } 195 integrate(const modpoly & p,const gen & shift_coeff)196 modpoly integrate(const modpoly & p,const gen & shift_coeff){ 197 if (p.empty()) 198 return p; 199 modpoly new_coord; 200 new_coord.reserve(p.size()); 201 modpoly::const_iterator itend=p.end(),it=p.begin(); 202 for (int d=0;it!=itend;++it,++d) 203 new_coord.push_back(normal(rdiv((*it),gen(d)+shift_coeff,context0),context0)); 204 return new_coord; 205 } 206 207 is_rational(double d,int & num,int & den,double eps)208 static bool is_rational(double d,int & num,int & den,double eps){ 209 double dcopy(d); 210 // continued fraction expansion 211 vector<int> v; 212 for (int n=1;n<11;++n){ 213 v.push_back(int(d)); 214 d=d-int(d); 215 if (fabs(d)<eps*n) 216 break; 217 d=1/d; 218 } 219 // re_VECTose fraction 220 num=0; 221 den=1; 222 reverse(v.begin(),v.end()); 223 for (vector<int>::const_iterator it=v.begin();it!=v.end();++it){ 224 num=num+den*(*it); 225 swap(num,den); 226 } 227 swap(num,den); 228 return fabs(dcopy-(num*1.0)/den)<eps; 229 } 230 231 232 // return n such that p=phi_n, p is assumed to be irreducible 233 // return 0 if p is not cyclotomic is_cyclotomic(const modpoly & p,double eps)234 int is_cyclotomic(const modpoly & p,double eps){ 235 modpoly q; gen e; 236 modpoly::const_iterator itend=p.end(),it=p.begin(); 237 for (;it!=itend;++it){ 238 if (it->type==_POLY){ 239 if (it->_POLYptr->coord.empty()) 240 e=zero; 241 else { 242 if (Tis_constant<gen>(*it->_POLYptr)) 243 e=it->_POLYptr->coord.front().value; 244 else 245 return 0; 246 } 247 } 248 else 249 e=*it; 250 if (e.type!=_INT_) 251 return 0; 252 q.push_back(e); 253 } 254 // q has integer coeff, q(X) must be = X^n conj(q(1/conj(X))) 255 // if it has all its root over the unit circle 256 // since q has integer coeff, q=X^n*q(1/X) i.e. is symmetric 257 modpoly qs(q); 258 reverse(q.begin(),q.end()); 259 if (q!=qs) 260 return 0; 261 // find arg of a root and compare to 2*pi 262 gen r=a_root(qs,0,eps); 263 if (is_undef(r)) return 0; 264 double arg_d=evalf_double(arg(r,context0),1,context0)._DOUBLE_val; 265 if (arg_d<0) 266 arg_d=-arg_d; 267 double d=2*M_PI/ arg_d; 268 // find rational approx of d 269 int num,den; 270 if (!is_rational(d,num,den,eps) || num>100) 271 return 0; 272 if (p==cyclotomic(num)) 273 return num; 274 else 275 return 0; 276 } is_cyclotomic(const modpoly & p,GIAC_CONTEXT)277 int is_cyclotomic(const modpoly & p,GIAC_CONTEXT){ 278 return is_cyclotomic(p,epsilon(contextptr)); 279 } 280 // use 0 for Z, n!=0 for Z/nZ modularize(const polynome & p,const gen & n,environment * env)281 modpoly modularize(const polynome & p,const gen & n,environment * env){ 282 bool ismod; 283 if (env && env->coeff.type!=_USER && !is_zero(n)){ 284 env->modulo=n; 285 env->pn=env->modulo; 286 ismod=true; 287 env->moduloon=true; 288 } 289 else 290 ismod=false; 291 gen n0(0); 292 vecteur v; 293 if (p.dim!=1) 294 return vecteur(1,gensizeerr(gettext("modpoly.cc/modularize"))); 295 if (p.coord.empty()) 296 return v; 297 int deg=p.lexsorted_degree(); 298 int curpow=deg; 299 v.reserve(deg+1); 300 vector< monomial<gen> >::const_iterator it=p.coord.begin(); 301 vector< monomial<gen> >::const_iterator itend=p.coord.end(); 302 for (;it!=itend;++it){ 303 int newpow=it->index.front(); 304 for (;curpow>newpow;--curpow) 305 v.push_back(n0); 306 if (ismod) 307 v.push_back(smod(it->value,env->modulo)); 308 else 309 v.push_back(it->value); 310 --curpow; 311 } 312 for (;curpow>-1;--curpow) 313 v.push_back(n0); 314 return v; 315 } 316 modularize(const dense_POLY1 & p,const gen & n,environment * env)317 modpoly modularize(const dense_POLY1 & p,const gen & n,environment * env){ 318 env->modulo=n; 319 env->pn=env->modulo; 320 env->moduloon=true; 321 if (p.empty()) 322 return p; 323 modpoly v; 324 gen n0( 0); 325 dense_POLY1::const_iterator it=p.begin(),itend=p.end(); 326 for (;it!=itend;++it){ 327 if (smod(*it,n)!=n0) 328 break; 329 } 330 for (;it!=itend;++it) 331 v.push_back(smod(*it,n)); 332 return v; 333 } 334 unmodularize(const modpoly & a)335 polynome unmodularize(const modpoly & a){ 336 if (a.empty()) 337 return polynome(1); 338 vector< monomial<gen> > v; 339 index_t i; 340 int deg=int(a.size())-1; 341 i.push_back(deg); 342 vecteur::const_iterator it=a.begin(); 343 vecteur::const_iterator itend=a.end(); 344 gen n0( 0); 345 for (;it!=itend;++it,--i[0]){ 346 if (*it!=n0) 347 v.push_back(monomial<gen>(*it,i)); 348 } 349 return polynome(1,v); 350 } 351 352 // random polynomial of degree =i random(int i,environment * env)353 modpoly random(int i,environment * env){ 354 vecteur v; 355 v.reserve(i+1); 356 gen e; 357 do 358 e=nrandom(env); 359 while 360 (is_zero(e)); 361 v.push_back(e); 362 for (int j=1;j<=i;j++) 363 v.push_back(nrandom(env)); 364 return v; 365 } 366 is_one(const modpoly & p)367 bool is_one(const modpoly & p){ 368 if (p.size()!=1) 369 return false; 370 return (is_one(p.front())); 371 } 372 373 // 1 one()374 modpoly one(){ 375 vecteur v; 376 v.push_back(gen(1)); 377 return v; 378 } 379 380 // x=x^1 xpower1()381 modpoly xpower1(){ 382 vecteur v; 383 v.push_back(gen( 1)); 384 v.push_back(gen( 0)); 385 return v; 386 } 387 normalize_env(environment * env)388 bool normalize_env(environment * env){ 389 if ( (env->moduloon && is_zero(env->coeff)) || is_zero(env->pn)){ 390 env->pn=env->modulo; 391 if (env->complexe) 392 env->pn = env->pn * env->pn ; 393 } 394 return (env->pn.type==_INT_); 395 } 396 397 // x^modulo xpowerpn(environment * env)398 modpoly xpowerpn(environment * env){ 399 if (!normalize_env(env)) 400 return vecteur(1,gendimerr(gettext("Field too large"))); 401 int deg=env->pn.val; 402 vecteur v(deg+1); 403 v[0]=1; 404 return v; 405 } 406 407 // x -> x^p (non modular) x_to_xp(const vecteur & v,int p)408 vecteur x_to_xp(const vecteur & v, int p){ 409 if (p<=0) 410 return vecteur(1,gensizeerr(gettext("modpoly.cc/x_to_xp"))); 411 if ( (p==1) || v.empty()) 412 return v; 413 const_iterateur it=v.begin(),itend=v.end(); 414 vecteur res; 415 res.reserve(1+(itend-it-1)*p); 416 res.push_back(*it); 417 ++it; 418 for (;it!=itend;++it){ 419 for (int i=1;i<p;++i) 420 res.push_back(zero); 421 res.push_back(*it); 422 } 423 return res; 424 } 425 426 // multiply by x^n shiftmodpoly(modpoly & a,int n)427 void shiftmodpoly(modpoly & a,int n){ 428 a.reserve(a.size()+n); 429 for (int i=0;i<n;i++) 430 a.push_back(0); 431 } 432 433 // high = high*x^n + low, size of low must be < n mergemodpoly(modpoly & high,const modpoly & low,int n)434 void mergemodpoly(modpoly & high,const modpoly & low,int n){ 435 int l=int(low.size()); 436 for (int i=0;i<n-l;i++) 437 high.push_back(0); 438 modpoly::const_iterator it=low.begin(), itend=low.end(); 439 for (;it!=itend;++it) 440 high.push_back(*it); 441 } 442 cstcoeff(const modpoly & q)443 gen cstcoeff(const modpoly & q){ 444 modpoly::const_iterator it=q.end(); 445 --it; 446 return *it; 447 } 448 449 // !! Do not call with modpoly slices if new_coord and th/other overlapp Addmodpoly(modpoly::const_iterator th_it,modpoly::const_iterator th_itend,modpoly::const_iterator other_it,modpoly::const_iterator other_itend,environment * env,modpoly & new_coord)450 void Addmodpoly(modpoly::const_iterator th_it,modpoly::const_iterator th_itend,modpoly::const_iterator other_it,modpoly::const_iterator other_itend,environment * env, modpoly & new_coord){ 451 int n=int(th_itend-th_it); 452 int m=int(other_itend-other_it); 453 if (m>n){ // swap th and other in order to have n>=m 454 modpoly::const_iterator tmp=th_it; 455 th_it=other_it; 456 other_it=tmp; 457 tmp=th_itend; 458 th_itend=other_itend; 459 other_itend=tmp; 460 int saven=n; 461 n=m; 462 m=saven; 463 } 464 if (m && other_it==new_coord.begin()){ 465 modpoly temp(new_coord); 466 Addmodpoly(th_it,th_itend,temp.begin(),temp.end(),env,new_coord); 467 return; 468 } 469 if (n && (th_it==new_coord.begin()) ){ 470 modpoly::iterator th=new_coord.begin()+n-m; 471 bool trim=(n==m); 472 // in-place addition 473 if (env && env->moduloon) 474 for (;m;++th,++other_it,--m) 475 *th=smod((*th)+(*other_it), env->modulo); 476 else 477 for (;m;++th,++other_it,--m) 478 *th += (*other_it); 479 if (trim){ 480 for (th=new_coord.begin();th!=th_itend;++th){ 481 if (!is_zero(*th)) 482 break; 483 } 484 new_coord.erase(new_coord.begin(),th); 485 } 486 return; 487 } 488 new_coord.clear(); 489 if ( (n<0) || (m<0) ) 490 return ; 491 new_coord.reserve(n); 492 if (n>m){ // no trimming needed 493 for (;n>m;++th_it,--n) 494 new_coord.push_back(*th_it); 495 } 496 else { // n==m, first remove all 0 terms of the sum 497 if (env && env->moduloon) 498 for (;n && is_zero(smod((*th_it)+(*other_it), env->modulo));++th_it,++other_it,--n) 499 ; 500 else 501 for (;n && is_zero(*th_it+*other_it);++th_it,++other_it,--n) 502 ; 503 } 504 // finish addition 505 if (env && env->moduloon) 506 for (;n;++th_it,++other_it,--n) 507 new_coord.push_back(smod((*th_it)+(*other_it), env->modulo)); 508 else 509 for (;n;++th_it,++other_it,--n) 510 new_coord.push_back( *th_it+(*other_it) ); 511 } 512 addmodpoly(const modpoly & th,const modpoly & other,environment * env,modpoly & new_coord)513 void addmodpoly(const modpoly & th, const modpoly & other, environment * env,modpoly & new_coord){ 514 // assert( (&th!=&new_coord) && (&other!=&new_coord) ); 515 modpoly::const_iterator th_it=th.begin(),th_itend=th.end(); 516 modpoly::const_iterator other_it=other.begin(),other_itend=other.end(); 517 Addmodpoly(th_it,th_itend,other_it,other_itend,env,new_coord); 518 } 519 addmodpoly(const modpoly & th,const modpoly & other,modpoly & new_coord)520 void addmodpoly(const modpoly & th, const modpoly & other, modpoly & new_coord){ 521 // assert( (&th!=&new_coord) && (&other!=&new_coord) ); 522 modpoly::const_iterator th_it=th.begin(),th_itend=th.end(); 523 modpoly::const_iterator other_it=other.begin(),other_itend=other.end(); 524 environment * env=new environment; 525 Addmodpoly(th_it,th_itend,other_it,other_itend,env,new_coord); 526 delete env; 527 } 528 529 // modular polynomial arithmetic: gcd, egcd, simplify operator_plus(const modpoly & th,const modpoly & other,environment * env)530 modpoly operator_plus (const modpoly & th,const modpoly & other,environment * env) { 531 #ifdef TIMEOUT 532 control_c(); 533 #endif 534 if (ctrl_c || interrupted) { 535 interrupted = true; ctrl_c=false; 536 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 537 } 538 // Tensor addition 539 if (th.empty()) 540 return other; 541 if (other.empty()) 542 return th; 543 modpoly new_coord; 544 addmodpoly(th,other,env,new_coord); 545 return new_coord; 546 } 547 operator +(const modpoly & th,const modpoly & other)548 modpoly operator + (const modpoly & th,const modpoly & other) { 549 #ifdef TIMEOUT 550 control_c(); 551 #endif 552 if (ctrl_c || interrupted) { 553 interrupted = true; ctrl_c=false; 554 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 555 } 556 // Tensor addition 557 if (th.empty()) 558 return other; 559 if (other.empty()) 560 return th; 561 modpoly new_coord; 562 addmodpoly(th,other,new_coord); 563 return new_coord; 564 } 565 566 Submodpoly(modpoly::const_iterator th_it,modpoly::const_iterator th_itend,modpoly::const_iterator other_it,modpoly::const_iterator other_itend,environment * env,modpoly & new_coord)567 void Submodpoly(modpoly::const_iterator th_it,modpoly::const_iterator th_itend,modpoly::const_iterator other_it,modpoly::const_iterator other_itend,environment * env,modpoly & new_coord){ 568 int n=int(th_itend-th_it); 569 if (!n){ 570 new_coord=modpoly(other_it,other_itend); 571 mulmodpoly(new_coord,-1,new_coord); 572 return; 573 } 574 int m=int(other_itend-other_it); 575 if (th_it==new_coord.begin()){ 576 if (n<m){ 577 modpoly temp(new_coord); 578 Submodpoly(temp.begin(),temp.end(),other_it,other_itend,env,new_coord); 579 return; 580 } 581 else { 582 modpoly::iterator th=new_coord.begin()+n-m; 583 bool trim=(n==m); 584 // in-place - 585 if (env && env->moduloon) 586 for (;m;++th,++other_it,--m) 587 *th=smod((*th)-(*other_it), env->modulo); 588 else 589 for (;m;++th,++other_it,--m) 590 *th -= (*other_it); 591 if (trim){ 592 for (th=new_coord.begin();th!=th_itend;++th){ 593 if (!is_zero(*th)) 594 break; 595 } 596 new_coord.erase(new_coord.begin(),th); 597 } 598 } 599 return; 600 } 601 if (m && (other_it==new_coord.begin()) ){ 602 bool inplace=(m>n); 603 if (n==m){ // look if highest coeff vanishes 604 if (env && env->moduloon) 605 inplace=!is_zero(smod((*th_it)-(*other_it), env->modulo)); 606 else 607 inplace=!is_zero((*th_it)-(*other_it)); 608 } 609 if (inplace){ // in-place substraction 610 modpoly::iterator th=new_coord.begin(); 611 if (env && env->moduloon){ 612 for (;m>n;++th,--m) 613 *th=smod(-(*th),env->modulo); 614 for (;m;++th_it,++th,--m) 615 *th=smod((*th_it)-(*th), env->modulo); 616 } 617 else { 618 for (;m>n;++th,--m) 619 *th=-(*th); 620 for (;m;++th_it,++th,--m) 621 *th=(*th_it)-(*th); 622 } 623 return; 624 } 625 else { // copy new_coord to a temporary and call again Addmodpoly 626 modpoly temp(new_coord); 627 Submodpoly(th_it,th_itend,temp.begin(),temp.end(),env,new_coord); 628 return; 629 } 630 } 631 if ( (n<0) || (m<0) ) 632 return ; 633 new_coord.clear(); 634 new_coord.reserve(giacmax(n,m)); 635 bool trimming; 636 if (m==n) 637 trimming=true; 638 else 639 trimming=false; 640 if (env && env->moduloon){ 641 for (;m>n;++other_it,--m) 642 new_coord.push_back(smod(-*other_it,env->modulo)); 643 } 644 else { 645 for (;m>n;++other_it,--m) 646 new_coord.push_back(-*other_it); 647 } 648 for (;n>m;++th_it,--n) 649 new_coord.push_back(*th_it); 650 if (env && env->moduloon) 651 for (;n;++th_it,++other_it,--n){ 652 gen tmp=smod((*th_it)-(*other_it), env->modulo); 653 if ( trimming){ 654 if (!is_zero(tmp)){ 655 trimming=false; 656 new_coord.push_back(tmp); 657 } 658 } 659 else 660 new_coord.push_back(tmp); 661 } 662 else 663 for (;n;++th_it,++other_it,--n){ 664 gen tmp=(*th_it)-(*other_it); 665 if ( trimming){ 666 if (!is_zero(tmp)){ 667 trimming=false; 668 new_coord.push_back(tmp); 669 } 670 } 671 else 672 new_coord.push_back(tmp); 673 } 674 } 675 submodpoly(const modpoly & th,const modpoly & other,environment * env,modpoly & new_coord)676 void submodpoly(const modpoly & th, const modpoly & other, environment * env,modpoly & new_coord){ 677 // assert( (&th!=&new_coord) && (&other!=&new_coord) ); 678 modpoly::const_iterator th_it=th.begin(),th_itend=th.end(); 679 modpoly::const_iterator other_it=other.begin(),other_itend=other.end(); 680 Submodpoly(th_it,th_itend,other_it,other_itend,env,new_coord); 681 } 682 submodpoly(const modpoly & th,const modpoly & other,modpoly & new_coord)683 void submodpoly(const modpoly & th, const modpoly & other, modpoly & new_coord){ 684 // assert( (&th!=&new_coord) && (&other!=&new_coord) ); 685 modpoly::const_iterator th_it=th.begin(),th_itend=th.end(); 686 modpoly::const_iterator other_it=other.begin(),other_itend=other.end(); 687 environment * env=new environment; 688 Submodpoly(th_it,th_itend,other_it,other_itend,env,new_coord); 689 delete env; 690 } 691 operator_minus(const modpoly & th,const modpoly & other,environment * env)692 modpoly operator_minus (const modpoly & th,const modpoly & other,environment * env) { 693 #ifdef TIMEOUT 694 control_c(); 695 #endif 696 if (ctrl_c || interrupted) { 697 interrupted = true; ctrl_c=false; 698 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 699 } 700 // Tensor sub 701 if (th.empty()) 702 return -other; 703 if (other.empty()) 704 return th; 705 modpoly new_coord; 706 submodpoly(th,other,env,new_coord); 707 return new_coord; 708 } 709 operator -(const modpoly & th,const modpoly & other)710 modpoly operator - (const modpoly & th,const modpoly & other) { 711 #ifdef TIMEOUT 712 control_c(); 713 #endif 714 if (ctrl_c || interrupted) { 715 interrupted = true; ctrl_c=false; 716 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 717 } 718 // Tensor sub 719 if (th.empty()) 720 return -other; 721 if (other.empty()) 722 return th; 723 modpoly new_coord; 724 submodpoly(th,other,new_coord); 725 return new_coord; 726 } 727 mulmodpoly(const modpoly & th,const gen & fact,environment * env,modpoly & new_coord)728 void mulmodpoly(const modpoly & th, const gen & fact,environment * env, modpoly & new_coord){ 729 if (!env || !env->moduloon){ 730 mulmodpoly(th,fact,new_coord); 731 return; 732 } 733 if (is_exactly_zero(fact)){ 734 new_coord.clear(); 735 return ; 736 } 737 if (&th==&new_coord){ 738 if (is_one(fact)) 739 return; 740 modpoly::iterator it=new_coord.begin(),itend=new_coord.end(); 741 if (!env->complexe && (env->modulo.type==_INT_) && (fact.type==_INT_) && (env->modulo.val<smallint) && (fact.val<smallint)){ 742 for (;it!=itend;++it) 743 it->val=smod( (it->val)*fact.val,env->modulo.val ) ; 744 } 745 else { 746 for (;it!=itend;++it) 747 *it=smod( (*it)*fact,env->modulo); 748 } 749 } 750 else { // &th!=&new_coord 751 if (is_one(fact)){ 752 new_coord=th; 753 return; 754 } 755 new_coord.clear(); 756 new_coord.reserve(th.size()); 757 modpoly::const_iterator it=th.begin(),itend=th.end(); 758 if (!env->complexe && (env->modulo.type==_INT_) && (fact.type==_INT_) && (env->modulo.val<smallint) && (fact.val<smallint)){ 759 for (;it!=itend;++it) 760 new_coord.push_back(smod( (it->val)*fact.val,env->modulo.val) ); 761 } 762 else { 763 for (;it!=itend;++it) 764 new_coord.push_back(smod( (*it)*fact,env->modulo) ); 765 } 766 } 767 } 768 mulmodpoly(const modpoly & th,const gen & fact,modpoly & new_coord)769 void mulmodpoly(const modpoly & th, const gen & fact, modpoly & new_coord){ 770 if (is_exactly_zero(fact)){ 771 new_coord.clear(); 772 return ; 773 } 774 if (&th==&new_coord){ 775 if (is_one(fact)) 776 return; 777 modpoly::iterator it=new_coord.begin(),itend=new_coord.end(); 778 #ifndef USE_GMP_REPLACEMENTS 779 if (fact.type==_INT_){ 780 for (;it!=itend;++it){ 781 if (it->type==_ZINT && it->ref_count()==1) 782 mpz_mul_si(*it->_ZINTptr,*it->_ZINTptr,fact.val); 783 else 784 *it= (*it)*fact; 785 } 786 return; 787 } 788 if (fact.type==_ZINT){ 789 for (;it!=itend;++it){ 790 if (it->type==_ZINT && it->ref_count()==1) 791 mpz_mul(*it->_ZINTptr,*it->_ZINTptr,*fact._ZINTptr); 792 else 793 *it= (*it)*fact; 794 } 795 return; 796 } 797 #endif 798 for (;it!=itend;++it) 799 type_operator_times(*it,fact,*it); // *it= (*it)*fact; 800 } 801 else { // &th!=&new_coord 802 new_coord.clear(); 803 new_coord.reserve(th.size()); 804 modpoly::const_iterator it=th.begin(),itend=th.end(); 805 for (;it!=itend;++it) 806 new_coord.push_back((*it)*fact); 807 } 808 } 809 operator *(const modpoly & th,const gen & fact)810 modpoly operator * (const modpoly & th, const gen & fact){ 811 #ifdef TIMEOUT 812 control_c(); 813 #endif 814 if (ctrl_c || interrupted) { 815 interrupted = true; ctrl_c=false; 816 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 817 } 818 // Tensor constant multiplication 819 if (is_one(fact)) 820 return th; 821 modpoly new_coord; 822 mulmodpoly(th,fact,new_coord); 823 return new_coord; 824 } 825 operator *(const gen & fact,const modpoly & th)826 modpoly operator * (const gen & fact,const modpoly & th){ 827 #ifdef TIMEOUT 828 control_c(); 829 #endif 830 if (ctrl_c || interrupted) { 831 interrupted = true; ctrl_c=false; 832 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 833 } 834 if (is_one(fact)) 835 return th; 836 modpoly new_coord; 837 mulmodpoly(th,fact,new_coord); 838 return new_coord; 839 } 840 operator *(const modpoly & a,const modpoly & b)841 modpoly operator * (const modpoly & a, const modpoly & b) { 842 environment env; 843 modpoly temp(operator_times(a,b,&env)); 844 return temp; 845 } 846 847 operator_times(const modpoly & th,const gen & fact,environment * env)848 modpoly operator_times(const modpoly & th, const gen & fact,environment * env){ 849 #ifdef TIMEOUT 850 control_c(); 851 #endif 852 if (ctrl_c || interrupted) { 853 interrupted = true; ctrl_c=false; 854 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 855 } 856 // Tensor constant multiplication 857 if (is_one(fact)) 858 return th; 859 modpoly new_coord; 860 mulmodpoly(th,fact,env,new_coord); 861 return new_coord; 862 } 863 operator_times(const gen & fact,const modpoly & th,environment * env)864 modpoly operator_times(const gen & fact,const modpoly & th,environment * env){ 865 #ifdef TIMEOUT 866 control_c(); 867 #endif 868 if (ctrl_c || interrupted) { 869 interrupted = true; ctrl_c=false; 870 return modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 871 } 872 if (is_one(fact)) 873 return th; 874 modpoly new_coord; 875 mulmodpoly(th,fact,env,new_coord); 876 return new_coord; 877 } 878 879 // *res = *res + a*b, *res must not be elsewhere referenced add_mul(mpz_t * res,mpz_t & prod,const gen & a,const gen & b)880 inline void add_mul(mpz_t * res,mpz_t & prod,const gen &a,const gen &b){ 881 switch ( (a.type<< _DECALAGE) | b.type) { 882 case _INT___INT_: 883 mpz_set_si(prod,a.val); 884 #ifdef mpz_mul_si 885 mpz_mul_si(prod,prod,b.val); 886 #else 887 if (b.val<0){ 888 mpz_mul_ui(prod,prod,-b.val); 889 mpz_neg(prod,prod); 890 } 891 else 892 mpz_mul_ui(prod,prod,b.val); 893 #endif 894 break; 895 case _ZINT__ZINT: 896 mpz_mul(prod,*a._ZINTptr,*b._ZINTptr); 897 break; 898 case _INT___ZINT: 899 #ifdef mpz_mul_si 900 mpz_mul_si(prod,*b._ZINTptr,a.val); 901 #else 902 if (a.val<0){ 903 mpz_mul_ui(prod,*b._ZINTptr,-a.val); 904 mpz_neg(prod,prod); 905 } 906 else 907 mpz_mul_ui(prod,*b._ZINTptr,a.val); 908 #endif 909 break; 910 case _ZINT__INT_: 911 #ifdef mpz_mul_si 912 mpz_mul_si(prod,*a._ZINTptr,b.val); 913 #else 914 if (b.val<0){ 915 mpz_mul_ui(prod,*a._ZINTptr,-b.val); 916 mpz_neg(prod,prod); 917 } 918 else 919 mpz_mul_ui(prod,*a._ZINTptr,b.val); 920 #endif 921 break; 922 } 923 mpz_add(*res,*res,prod); 924 } 925 926 // *res = *res - a*b, *res must not be referenced elsewhere sub_mul(mpz_t * res,mpz_t & prod,const gen & a,const gen & b)927 inline void sub_mul(mpz_t * res,mpz_t & prod,const gen &a,const gen &b){ 928 switch ( (a.type<< _DECALAGE) | b.type) { 929 case _INT___INT_: 930 mpz_set_si(prod,a.val); 931 #ifdef mpz_mul_si 932 mpz_mul_si(prod,prod,b.val); 933 #else 934 if (b.val<0){ 935 mpz_mul_ui(prod,prod,-b.val); 936 mpz_neg(prod,prod); 937 } 938 else 939 mpz_mul_ui(prod,prod,b.val); 940 #endif 941 break; 942 case _ZINT__ZINT: 943 mpz_mul(prod,*a._ZINTptr,*b._ZINTptr); 944 break; 945 case _INT___ZINT: 946 #ifdef mpz_mul_si 947 mpz_mul_si(prod,*b._ZINTptr,a.val); 948 #else 949 if (a.val<0){ 950 mpz_mul_ui(prod,*b._ZINTptr,-a.val); 951 mpz_neg(prod,prod); 952 } 953 else 954 mpz_mul_ui(prod,*b._ZINTptr,a.val); 955 #endif 956 break; 957 case _ZINT__INT_: 958 #ifdef mpz_mul_si 959 mpz_mul_si(prod,*a._ZINTptr,b.val); 960 #else 961 if (b.val<0){ 962 mpz_mul_ui(prod,*a._ZINTptr,-b.val); 963 mpz_neg(prod,prod); 964 } 965 else 966 mpz_mul_ui(prod,*a._ZINTptr,b.val); 967 #endif 968 break; 969 } 970 mpz_sub(*res,*res,prod); 971 } 972 973 // set madeg to RAND_MAX if no truncation in degree Muldense_POLY1(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord,int taille,int maxdeg)974 static void Muldense_POLY1(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord,int taille,int maxdeg){ 975 if (ita0==ita_end || itb0==itb_end || maxdeg<0){ 976 new_coord.clear(); 977 return; 978 } 979 mpz_t prod; 980 mpz_init(prod); 981 int newdeg=(ita_end-ita0)+(itb_end-itb0)-2,skip=0; 982 if (maxdeg>=0 && newdeg>maxdeg){ 983 skip=newdeg-maxdeg; 984 newdeg=maxdeg; 985 } 986 new_coord.resize(newdeg+1); 987 modpoly::const_iterator ita_begin=ita0-1,ita=ita0,itb=itb0; 988 gen * target=&new_coord.front(); 989 if (taille<128) 990 taille=0; 991 else { 992 taille=sizeinbase2(taille/128); 993 taille=(128 << taille); 994 } 995 ref_mpz_t * res = new ref_mpz_t(taille?taille:128); 996 for ( ; ita!=ita_end; ++ita ){ 997 if (skip){ 998 --skip; 999 continue; 1000 } 1001 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1002 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1003 add_mul(&res->z,prod,*ita_cur,*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1004 } 1005 int oldtaille=mpz_sizeinbase(res->z,2); 1006 if (env && env->moduloon){ 1007 *target=smod(gen(res),env->modulo); 1008 res = new ref_mpz_t(taille?taille:oldtaille+64); 1009 } 1010 else { 1011 // *target=res; 1012 if (ref_mpz_t2gen(res,*target)) 1013 res = new ref_mpz_t(taille?taille:oldtaille+64); 1014 else 1015 mpz_set_si(res->z,0); 1016 } 1017 ++target; 1018 } 1019 --ita; 1020 ++itb; 1021 for ( ; itb!=itb_end;++itb){ 1022 if (skip){ 1023 --skip; 1024 continue; 1025 } 1026 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1027 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1028 add_mul(&res->z,prod,*ita_cur,*itb_cur); // res=res+((*ita_cur)) * ((*itb_cur)); 1029 } 1030 int oldtaille=mpz_sizeinbase(res->z,2); 1031 if (env && env->moduloon){ 1032 *target=smod(gen(res),env->modulo); 1033 res = new ref_mpz_t(taille?taille:oldtaille); 1034 } 1035 else { 1036 // *target=res; 1037 if (ref_mpz_t2gen(res,*target)) 1038 res = new ref_mpz_t(taille?taille:oldtaille); 1039 else 1040 mpz_set_si(res->z,0); 1041 } 1042 ++target; 1043 } 1044 delete res; 1045 mpz_clear(prod); 1046 } 1047 1048 // new_coord += a*b, used in gen.cc add_mulmodpoly(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord)1049 void add_mulmodpoly(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord){ 1050 if (ita0==ita_end || itb0==itb_end) 1051 return; 1052 bool same=ita0==itb0 && ita_end==itb_end; 1053 mpz_t prod; 1054 mpz_init(prod); 1055 int ncs=int(new_coord.size()); 1056 int news=int((ita_end-ita0)+(itb_end-itb0)-1); 1057 if (ncs<news) 1058 new_coord=mergevecteur(vecteur(news-ncs,0),new_coord); 1059 modpoly::const_iterator ita_begin=ita0-1,ita=ita0,itb=itb0; 1060 gen * target=&new_coord.front(); 1061 if (ncs>news) 1062 target += (ncs-news); 1063 for ( ; ita!=ita_end; ++ita,++target ){ 1064 if (!env && target->type==_ZINT && target->ref_count()==1){ 1065 mpz_t * resz=target->_ZINTptr; 1066 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1067 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1068 add_mul(resz,prod,*ita_cur,*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1069 } 1070 } 1071 else { 1072 ref_mpz_t * res=new ref_mpz_t; 1073 mpz_t * resz=&res->z; 1074 if (target->type==_INT_) 1075 mpz_set_si(*resz,target->val); 1076 else 1077 mpz_set(*resz,*target->_ZINTptr); 1078 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1079 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1080 add_mul(resz,prod,*ita_cur,*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1081 } 1082 if (env && env->moduloon) 1083 *target=smod(gen(res),env->modulo); 1084 else 1085 *target=res; 1086 } 1087 } 1088 --ita; 1089 ++itb; 1090 for ( ; itb!=itb_end;++itb,++target){ 1091 if (!env && target->type==_ZINT && target->ref_count()==1){ 1092 mpz_t * resz=target->_ZINTptr; 1093 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1094 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1095 add_mul(resz,prod,*ita_cur,*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1096 } 1097 } 1098 else { 1099 ref_mpz_t * res=new ref_mpz_t; 1100 mpz_t * resz=&res->z; 1101 if (target->type==_INT_) 1102 mpz_set_si(*resz,target->val); 1103 else 1104 mpz_set(*resz,*target->_ZINTptr); 1105 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1106 for (;itb_cur!=itb_end && ita_cur!=ita_begin;--ita_cur,++itb_cur) { 1107 add_mul(resz,prod,*ita_cur,*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1108 } 1109 if (env && env->moduloon) 1110 *target=smod(gen(res),env->modulo); 1111 else 1112 *target=res; 1113 } 1114 } 1115 mpz_clear(prod); 1116 } 1117 1118 // new_coord memory must be reserved, Mulmodpoly clears new_coord 1119 // set madeg to RAND_MAX if no truncation in degree Mulmodpolymod(modpoly::const_iterator ita,modpoly::const_iterator ita_end,modpoly::const_iterator itb,modpoly::const_iterator itb_end,environment * env,modpoly & new_coord,bool intcoeff,int taille,int seuil_kara,int maxdeg)1120 static void Mulmodpolymod(modpoly::const_iterator ita,modpoly::const_iterator ita_end,modpoly::const_iterator itb,modpoly::const_iterator itb_end,environment * env,modpoly & new_coord,bool intcoeff,int taille,int seuil_kara,int maxdeg){ 1121 if (maxdeg<0) 1122 return; 1123 if (ita_end-ita-1>maxdeg) 1124 ita=ita_end-maxdeg-1; 1125 if (itb_end-itb-1>maxdeg) 1126 itb=itb_end-maxdeg-1; 1127 int a=int(ita_end-ita); 1128 int b=int(itb_end-itb); 1129 if (!b) 1130 return ; 1131 if ( ( a <= seuil_kara) || ( b <= seuil_kara) ){ 1132 if (intcoeff) 1133 Muldense_POLY1(ita,ita_end,itb,itb_end,env,new_coord,taille,maxdeg); 1134 else 1135 mulmodpoly_naive(ita,ita_end,itb,itb_end,env,new_coord); 1136 return ; 1137 } 1138 if (a<b){ 1139 Mulmodpolymod(itb,itb_end,ita,ita_end,env,new_coord,intcoeff,taille,seuil_kara,maxdeg); 1140 return; 1141 } 1142 int mid=(a+1)/2; 1143 modpoly::const_iterator ita_mid=ita_end-mid; 1144 if (mid>=b){ // cut A in a/b+1 parts 1145 int nslices=a/b; // number of submultiplications -1 1146 ita_mid=ita+b; 1147 int maxdeg_shift = ita_end-ita_mid; 1148 Mulmodpolymod(itb,itb_end,ita,ita_mid,env,new_coord,intcoeff,taille,seuil_kara,maxdeg-maxdeg_shift); // initialization 1149 modpoly low; 1150 low.reserve(b*b); 1151 for (int i=1;i<nslices;i++){ 1152 ita=ita_mid; 1153 ita_mid=ita_mid+b; 1154 shiftmodpoly(new_coord,b); 1155 maxdeg_shift -= b; 1156 Mulmodpolymod(itb,itb_end,ita,ita_mid,env,low,intcoeff,taille,seuil_kara,maxdeg-maxdeg_shift); 1157 addmodpoly(new_coord,low,env,new_coord); 1158 } 1159 // last multiplication 1160 mid=a%b; 1161 if (mid){ 1162 shiftmodpoly(new_coord,mid); 1163 Mulmodpolymod(itb,itb_end,ita_mid,ita_end,env,low,intcoeff,taille,seuil_kara,maxdeg); 1164 addmodpoly(new_coord,low,env,new_coord); 1165 } 1166 return ; 1167 } 1168 // A and B have comparable sizes. 1169 bool same=ita==itb && ita_end==itb_end; 1170 // cut A and B in two parts 1171 // A=A_low+x^mid*A_high, B=B_low+x^mid*B_high 1172 // A*B = A_low*B_low + x^[2*mid]* A_high*B_high 1173 // + x^mid* [ (A_low+A_high)*(B_low+B_high)-A_low*B_low-A_high*B_high ] 1174 modpoly lowlow, Aplus, Bplus, lowhigh; 1175 modpoly::const_iterator itb_mid=itb_end-mid; 1176 lowlow.reserve(3*mid); 1177 Mulmodpolymod(ita_mid,ita_end,itb_mid,itb_end,env,lowlow,intcoeff,taille,seuil_kara,RAND_MAX); 1178 // If 2*mid is about maxdeg, 1179 // A*B=A_low*B_low+x^mid*(A_low*B_hig+A_hig*B_low)+x^(2*mid)*A_hig*B_hig 1180 if (mid>=maxdeg/2-4){ 1181 Mulmodpolymod(ita,ita_mid,itb,itb_mid,env,new_coord,intcoeff,taille,seuil_kara,maxdeg-2*mid); 1182 Mulmodpolymod(ita,ita_mid,itb_mid,itb_end,env,Aplus,intcoeff,taille,seuil_kara,maxdeg-mid); 1183 Mulmodpolymod(ita_mid,ita_end,itb,itb_mid,env,Bplus,intcoeff,taille,seuil_kara,maxdeg-mid); 1184 addmodpoly(Aplus,Bplus,env,Aplus); 1185 shiftmodpoly(new_coord,mid); 1186 addmodpoly(new_coord,Aplus,env,new_coord); 1187 shiftmodpoly(new_coord,mid); 1188 addmodpoly(new_coord,lowlow,env,new_coord); 1189 trim_inplace(new_coord); 1190 return; 1191 } 1192 // COUT << "lowlow" << lowlow << '\n'; 1193 // new_coord.reserve(2*mid); 1194 Mulmodpolymod(ita,ita_mid,itb,itb_mid,env,new_coord,intcoeff,taille,seuil_kara,RAND_MAX); 1195 #if 0 1196 if (same){ 1197 // (a+bx)^2=a^2+2*a*b*x+b^2*x^2, slower because a*b is not a square 1198 // a^2+b^2*x^2+((a+b)^2-a^2-b^2)*x is faster 1199 mergemodpoly(new_coord,lowlow,2*mid); 1200 Mulmodpolymod(ita,ita_mid,ita_mid,ita_end,env,lowhigh,intcoeff,taille,seuil_kara,RAND_MAX); 1201 mulmodpoly(lowhigh,2,lowhigh); 1202 shiftmodpoly(lowhigh,mid); 1203 addmodpoly(new_coord,lowhigh,env,new_coord); 1204 return; 1205 } 1206 #endif 1207 // COUT << "new_coord" << new_coord << '\n'; 1208 lowhigh.reserve(3*mid); 1209 Addmodpoly(ita,ita_mid,ita_mid,ita_end,env,Aplus); 1210 modpoly::const_iterator itap=Aplus.begin(),itap_end=Aplus.end(); 1211 if (same){ 1212 Mulmodpolymod(itap,itap_end,itap,itap_end,env,lowhigh,intcoeff,taille,seuil_kara,RAND_MAX); 1213 } 1214 else { 1215 Addmodpoly(itb,itb_mid,itb_mid,itb_end,env,Bplus); 1216 modpoly::const_iterator itbp=Bplus.begin(),itbp_end=Bplus.end(); 1217 Mulmodpolymod(itap,itap_end,itbp,itbp_end,env,lowhigh,intcoeff,taille,seuil_kara,RAND_MAX); 1218 } 1219 // COUT << "lowhigh" << lowhigh << '\n'; 1220 submodpoly(lowhigh,new_coord,env,lowhigh); 1221 mergemodpoly(new_coord,lowlow,2*mid); 1222 #if 0 1223 submodpoly(lowhigh,lowlow,env,lowhigh); 1224 shiftmodpoly(lowhigh,mid); 1225 addmodpoly(new_coord,lowhigh,env,new_coord); 1226 #else 1227 submodpoly(lowhigh,lowlow,env,lowlow); 1228 // COUT << "lowh-hh-ll" << lowlow << '\n'; 1229 shiftmodpoly(lowlow,mid); 1230 addmodpoly(new_coord,lowlow,env,new_coord); 1231 #endif 1232 // modpoly verif; 1233 // Muldense_POLY1(ita,ita_end,itb,itb_end,env,verif); 1234 // COUT << "newcoord" << new_coord << "=?" << verif << '\n'; 1235 } 1236 1237 Muldensemodpolysmall(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord)1238 inline void Muldensemodpolysmall(const modpoly::const_iterator & ita0,const modpoly::const_iterator & ita_end,const modpoly::const_iterator & itb0,const modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord){ 1239 new_coord.clear(); 1240 if (ita0==ita_end || itb0==itb_end) return; 1241 modpoly::const_iterator ita_begin=ita0,ita=ita0,itb=itb0; 1242 for ( ; ita!=ita_end; ++ita ){ 1243 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1244 int res=0; 1245 for (;itb_cur!=itb_end;--ita_cur,++itb_cur) { 1246 res += ita_cur->val * itb_cur->val ; 1247 if (ita_cur==ita_begin) 1248 break; 1249 } 1250 if (env && env->moduloon) 1251 new_coord.push_back(smod(res,env->modulo.val)); 1252 else 1253 new_coord.push_back(res); 1254 } 1255 --ita; 1256 ++itb; 1257 for ( ; itb!=itb_end;++itb){ 1258 int res= 0; 1259 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1260 for (;;) { 1261 res += ita_cur->val * itb_cur->val ; 1262 if (ita_cur==ita_begin) 1263 break; 1264 --ita_cur; 1265 ++itb_cur; 1266 if (itb_cur==itb_end) 1267 break; 1268 } 1269 if (env && env->moduloon) 1270 new_coord.push_back(smod(res,env->modulo.val)); 1271 else 1272 new_coord.push_back(res); 1273 } 1274 } 1275 Mulmodpolysmall(modpoly::const_iterator & ita,modpoly::const_iterator & ita_end,modpoly::const_iterator & itb,modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord)1276 static void Mulmodpolysmall(modpoly::const_iterator & ita,modpoly::const_iterator & ita_end,modpoly::const_iterator & itb,modpoly::const_iterator & itb_end,environment * env,modpoly & new_coord){ 1277 int a=int(ita_end-ita); 1278 int b=int(itb_end-itb); 1279 if (!b) 1280 return ; 1281 if ( ( a <= INT_KARAMUL_SIZE) || ( b <= INT_KARAMUL_SIZE) ){ 1282 Muldensemodpolysmall(ita,ita_end,itb,itb_end,env,new_coord); 1283 return ; 1284 } 1285 if (a<b){ 1286 Mulmodpolysmall(itb,itb_end,ita,ita_end,env,new_coord); 1287 return; 1288 } 1289 int mid=(a+1)/2; 1290 modpoly::const_iterator ita_mid=ita_end-mid; 1291 if (mid>=b){ // cut A in a/b+1 parts 1292 int nslices=a/b; // number of submultiplications -1 1293 ita_mid=ita+b; 1294 Mulmodpolysmall(itb,itb_end,ita,ita_mid,env,new_coord); // initialization 1295 modpoly low; 1296 low.reserve(2*b); 1297 for (int i=1;i<nslices;i++){ 1298 ita=ita_mid; 1299 ita_mid=ita_mid+b; 1300 shiftmodpoly(new_coord,b); 1301 Mulmodpolysmall(itb,itb_end,ita,ita_mid,env,low); 1302 addmodpoly(new_coord,low,env,new_coord); 1303 } 1304 // last multiplication 1305 mid=a%b; 1306 if (mid){ 1307 shiftmodpoly(new_coord,mid); 1308 Mulmodpolysmall(itb,itb_end,ita_mid,ita_end,env,low); 1309 addmodpoly(new_coord,low,env,new_coord); 1310 } 1311 return ; 1312 } 1313 // cut A and B in two parts 1314 // A=A_low+x^mid*A_high, B=B_low+x^mid*B_high 1315 // A*B = A_low*B_low + x^[2*mid]* A_high*B_high 1316 // + x^mid* [ (A_low+A_high)*(B_low+B_high)-A_low*B_low-A_high*B_high ] 1317 modpoly lowlow, Aplus, Bplus, lowhigh; 1318 modpoly::const_iterator itb_mid=itb_end-mid; 1319 lowlow.reserve(3*mid); 1320 Mulmodpolysmall(ita_mid,ita_end,itb_mid,itb_end,env,lowlow); 1321 // COUT << "lowlow" << lowlow << '\n'; 1322 // new_coord.reserve(2*mid); 1323 Mulmodpolysmall(ita,ita_mid,itb,itb_mid,env,new_coord); 1324 // COUT << "new_coord" << new_coord << '\n'; 1325 lowhigh.reserve(2*mid); 1326 Addmodpoly(ita,ita_mid,ita_mid,ita_end,env,Aplus); 1327 Addmodpoly(itb,itb_mid,itb_mid,itb_end,env,Bplus); 1328 modpoly::const_iterator itap=Aplus.begin(),itap_end=Aplus.end(); 1329 modpoly::const_iterator itbp=Bplus.begin(),itbp_end=Bplus.end(); 1330 Mulmodpolysmall(itap,itap_end,itbp,itbp_end,env,lowhigh); 1331 // COUT << "lowhigh" << lowhigh << '\n'; 1332 submodpoly(lowhigh,new_coord,env,lowhigh); 1333 mergemodpoly(new_coord,lowlow,2*mid); 1334 submodpoly(lowhigh,lowlow,env,lowlow); 1335 // COUT << "lowh-hh-ll" << lowlow << '\n'; 1336 shiftmodpoly(lowlow,mid); 1337 addmodpoly(new_coord,lowlow,env,new_coord); 1338 } 1339 1340 // Warning: mulmodpoly assumes that coeff are integers mulmodpoly(const modpoly & a,const modpoly & b,environment * env,modpoly & new_coord,int maxdeg)1341 void mulmodpoly(const modpoly & a, const modpoly & b, environment * env,modpoly & new_coord,int maxdeg){ 1342 if (a.empty() || b.empty()){ 1343 new_coord.clear(); 1344 return; 1345 } 1346 int as=int(a.size())-1; 1347 int bs=int(b.size())-1; 1348 if (!as){ 1349 mulmodpoly(b,a.front(),env,new_coord); 1350 return; 1351 } 1352 if (!bs){ 1353 mulmodpoly(a,b.front(),env,new_coord); 1354 return; 1355 } 1356 int product_deg=as+bs; 1357 if (&a==&new_coord){ 1358 vecteur tmp; 1359 mulmodpoly(a,b,env,tmp,maxdeg); 1360 swap(tmp,new_coord); 1361 return; 1362 // setsizeerr(gettext("modpoly.cc/mulmodpoly")); 1363 } 1364 new_coord.reserve(product_deg+1); 1365 modpoly::const_iterator ita=a.begin(),ita_end=a.end(),itb=b.begin(),itb_end=b.end(); // ,ita_begin=a.begin() 1366 if ( env && (env->moduloon) && is_zero(env->coeff) && !env->complexe && (env->modulo.type==_INT_) && (env->modulo.val < smallint) && (product_deg < 65536) ) 1367 Mulmodpolysmall(ita,ita_end,itb,itb_end,env,new_coord); 1368 else { 1369 // test for fft should perhaps take care of the size of env->modulo 1370 if ( (1 || 1371 (!env || !env->moduloon || env->modulo.type==_INT_) 1372 ) 1373 && as>=FFTMUL_SIZE && bs>=FFTMUL_SIZE 1374 ){ 1375 // Check that all coeff are integers 1376 for (;ita!=ita_end;++ita){ 1377 if (!ita->is_integer()) 1378 break; 1379 } 1380 for (;itb!=itb_end;++itb){ 1381 if (!itb->is_integer()) 1382 break; 1383 } 1384 if (ita==ita_end && itb==itb_end){ 1385 //CERR << "// fftmult" << '\n'; 1386 if (fftmult(a,b,new_coord,(env && env->moduloon && is_zero(env->coeff) && env->modulo.type==_INT_)?env->modulo.val:0,RAND_MAX)){ 1387 #if 0 1388 vecteur save=new_coord; 1389 Muldense_POLY1(a.begin(),ita_end,b.begin(),itb_end,env,new_coord,0,maxdeg); 1390 if (save!=new_coord) 1391 CERR << " fft mult error poly1" << a << "*" << b << ";" << (env && env->moduloon && is_zero(env->coeff)?env->modulo:zero) << '\n'; 1392 #endif 1393 if (env && env->moduloon && env->modulo.type!=_INT_) 1394 smod(new_coord,env->modulo,new_coord); 1395 return ; 1396 } 1397 } 1398 ita=a.begin(); 1399 itb=b.begin(); 1400 } 1401 int taille=0;//sizeinbase2(a)+sizeinbase2(b); 1402 if ((as<=KARAMUL_SIZE) && (bs<=KARAMUL_SIZE)) 1403 Muldense_POLY1(ita,ita_end,itb,itb_end,env,new_coord,taille,maxdeg); 1404 else 1405 Mulmodpolymod(ita,ita_end,itb,itb_end,env,new_coord,true,taille,KARAMUL_SIZE,maxdeg); 1406 } 1407 } 1408 1409 operator_times(const modpoly & a,const modpoly & b,environment * env)1410 modpoly operator_times(const modpoly & a, const modpoly & b,environment * env) { 1411 // Multiplication 1412 // COUT << a <<"*" << b << "[" << modulo << "]" << '\n'; 1413 if (a.empty()) 1414 return a; 1415 if (b.empty()) 1416 return b; 1417 modpoly new_coord; 1418 operator_times(a,b,env,new_coord); 1419 // COUT << new_coord << '\n'; 1420 return new_coord; 1421 } 1422 unmod(const modpoly & a,const gen & m)1423 modpoly unmod(const modpoly & a,const gen & m){ 1424 modpoly res(a); 1425 iterateur it=res.begin(),itend=res.end(); 1426 for (;it!=itend;++it){ 1427 if (is_integer(*it)) 1428 continue; 1429 if (it->type!=_MOD || *(it->_MODptr+1)!=m) 1430 return modpoly(1,gensizeerr("Can not convert "+it->print(context0)+" mod "+m.print(context0))); 1431 *it=*it->_MODptr; 1432 } 1433 return res; 1434 } 1435 unext(const modpoly & a,const gen & pmin,modpoly & res)1436 bool unext(const modpoly & a,const gen & pmin,modpoly & res){ 1437 res=a; 1438 iterateur it=res.begin(),itend=res.end(); 1439 for (;it!=itend;++it){ 1440 gen g=*it; 1441 if (g.type==_FRAC) 1442 return false; 1443 if (g.type==_EXT){ 1444 if (*(g._EXTptr+1)!=pmin) 1445 return false; 1446 g=*g._EXTptr; 1447 if (g.type==_VECT) 1448 g.subtype=_POLY1__VECT; 1449 *it=g; 1450 } 1451 } 1452 return true; 1453 } 1454 ext(modpoly & res,const gen & pmin)1455 void ext(modpoly & res,const gen & pmin){ 1456 iterateur it=res.begin(),itend=res.end(); 1457 for (;it!=itend;++it){ 1458 *it=ext_reduce(*it,pmin); 1459 } 1460 } 1461 modularize(modpoly & a,const gen & m)1462 void modularize(modpoly & a,const gen & m){ 1463 iterateur it=a.begin(),itend=a.end(); 1464 for (;it!=itend;++it){ 1465 *it=makemod(*it,m); 1466 } 1467 } 1468 mulmodpoly_naive(modpoly::const_iterator ita,modpoly::const_iterator ita_end,modpoly::const_iterator itb,modpoly::const_iterator itb_end,environment * env,modpoly & new_coord)1469 void mulmodpoly_naive(modpoly::const_iterator ita,modpoly::const_iterator ita_end,modpoly::const_iterator itb,modpoly::const_iterator itb_end,environment * env,modpoly & new_coord){ 1470 new_coord.clear(); 1471 if (ita==ita_end || itb==itb_end) 1472 return; 1473 modpoly::const_iterator ita_begin=ita; 1474 if (ita==itb && ita_end==itb_end){ 1475 // square polynomial 1476 // CERR << "square size " << ita_end-ita << '\n'; 1477 for ( ; ita!=ita_end; ++ita ){ 1478 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1479 gen res; 1480 for (;itb_cur<ita_cur;--ita_cur,++itb_cur) { 1481 type_operator_plus_times(*ita_cur,*itb_cur,res); 1482 } 1483 if (res.type==_VECT && res.ref_count()==1) mulmodpoly(*res._VECTptr,2,*res._VECTptr); else 1484 res = 2*res; 1485 if (itb_cur==ita_cur) 1486 type_operator_plus_times(*ita_cur,*itb_cur,res); 1487 new_coord.push_back(res); 1488 } 1489 --ita; 1490 ++itb; 1491 for ( ; itb!=itb_end;++itb){ 1492 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1493 gen res; 1494 for (;itb_cur<ita_cur;--ita_cur,++itb_cur) { 1495 type_operator_plus_times(*ita_cur,*itb_cur,res); 1496 } 1497 if (res.type==_VECT && res.ref_count()==1) mulmodpoly(*res._VECTptr,2,*res._VECTptr); else 1498 res = 2*res; 1499 if (itb_cur==ita_cur) 1500 type_operator_plus_times(*ita_cur,*itb_cur,res); 1501 new_coord.push_back(res); 1502 } 1503 return; 1504 } 1505 // CERR << "non square size " << ita_end-ita << '\n'; 1506 for ( ; ita!=ita_end; ++ita ){ 1507 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1508 gen res; 1509 for (;;) { 1510 type_operator_plus_times(*ita_cur,*itb_cur,res); 1511 //res += (*ita_cur)*(*itb_cur); // res = res + (*ita_cur) * (*itb_cur); 1512 if (ita_cur==ita_begin) 1513 break; 1514 --ita_cur; 1515 ++itb_cur; 1516 if (itb_cur==itb_end) 1517 break; 1518 } 1519 new_coord.push_back(res); 1520 } 1521 --ita; 1522 ++itb; 1523 for ( ; itb!=itb_end;++itb){ 1524 modpoly::const_iterator ita_cur=ita,itb_cur=itb; 1525 gen res; 1526 for (;;) { 1527 type_operator_plus_times(*ita_cur,*itb_cur,res); 1528 //res += (*ita_cur)*(*itb_cur); 1529 if (ita_cur==ita_begin) 1530 break; 1531 --ita_cur; 1532 ++itb_cur; 1533 if (itb_cur==itb_end) 1534 break; 1535 } 1536 new_coord.push_back(res); 1537 } 1538 } 1539 mulmodpoly_kara_naive(const modpoly & a,const modpoly & b,environment * env,modpoly & new_coord,int seuil_kara)1540 void mulmodpoly_kara_naive(const modpoly & a, const modpoly & b,environment * env,modpoly & new_coord,int seuil_kara){ 1541 modpoly::const_iterator ita=a.begin(),ita_end=a.end(),itb=b.begin(),itb_end=b.end(); 1542 Mulmodpolymod(ita,ita_end,itb,itb_end,env,new_coord,false,0,seuil_kara,RAND_MAX); // sizeinbase2(a)+sizeinbase2(b)); 1543 } 1544 1545 // return true if v empty trim(modpoly & v)1546 bool trim(modpoly & v){ 1547 iterateur it=v.begin(),itend=v.end(); 1548 for (;it!=itend;++it){ 1549 if (*it!=0) 1550 break; 1551 } 1552 if (it!=v.begin()) 1553 v.erase(v.begin(),it); 1554 return v.empty(); 1555 } 1556 1557 // exchange outer and inner variable in source reorder(const modpoly & source,modpoly & target)1558 void reorder(const modpoly & source,modpoly & target){ 1559 int ts=0,ss=int(source.size()); 1560 modpoly::const_iterator it=source.begin(),itend=source.end(); 1561 for (;it!=itend;++it) 1562 ts=giacmax(ts,it->type==_VECT?int(it->_VECTptr->size()):1); 1563 target.resize(ts); 1564 for (int i=0;i<ts;++i) 1565 target[i]=gen(vecteur(ss),_POLY1__VECT); 1566 for (int j=0;j<ss;++j){ 1567 gen g=source[j]; 1568 if (g.type!=_VECT){ 1569 (*target[0]._VECTptr)[j]=g; 1570 continue; 1571 } 1572 vecteur & v =*g._VECTptr; 1573 int vs=int(v.size()); 1574 int shift=ts-vs; 1575 for (int i=0;i<vs;++i){ 1576 (*target[i+shift]._VECTptr)[j]=v[i]; 1577 } 1578 } 1579 for (int i=0;i<ts;++i){ 1580 if (trim(*target[i]._VECTptr)) 1581 target[i]=0; 1582 } 1583 } 1584 1585 // recursive 2d to 1d, inner variable must be of degree<n to1d(const modpoly & p,modpoly & q,int n)1586 bool to1d(const modpoly & p,modpoly & q,int n){ 1587 int ps=int(p.size()); 1588 q.reserve(ps*n); 1589 for (int i=0;i<ps;++i){ 1590 gen pi=p[i]; 1591 if (pi.type!=_VECT){ 1592 for (int j=1;j<n;++j) 1593 q.push_back(0); 1594 q.push_back(pi); 1595 continue; 1596 } 1597 vecteur & v = *pi._VECTptr; 1598 int vs=int(v.size()); 1599 if (vs>n) return false; 1600 for (int j=vs;j<n;++j) 1601 q.push_back(0); 1602 for (int j=0;j<vs;++j) 1603 q.push_back(v[j]); 1604 } 1605 return true; 1606 } 1607 from1d(const modpoly & p,modpoly & q,int n)1608 void from1d(const modpoly & p,modpoly &q,int n){ 1609 int ps = int(p.size()); 1610 q.clear(); 1611 q.reserve((ps+n-1)/n); 1612 int r=ps%n; 1613 vecteur tmp; 1614 tmp.reserve(n); 1615 const_iterateur it=p.begin(),itend=p.end(); 1616 for (;r>0;++it,--r){ 1617 tmp.push_back(*it); 1618 } 1619 trim(tmp); 1620 if (!tmp.empty()) 1621 q.push_back(tmp); 1622 for (;it!=itend;){ 1623 tmp.clear(); 1624 for (r=n;r>0;++it,--r){ 1625 tmp.push_back(*it); 1626 } 1627 trim(tmp); 1628 q.push_back(tmp.empty()?0:(tmp.size()==1?tmp.front():tmp)); 1629 } 1630 } 1631 1632 // eval p[i] at x in q[i] horner2(const modpoly & p,const gen & x,modpoly & q)1633 void horner2(const modpoly & p,const gen & x,modpoly & q){ 1634 int ps = int(p.size()); 1635 q.resize(ps); 1636 for (int i=0;i<ps;++i){ 1637 gen pi=p[i]; 1638 if (pi.type!=_VECT) 1639 q[i]=pi; 1640 else 1641 q[i]=horner(*pi._VECTptr,x,context0); 1642 } 1643 } 1644 mulmodpoly_interpolate(const modpoly & p,const modpoly & q,int n,modpoly & res)1645 void mulmodpoly_interpolate(const modpoly & p,const modpoly & q,int n,modpoly & res){ 1646 modpoly px,qx,pqx; 1647 vecteur X,Y; 1648 int rs=int(p.size()+q.size()-1); 1649 res.resize(rs); 1650 if (debug_infolevel) 1651 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate horner " << '\n'; 1652 for (int i=-n;i<=n;++i){ 1653 X.push_back(i); 1654 if (debug_infolevel>1) 1655 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate horner2 " << i << '\n'; 1656 horner2(p,i,px); 1657 if (debug_infolevel>1) 1658 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate mult " << '\n'; 1659 if (&p==&q){ 1660 mulmodpoly_kara_naive(px,px,0,pqx,20); 1661 #if 0 1662 vecteur tmp; mulmodpoly(px,px,0,tmp); 1663 if (tmp!=pqx) { 1664 ofstream of("bugfft"); 1665 of << "p:=" << gen(px,_POLY1__VECT) << ":;" << '\n'; 1666 of << "correct p2 " << gen(pqx,_POLY1__VECT) << ":;" << '\n'; 1667 of << "wront p2 " << gen(tmp,_POLY1__VECT) << ":;" << '\n'; 1668 tmp=pqx-tmp; 1669 of << "difference" << tmp << '\n'; 1670 } 1671 #endif 1672 } 1673 else { 1674 horner2(q,i,qx); 1675 mulmodpoly_kara_naive(px,qx,0,pqx,20); 1676 } 1677 Y.push_back(pqx); 1678 } 1679 if (debug_infolevel) 1680 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate reorder " << '\n'; 1681 vecteur Yr; 1682 reorder(Y,Yr); 1683 if (debug_infolevel) 1684 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate rebuild " << '\n'; 1685 for (int i=0;i<rs;++i){ 1686 vecteur y=gen2vecteur(Yr[i]); 1687 if (y.size()<2*n+1) 1688 y.insert(y.begin(),int(2*n+1-y.size()),0); 1689 interpolate_inplace(X,y,0); 1690 res[i]=y; 1691 } 1692 if (debug_infolevel) 1693 CERR << CLOCK()*1e-6 << " mulmodpoly_interpolate end " << '\n'; 1694 } 1695 operator_times(const modpoly & a,const modpoly & b,environment * env,modpoly & new_coord,int maxdeg)1696 void operator_times (const modpoly & a, const modpoly & b,environment * env,modpoly & new_coord,int maxdeg) { 1697 #ifdef TIMEOUT 1698 control_c(); 1699 #endif 1700 if (ctrl_c || interrupted) { 1701 interrupted = true; ctrl_c=false; 1702 new_coord=modpoly(1,gensizeerr(gettext("Stopped by user interruption."))); 1703 return; 1704 } 1705 if (a.size()==1){ 1706 mulmodpoly(b,a.front(),env,new_coord); 1707 return; 1708 } 1709 if (b.size()==1){ 1710 mulmodpoly(a,b.front(),env,new_coord); 1711 return; 1712 } 1713 if (env && env->moduloon && is_zero(env->coeff)){ 1714 mulmodpoly(a,b,env,new_coord,maxdeg); 1715 return ; 1716 } 1717 bool gf=has_gf_coeff(a) || has_gf_coeff(b); 1718 #if 1 1719 if (gf){ 1720 vector<int> A,B; int M=-1; gen x; 1721 int agf=gf_char2_vecteur2vectorint(a,A,x),bgf=gf_char2_vecteur2vectorint(b,B,x); 1722 if (agf>0){ 1723 if (bgf==0 || agf==bgf) 1724 M=agf; 1725 } 1726 else { 1727 if (agf==0 && bgf>0) 1728 M=bgf; 1729 } 1730 if (M>0){ 1731 vector<int> R; 1732 if (gf_char2_multpoly(A,B,R,M)){ 1733 gf_char2_vectorint2vecteur(R,new_coord,M,x); 1734 return; 1735 } 1736 } 1737 } 1738 if (gf){ 1739 vector< vector<int> > A, B,R; 1740 vector<int> apmin,bpmin; gen x; 1741 int ac=gf_vecteur2vectorvectorint(a,A,x,apmin); 1742 if (ac){ 1743 int bc=gf_vecteur2vectorvectorint(b,B,x,bpmin); 1744 if (bc==ac && apmin==bpmin){ 1745 if (gf_multpoly(A,B,R,apmin,ac)){ 1746 gf_vectorvectorint2vecteur(R,new_coord,ac,apmin,x); 1747 return; 1748 } 1749 } 1750 } 1751 } 1752 #endif 1753 modpoly::const_iterator ita=a.begin(),ita_end=a.end(),itb=b.begin(),itb_end=b.end(); 1754 #if 1 1755 if (ita->type==_DOUBLE_ || (ita->type==_CPLX && (ita->subtype==3 || ita->_CPLXptr->type==_DOUBLE_ || (ita->_CPLXptr+1)->type==_DOUBLE_) ) ) { 1756 std::vector< complex_double > af,bf; 1757 if (convert(a,af,true) && convert(b,bf,true)){ 1758 bool real=is_real(a,context0) && is_real(b,context0); 1759 int as=int(a.size()),bs=int(b.size()); 1760 int rs=as+bs-1; 1761 int logrs=sizeinbase2(rs); 1762 if (logrs>30) { new_coord=modpoly(1,gensizeerr("Degree too large")); return ;} 1763 int n=(1u<<logrs); double invn=1.0/n; 1764 reverse(af.begin(),af.end()); af.resize(n); 1765 reverse(bf.begin(),bf.end()); bf.resize(n); 1766 fft2(&af.front(),n,2*M_PI/n); 1767 fft2(&bf.front(),n,2*M_PI/n); 1768 for (int i=0;i<n;++i) 1769 af[i] *= bf[i]; 1770 fft2(&af.front(),n,-2*M_PI/n); 1771 af.resize(rs); 1772 reverse(af.begin(),af.end()); 1773 new_coord.clear(); new_coord.reserve(rs); 1774 if (real){ 1775 for (int i=0;i<rs;++i) 1776 new_coord.push_back(invn*af[i].real()); 1777 } 1778 else { 1779 for (int i=0;i<rs;++i) 1780 new_coord.push_back(invn*af[i]); 1781 } 1782 return; 1783 } 1784 } 1785 #endif 1786 // Check that all coeff of a b are integers 1787 for (;ita!=ita_end;++ita){ 1788 if (ita->type==_EXT){ 1789 gen pmin=*(ita->_EXTptr+1); 1790 modpoly aa,bb; 1791 if (&a==&b && unext(a,pmin,aa)){ 1792 #if 0 1793 if (pmin.type==_VECT && to1d(aa,bb,2*pmin._VECTptr->size()-3)){ 1794 aa.clear(); 1795 mulmodpoly_kara_naive(bb,bb,env,aa,KARAMUL_SIZE); 1796 //mulmodpoly(bb,bb,env,aa); 1797 from1d(aa,new_coord,2*pmin._VECTptr->size()-3); 1798 ext(new_coord,pmin); 1799 return; 1800 } 1801 #endif 1802 int n=-1; 1803 if (pmin.type==_VECT) 1804 n=int(pmin._VECTptr->size())-2; 1805 if (n>0 && aa.size()>=512) 1806 mulmodpoly_interpolate(aa,aa,n,new_coord); 1807 else 1808 mulmodpoly_kara_naive(aa,aa,env,new_coord,10); 1809 ext(new_coord,pmin); 1810 return; 1811 } 1812 if (unext(a,pmin,aa) && unext(b,pmin,bb)){ 1813 if (0 && (aa.size()>=20 || bb.size()>=20)){ 1814 modpoly A,B,C; // it's slower 1815 reorder(aa,A); 1816 reorder(bb,B); 1817 mulmodpoly_kara_naive(A,B,env,C,8); 1818 reorder(C,new_coord); 1819 } 1820 else 1821 mulmodpoly_kara_naive(aa,bb,env,new_coord,10); 1822 ext(new_coord,pmin); 1823 return; 1824 } 1825 } 1826 if (ita->type==_MOD 1827 //&& (ita->_MODptr+1)->type==_INT_ 1828 ){ 1829 environment e; 1830 e.modulo=*(ita->_MODptr+1); 1831 e.moduloon=true; 1832 mulmodpoly(unmod(a,e.modulo),unmod(b,e.modulo),&e,new_coord,maxdeg); 1833 modularize(new_coord,e.modulo); 1834 return; 1835 } 1836 if (!ita->is_integer()) 1837 break; 1838 } 1839 for (;itb!=itb_end;++itb){ 1840 if (itb->type==_MOD 1841 //&& (itb->_MODptr+1)->type==_INT_ 1842 ){ 1843 environment e; 1844 e.modulo=*(itb->_MODptr+1); 1845 e.moduloon=true; 1846 mulmodpoly(unmod(a,e.modulo),unmod(b,e.modulo),&e,new_coord,maxdeg); 1847 modularize(new_coord,e.modulo); 1848 return; 1849 } 1850 if (!itb->is_integer()) 1851 break; 1852 } 1853 if (ita==ita_end && itb==itb_end){ // integer coefficients 1854 mulmodpoly(a,b,env,new_coord,maxdeg); 1855 return; 1856 } 1857 mulmodpoly_kara_naive(a,b,env,new_coord,KARAMUL_SIZE); 1858 } 1859 1860 // res=(*it) * ... (*(it_end-1)) mulmodpoly(vector<modpoly>::const_iterator it,vector<modpoly>::const_iterator it_end,environment * env,modpoly & new_coord)1861 void mulmodpoly(vector<modpoly>::const_iterator it,vector<modpoly>::const_iterator it_end,environment * env,modpoly & new_coord){ 1862 int n=int(it_end-it); 1863 if (n>3){ 1864 vector<modpoly>::const_iterator it_mid=it+(it_end-it)/2; 1865 modpoly first,second; 1866 mulmodpoly(it,it_mid,env,first); 1867 mulmodpoly(it_mid,it_end,env,second); 1868 mulmodpoly(first,second,env,new_coord); 1869 return ; 1870 } 1871 switch (n){ 1872 case 0: 1873 return; 1874 case 1: 1875 new_coord=*it; 1876 return; 1877 case 2: 1878 operator_times(*it,*(it+1),env,new_coord); 1879 return; 1880 case 3: 1881 operator_times(*it,*(it+1),env,new_coord); 1882 new_coord=operator_times(*(it+2),new_coord,env); 1883 return ; 1884 } 1885 } 1886 mulmodpoly(vector<modpoly>::const_iterator * it,int debut,int fin,environment * env,modpoly & pi)1887 void mulmodpoly(vector<modpoly>::const_iterator * it,int debut,int fin,environment * env,modpoly & pi){ 1888 // pi = *(it[debut]); 1889 // for (int j=debut+1;j<=fin;j++){ 1890 // modpoly tmp; 1891 // mulmodpoly(pi,*it[j],env,tmp); 1892 // pi=tmp; 1893 // } 1894 //return ; 1895 if (fin-debut>2){ 1896 int milieu=(debut+fin)/2; 1897 modpoly first,second; 1898 mulmodpoly(it,debut,milieu,env,first); 1899 mulmodpoly(it,milieu+1,fin,env,second); 1900 mulmodpoly(first,second,env,pi); 1901 return ; 1902 } 1903 switch (fin-debut){ 1904 case 0: 1905 pi=*(it[debut]); 1906 break; 1907 case 1: 1908 operator_times(*(it[debut]),*(it[debut+1]),env,pi); 1909 break; 1910 case 2: 1911 operator_times(*(it[debut]),*(it[debut+1]),env,pi); 1912 pi=operator_times(pi,(*it[debut+2]),env); 1913 break; 1914 } 1915 } 1916 negmodpoly(const modpoly & th,modpoly & new_coord)1917 void negmodpoly(const modpoly & th, modpoly & new_coord){ 1918 if (&th==&new_coord){ 1919 modpoly::iterator a = new_coord.begin(); 1920 modpoly::const_iterator a_end = new_coord.end(); 1921 for (;a!=a_end;++a){ 1922 #ifndef USE_GMP_REPLACEMENTS 1923 if (a->type==_ZINT && a->ref_count()==1) 1924 mpz_neg(*a->_ZINTptr,*a->_ZINTptr); 1925 else 1926 #endif 1927 *a=-(*a); 1928 } 1929 } 1930 else { 1931 new_coord.reserve(th.size()); 1932 modpoly::const_iterator a = th.begin(); 1933 modpoly::const_iterator a_end = th.end(); 1934 for (;a!=a_end;++a) 1935 new_coord.push_back(-(*a)); 1936 } 1937 } 1938 operator -(const modpoly & th)1939 modpoly operator - (const modpoly & th) { 1940 // Negate 1941 modpoly new_coord; 1942 negmodpoly(th,new_coord); 1943 return new_coord; 1944 } 1945 1946 // right redimension poly to degree n rrdm(modpoly & p,int n)1947 void rrdm(modpoly & p, int n){ 1948 int s=int(p.size()); 1949 if (s==n+1) 1950 return; 1951 for (;s>n+1;--s){ // remove trainling coeff 1952 p.pop_back(); 1953 } 1954 for (;s<n+1;++s){ // add zeros coeff 1955 p.push_back(0); 1956 } 1957 } 1958 1959 // reduce mod env->modulo and trim. (T=int or longlong) trim_inplace(vector<longlong> & p,longlong modulo)1960 void trim_inplace(vector<longlong> & p,longlong modulo){ 1961 if (p.empty()) 1962 return ; 1963 vector<longlong>::iterator it=p.begin(),itend=p.end(); 1964 while ( (it!=itend) && (*it % modulo==0) ) 1965 ++it; 1966 vector<longlong>::iterator it1=it; 1967 for (;it1!=itend;++it1){ 1968 *it1=smodll(*it1,modulo); 1969 } 1970 p.erase(p.begin(),it); 1971 } 1972 fast_trim_inplace(vector<longlong> & p,longlong modulo)1973 void fast_trim_inplace(vector<longlong> & p,longlong modulo){ 1974 if (p.empty()) 1975 return ; 1976 vector<longlong>::iterator it=p.begin(),itend=p.end(); 1977 while ( (it!=itend) && (*it==0 || *it % modulo==0) ) 1978 ++it; 1979 p.erase(p.begin(),it); 1980 } 1981 trim_inplace(vector<int> & p,int modulo)1982 void trim_inplace(vector<int> & p,int modulo){ 1983 if (p.empty()) 1984 return ; 1985 vector<int>::iterator it=p.begin(),itend=p.end(); 1986 while ( (it!=itend) && (*it==0 || *it % modulo==0) ) 1987 ++it; 1988 vector<int>::iterator it1=it; 1989 for (;it1!=itend;++it1){ 1990 *it1=smod(*it1,modulo); 1991 } 1992 p.erase(p.begin(),it); 1993 } 1994 fast_trim_inplace(vector<int> & p,int modulo,int maxsize)1995 void fast_trim_inplace(vector<int> & p,int modulo,int maxsize){ 1996 if (p.empty()) 1997 return ; 1998 vector<int>::iterator it=p.begin(),itend=p.end(); 1999 if (maxsize>=0 && maxsize<itend-it) 2000 it = itend-maxsize; 2001 while ( (it!=itend) && (*it==0 || *it % modulo==0) ) 2002 ++it; 2003 p.erase(p.begin(),it); 2004 } 2005 2006 // reduce mod env->modulo and trim. trim_inplace(modpoly & p,environment * env)2007 void trim_inplace(modpoly & p,environment * env){ 2008 if (p.empty()) 2009 return ; 2010 modpoly::iterator it=p.begin(),itend=p.end(); 2011 if (env && env->moduloon){ 2012 if (env->modulo.type==_ZINT){ 2013 mpz_t &mo=*env->modulo._ZINTptr; 2014 for (;it!=itend;++it){ 2015 if (it->type==_ZINT && it->ref_count()==1){ 2016 mpz_t & m=*it->_ZINTptr; 2017 mpz_mod(m,m,mo); // not smod-ed 2018 if (mpz_cmp_si(m,0)!=0) 2019 break; 2020 } 2021 else { 2022 if (!is_zero(smod(*it,env->modulo))) 2023 break; 2024 } 2025 } 2026 } 2027 else { 2028 while ( (it!=itend) && (is_zero(smod(*it,env->modulo))) ) 2029 ++it; 2030 } 2031 } 2032 else 2033 while ( (it!=itend) && (is_zero(*it)) ) 2034 ++it; 2035 if (env && env->moduloon){ 2036 modpoly::iterator it1=it; 2037 if (env->modulo.type==_ZINT){ 2038 mpz_t &mo=*env->modulo._ZINTptr; 2039 mpz_t mo2; mpz_init_set(mo2,mo); mpz_tdiv_q_2exp(mo2,mo2,1); 2040 for (;it1!=itend;++it1){ 2041 if (it1->type==_ZINT && it1->ref_count()==1){ 2042 mpz_t & m=*it1->_ZINTptr; 2043 mpz_mod(m,m,mo); // not smod-ed 2044 if (mpz_cmp(m,mo2)>0) 2045 mpz_sub(m,m,mo); 2046 if (mpz_sizeinbase(m,2)<32) 2047 *it1=mpz_get_si(m); 2048 } 2049 else 2050 *it1=smod(*it1,env->modulo); 2051 } 2052 mpz_clear(mo2); 2053 } 2054 else { 2055 for (;it1!=itend;++it1){ 2056 *it1=smod(*it1,env->modulo); 2057 } 2058 } 2059 } 2060 p.erase(p.begin(),it); 2061 } 2062 trim(const modpoly & p,environment * env)2063 modpoly trim(const modpoly & p,environment * env){ 2064 if (p.empty()) 2065 return p; 2066 modpoly::const_iterator it=p.begin(),itend=p.end(); 2067 if (env && env->moduloon) 2068 while ( (it!=itend) && (is_zero(smod(*it,env->modulo))) ) 2069 ++it; 2070 else 2071 while ( (it!=itend) && (is_zero(*it)) ) 2072 ++it; 2073 modpoly new_coord ; 2074 if (env && env->moduloon) 2075 for (;it!=itend;++it) 2076 new_coord.push_back(smod(*it,env->modulo)); 2077 else 2078 for (;it!=itend;++it) 2079 new_coord.push_back(*it); 2080 return new_coord; 2081 } 2082 trim_inplace(modpoly & p)2083 void trim_inplace(modpoly & p){ 2084 modpoly::iterator it=p.begin(),itend=p.end(); 2085 while ( (it!=itend) && (is_zero(*it)) ) 2086 ++it; 2087 if (it!=p.begin()) 2088 p.erase(p.begin(),it); 2089 } 2090 divmodpoly(const modpoly & th,const gen & fact,modpoly & new_coord)2091 void divmodpoly(const modpoly & th, const gen & fact, modpoly & new_coord){ 2092 if (is_one(fact)){ 2093 if (&th!=&new_coord) 2094 new_coord=th; 2095 return ; 2096 } 2097 if (fact.type==_USER || fact.type==_EXT){ 2098 gen invfact=inv(fact,context0); 2099 mulmodpoly(th,invfact,new_coord); 2100 return; 2101 } 2102 if (&th==&new_coord){ 2103 modpoly::iterator it=new_coord.begin(),itend=new_coord.end(); 2104 for (;it!=itend;++it) 2105 // *it =iquo(*it,fact); 2106 *it=rdiv(*it,fact,context0); 2107 } 2108 else { 2109 modpoly::const_iterator it=th.begin(),itend=th.end(); 2110 for (;it!=itend;++it) 2111 new_coord.push_back(rdiv(*it,fact,context0)); // was iquo 2112 // new_coord.push_back(iquo(*it,fact)); 2113 } 2114 } 2115 iquo(modpoly & th,const gen & fact)2116 void iquo(modpoly & th,const gen & fact){ 2117 modpoly::iterator it=th.begin(),itend=th.end(); 2118 #ifndef USE_GMP_REPLACEMENTS 2119 if (fact.type==_INT_ && fact.val<0){ 2120 iquo(th,-fact); 2121 negmodpoly(th,th); 2122 return; 2123 } 2124 if (fact.type==_INT_ ){ 2125 for (;it!=itend;++it){ 2126 if (it->type==_ZINT && it->ref_count()==1) 2127 mpz_tdiv_q_ui(*it->_ZINTptr,*it->_ZINTptr,fact.val); 2128 else { 2129 if (it->type==_POLY){ 2130 polynome copie(*it->_POLYptr); 2131 copie /= fact; 2132 *it=copie; 2133 } 2134 else 2135 *it=iquo(*it,fact); 2136 } 2137 } 2138 return; 2139 } 2140 if (fact.type==_ZINT){ 2141 for (;it!=itend;++it){ 2142 if (it->type==_ZINT && it->ref_count()==1) 2143 mpz_tdiv_q(*it->_ZINTptr,*it->_ZINTptr,*fact._ZINTptr); 2144 else 2145 *it=iquo(*it,fact); 2146 } 2147 return; 2148 } 2149 #endif 2150 for (;it!=itend;++it) 2151 *it=iquo(*it,fact); 2152 } 2153 divmodpoly(const modpoly & th,const gen & fact,environment * env,modpoly & new_coord)2154 void divmodpoly(const modpoly & th, const gen & fact, environment * env,modpoly & new_coord){ 2155 if (is_one(fact)){ 2156 if (&th!=&new_coord) 2157 new_coord=th; 2158 return ; 2159 } 2160 if (!env || !env->moduloon || !is_zero(env->coeff)) 2161 divmodpoly(th,fact,new_coord); 2162 else { 2163 gen factinv(invmod(fact,env->modulo)); 2164 mulmodpoly(th,factinv,env,new_coord); 2165 } 2166 } 2167 operator /(const modpoly & th,const gen & fact)2168 modpoly operator / (const modpoly & th,const gen & fact ) { 2169 if (is_one(fact)) 2170 return th; 2171 modpoly new_coord; 2172 divmodpoly(th,fact,new_coord); 2173 return new_coord; 2174 } 2175 operator_div(const modpoly & th,const gen & fact,environment * env)2176 modpoly operator_div (const modpoly & th,const gen & fact,environment * env ) { 2177 if (is_one(fact)) 2178 return th; 2179 modpoly new_coord; 2180 divmodpoly(th,fact,env,new_coord); 2181 return new_coord; 2182 } 2183 2184 // fast div rem http://www.csd.uwo.ca/~moreno/CS424/Lectures/FastDivisionAndGcd.html/node3.html 2185 // fast modular inverse: f*g=1 mod x^l invmod(const modpoly & f,int l,environment * env,modpoly & g)2186 bool invmod(const modpoly & f,int l,environment * env,modpoly & g){ 2187 if (f.empty()) 2188 return false; 2189 gen finv=f.back(); 2190 if (f.back()!=1){ 2191 finv=invenv(finv,env); 2192 if (finv.type==_FRAC) 2193 return false; 2194 modpoly F; 2195 mulmodpoly(f,finv,env,F); 2196 if (!invmod(F,l,env,g)) 2197 return false; 2198 mulmodpoly(g,finv,env,g); 2199 return true; 2200 } 2201 g=modpoly(1,1); 2202 for (longlong i=2;;){ 2203 modpoly h,tmp1,tmp2; 2204 operator_times(g,g,env,h); 2205 if (h.size()>i) 2206 h=modpoly(h.end()-i,h.end()); 2207 // g=plus_two*g-f*h; 2208 mulmodpoly(g,plus_two,env,tmp1); 2209 int taille=giacmin(i,l); 2210 if (taille>f.size()) 2211 taille=f.size(); 2212 modpoly F(f.end()-taille,f.end()); 2213 operator_times(F,h,env,tmp2); 2214 #if 0 // debug 2215 int fft_mult_save=FFTMUL_SIZE; 2216 FFTMUL_SIZE=1<<30; 2217 modpoly tmp3; 2218 operator_times(F,h,env,tmp3); 2219 if (tmp3!=tmp2) 2220 CERR << "Divquo/invmod error" << tmp3-tmp2 << '\n'; 2221 FFTMUL_SIZE=fft_mult_save; 2222 #endif 2223 submodpoly(tmp1,tmp2,env,g); 2224 if (g.size()>i) 2225 g=modpoly(g.end()-i,g.end()); 2226 if (g.size()>l) 2227 g=modpoly(g.end()-l,g.end()); 2228 g=trim(g,env); 2229 if (i>l) break; 2230 i=2*i; 2231 } 2232 return true; 2233 } 2234 2235 // euclidean quotient using modular inverse DivQuo(const modpoly & a,const modpoly & b,environment * env,modpoly & q)2236 int DivQuo(const modpoly & a, const modpoly & b, environment * env,modpoly & q){ 2237 q.clear(); 2238 int n=a.size(),m=b.size(); 2239 if (n<m) 2240 return 1; 2241 int s=n-m+1; 2242 if (s>=FFTMUL_SIZE && m>=FFTMUL_SIZE && env && env->modulo.type==_INT_){ 2243 int p=env->modulo.val,l=sizeinbase2(n); 2244 // check if p is a Fourier prime for n 2245 int N=1<<l; 2246 if ( (((p-1)>>l)<<l)==p-1 && is_probab_prime_p(p)){ 2247 int w=nthroot(p,l); 2248 if (w){ 2249 vector<int> A,B,Wp,tmp0; 2250 vecteur2vector_int(a,p,A); 2251 vecteur2vector_int(b,p,B); 2252 to_fft(A,p,w,Wp,N,tmp0,1,false,false); A.swap(tmp0); 2253 to_fft(B,p,w,Wp,N,tmp0,1,false,false); B.swap(tmp0); 2254 fft_aoverb_p(A,B,tmp0,p); 2255 fft_reverse(Wp,p); 2256 from_fft(tmp0,p,Wp,A,true,false); 2257 fast_trim_inplace(A,p); 2258 if (A.size()==s){ 2259 vector_int2vecteur(A,q); 2260 return 2; 2261 } 2262 } 2263 } 2264 } 2265 modpoly f(b),g; 2266 reverse(f.begin(),f.end()); 2267 if (!invmod(f,n-m+1,env,g)) 2268 return 0; 2269 f=a; 2270 reverse(f.begin(),f.end()); 2271 operator_times(f,g,env,q); 2272 if (q.size()>s) 2273 q=modpoly(q.end()-s,q.end()); 2274 reverse(q.begin(),q.end()); 2275 trim(q,env); 2276 return 1; 2277 } 2278 2279 // for p prime such that p-1 is divisible by 2^N, compute a 2^N-th root of 1 2280 // otherwise return 0 nthroot(unsigned p,unsigned N)2281 unsigned nthroot(unsigned p,unsigned N){ 2282 unsigned expo=(p-1)>>N; 2283 if ( (expo<<N)!=p-1) 2284 return 0; 2285 for (unsigned n=2;;++n){ 2286 int w=powmod(n,expo,p); // w=n^((p-1)/2^N) 2287 ulonglong r=w; 2288 for (unsigned i=1;i<N;++i) 2289 r=(r*r)%p; 2290 if (r==p-1) // r=w^(2^(N-1))=n^((p-1)/2) 2291 return w; 2292 } 2293 } 2294 find_w(vector<int> & Wp,unsigned shift,unsigned p)2295 int find_w(vector<int> & Wp,unsigned shift,unsigned p){ 2296 unsigned n=1<<shift,w=0; 2297 #if defined GIAC_PRECOND || defined GIAC_CACHEW 2298 int ws=Wp.size(); 2299 #else 2300 int ws=2*Wp.size(); 2301 #endif 2302 if (ws/n){ 2303 w=Wp[ws/n]; 2304 int wp=powmod(w,n/2,p); 2305 if (wp!=p-1){ 2306 w=0; Wp.clear(); 2307 } 2308 //CERR << Wp << endl; 2309 } 2310 if (w==0 && p!=p1 && p!=p2 && p!=p3) 2311 w=nthroot(p,shift); 2312 return w; 2313 } 2314 chk_equal_mod(const gen & a,longlong p,int m)2315 bool chk_equal_mod(const gen & a,longlong p,int m){ 2316 if (a.type==_FRAC){ 2317 int n=a._FRACptr->num.type==_ZINT?modulo(*a._FRACptr->num._ZINTptr,m):a._FRACptr->num.val; 2318 int d=a._FRACptr->den.type==_ZINT?modulo(*a._FRACptr->den._ZINTptr,m):a._FRACptr->den.val; 2319 return (n-longlong(p)*d)%m==0; 2320 } 2321 if (a.type==_ZINT) 2322 return (modulo(*a._ZINTptr,m)-p)%m==0; 2323 if (a.type==_INT_) 2324 return (a.val-p)%m==0; 2325 CERR << "Unknow type in reconstruction " << a << '\n'; 2326 return false; 2327 } 2328 chk_equal_mod(const vecteur & a,const vecteur & p,int m)2329 bool chk_equal_mod(const vecteur & a,const vecteur & p,int m){ 2330 if (a.size()!=p.size()) 2331 return false; 2332 const_iterateur it=a.begin(),itend=a.end(),jt=p.begin(); 2333 for (;it!=itend;++jt,++it){ 2334 if (it->type==_INT_ && *it==*jt) continue; 2335 if (jt->type!=_INT_ || !chk_equal_mod(*it,jt->val,m)) 2336 return false; 2337 } 2338 return true; 2339 } 2340 chk_equal_mod(const vecteur & a,const vector<int> & p,int m)2341 bool chk_equal_mod(const vecteur & a,const vector<int> & p,int m){ 2342 if (a.size()!=p.size()) 2343 return false; 2344 const_iterateur it=a.begin(),itend=a.end(); 2345 vector<int>::const_iterator jt=p.begin(); 2346 for (;it!=itend;++jt,++it){ 2347 if (it->type==_INT_ && it->val==*jt) continue; 2348 if (!chk_equal_mod(*it,*jt,m)) 2349 return false; 2350 } 2351 return true; 2352 } 2353 2354 precond_mulmod31(int b1,int q1,int p,int q1surp)2355 inline int precond_mulmod31(int b1,int q1,int p,int q1surp){ 2356 b1 += (b1>>31) &p; 2357 int t=longlong(b1)*q1-((longlong(b1)*q1surp)>>31)*p; 2358 t += (t>>31)&p; // t positive (or at least t-p is valid) 2359 return t; 2360 } 2361 2362 // v *= m mod p precond_mulmod31(vector<int> & v,int m,int p,int msurp)2363 void precond_mulmod31(vector<int> & v,int m,int p,int msurp){ 2364 vector<int>::iterator it=v.begin(),itend=v.end(); 2365 for (;it!=itend;++it){ 2366 *it=precond_mulmod31(*it,m,p,msurp); 2367 } 2368 } 2369 precond_mulmod31(vector<int> & v,int m,int p)2370 void precond_mulmod31(vector<int> & v,int m,int p){ 2371 m += (m>>31) &p; 2372 int msurp=((1LL<<31)*m)/p+1; 2373 msurp += (msurp>>31) & p; 2374 precond_mulmod31(v,m,p,msurp); 2375 } 2376 2377 // invp is 1.0/p*(1.0-prec) < evalf(1/p) with a sufficient bias insuring r>=0 amodp(longlong a,int p,double invp)2378 inline int amodp(longlong a,int p,double invp){ 2379 longlong q=a*invp; // q<=a/p, maximal relative error: prec+3*2^-53<2^-50 2380 int r= a-q*p; // max absolute error |a|*2^-50<=2^13, hence 0<=r<=p+2^13 2381 if (0 && a>0 && r<0) 2382 CERR << "err amodp a=" << a << " p=" << p << " r=" << r << "\n"; 2383 #ifndef GIAC_PRECOND 2384 r += (r>>31)&p; // this is not required if a>=0 2385 #endif 2386 return r; 2387 if (r>p && r-p>= (1<<10) )//(r-(a%p)) %p!=0) // ((a-r)%p!=0) 2388 CERR << "err amodp a=" << a << " p=" << p << " r=" << r << "\n"; 2389 return r; 2390 return a%p; 2391 } 2392 amodpplus(longlong a,int p,double invp)2393 inline int amodpplus(longlong a,int p,double invp){ 2394 longlong q=a*invp; // q<=a/p, maximal relative error: prec+3*2^-53<2^-50 2395 int r= a-q*p; // max absolute error |a|*2^-50<=2^13, hence 0<=r<=p+2^13 2396 r += (r>>31)&p; // this is not required if a>=0 2397 return r; 2398 } 2399 // using apos_modp fails for n:=8000;a:=randpoly(n,3,[]):; b:=randpoly(n+2,3,[]):; ntl_on(false);time(r:=resultant(a,b)); apos_modp(longlong a,int p,double invp)2400 inline int apos_modp(longlong a,int p,double invp){ 2401 longlong q=a*invp; // q<=a/p, maximal relative error: prec+3*2^-53<2^-50 2402 int r= a-q*p; // max absolute error |a|*2^-50<=2^13, hence 0<=r<=p+2^13 2403 return r; 2404 } 2405 precond_mulmod_double(vector<int> & v,int m,int p,double invp)2406 void precond_mulmod_double(vector<int> & v,int m,int p,double invp){ 2407 vector<int>::iterator it=v.begin(),itend=v.end(); 2408 for (;it!=itend;++it){ 2409 *it=amodp((*it)*longlong(m),p,invp); 2410 } 2411 } 2412 precond_mulmod_double(vector<int> & v,int m,int p)2413 void precond_mulmod_double(vector<int> & v,int m,int p){ 2414 double invp=find_invp(p); 2415 precond_mulmod_double(v,m,p,invp); 2416 } 2417 2418 // Beware, precond_mulmod_double does not work if p is very near from 1ULL<<31 precond_mulmod(vector<int> & v,int m,int p)2419 inline void precond_mulmod(vector<int> & v,int m,int p){ 2420 if (m==1) 2421 return; 2422 #if 1 //def GIAC_PRECOND 2423 precond_mulmod31(v,m,p); 2424 #else 2425 precond_mulmod_double(v,m,p); 2426 #endif 2427 } 2428 2429 // ab=a*b mod m, assumes that m is a Fourier prime for qi and ri 2430 // returns true if fft was used operator_times(const std::vector<int> & a,const std::vector<int> & b,int m,std::vector<int> & ab)2431 bool operator_times(const std::vector<int> & a,const std::vector<int> & b,int m,std::vector<int> & ab){ 2432 if (a.size()<FFTMUL_SIZE || b.size()<FFTMUL_SIZE){ 2433 smallmult(a.begin(),a.end(),b.begin(),b.end(),ab,m); 2434 return false; 2435 } 2436 vector<int> A,B,Wp,tmp0; 2437 int l=sizeinbase2(a.size()+b.size()-1); 2438 int n=1<<l; 2439 int w=nthroot(m,l); 2440 if (w==0){ 2441 smallmult(a.begin(),a.end(),b.begin(),b.end(),ab,m); 2442 return false; 2443 } 2444 //smallmult(qi.begin(),qi.end(),bi.begin(),bi.end(),tmp1,m); // debug 2445 to_fft(a,m,w,Wp,n,tmp0,1,false,false); A.swap(tmp0); 2446 if (&a==&b) 2447 fft_ab_p(A,A,tmp0,m); 2448 else { 2449 to_fft(b,m,w,Wp,n,tmp0,1,false,false); B.swap(tmp0); 2450 fft_ab_p(A,B,tmp0,m); 2451 } 2452 fft_reverse(Wp,m); 2453 from_fft(tmp0,m,Wp,ab,true,false); 2454 fast_trim_inplace(ab,m); 2455 return true; 2456 } 2457 2458 // fast modular inverse: f*g=1 mod x^l invmod(const std::vector<int> & f,int l,int p,std::vector<int> & g)2459 bool invmod(const std::vector<int> & f,int l,int p,std::vector<int> & g){ 2460 if (f.empty()) 2461 return false; 2462 int finv=f.back() % p; 2463 finv += (finv>>31) & p; 2464 if (finv!=1){ 2465 finv=invmod(finv,p); 2466 vector<int> F(f); 2467 precond_mulmod(F,finv,p); 2468 if (!invmod(F,l,p,g)) 2469 return false; 2470 precond_mulmod(g,finv,p); 2471 return true; 2472 } 2473 g=vector<int>(1,1); 2474 for (longlong i=2;;){ 2475 vector<int> h,tmp1; 2476 operator_times(g,g,p,h); 2477 if (h.size()>i) 2478 h=vector<int>(h.end()-i,h.end()); 2479 // g=plus_two*g-f*h; 2480 tmp1=g; 2481 precond_mulmod(tmp1,2,p); // tmp1=2*g 2482 int taille=giacmin(i,l); 2483 if (taille>f.size()) 2484 taille=f.size(); 2485 vector<int> F(f.end()-taille,f.end()); 2486 operator_times(F,h,p,g); 2487 submodneg(g,tmp1,p); // g=tmp1-F*h 2488 if (g.size()>i) 2489 g=vector<int>(g.end()-i,g.end()); 2490 if (g.size()>l) 2491 g=vector<int>(g.end()-l,g.end()); 2492 fast_trim_inplace(g,p); 2493 if (i>l) break; 2494 i=2*i; 2495 } 2496 return true; 2497 } 2498 2499 // euclidean quotient using modular inverse DivQuo(const std::vector<int> & a,const std::vector<int> & b,int p,std::vector<int> & q,bool ck_exactquo)2500 int DivQuo(const std::vector<int> & a, const std::vector<int> & b, int p,std::vector<int> & q,bool ck_exactquo){ 2501 q.clear(); 2502 int n=a.size(),m=b.size(); 2503 if (n<m) 2504 return 1; 2505 int s=n-m+1; 2506 if (ck_exactquo && s>=FFTMUL_SIZE && m>=FFTMUL_SIZE){ 2507 int l=sizeinbase2(n); 2508 // check if p is a Fourier prime for n 2509 int N=1<<l; 2510 if ( (((p-1)>>l)<<l)==p-1 && is_probab_prime_p(p)){ 2511 int w=nthroot(p,l); 2512 if (w){ 2513 vector<int> A,B,Wp,tmp0; 2514 to_fft(a,p,w,Wp,N,tmp0,1,false,false); A.swap(tmp0); 2515 to_fft(b,p,w,Wp,N,tmp0,1,false,false); B.swap(tmp0); 2516 fft_aoverb_p(A,B,tmp0,p); 2517 fft_reverse(Wp,p); 2518 from_fft(tmp0,p,Wp,q,true,false); 2519 fast_trim_inplace(q,p); 2520 if (q.size()==s) 2521 return 2; 2522 } 2523 } 2524 } 2525 vector<int> f(b),g; 2526 reverse(f.begin(),f.end()); 2527 if (!invmod(f,n-m+1,p,g)) 2528 return 0; 2529 f=a; 2530 reverse(f.begin(),f.end()); 2531 operator_times(f,g,p,q); 2532 if (q.size()>s) 2533 q=vector<int>(q.end()-s,q.end()); 2534 reverse(q.begin(),q.end()); 2535 fast_trim_inplace(q,p); 2536 return 1; 2537 } 2538 2539 // reconstruct quo and rem by chinese remaindering 2540 // quo, rem are already computed for env->modulo divremrec(const modpoly & A,const modpoly & B,modpoly & quo,modpoly & rem,environment * env)2541 bool divremrec(const modpoly & A, const modpoly & B, modpoly & quo, modpoly & rem,environment * env){ 2542 gen M=mignotte_bound(A); // works for exact quotient only! 2543 gen B0=B[0]; 2544 gen pip=env->modulo; 2545 gen p=pip; 2546 vecteur a,b,q,r,quo_,rem_; 2547 vector<int> ai,bi,qi,ri,qbi,tmp0,tmp1,Wp; 2548 unsigned long l=sizeinbase2(A.size())-1; 2549 unsigned long n=1<<(l+1); 2550 while (is_greater(M,pip,context0)){ 2551 bool fourier_prime=false; 2552 for (;;){ 2553 p=p-1; 2554 if (p.type==_INT_ && sizeinbase2(p)>l+8 && n<=(1<<22)){ 2555 p=prevprimep1p2p3(p.val-1,0,n); 2556 fourier_prime=true; 2557 } 2558 else 2559 p=prevprime(p); 2560 if (smod(B0,p)!=0) 2561 break; 2562 } 2563 env->modulo=p; 2564 // check that b*q+r==a before doing division 2565 if (fourier_prime){ 2566 int m=p.val; 2567 #if 1 2568 vecteur2vector_int(A,m,ai); 2569 vecteur2vector_int(B,m,bi); 2570 DivRem(ai,bi,m,qi,ri,rem.empty()); 2571 if (chk_equal_mod(quo,qi,m) && chk_equal_mod(rem,ri,m)){ 2572 operator_times(quo,B,0,rem_); 2573 addmodpoly(rem_,rem,rem_); 2574 // add_mulmodpoly(quo_.begin(),quo_.end(),B.begin(),B.end(),0,rem); 2575 if (rem_==A) 2576 return true; 2577 } 2578 ichinrem_inplace(quo,qi,pip,m); 2579 ichinrem_inplace(rem,ri,pip,m); 2580 pip=pip*p; 2581 continue; 2582 #endif 2583 vecteur2vector_int(quo,m,qi); 2584 vecteur2vector_int(B,m,bi); 2585 operator_times(qi,bi,m,qbi); 2586 // fft_reverse(Wp,m); 2587 vecteur2vector_int(rem,m,ri); 2588 addmod(qbi,ri,m); 2589 vecteur2vector_int(A,m,ai); 2590 #if 0 // debug 2591 tmp0.clear(); 2592 tmp0=tmp1; submod(tmp0,qbi,m); // debug 2593 if (!tmp0.empty()) 2594 CERR << "err\n"; 2595 submod(tmp1,ai,m); // debug 2596 #endif 2597 submod(qbi,ai,m); 2598 if (qbi.empty()){ 2599 operator_times(quo,B,0,rem_); 2600 addmodpoly(rem_,rem,rem_); 2601 // add_mulmodpoly(quo_.begin(),quo_.end(),B.begin(),B.end(),0,rem); 2602 if (rem_==A) 2603 return true; 2604 } 2605 else { 2606 DivRem(ai,bi,m,qi,ri); 2607 //DivRem(A,B,env,q,r,false); // debug 2608 ichinrem_inplace(quo,qi,pip,m); 2609 ichinrem_inplace(rem,ri,pip,m); 2610 } 2611 pip=pip*p; 2612 continue; 2613 } 2614 smod(A,p,a); smod(B,p,b); 2615 smod(quo,p,q); smod(rem,p,r); 2616 operator_times(b,q,env,rem_); 2617 addmodpoly(rem_,r,env,rem_); 2618 if (a==rem_){ 2619 quo_=quo; 2620 rem_=rem; 2621 } 2622 else { 2623 DivRem(a,b,env,q,r,false); 2624 quo_=ichinrem(quo,q,pip,p); 2625 rem_=ichinrem(rem,r,pip,p); 2626 } 2627 if (quo==quo_ && rem==rem_){ 2628 operator_times(quo_,B,0,rem_); 2629 addmodpoly(rem_,rem,rem_); 2630 // add_mulmodpoly(quo_.begin(),quo_.end(),B.begin(),B.end(),0,rem); 2631 if (rem_==A) 2632 return true; 2633 } 2634 quo.swap(quo_); 2635 rem.swap(rem_); 2636 pip=pip*p; 2637 } 2638 return false; 2639 } 2640 2641 // modular division DivRemInt(const modpoly & A,const modpoly & B,modpoly & quo,modpoly & rem)2642 bool DivRemInt(const modpoly & A, const modpoly & B, modpoly & quo, modpoly & rem){ 2643 gen B0=B[0]; 2644 // first try for exact quotient modulo a prime 2645 gen p = p1 ; 2646 while (smod(B,p)==0){ 2647 p=prevprime(p-1); 2648 } 2649 vecteur a(A),b(B); 2650 smod(a,p,a); smod(b,p,b); 2651 environment env; env.modulo=p; env.moduloon=true; 2652 DivRem(a,b,&env,quo,rem,false); 2653 if (rem.empty()){ 2654 // it is highly probable that the division is exact, 2655 // reconstruct quo by Chinese remaindering 2656 if (divremrec(A,B,quo,rem,&env)) 2657 return true; 2658 } 2659 // reconstruct both quo and rem of the pseudo-division 2660 gen Bb=pow(B0,A.size()-B.size()+1,context0); 2661 vecteur Apseudo(A); 2662 multvecteur(Bb,Apseudo,Apseudo); 2663 multvecteur(Bb,a,a); 2664 smod(a,p,a); 2665 multvecteur(Bb,quo,quo); 2666 smod(quo,p,quo); 2667 multvecteur(Bb,rem,rem); 2668 smod(rem,p,rem); 2669 if (!divremrec(Apseudo,B,quo,rem,&env)) 2670 return false; 2671 Bb=inv(Bb,context0); 2672 multvecteur(Bb,quo,quo); 2673 multvecteur(Bb,rem,rem); 2674 return true; 2675 } 2676 coefftype(const modpoly & v,gen & coefft)2677 int coefftype(const modpoly & v,gen & coefft){ 2678 int t=0; 2679 const_iterateur it=v.begin(),itend=v.end(); 2680 for (;it!=itend;++it){ 2681 const unsigned char tmp=it->type; 2682 if (tmp==_INT_ || tmp==_ZINT) 2683 continue; 2684 t=tmp; 2685 coefft=*it; 2686 if (t==_USER) 2687 return t; 2688 if (t==_MOD) 2689 return t; 2690 if (t==_EXT) 2691 return t; 2692 } 2693 return t; 2694 } 2695 DivRem(const modpoly & th,const modpoly & other,environment * env,modpoly & quo,modpoly & rem,bool allowrational)2696 bool DivRem(const modpoly & th, const modpoly & other, environment * env,modpoly & quo, modpoly & rem,bool allowrational){ 2697 // COUT << "DivRem" << th << "," << other << '\n'; 2698 if (other.empty()){ 2699 #ifndef NO_STDEXCEPT 2700 setsizeerr(gettext("modpoly.cc/DivRem")); 2701 #endif 2702 return false; 2703 } 2704 if (th.empty()){ 2705 quo=th; 2706 rem=th; 2707 return true ; 2708 } 2709 int a=int(th.size())-1; 2710 int b=int(other.size())-1; 2711 if (other.size()==1){ 2712 divmodpoly(th,other.front(),env,quo); 2713 rem.clear(); 2714 return true ; 2715 } 2716 quo.clear(); 2717 if (a<b){ 2718 rem=th; 2719 return true; 2720 } 2721 gen acoeff,bcoeff; 2722 int atype=coefftype(th,acoeff),btype=coefftype(other,bcoeff); 2723 if (atype==_MOD || btype==_MOD){ 2724 environment e; 2725 e.modulo=atype==_MOD?*(acoeff._MODptr+1):*(bcoeff._MODptr+1); 2726 e.moduloon=true; 2727 if (!DivRem(unmod(th,e.modulo),unmod(other,e.modulo),&e,quo,rem,false)) 2728 return false; 2729 modularize(quo,e.modulo); 2730 modularize(rem,e.modulo); 2731 return true; 2732 } 2733 #if 1 2734 int divquores=0; 2735 if (env && env->moduloon && 2736 other.size()>FFTMUL_SIZE && th.size()-other.size()>FFTMUL_SIZE){ 2737 if (debug_infolevel>2) 2738 CERR << CLOCK()*1e-6 << " DivRem mod start" << endl; 2739 int l=sizeinbase2(other.size()),p=env->modulo.val; 2740 if (env->modulo.type==_INT_ && p-1==((p-1)>>l)<<l){ 2741 vector<int> a,b,q,r; 2742 vecteur2vector_int(th,p,a); 2743 vecteur2vector_int(other,p,b); 2744 divquores=DivQuo(a,b,p,q,true); // check for exact quotient 2745 vector_int2vecteur(q,quo); 2746 if (debug_infolevel>2) 2747 CERR << CLOCK()*1e-6 << " DivQuo mod Fourier prime end" << endl; 2748 rem.clear(); 2749 if (divquores==2) 2750 return true; 2751 if (divquores){ 2752 operator_times(b,q,p,r); 2753 submodneg(r,a,p); 2754 vector_int2vecteur(r,rem); 2755 if (debug_infolevel>2) 2756 CERR << CLOCK()*1e-6 << " DivRem mod Fourier prime end" << endl; 2757 return true; 2758 } 2759 } 2760 if ((divquores=DivQuo(th,other,env,quo))){ 2761 rem.clear(); 2762 if (divquores==2) 2763 return true; 2764 modpoly tmp; 2765 operator_times(other,quo,env,tmp); 2766 submodpoly(th,tmp,env,rem); 2767 return true; 2768 } 2769 } 2770 #endif 2771 if ( (env==0 || env->moduloon==false) && atype==0 && btype==0 && other.size()>FFTMUL_SIZE && th.size()-other.size()>FFTMUL_SIZE && DivRemInt(th,other,quo,rem) ) 2772 return true; 2773 quo.reserve(a-b+1); 2774 // A=BQ+R -> A=(B*invcoeff)*Q+(R*invcoeff), 2775 // make division of A*coeff by B*coeff and multiply R by coeff at the end 2776 gen coeff=other.front(),invcoeff; bool invother=false; 2777 if (coeff.type==_USER){ 2778 invother=true; 2779 invcoeff=inv(coeff,context0); 2780 } 2781 if (coeff.type==_EXT){ 2782 gen coeff0=*coeff._EXTptr; 2783 if (coeff0.type==_VECT){ 2784 for (int i=0;i<coeff0._VECTptr->size();++i){ 2785 if ((*coeff0._VECTptr)[i].type==_USER){ 2786 invcoeff=inv(coeff,context0); 2787 invother=true; 2788 break; 2789 } 2790 } 2791 } 2792 } 2793 if (!invother && env && env->moduloon){ 2794 invcoeff=invmod(coeff,env->modulo); 2795 invother=true; 2796 } 2797 // copy rem to an array 2798 modpoly::const_iterator remit=th.begin(); // ,remend=rem.end(); 2799 gen * tmp=new gen[a+1]; // must use new/delete 2800 gen * tmpend=&tmp[a]; 2801 gen * tmpptr=tmpend; // tmpend points to the highest degree coeff of A 2802 /* 2803 vecteur vtmp(a+1); 2804 iterateur tmp=vtmp.begin(); 2805 iterateur tmpend=vtmp.end()-1; 2806 iterateur tmpptr=tmpend; // tmpend points to the highest degree coeff of A 2807 */ 2808 for (;tmpptr!=tmp-1;--tmpptr,++remit) 2809 *tmpptr=*remit; 2810 modpoly::const_iterator B_beg=other.begin(),B_end=other.end(); 2811 mpz_t prod; 2812 mpz_init(prod); 2813 gen n0( 0),q,mod2(env?2*env->modulo:0); 2814 for (;a>=b;--a){ 2815 if (invother){ 2816 if (env && env->moduloon){ 2817 if (tmpend->type==_ZINT && invcoeff.type==_ZINT && env->modulo.type==_ZINT){ 2818 mpz_mul(prod,*tmpend->_ZINTptr,*invcoeff._ZINTptr); 2819 mpz_fdiv_r(prod,prod,*env->modulo._ZINTptr); // prod positive 2820 if (mpz_cmp(prod,*mod2._ZINTptr)>0) 2821 mpz_sub(prod,prod,*env->modulo._ZINTptr); 2822 q=prod; 2823 } 2824 else 2825 q=smod(*tmpend*invcoeff,env->modulo); 2826 } 2827 else 2828 q=*tmpend*invcoeff; 2829 } 2830 else { 2831 q=rdiv(*tmpend,coeff,context0); 2832 if (!allowrational){ 2833 if (q.type==_FRAC){ 2834 delete [] tmp; 2835 return false; 2836 } 2837 } 2838 } 2839 quo.push_back(q); 2840 --tmpend; 2841 bool fast=(env && is_zero(env->coeff) && (env->complexe || !env->moduloon) )?false:(q.type==_INT_) || (q.type==_ZINT); 2842 if (!is_zero(q)) { 2843 // tmp <- tmp - q *B.shifted 2844 tmpptr=tmpend; 2845 modpoly::const_iterator itq=B_beg; 2846 ++itq; // first elements cancel 2847 if (env && (env->moduloon && !env->complexe && is_zero(env->coeff)) && (env->modulo.type==_INT_) && (env->modulo.val<smallint)){ 2848 for (;itq!=B_end;--tmpptr,++itq){ // no mod here to save comput. time 2849 tmpptr->val -= q.val*itq->val ; 2850 } 2851 } 2852 else { 2853 mpz_set_si(prod,0); 2854 for (;itq!=B_end;--tmpptr,++itq){ // no mod here to save comput. time 2855 if (fast && (tmpptr->type==_ZINT) && 2856 #ifndef SMARTPTR64 2857 (tmpptr->__ZINTptr->ref_count==1) && 2858 #else 2859 ((ref_mpz_t *) (* (ulonglong *) tmpptr >> 16))->ref_count==1 && 2860 #endif 2861 ( (itq->type==_ZINT) || (itq->type==_INT_) ) ) 2862 sub_mul(tmpptr->_ZINTptr,prod,q,*itq); 2863 else 2864 *tmpptr = (*tmpptr)-q*(*itq) ; 2865 } 2866 } 2867 } 2868 /* 2869 if (env && !env->moduloon) { 2870 CERR << quo << '\n'; 2871 CERR << quo*other << '\n'; 2872 CERR << "["; 2873 for (int i=1;i<a;++i) 2874 CERR << tmp[a-i] << "," ; 2875 CERR << tmp[0] << "]" << '\n'; 2876 CERR << '\n'; 2877 } 2878 */ 2879 } // end for (;;) 2880 // trim rem and multiply by coeff, this will modularize rem as well 2881 rem.clear(); 2882 // bool trimming=true; 2883 if (env && env->moduloon){ 2884 for (;tmpend!=tmp-1;--tmpend){ 2885 if (tmpend->type==_ZINT && env->modulo.type==_ZINT){ 2886 mpz_fdiv_r(prod,*tmpend->_ZINTptr,*env->modulo._ZINTptr); // prod positive 2887 if (mpz_cmp_si(prod,0)) 2888 break; 2889 } 2890 else { 2891 if (!is_zero(smod(*tmpend,env->modulo))) 2892 break; 2893 } 2894 } 2895 for (;tmpend!=tmp-1;--tmpend){ 2896 if (tmpend->type==_ZINT && env->modulo.type==_ZINT){ 2897 mpz_fdiv_r(prod,*tmpend->_ZINTptr,*env->modulo._ZINTptr); // prod positive 2898 if (mpz_cmp(prod,*mod2._ZINTptr)>0) 2899 mpz_sub(prod,prod,*env->modulo._ZINTptr); 2900 rem.push_back(prod); 2901 } 2902 else 2903 rem.push_back(smod(*tmpend,env->modulo)); 2904 } 2905 } 2906 else { 2907 for (;tmpend!=tmp-1;--tmpend){ 2908 if (!is_zero(*tmpend)) 2909 break; 2910 } 2911 for (;tmpend!=tmp-1;--tmpend){ 2912 rem.push_back(*tmpend); 2913 } 2914 } 2915 mpz_clear(prod); 2916 // COUT << "DivRem" << th << "-" << other << "*" << quo << "=" << rem << " " << th-other*quo << '\n'; 2917 delete [] tmp; 2918 return true; 2919 } 2920 DenseDivRem(const modpoly & th,const modpoly & other,modpoly & quo,modpoly & rem,bool fastdivcheck)2921 bool DenseDivRem(const modpoly & th, const modpoly & other,modpoly & quo, modpoly & rem,bool fastdivcheck){ 2922 int n=int(th.size()), m=int(other.size()); 2923 gen t=th[n-1], o=other[m-1]; 2924 if (fastdivcheck && n && m ){ 2925 if (is_zero(o)){ 2926 if (!is_zero(t)) 2927 return false; 2928 } 2929 else { 2930 if (!is_zero(t % o)) 2931 return false; 2932 // if ((n>1) && (m>1)) 2933 // COUT << ( th[n-2]-other[m-2]*(t/o) ) % o << '\n'; 2934 } 2935 } 2936 environment env; 2937 if (fastdivcheck){ 2938 env.moduloon=true; 2939 env.modulo=p4; 2940 bool res=DivRem(th,other,&env,quo,rem,false); 2941 if (!res || !rem.empty()) 2942 return false; 2943 } 2944 env.moduloon=false; 2945 bool res=DivRem(th,other,&env,quo,rem,false); 2946 return res; 2947 } 2948 operator /(const modpoly & th,const modpoly & other)2949 modpoly operator / (const modpoly & th,const modpoly & other) { 2950 modpoly rem,quo; 2951 environment env; 2952 DivRem(th,other,&env,quo,rem); 2953 return quo; 2954 } 2955 operator %(const modpoly & th,const modpoly & other)2956 modpoly operator % (const modpoly & th,const modpoly & other) { 2957 modpoly rem,quo; 2958 environment env; 2959 DivRem(th,other,&env,quo,rem); 2960 return rem; 2961 } 2962 operator_div(const modpoly & th,const modpoly & other,environment * env)2963 modpoly operator_div (const modpoly & th,const modpoly & other,environment * env) { 2964 modpoly rem,quo; 2965 DivRem(th,other,env,quo,rem); 2966 return quo; 2967 } 2968 operator_mod(const modpoly & th,const modpoly & other,environment * env)2969 modpoly operator_mod (const modpoly & th,const modpoly & other,environment * env) { 2970 modpoly rem,quo; 2971 DivRem(th,other,env,quo,rem); 2972 return rem; 2973 } 2974 2975 // Pseudo division a*th = other*quo + rem PseudoDivRem(const dense_POLY1 & th,const dense_POLY1 & other,dense_POLY1 & quo,dense_POLY1 & rem,gen & a)2976 void PseudoDivRem(const dense_POLY1 & th, const dense_POLY1 & other, dense_POLY1 & quo, dense_POLY1 & rem, gen & a){ 2977 int ts=int(th.size()); 2978 int os=int(other.size()); 2979 if (ts<os){ 2980 quo.clear(); 2981 rem=th; 2982 a=1; 2983 } 2984 else { 2985 gen l(other[0]); 2986 a=pow(l,ts-os+1); 2987 DenseDivRem(th*a,other,quo,rem); 2988 } 2989 } 2990 2991 /* 2992 dense_POLY1 AscPowDivRemModifiable(dense_POLY1 & num, dense_POLY1 & den,int order){ 2993 // reverse and adjust den degree to order 2994 reverse(den.begin(),den.end()); 2995 rrdm(den,order); 2996 // reverse and adjust num degree to 2*order 2997 reverse(num.begin(),num.end()); 2998 rrdm(num,2*order); 2999 dense_POLY1 quo,rem; 3000 DenseDivRem(num,den,quo,rem); 3001 reverse(quo.begin(),quo.end()); 3002 return trim(quo,env); 3003 } 3004 3005 dense_POLY1 AscPowDivRem(const dense_POLY1 & num, const dense_POLY1 & den,int order){ 3006 dense_POLY1 numcopy(num),dencopy(den); 3007 return AscPowDivRemModifiable(numcopy,dencopy,order); 3008 } 3009 */ 3010 3011 // a-b*q precond_a_bq(int a,int b,int q,int p,int qsurp)3012 int precond_a_bq(int a,int b,int q,int p,int qsurp){ 3013 a += (a>>31)&p; 3014 a -= precond_mulmod31(b,q,p,qsurp); 3015 a += (a>>31)&p; 3016 return a; 3017 } 3018 3019 // a-q1*b1-q2*b2 precond_a_q1b1_q2b2(int a,int q1,int b1,int q2,int b2,int p,int q1surp,int q2surp)3020 int precond_a_q1b1_q2b2(int a,int q1,int b1,int q2,int b2,int p,int q1surp,int q2surp){ 3021 a += (a>>31)&p; // insure a is positive 3022 b1 += (b1>>31)&p; // insure b1 is positive 3023 int t=longlong(b1)*q1-((longlong(b1)*q1surp)>>31)*p; 3024 t += (t>>31)&p; // t positive (or at least t-p is valid) 3025 a -= t; 3026 a += (a>>31)&p; // insure a is positive 3027 b2 += (b2>>31)&p; // insure b2 is positive 3028 t = longlong(b2)*q2-((longlong(b2)*q2surp)>>31)*p; 3029 t += (t>>31)&p; // t positive 3030 a -= t; 3031 return a; 3032 } 3033 3034 // Euclidean division modulo m DivRem(const vector<int> & th,const vector<int> & other,int m,vector<int> & quo,vector<int> & rem,bool ck_exactquo)3035 void DivRem(const vector<int> & th, const vector<int> & other,int m,vector<int> & quo, vector<int> & rem,bool ck_exactquo){ 3036 if (other.empty()){ 3037 rem=th; 3038 quo.clear(); 3039 return; 3040 } 3041 if (th.empty()){ 3042 quo=th; 3043 rem=th; 3044 return; 3045 } 3046 int a=int(th.size())-1; 3047 int b=int(other.size())-1; 3048 vector<int> quo_,rem_; // debug 3049 if (b>=FFTMUL_SIZE && a-b>=FFTMUL_SIZE){ 3050 int divquores=DivQuo(th,other,m,quo,ck_exactquo); 3051 if (divquores){ 3052 rem.clear(); 3053 if (divquores==2) 3054 return; 3055 operator_times(other,quo,m,rem); 3056 submodneg(rem,th,m); 3057 return ; 3058 quo_=quo; rem_=rem; 3059 } 3060 } 3061 int coeff=other.front(),invcoeff=invmod(coeff,m); 3062 if (!b){ 3063 quo=th; 3064 mulmod(quo,invcoeff,m); 3065 rem.clear(); 3066 return; 3067 } 3068 quo.clear(); 3069 double invm=1.0/m;//find_invp(m); => chk_normalize failure 3070 if (a==b+1){ 3071 rem.clear(); 3072 // frequent case in euclidean algorithms 3073 // rem=th-other*q 3074 vector<int>::const_iterator at=th.begin()+2,bt=other.begin()+1,btend=other.end(); 3075 #if 1 3076 //vector<int> rem_,quo_; 3077 { 3078 longlong q0=amodp(longlong(th[0])*invcoeff,m,invm); 3079 q0 += (q0>>63)&m; 3080 longlong q1=amodp(longlong(amodp(th[1]-other[1]*q0,m,invm) )*invcoeff,m,invm); 3081 q1 += (q1>>63)&m; 3082 quo.push_back(int(q0)); 3083 quo.push_back(int(q1)); 3084 // first part of the loop, remainder is empty, push r only if non 0 3085 for (;;++at){ 3086 longlong r=*at-q1*(*bt); 3087 ++bt; 3088 if (bt==btend){ 3089 r=amodp(r,m,invm); 3090 if (r && r!=m && r!=-m) 3091 rem.push_back(int(r)); 3092 return; 3093 } 3094 r -= q0*(*bt); 3095 r =amodp(r,m,invm); 3096 if (r && r!=m && r!=-m){ 3097 rem.push_back(int(r)); 3098 break; 3099 } 3100 } 3101 // second part of the loop, remainder is not empty, push r always 3102 --btend; ++at; 3103 #if 1 3104 btend-=3; 3105 int b1,b2=*bt; 3106 for (;bt<btend;at+=4,bt+=4){ 3107 b1=bt[1]; 3108 rem.push_back( amodp(at[0]-q1*b2-q0*b1,m,invm) ); 3109 b2=bt[2]; 3110 rem.push_back( amodp(at[1]-q1*b1-q0*b2,m,invm) ); 3111 b1=bt[3]; 3112 rem.push_back( amodp(at[2]-q1*b2-q0*b1,m,invm) ); 3113 b2=bt[4]; 3114 rem.push_back( amodp(at[3]-q1*b1-q0*b2,m,invm) ); 3115 } 3116 btend+=3; 3117 #endif 3118 for (;bt!=btend;++at,++bt){ 3119 rem.push_back( amodp(*at-q1*(*bt)-q0*bt[1],m,invm) ); 3120 } 3121 rem.push_back(amodp(*at-q1*(*bt),m,invm)); 3122 return; 3123 //rem_.swap(rem);quo_.swap(quo);at=th.begin()+2;bt=other.begin()+1;btend=other.end(); 3124 } 3125 #endif 3126 #ifdef GIAC_PRECOND 3127 invcoeff += (invcoeff>>31)&m; 3128 int invcoeffinv=(1LL<<31)*invcoeff/m+1; 3129 int q0=precond_mulmod31(th[0],invcoeff,m,invcoeffinv); 3130 //if ((q0-longlong(th[0])*invcoeff)%m!=0) 3131 //CERR << "err\n"; 3132 int q0inv=(1LL<<31)*q0/m+1; 3133 int q1=precond_a_bq(th[1],other[1],q0,m,q0inv); 3134 q1=precond_mulmod31(q1,invcoeff,m,invcoeffinv); 3135 //if ((q1-(( (th[1]-other[1]*q0)%m )*invcoeff))%m!=0) 3136 //CERR << "err\n"; 3137 int q1inv=(1LL<<31)*q1/m+1; 3138 quo.push_back(int(q0)); 3139 quo.push_back(int(q1)); 3140 // first part of the loop, remainder is empty, push r only if non 0 3141 for (--btend;bt!=btend;++at,++bt){ 3142 int r=precond_a_q1b1_q2b2(*at,q1,*bt,q0,bt[1],m,q1inv,q0inv); 3143 if (r!=0){ 3144 rem.push_back(r); 3145 ++at;++bt; 3146 break; 3147 } 3148 } 3149 for (;bt!=btend;++at,++bt){ 3150 int r=precond_a_q1b1_q2b2(*at,q1,*bt,q0,bt[1],m,q1inv,q0inv); 3151 // int s=(*at-q1*(*bt)-q0*bt[1])%m; 3152 //if ((longlong(r)-s)%m!=0) 3153 //CERR << "err\n"; 3154 rem.push_back(r); 3155 } 3156 rem.push_back(precond_a_bq(*at,*bt,q1,m,q1inv)); 3157 return; 3158 //submod(rem_,rem,m); submod(quo_,quo,m); 3159 //if (!rem_.empty() || !quo_.empty()) 3160 //CERR << "err\n"; 3161 #else 3162 longlong q0=(longlong(th[0])*invcoeff)%m; 3163 longlong q1= (( (th[1]-other[1]*q0)%m )*invcoeff)%m; 3164 quo.push_back(int(q0)); 3165 quo.push_back(int(q1)); 3166 // first part of the loop, remainder is empty, push r only if non 0 3167 for (;;++at){ 3168 longlong r=*at-q1*(*bt); 3169 ++bt; 3170 if (bt==btend){ 3171 r %= m; 3172 if (r) 3173 rem.push_back(int(r)); 3174 return; 3175 } 3176 r -= q0*(*bt); 3177 r %= m; 3178 if (r){ 3179 rem.push_back(int(r)); 3180 break; 3181 } 3182 } 3183 // second part of the loop, remainder is not empty, push r always 3184 --btend; 3185 for (++at;bt!=btend;++at,++bt){ 3186 rem.push_back( (*at-q1*(*bt)-q0*bt[1])%m ); 3187 } 3188 rem.push_back((*at-q1*(*bt))%m); 3189 #endif 3190 return; 3191 } 3192 rem=th; // code for a-b>1 3193 if (a<b) 3194 return; 3195 quo.reserve(a-b+1); 3196 // copy rem to an array 3197 vector<int>::const_iterator remit=rem.begin();//,remend=rem.end(); 3198 if ((a-b+1)*double(m)*m<9e15){ 3199 ALLOCA(longlong, tmp, (a+1)*sizeof(longlong));//longlong * tmp=(longlong *)alloca((a+1)*sizeof(longlong)); 3200 longlong * tmpend=&tmp[a]; 3201 longlong * tmpptr=tmpend; // tmpend points to the highest degree coeff of A 3202 for (;tmpptr!=tmp-1;--tmpptr,++remit) 3203 *tmpptr=*remit; 3204 vector<int>::const_iterator B_beg=other.begin(),B_end=other.end(); 3205 int q;//n0(0), 3206 for (;a>=b;--a){ 3207 q= amodp(longlong(invcoeff)*(*tmpend),m,invm); 3208 quo.push_back(q); 3209 --tmpend; 3210 // tmp <- tmp - q *B.shifted (if q!=0) 3211 if (q) { 3212 tmpptr=tmpend; 3213 vector<int>::const_iterator itq=B_beg; 3214 ++itq; // first elements cancel 3215 for (;itq!=B_end;--tmpptr,++itq){ 3216 *tmpptr = (*tmpptr -(longlong(q) * (*itq))); 3217 } 3218 } 3219 } 3220 // trim rem and multiply by coeff, this will modularize rem as well 3221 rem.clear(); 3222 // bool trimming=true; 3223 for (;tmpend!=tmp-1;--tmpend){ 3224 if (*tmpend && *tmpend % m) 3225 break; 3226 } 3227 for (;tmpend!=tmp-1;--tmpend){ 3228 rem.push_back( amodp(*tmpend,m,invm)); 3229 } 3230 return; 3231 } 3232 #if defined VISUALC || defined BESTA_OS 3233 int * tmp=new int[a+1]; 3234 #else 3235 int tmp[a+1]; 3236 #endif 3237 int * tmpend=&tmp[a]; 3238 int * tmpptr=tmpend; // tmpend points to the highest degree coeff of A 3239 for (;tmpptr!=tmp-1;--tmpptr,++remit) 3240 *tmpptr=*remit; 3241 vector<int>::const_iterator B_beg=other.begin(),B_end=other.end(); 3242 int q;//n0(0), 3243 for (;a>=b;--a){ 3244 //q = longlong(invcoeff)*(*tmpend) % m; 3245 q = amodp(longlong(invcoeff)*(*tmpend),m,invm); //q += (q>>31)&m; 3246 quo.push_back(q); 3247 --tmpend; 3248 // tmp <- tmp - q *B.shifted (if q!=0) 3249 if (q) { 3250 tmpptr=tmpend; 3251 vector<int>::const_iterator itq=B_beg; 3252 ++itq; // first elements cancel 3253 for (;itq!=B_end;--tmpptr,++itq){ 3254 *tmpptr = amodp(*tmpptr -(longlong(q) * (*itq)),m,invm); 3255 //*tmpptr=(*tmpptr -(longlong(q) * (*itq)))%m; 3256 } 3257 } 3258 } 3259 // trim rem and multiply by coeff, this will modularize rem as well 3260 rem.clear(); 3261 // bool trimming=true; 3262 for (;tmpend!=tmp-1;--tmpend){ 3263 if (*tmpend && (*tmpend % m)) 3264 break; 3265 } 3266 for (;tmpend!=tmp-1;--tmpend){ 3267 //int r=*tmpend %m; 3268 int r=amodp(*tmpend,m,invm); //r += (r>>31)&m; 3269 rem.push_back(r); 3270 } 3271 #if defined VISUALC || defined BESTA_OS 3272 delete [] tmp; 3273 #endif 3274 return; 3275 // debug 3276 if (quo_.size()){ 3277 submod(quo_,quo,m); submod(rem_,rem,m); 3278 if (quo_.size() || rem_.size()) 3279 CERR << "err\n"; 3280 } 3281 } 3282 3283 // Conversion from vector<gen> to vector<int> modulo m modpoly2smallmodpoly(const modpoly & p,vector<int> & v,int m)3284 void modpoly2smallmodpoly(const modpoly & p,vector<int> & v,int m){ 3285 v.clear(); 3286 const_iterateur it=p.begin(),itend=p.end(); 3287 v.reserve(itend-it); 3288 int g; 3289 bool trim=true; 3290 for (;it!=itend;++it){ 3291 if (it->type==_INT_) 3292 g=it->val % m; 3293 else 3294 g=smod(*it,m).val; 3295 if (g) 3296 trim=false; 3297 if (!trim) 3298 v.push_back(g); 3299 } 3300 } 3301 3302 3303 // Conversion from vector<int> to vector<gen> using smod smallmodpoly2modpoly(const vector<int> & v,modpoly & p,int m)3304 void smallmodpoly2modpoly(const vector<int> & v,modpoly & p,int m){ 3305 vector<int>::const_iterator it=v.begin(),itend=v.end(); 3306 p.clear(); 3307 p.reserve(itend-it); 3308 for (;it!=itend;++it){ 3309 p.push_back(smod(*it,m)); 3310 } 3311 } 3312 3313 // compute r mod b into r 3314 // r, b must be allocated arrays of int 3315 // compute quotient if quoend!=0 3316 // set exactquo to true if you know that b divides r and only want to compute the quotient 3317 // this will not compute low degree coeff of r during division and spare some time rem(int * & r,int * rend,int * b,int * bend,int m,int * & quo,int * quoend,bool exactquo=false)3318 static void rem(int * & r,int *rend,int * b,int *bend,int m,int * & quo,int *quoend,bool exactquo=false){ 3319 int * i,*j,*rstop,*qcur,k,q,q2,lcoeffinv=1; 3320 k=int(bend-b); 3321 if (!k){ 3322 quo=quoend; 3323 return; 3324 } 3325 if (rend-r<k){ 3326 quo=quoend; 3327 return; 3328 } 3329 quo=quoend-((rend-r)-(k-1)); 3330 qcur=quo; 3331 // inv leading coeff of b 3332 if (*b !=1) 3333 lcoeffinv=invmod(*b,m); 3334 if (k==1){ 3335 if (quoend){ 3336 i=quo; 3337 for (;r!=rend;++r,++i){ 3338 type_operator_times_reduce(*r,lcoeffinv,*i,m); 3339 // *i=(*r*lcoeffinv)%m; 3340 } 3341 } 3342 else 3343 r=rend; 3344 return; 3345 } 3346 if (rend-r==bend-b+1){ 3347 // frequent case: the degree decrease by 1 3348 // a(x) += b(x)*(q1*x+q2) 3349 // first compute q1 and q2 3350 q=-smod(*r*longlong(lcoeffinv),m); 3351 ++r; 3352 q2=-smod( ((*r+longlong(q)* *(b+1))%m)*longlong(lcoeffinv),m); 3353 if (quoend){ 3354 *qcur=-q; 3355 ++qcur; 3356 *qcur=-q2; 3357 } 3358 ++r; 3359 // now compute a 3360 j=r; 3361 i=b+1; 3362 if (i!=bend){ 3363 if (m<46340){ 3364 for (;;){ 3365 *j += q2* (*i); 3366 ++i; 3367 if (i==bend){ 3368 *j %= m; 3369 break; 3370 } 3371 *j += q* (*i); 3372 *j %= m; 3373 ++j; 3374 } 3375 } 3376 else { 3377 for (;;){ 3378 type_operator_plus_times_reduce_nock(q2,*i,*j,m); 3379 ++i; 3380 if (i==bend) 3381 break; 3382 type_operator_plus_times_reduce_nock(q,*i,*j,m); 3383 ++j; 3384 } 3385 } 3386 } // end if i!=bend 3387 } 3388 else { 3389 ++b; 3390 // while degree(r)>=degree(b) do r <- r - r[0]*lcoeffinv*b 3391 // rend is not used anymore, we make it point k ints before 3392 rstop = rend-(k-1) ; // if r==rend then deg(r)==deg(b) 3393 for (;rstop-r>0;){ 3394 type_operator_times_reduce(*r,lcoeffinv,q,m); 3395 // q=((*r)*longlong(lcoeffinv))%m; 3396 if (quoend){ 3397 *qcur=q; 3398 ++qcur; 3399 } 3400 ++r; 3401 if (q){ 3402 q=-q; 3403 j=r; 3404 i=b; 3405 for (;i!=bend;++j,++i){ 3406 type_operator_plus_times_reduce_nock(q,*i,*j,m); 3407 // *j = (*j + q * *i)%m; 3408 } 3409 } 3410 if (exactquo && rend-r<=2*(k-1)) 3411 --bend; 3412 } 3413 } 3414 // trim answer 3415 for (;r!=rend;++r){ 3416 if (*r) 3417 break; 3418 } 3419 } 3420 3421 /* 3422 void rem_tabint(int * & r,int *rend,int * b,int *bend,int m,int * & quo,int *quoend){ 3423 int * i,*j,*rstop,*qcur,k,q,lcoeffinv=1; 3424 k=bend-b; 3425 if (!k){ 3426 quo=quoend; 3427 return; 3428 } 3429 if (rend-r<k){ 3430 quo=quoend; 3431 return; 3432 } 3433 quo=quoend-((rend-r)-(k-1)); 3434 qcur=quo; 3435 // inv leading coeff of b 3436 if (*b !=1) 3437 lcoeffinv=invmod(*b,m); 3438 if (k==1){ 3439 if (quoend){ 3440 i=quo; 3441 for (;r!=rend;++r,++i){ 3442 type_operator_times_reduce(*r,lcoeffinv,*i,m); 3443 // *i=(*r*lcoeffinv)%m; 3444 } 3445 } 3446 else 3447 r=rend; 3448 return; 3449 } 3450 ++b; 3451 // while degree(r)>=degree(b) do r <- r - r[0]*lcoeffinv*b 3452 // rend is not used anymore, we make it point k ints before 3453 rstop = rend-(k-1) ; // if r==rend then deg(r)==deg(b) 3454 for (;rstop-r>0;){ 3455 type_operator_times_reduce(*r,lcoeffinv,q,m); 3456 // q=((*r)*longlong(lcoeffinv))%m; 3457 if (quoend){ 3458 *qcur=q; 3459 ++qcur; 3460 } 3461 ++r; 3462 if (q){ 3463 q=-q; 3464 j=r; 3465 i=b; 3466 for (;i!=bend;++j,++i){ 3467 // type_operator_plus_times_reduce_nock(q,*i,*j,m); 3468 *j = (*j + q * *i)%m; 3469 // *j = (*j + longlong(q) * *i)%m; 3470 } 3471 } 3472 } 3473 // trim answer 3474 for (;r!=rend;++r){ 3475 if (*r) 3476 break; 3477 } 3478 } 3479 */ 3480 gcdconvert(const modpoly & p,int m,int * a)3481 static void gcdconvert(const modpoly & p,int m,int * a){ 3482 const_iterateur it=p.begin(),itend=p.end(); 3483 for (;it!=itend;++it,++a){ 3484 if (it->type==_INT_) 3485 *a=it->val % m; 3486 else 3487 *a=smod(*it,m).val; 3488 } 3489 } 3490 gcdconvert(const polynome & p,int m,int * a)3491 static bool gcdconvert(const polynome & p,int m,int * a){ 3492 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 3493 int deg; 3494 for (;it!=itend;){ 3495 if (it->value.type==_INT_) 3496 *a=it->value.val % m; 3497 else { 3498 if (it->value.type==_ZINT) 3499 *a=smod(it->value,m).val; 3500 else 3501 return false; 3502 } 3503 deg=it->index.front(); 3504 ++it; 3505 if (it==itend){ 3506 for (++a;deg>0;++a,--deg){ 3507 *a=0; 3508 } 3509 return true; 3510 } 3511 deg -= it->index.front(); 3512 for (++a,--deg;deg>0;++a,--deg){ 3513 *a=0; 3514 } 3515 } 3516 return true; 3517 } 3518 3519 // Efficient small modular gcd of p and q using vector<int> gcdsmallmodpoly(const modpoly & p,const modpoly & q,int m,modpoly & d)3520 void gcdsmallmodpoly(const modpoly &p,const modpoly & q,int m,modpoly & d){ 3521 3522 int as=int(p.size()),bs=int(q.size()); 3523 #if defined VISUALC || defined BESTA_OS 3524 int *asave=new int[as], *a=asave,*aend=a+as; 3525 int *bsave=new int[bs], *b=bsave,*bend=b+bs,*qcur=0; 3526 #else 3527 #ifndef NO_STDEXCEPT 3528 if (as>1000000 || bs>1000000) 3529 setdimerr(); 3530 #endif 3531 int asave[as], *a=asave,*aend=a+as; 3532 int bsave[bs], *b=bsave,*bend=b+bs,*qcur=0; 3533 #endif 3534 gcdconvert(p,m,a); 3535 int * t; 3536 gcdconvert(q,m,b); 3537 for (;b!=bend;){ 3538 rem(a,aend,b,bend,m,qcur,0); 3539 t=a; a=b; b=t; 3540 t=aend; aend=bend; bend=t; 3541 } 3542 d.clear(); 3543 d.reserve(aend-a); 3544 int ainv=1; 3545 if (a!=aend) 3546 ainv=invmod(*a,m); 3547 for (;a!=aend;++a){ 3548 d.push_back(smod((*a)*longlong(ainv),m)); 3549 } 3550 #if defined VISUALC || defined BESTA_OS 3551 delete [] asave; 3552 delete [] bsave; 3553 #endif 3554 } 3555 gcdsmallmodpoly(const polynome & p,const polynome & q,int m,polynome & d,polynome & dp,polynome & dq,bool compute_cof)3556 bool gcdsmallmodpoly(const polynome &p,const polynome & q,int m,polynome & d,polynome & dp,polynome & dq,bool compute_cof){ 3557 if (p.dim!=1 || q.dim!=1) 3558 return false; 3559 bool promote = m>=46340; 3560 int as=p.lexsorted_degree()+1,bs=q.lexsorted_degree()+1; 3561 if (as>HGCD*4 || bs>HGCD*4) 3562 return false; 3563 #if defined VISUALC || defined BESTA_OS 3564 int *asave = new int[as], *a=asave,*aend=a+as,*qcur=0; 3565 int *Asave = new int[as], *A=Asave,*Aend=A+as; 3566 int *bsave = new int[bs], *b=bsave,*bend=b+bs; 3567 int *Bsave = new int[bs], *B=Bsave,*Bend=B+bs; 3568 #else // this will allocate too much on stack for as+bs large 3569 int asave[as], *a=asave,*aend=a+as,*qcur=0; 3570 int Asave[as], *A=Asave,*Aend=A+as; 3571 int bsave[bs], *b=bsave,*bend=b+bs; 3572 int Bsave[bs], *B=Bsave,*Bend=B+bs; 3573 #endif 3574 int * t; 3575 if (gcdconvert(p,m,a) && gcdconvert(q,m,b) ){ 3576 memcpy(Asave,asave,as*sizeof(int)); 3577 memcpy(Bsave,bsave,bs*sizeof(int)); 3578 for (;b!=bend;){ 3579 rem(a,aend,b,bend,m,qcur,0); 3580 t=a; a=b; b=t; 3581 t=aend; aend=bend; bend=t; 3582 } 3583 d.coord.clear(); 3584 int ainv=1; 3585 int * aa=a; 3586 if (a!=aend) 3587 ainv=invmod(*a,m); 3588 if (promote){ 3589 for (int deg=int(aend-a)-1;a!=aend;++a,--deg){ 3590 if (*a){ 3591 *a=smod((*a)*longlong(ainv),m); 3592 d.coord.push_back(monomial<gen>(*a,deg,1,1)); 3593 } 3594 } 3595 } 3596 else { 3597 for (int deg=int(aend-a)-1;a!=aend;++a,--deg){ 3598 if (*a){ 3599 *a=smod((*a)*ainv,m); 3600 d.coord.push_back(monomial<gen>(*a,deg,1,1)); 3601 } 3602 } 3603 } 3604 if (aa!=aend && compute_cof){ 3605 if (debug_infolevel>20) 3606 CERR << "gcdsmallmodpoly, compute cofactors " << CLOCK() << '\n'; 3607 #if defined VISUALC || defined BESTA_OS 3608 int * qsave=new int[std::max(as,bs)], *qcur=qsave,*qend=qsave+std::max(as,bs); 3609 #else 3610 int qsave[std::max(as,bs)], *qcur=qsave,*qend=qsave+std::max(as,bs); 3611 #endif 3612 // int * qsave=new int[as], *qcur=qsave,*qend=qsave+as; 3613 rem(A,Aend,aa,aend,m,qcur,qend); 3614 dp.coord.clear(); 3615 for (int deg=int(qend-qcur)-1;qcur!=qend;++qcur,--deg){ 3616 if (*qcur) 3617 dp.coord.push_back(monomial<gen>(smod(*qcur,m),deg,1,1)); 3618 } 3619 qcur=qsave; 3620 rem(B,Bend,aa,aend,m,qcur,qend); 3621 dq.coord.clear(); 3622 for (int deg=int(qend-qcur)-1;qcur!=qend;++qcur,--deg){ 3623 if (*qcur) 3624 dq.coord.push_back(monomial<gen>(smod(*qcur,m),deg,1,1)); 3625 } 3626 if (debug_infolevel>20) 3627 CERR << "gcdsmallmodpoly, end compute cofactors " << CLOCK() << '\n'; 3628 #if defined VISUALC || defined BESTA_OS 3629 delete [] qsave; 3630 #endif 3631 } 3632 #if defined VISUALC || defined BESTA_OS 3633 delete [] asave; delete [] Asave; delete [] bsave; delete [] Bsave; 3634 #endif 3635 return true; 3636 } 3637 else { 3638 #if defined VISUALC || defined BESTA_OS 3639 delete [] asave; delete [] Asave; delete [] bsave; delete [] Bsave; 3640 #endif 3641 return false; 3642 } 3643 } 3644 3645 // invert a1 mod m invmod(double a1,double A)3646 double invmod(double a1,double A){ 3647 double a(A),a2,u=0,u1=1,u2,q; 3648 for (;a1;){ 3649 q=std::floor(a/a1); 3650 a2=a-q*a1; 3651 u2=u-q*u1; 3652 a=a1; 3653 a1=a2; 3654 u=u1; 3655 u1=u2; 3656 } 3657 if (a==-1){ a=1; u=-u; } 3658 if (a!=1) return 0; 3659 if (u<0) u+=A; 3660 return u; 3661 } 3662 convertdouble(const modpoly & p,double M,vector<double> & v)3663 bool convertdouble(const modpoly & p,double M,vector<double> & v){ 3664 v.clear(); v.reserve(p.size()); 3665 int m=int(M); 3666 const_iterateur it=p.begin(),itend=p.end(); 3667 for (;it!=itend;++it){ 3668 if (it->type==_INT_) 3669 v.push_back(it->val % m); 3670 else { 3671 if (it->type==_ZINT) 3672 v.push_back(smod(*it,m).val); 3673 else 3674 return false; 3675 } 3676 } 3677 return true; 3678 } 3679 convertfromdouble(const vector<double> & A,modpoly & a,double M)3680 bool convertfromdouble(const vector<double> & A,modpoly & a,double M){ 3681 a.clear(); a.reserve(A.size()); 3682 int m( (int)M); 3683 vector<double>::const_iterator it=A.begin(),itend=A.end(); 3684 for (;it!=itend;++it){ 3685 double d=*it; 3686 if (d!=int(d)) 3687 return false; 3688 if (d>M/2) 3689 a.push_back(int(d)-m); 3690 else 3691 a.push_back(int(d)); 3692 } 3693 return true; 3694 } 3695 multdoublepoly(double x,vector<double> & v,double m)3696 void multdoublepoly(double x,vector<double> & v,double m){ 3697 if (x==1) 3698 return; 3699 vector<double>::iterator it=v.begin(),itend=v.end(); 3700 for (;it!=itend;++it){ 3701 double t=*it * x; 3702 double q=std::floor(t/m); 3703 *it = t-q*m; 3704 } 3705 } 3706 3707 // A = BQ+R mod m with B leading coeff = 1 quoremdouble(const vector<double> & A,const vector<double> & B,vector<double> & Q,vector<double> & R,double m)3708 void quoremdouble(const vector<double> & A,const vector<double> & B,vector<double> & Q,vector<double> & R,double m){ 3709 Q.clear(); 3710 R=A; 3711 int rs=int(R.size()),bs=int(B.size()); 3712 if (rs<bs) 3713 return; 3714 if (rs==bs+1){ } // possible improvement 3715 vector<double>::iterator it=R.begin(),itend=it+(rs-bs+1); 3716 for (;it!=itend;){ 3717 double q=*it; 3718 Q.push_back(q); 3719 *it=0; 3720 ++it; 3721 vector<double>::iterator kt=it; 3722 vector<double>::const_iterator jt=B.begin()+1,jtend=B.end(); 3723 for (;jt!=jtend;++kt,++jt){ 3724 double d= *kt- q*(*jt); 3725 *kt=d-std::floor(d/m)*m; 3726 } 3727 for (;it!=itend;++it){ 3728 if (*it) 3729 break; 3730 } 3731 } 3732 for (;it!=R.end();++it){ 3733 if (*it) 3734 break; 3735 } 3736 R.erase(R.begin(),it); 3737 } 3738 gcddoublemodpoly(const modpoly & p,const modpoly & q,double m,modpoly & a)3739 bool gcddoublemodpoly(const modpoly &p,const modpoly & q,double m,modpoly &a){ 3740 vector<double> A,B,Q,R; 3741 if (!convertdouble(p,m,A) || !convertdouble(q,m,B)) 3742 return false; 3743 while (!B.empty()){ 3744 multdoublepoly(invmod(B.front(),m),B,m); 3745 quoremdouble(A,B,Q,R,m); 3746 swap(A,B); 3747 swap(B,R); 3748 } 3749 if (!A.empty()) 3750 multdoublepoly(invmod(A.front(),m),A,m); 3751 return convertfromdouble(A,a,m); 3752 } 3753 reverse_resize(modpoly & a,int N,int reserve)3754 void reverse_resize(modpoly & a,int N,int reserve){ 3755 reverse(a.begin(),a.end()); 3756 // for (int i=a.size();i<N;++i) a.push_back(0); 3757 a.resize(N); 3758 for (int i=0;i<a.size();++i){ 3759 if (a[i].type==_ZINT) 3760 a[i]=*a[i]._ZINTptr; 3761 else 3762 a[i].uncoerce(reserve); 3763 } 3764 } 3765 3766 // a=source mod x^N-1 mod p reverse_assign(const modpoly & source,vector<int> & a,int N,int p)3767 void reverse_assign(const modpoly & source,vector<int> & a,int N,int p){ 3768 a.clear(); a.resize(N); 3769 if (source.empty()) return; 3770 const gen * stop=&*source.begin(),*start=&*source.end()-1; 3771 int i=0; 3772 for (;i<N && start>=stop;i++,--start){ 3773 if (start->type==_INT_) 3774 a[i]=start->val % p; 3775 else 3776 a[i]=modulo(*start->_ZINTptr,p); 3777 } 3778 for (i=0;start>=stop;--start){ 3779 if (start->type==_INT_) 3780 a[i]=(a[i]+longlong(start->val)) %p; 3781 else 3782 a[i]=(a[i]+longlong(modulo(*start->_ZINTptr,p))) % p; 3783 ++i; 3784 if (i==N) 3785 i=0; 3786 } 3787 } 3788 3789 // make f coeffs in [0,p] make_positive(vector<int> & f,int p)3790 void make_positive(vector<int> & f,int p){ 3791 for (vector<int>::iterator it=f.begin();it!=f.end();++it){ 3792 int i=*it; 3793 i += (i>>31)&p; 3794 i -= p; 3795 i += (i>>31)&p; 3796 *it=i; 3797 } 3798 } 3799 reverse_assign(vector<int> & a,int N,int p)3800 void reverse_assign(vector<int> & a,int N,int p){ 3801 if (a.size()>N){ 3802 vector<int>::iterator it=a.begin(),jt=it+N,jtend=a.end(); 3803 for (;it<jt;++it) 3804 *it += (*it>>31)&p; 3805 for (it=a.begin();jt!=jtend;++it,++jt){ 3806 int i=*it,j=*jt; 3807 j += (j>>31)&p; 3808 i += j-p; 3809 i += (i>>31)&p; 3810 *jt=i; 3811 } 3812 a.erase(a.begin(),it); 3813 reverse(a.begin(),a.end()); 3814 } 3815 else { 3816 make_positive(a,p); 3817 reverse(a.begin(),a.end()); 3818 a.resize(N); 3819 } 3820 } 3821 3822 // a=source mod x^N-1 mod p reverse_assign(const vector<int> & source,vector<int> & a,int N,int p)3823 void reverse_assign(const vector<int> & source,vector<int> & a,int N,int p){ 3824 a.clear(); a.resize(N); 3825 if (source.empty()) return; 3826 const int * stop=&*source.begin(),*start=&*source.end()-1; 3827 int i=0; 3828 for (;i<N && start>=stop;i++,--start){ 3829 int k=*start; 3830 k += (k>>31)&p; // add p if k is negative 3831 // if (k<0) 3832 //CERR << "err\n"; 3833 a[i]=k; 3834 } 3835 for (i=0;start>=stop;--start){ 3836 int k=*start; 3837 k -= (k>>31)*p; 3838 k += (a[i]-p); 3839 k -= (k>>31)*p; 3840 a[i]= k ; 3841 // a[i]=(a[i]+longlong(*start)) %p; 3842 // if ( (a[i]-longlong(k))%p!=0) 3843 //if (k<0) 3844 //CERR << "err\n"; 3845 ++i; 3846 if (i==N) 3847 i=0; 3848 } 3849 } 3850 3851 // a=source mod x^N-1 reverse_assign(const modpoly & source,modpoly & a,int N,int reserve)3852 void reverse_assign(const modpoly & source,modpoly & a,int N,int reserve){ 3853 if (&source==&a){ 3854 a.reserve(N); 3855 reverse(a.begin(),a.end()); 3856 for (int i=0;i<a.size();++i) 3857 a[i].uncoerce(reserve); 3858 for (int i=a.size();i<N;++i){ 3859 gen g; g.uncoerce(reserve); 3860 a.push_back(g); 3861 } 3862 return; 3863 } 3864 a.resize(N); 3865 const gen * stop=&*source.begin(),*start=&*source.end()-1; 3866 int i=0; 3867 for (;i<N && start>=stop;i++,--start){ 3868 if (a[i].type!=_ZINT){ 3869 a[i]=0; 3870 a[i].uncoerce(reserve); 3871 } 3872 if (start->type==_INT_) 3873 mpz_set_si(*a[i]._ZINTptr,start->val); 3874 else 3875 mpz_set(*a[i]._ZINTptr,*start->_ZINTptr); 3876 } 3877 for (;i<N ;i++){ 3878 gen & g=a[i]; 3879 if (g.type==_ZINT) 3880 mpz_set_si(*g._ZINTptr,0); 3881 else { 3882 g=0; 3883 g.uncoerce(reserve); 3884 } 3885 } 3886 for (i=0;start>=stop;--start){ 3887 if (start->type==_INT_){ 3888 if (start->val>=0) 3889 mpz_add_ui(*a[i]._ZINTptr,*a[i]._ZINTptr,start->val); 3890 else 3891 mpz_sub_ui(*a[i]._ZINTptr,*a[i]._ZINTptr,-start->val); 3892 } 3893 else 3894 mpz_add(*a[i]._ZINTptr,*a[i]._ZINTptr,*start->_ZINTptr); 3895 ++i; 3896 if (i==N) 3897 i=0; 3898 } 3899 } 3900 fft_ab_p1(vector<int> & a,const vector<int> & b)3901 void fft_ab_p1(vector<int> &a,const vector<int> &b){ 3902 size_t s=a.size(); 3903 for (size_t i=0;i<s;++i){ 3904 a[i]=(longlong(a[i])*b[i])%p1; 3905 } 3906 } 3907 fft_ab_p2(vector<int> & a,const vector<int> & b)3908 void fft_ab_p2(vector<int> &a,const vector<int> &b){ 3909 size_t s=a.size(); 3910 for (size_t i=0;i<s;++i){ 3911 a[i]=(longlong(a[i])*b[i])%p2; 3912 } 3913 } 3914 fft_ab_p3(vector<int> & a,const vector<int> & b)3915 void fft_ab_p3(vector<int> &a,const vector<int> &b){ 3916 size_t s=a.size(); 3917 for (size_t i=0;i<s;++i){ 3918 a[i]=(longlong(a[i])*b[i])%p3; 3919 } 3920 } 3921 fft_ab(fft_rep & a,const fft_rep & b)3922 void fft_ab(fft_rep & a,const fft_rep & b){ 3923 fft_ab_p1(a.modp1,b.modp1); 3924 fft_ab_p2(a.modp2,b.modp2); 3925 fft_ab_p3(a.modp3,b.modp3); 3926 } 3927 a_minus_qsize2_b(const vector<int> & ua,const vector<int> & q,const vector<int> & ub,vector<int> & ur,int p)3928 void a_minus_qsize2_b(const vector<int> & ua,const vector<int> & q,const vector<int> &ub,vector<int> & ur,int p){ 3929 double invp=find_invp(p); 3930 longlong q1=-q[0],q0=-q[1]; 3931 ur.clear(); ur.push_back((q1*ub.front())%p); 3932 const int * it=&ub[0],*itend=it-1+ub.size(),*itmid=it+ub.size()-ua.size(),*jt=&ua[0]; 3933 if (ua.empty()){ 3934 for (;it!=itend;++it){ 3935 ur.push_back(amodp(q0*it[0]+q1*it[1],p,invp)); 3936 } 3937 ur.push_back(amodp(q0*it[0],p,invp)); 3938 } 3939 else { 3940 #if 1 3941 itmid-=4; 3942 int i0=it[0],i1; 3943 for (;it<itmid;it+=4){ 3944 i1=it[1]; 3945 ur.push_back(amodp(q0*i0+q1*i1,p,invp)); 3946 i0=it[2]; 3947 ur.push_back(amodp(q0*i1+q1*i0,p,invp)); 3948 i1=it[3]; 3949 ur.push_back(amodp(q0*i0+q1*i1,p,invp)); 3950 i0=it[4]; 3951 ur.push_back(amodp(q0*i1+q1*i0,p,invp)); 3952 } 3953 itmid+=4; 3954 #endif 3955 for (;it!=itmid;++it){ 3956 ur.push_back(amodp(q0*it[0]+q1*it[1],p,invp)); 3957 } 3958 #if 1 3959 itend-=4; 3960 i0=it[0]; 3961 for (;it<itend;it+=4,jt+=4){ 3962 i1=it[1]; 3963 ur.push_back(amodp(q0*i0+q1*i1+jt[0],p,invp)); 3964 i0=it[2]; 3965 ur.push_back(amodp(q0*i1+q1*i0+jt[1],p,invp)); 3966 i1=it[3]; 3967 ur.push_back(amodp(q0*i0+q1*i1+jt[2],p,invp)); 3968 i0=it[4]; 3969 ur.push_back(amodp(q0*i1+q1*i0+jt[3],p,invp)); 3970 } 3971 itend+=4; 3972 #endif 3973 for (;it!=itend;++jt,++it){ 3974 ur.push_back(amodp(q0*it[0]+q1*it[1]+*jt,p,invp)); 3975 } 3976 ur.push_back(amodp(q0*it[0]+*jt,p,invp)); 3977 } 3978 //make_positive(ur,p); 3979 } 3980 a_minus_qsize2_b(const vector<int> & ua,const vector<int> & q,const vector<int> & ub,vector<int> & ur,int p,int & q0inv,int & q1inv)3981 void a_minus_qsize2_b(const vector<int> & ua,const vector<int>& q,const vector<int> &ub,vector<int> & ur,int p,int & q0inv,int & q1inv){ 3982 //if (ua.empty()) 3983 return a_minus_qsize2_b(ua,q,ub,ur,p); 3984 int q1=q[0],q0=q[1]; 3985 if (q0inv==0 || q1inv==0){ 3986 q0 += (q0>>31)&p; 3987 q1 += (q1>>31)&p; 3988 q0inv=(1ULL<<31)*unsigned(q0)/unsigned(p)+1; 3989 q1inv=(1ULL<<31)*unsigned(q1)/unsigned(p)+1; 3990 } 3991 ur.clear(); 3992 ur.push_back(precond_mulmod31(-ub.front(),q1,p,q1inv)); // (-q1*ub.front())%p); 3993 const int * it=&ub[0],*itend=it-1+ub.size(),*itmid=it+ub.size()-ua.size(),*jt=&ua[0]; 3994 for (;it!=itmid;++it){ 3995 ur.push_back(precond_a_q1b1_q2b2(0,q0,it[0],q1,it[1],p,q0inv,q1inv)); 3996 } 3997 for (;it!=itend;++jt,++it){ 3998 ur.push_back(precond_a_q1b1_q2b2(*jt,q0,it[0],q1,it[1],p,q0inv,q1inv)); 3999 } 4000 ur.push_back(precond_a_bq(*jt,it[0],q0,p,q0inv));//(*jt-q0*it[0])%p); 4001 } 4002 hgcd_iter_int(const vector<int> & a0i,const vector<int> & b0i,int m,vector<int> & ua,vector<int> & ub,vector<int> & va,vector<int> & vb,int p,vector<int> & coeffv,vector<int> & degv,vector<int> & a,vector<int> & b,vector<int> & q,vector<int> & r,vector<int> & ur,vector<int> & vr)4003 bool hgcd_iter_int(const vector<int> & a0i,const vector<int> & b0i,int m,vector<int> & ua,vector<int> & ub,vector<int> & va,vector<int> &vb,int p,vector<int> & coeffv,vector<int> & degv,vector<int> &a,vector<int> & b,vector<int> & q,vector<int> & r,vector<int> & ur,vector<int> & vr){ 4004 if (debug_infolevel>2) 4005 CERR << CLOCK()*1e-6 << " halfgcd iter m=" << m << " dega0/a1 " << a0i.size() << "," << b0i.size() << '\n'; 4006 int as=a0i.size(),as2=nextpow2(as); 4007 a.reserve(as2); b.reserve(as2); 4008 a.resize(a0i.size()); 4009 b.resize(b0i.size()); 4010 copy(a0i.begin(),a0i.end(),a.begin()); 4011 copy(b0i.begin(),b0i.end(),b.begin()); 4012 r.reserve(as); 4013 // initializes ua to 1 and ub to 0, the coeff of u in ua*a+va*b=a 4014 ua.reserve(as2); ua.clear(); ua.push_back(1); ub.clear(); ub.reserve(as2); ur.clear(); ur.reserve(as2); 4015 va.reserve(as2); va.clear(); vb.clear(); vb.reserve(as2); vb.push_back(1); vr.clear(); vr.reserve(as2); 4016 vector<int>::iterator it,itend; 4017 // DivRem: a = bq+r 4018 // hence ur <- ua-q*ub, vr <- va-q*vb verify 4019 // ur*a+vr*b=r 4020 // a <- b, b <- r, ua <- ub and ub<- ur 4021 for (;;){ 4022 int n=int(b.size())-1; 4023 if (n<m){ // degree(b) is small enough 4024 if (debug_infolevel>2) 4025 CERR << CLOCK()*1e-6 << " halfgcd iter end" << a0i.size() << "," << b0i.size() << '\n'; 4026 make_positive(ua,p); 4027 make_positive(ub,p); 4028 make_positive(va,p); 4029 make_positive(vb,p); 4030 return true; 4031 } 4032 if (!degv.empty()){ 4033 degv.push_back(degv.back()+b.size()-a.size()); 4034 coeffv.push_back(b[0]); 4035 } 4036 DivRem(a,b,p,q,r); // division works always 4037 swap(a,b); swap(b,r); // a=b; b=r; 4038 // ur=ua-q*ub, ua<-ub, ub<-ur 4039 if (q.size()==2){ // here ua.size()<ub.size() 4040 int q1inv=0,q2inv=0; 4041 if (ub.empty()) 4042 swap(ua,ub); 4043 else { 4044 a_minus_qsize2_b(ua,q,ub,ur,p,q1inv,q2inv); 4045 swap(ua,ub); swap(ub,ur); 4046 } 4047 a_minus_qsize2_b(va,q,vb,vr,p,q1inv,q2inv); 4048 swap(va,vb); swap(vb,vr); 4049 continue; 4050 } 4051 if (ub.empty()) 4052 swap(ua,ub); 4053 else { 4054 mulsmall(q.begin(),q.end(),ub.begin(),ub.end(),p,ur); 4055 submodneg(ur,ua,p); 4056 swap(ua,ub); swap(ub,ur); // ua=ub; ub=ur; 4057 } 4058 if (vb.size()==1 && vb.front()==1) vr.swap(q); else mulsmall(q.begin(),q.end(),vb.begin(),vb.end(),p,vr); 4059 submodneg(vr,va,p); 4060 swap(va,vb); swap(vb,vr); // ua=ub; ub=ur; 4061 } 4062 return false; // never reached 4063 } 4064 a_minus_qsize2_b(const vecteur & ua,const vecteur & q,const vecteur & ub,vecteur & ur,int p)4065 void a_minus_qsize2_b(const vecteur & ua,const vecteur & q,const vecteur &ub,vecteur & ur,int p){ 4066 longlong q1=-q[0].val,q0=-q[1].val; 4067 ur.push_back((q1*ub.front().val)%p); 4068 const gen * it=&ub[0],*itend=it-1+ub.size(),*itmid=it+ub.size()-ua.size(),*jt=&ua[0]; 4069 if (ua.empty()){ 4070 for (;it!=itend;++it){ 4071 ur.push_back((q0*it[0].val+q1*it[1].val)%p); 4072 } 4073 ur.push_back((q0*it[0].val)%p); 4074 } 4075 else { 4076 for (;it!=itmid;++it){ 4077 ur.push_back((q0*it[0].val+q1*it[1].val)%p); 4078 } 4079 for (;it!=itend;++jt,++it){ 4080 ur.push_back((q0*it[0].val+q1*it[1].val+jt->val)%p); 4081 } 4082 ur.push_back((q0*it[0].val+jt->val)%p); 4083 } 4084 } 4085 a_bc(const vector<int> & a,const vector<int> & b,const vector<int> & c,int p,vector<int> & res,vector<int> & tmp1)4086 void a_bc(const vector<int> &a,const vector<int> &b,const vector<int> &c,int p,vector<int> & res,vector<int> & tmp1){ 4087 // res=trim(a-b*c,env); return; 4088 size_t as=a.size(),bs=b.size(); 4089 if (as<=bs){ 4090 tmp1.clear(); tmp1.reserve(bs); 4091 if (c.size()==2){ 4092 a_minus_qsize2_b(a,c,b,tmp1,p); 4093 tmp1.swap(res); 4094 //make_positive(res,p); 4095 return; 4096 } 4097 } 4098 mulsmall(b.begin(),b.end(),c.begin(),c.end(),p,tmp1); 4099 submodneg(tmp1,a,p); 4100 tmp1.swap(res); 4101 } 4102 4103 // a-b*c a_bc(const modpoly & a,const modpoly & b,const modpoly & c,environment * env,modpoly & res,modpoly & tmp1)4104 void a_bc(const modpoly &a,const modpoly &b,const modpoly &c,environment * env,modpoly & res,modpoly & tmp1){ 4105 // res=trim(a-b*c,env); return; 4106 size_t as=a.size(),bs=b.size(); 4107 if (as<=bs && env->moduloon && env->modulo.type==_INT_){ 4108 tmp1.clear(); tmp1.reserve(bs); 4109 int p=env->modulo.val; 4110 if (c.size()==2){ 4111 a_minus_qsize2_b(a,c,b,tmp1,p); 4112 tmp1.swap(res); return; 4113 } 4114 } 4115 environment zeroenv; 4116 tmp1.clear(); 4117 if (!b.empty() && !c.empty()) 4118 operator_times(b,c,&zeroenv,tmp1); 4119 submodpoly(a,tmp1,res); 4120 trim_inplace(res,env); 4121 } 4122 smod2N(mpz_t & z,unsigned long expoN,mpz_t tmpqz,bool do_smod=false)4123 void smod2N(mpz_t & z,unsigned long expoN,mpz_t tmpqz,bool do_smod=false){ 4124 mpz_tdiv_q_2exp(tmpqz,z,expoN); 4125 if (mpz_cmp_si(tmpqz,0)){ 4126 mpz_tdiv_r_2exp(z,z,expoN); 4127 mpz_sub(z,z,tmpqz); 4128 mpz_tdiv_q_2exp(tmpqz,z,expoN); 4129 if (mpz_cmp_si(tmpqz,0)){ 4130 mpz_tdiv_r_2exp(z,z,expoN); 4131 mpz_sub(z,z,tmpqz); 4132 } 4133 } 4134 if (!do_smod) 4135 return; 4136 mpz_tdiv_q_2exp(tmpqz,z,expoN-1); 4137 if (mpz_cmp_si(tmpqz,0)){ 4138 mpz_sub(z,z,tmpqz); 4139 mpz_mul_2exp(tmpqz,tmpqz,expoN); 4140 mpz_sub(z,z,tmpqz); 4141 } 4142 } 4143 trim_deg(modpoly & a,int deg)4144 void trim_deg(modpoly & a,int deg){ 4145 if (a.size()>deg+1) 4146 a.erase(a.begin(),a.end()-deg-1); 4147 } 4148 trim_deg(vector<int> & a,int deg)4149 void trim_deg(vector<int> & a,int deg){ 4150 if (a.size()>deg+1) 4151 a.erase(a.begin(),a.end()-deg-1); 4152 } 4153 trim_deg(vector<longlong> & a,int deg)4154 void trim_deg(vector<longlong> & a,int deg){ 4155 if (a.size()>deg+1) 4156 a.erase(a.begin(),a.end()-deg-1); 4157 } 4158 4159 #ifdef INT128 4160 #define GIAC_LLPRECOND 1 precond_mulmodll(ulonglong A,ulonglong W,ulonglong Winvp,ulonglong p)4161 inline longlong precond_mulmodll(ulonglong A,ulonglong W,ulonglong Winvp,ulonglong p){ 4162 longlong t = uint128_t(A)*W-((uint128_t(A)*Winvp)>>64)*p; 4163 t+=((t>>63)&p); 4164 return t; 4165 // debug 4166 if ((uint128_t(A)*W-t)%p!=0) 4167 CERR << "err\n"; 4168 return t; 4169 } preconditionner_ll(ulonglong ww,longlong p)4170 inline ulonglong preconditionner_ll(ulonglong ww,longlong p){ 4171 return 1+((uint128_t(1)<<64)*ww)/ulonglong(p); // quotient ceiling 4172 } smodll(int128_t res,longlong m)4173 longlong smodll(int128_t res,longlong m){ 4174 res %= m; 4175 if (res>m/2) 4176 res -= m; 4177 return res; 4178 } 4179 4180 // this does not work for 63 bits primes because long_double 4181 // aka float128 seems to be FPU 80 bits integers with 64 bits of mantissa 4182 // insufficent precision amodpll(int128_t a,longlong p,long_double invp)4183 inline longlong amodpll(int128_t a,longlong p,long_double invp){ 4184 longlong q=long_double(a)*invp; 4185 q=a-int128_t(q)*p; 4186 q+=(q>>63)&p;q+=(q>>63)&p; 4187 q-=p;q+=(q>>63)&p; 4188 return q; 4189 // debug 4190 if (q!=q%p) 4191 CERR << "err amodpll\n"; 4192 return q%p; 4193 } 4194 vecteur2vector_ll(const vecteur & v,longlong m,vector<longlong> & res)4195 void vecteur2vector_ll(const vecteur & v,longlong m,vector<longlong> & res){ 4196 vecteur::const_iterator it=v.begin(),itend=v.end(); 4197 res.clear(); 4198 res.reserve(itend-it); 4199 if (m<0) 4200 m=-m; 4201 for (;it!=itend;++it){ 4202 gen g=*it; 4203 if (it->type==_MOD) 4204 g=*it->_MODptr; 4205 longlong r=it->type==_ZINT?mpz_fdiv_ui(*it->_ZINTptr,m):(it->val % m); 4206 r += (ulonglong(r)>>63)*m; // make positive 4207 // r -= (ulonglong((m>>1)-r)>>31)*m; // smod 4208 res.push_back(r); 4209 } 4210 } 4211 4212 // longlong fft 4213 // exemple of Fourier primes (with 2^53-roots of unity) 4214 // [4719772409484279809,4782822804267466753,4854880398305394689,5071053180419178497,5179139571476070401,5323254759551926273,5395312353589854209,5503398744646746113,5998794703657500673,6151917090988097537,6269010681299730433,6566248256706183169,6782421038819966977,6962565023914786817,7097673012735901697,7557040174727692289,7728176960567771137,7908320945662590977,8295630513616453633,8583860889768165377,8592868089022906369,8691947280825057281,9097271247288401921] 4215 const longlong p5=9097271247288401921LL; 4216 const long_double invp5=long_double(1)/p5; addmodll(longlong a,longlong b,longlong p)4217 static inline longlong addmodll(longlong a, longlong b, longlong p) { 4218 longlong t=(a-p)+b; 4219 t += (t>>63)&p; 4220 return t; 4221 } 4222 submodll(longlong a,longlong b,longlong p)4223 static inline longlong submodll(longlong a, longlong b, longlong p) { 4224 longlong t=a-b; 4225 t += (t>>63)&p; 4226 return t; 4227 } 4228 mulmodll(longlong a,longlong b,longlong p)4229 static inline longlong mulmodll(longlong a, longlong b, longlong p) { 4230 return (int128_t(a)*b) % p; 4231 } 4232 mulmodll(vector<longlong> & v,longlong b,longlong p)4233 void mulmodll(vector<longlong> & v,longlong b,longlong p){ 4234 vector<longlong>::iterator it=v.begin(),itend=v.end(); 4235 int128_t B=b; 4236 for (;it!=itend;++it){ 4237 *it=(*it*B)%p; 4238 } 4239 } 4240 mulmodll(longlong a,longlong b,longlong p,long_double invp)4241 static inline longlong mulmodll(longlong a, longlong b, longlong p,long_double invp) { 4242 return amodpll(int128_t(a)*b,p,invp); 4243 } 4244 mulmodll(vector<longlong> & v,longlong b,longlong p,long_double invp)4245 void mulmodll(vector<longlong> & v,longlong b,longlong p,long_double invp){ 4246 vector<longlong>::iterator it=v.begin(),itend=v.end(); 4247 for (;it!=itend;++it){ 4248 *it=mulmodll(*it,b,p,invp); 4249 // *it=(*it*int128_t(B))%p; 4250 } 4251 } 4252 precond_mulmodll(vector<longlong> & v,longlong b,longlong bsurp,longlong p)4253 void precond_mulmodll(vector<longlong> & v,longlong b,longlong bsurp,longlong p){ 4254 vector<longlong>::iterator it=v.begin(),itend=v.end(); 4255 for (;it!=itend;++it){ 4256 *it=precond_mulmodll(*it,b,bsurp,p); 4257 } 4258 } 4259 4260 // Euclidean division modulo m DivRem(const vector<longlong> & th,const vector<longlong> & other,longlong m,vector<longlong> & quo,vector<longlong> & rem)4261 void DivRem(const vector<longlong> & th, const vector<longlong> & other,longlong m,vector<longlong> & quo, vector<longlong> & rem){ 4262 if (other.empty()){ 4263 rem=th; 4264 quo.clear(); 4265 return; 4266 } 4267 if (th.empty()){ 4268 quo=th; 4269 rem=th; 4270 return; 4271 } 4272 longlong a=longlong(th.size())-1; 4273 longlong b=longlong(other.size())-1; 4274 longlong coeff=other.front(),invcoeff=invmodll(coeff,m); 4275 long_double invm=long_double(1)/m; 4276 if (!b){ 4277 quo=th; 4278 mulmodll(quo,invcoeff,m); 4279 rem.clear(); 4280 return; 4281 } 4282 quo.clear(); 4283 if (a==b+1){ 4284 rem.clear(); 4285 // frequent case in euclidean algorithms 4286 int128_t q0=amodpll(int128_t(th[0])*invcoeff,m,invm); 4287 if (q0<0) q0+=m; 4288 int128_t q1=amodpll(int128_t(amodpll(th[1]-other[1]*q0,m,invm) )*invcoeff,m,invm);// (( (th[1]-other[1]*q0)%m )*invcoeff)%m; 4289 if (q1<0) q1+=m; 4290 quo.push_back(longlong(q0)); 4291 quo.push_back(longlong(q1)); 4292 // rem=th-other*q 4293 vector<longlong>::const_iterator at=th.begin()+2,bt=other.begin()+1,btend=other.end(); 4294 // first part of the loop, remainder is empty, push r only if non 0 4295 for (;;++at){ 4296 int128_t r=*at-q1*(*bt); 4297 ++bt; 4298 if (bt==btend){ 4299 r =amodpll(r,m,invm); 4300 if (r&& r!=m && r!=-m) 4301 rem.push_back(longlong(r)); 4302 return; 4303 } 4304 r -= q0*(*bt); 4305 r = amodpll(r,m,invm); 4306 if (r&& r!=m && r!=-m){ 4307 rem.push_back(longlong(r)); 4308 break; 4309 } 4310 } 4311 // second part of the loop, remainder is not empty, push r always 4312 for (++at;;++at){ 4313 int128_t r=*at-q1*(*bt); 4314 ++bt; 4315 if (bt==btend){ 4316 rem.push_back(amodpll(r,m,invm)); 4317 return; 4318 } 4319 rem.push_back(amodpll(r-q0*(*bt),m,invm));//rem.push_back((r-q0*(*bt))%m); 4320 } 4321 } 4322 rem=th; 4323 if (a<b) 4324 return; 4325 quo.reserve(a-b+1); 4326 // A=BQ+R -> A*invcoeff=(B*invcoeff)*Q+(R*invcoeff), 4327 // make division of A*invcoeff by B*invcoeff and multiply R by coeff at the end 4328 // copy rem to an array 4329 vector<longlong>::const_iterator remit=rem.begin();//,remend=rem.end(); 4330 #if defined VISUALC || defined BESTA_OS 4331 longlong * tmp=new longlong[a+1]; 4332 #else 4333 longlong tmp[a+1]; 4334 #endif 4335 longlong * tmpend=&tmp[a]; 4336 longlong * tmpptr=tmpend; // tmpend points to the highest degree coeff of A 4337 for (;tmpptr!=tmp-1;--tmpptr,++remit) 4338 *tmpptr=*remit; 4339 vector<longlong>::const_iterator B_beg=other.begin(),B_end=other.end(); 4340 longlong q;//n0(0), 4341 for (;a>=b;--a){ 4342 q= amodpll(int128_t(invcoeff)*(*tmpend),m,invm); 4343 quo.push_back(q); 4344 --tmpend; 4345 // tmp <- tmp - q *B.shifted (if q!=0) 4346 if (q) { 4347 tmpptr=tmpend; 4348 vector<longlong>::const_iterator itq=B_beg; 4349 ++itq; // first elements cancel 4350 for (;itq!=B_end;--tmpptr,++itq){ 4351 *tmpptr = amodpll(*tmpptr -(int128_t(q) * (*itq)),m,invm); // (*tmpptr -(int128_t(q) * (*itq)))%m; 4352 } 4353 } 4354 } 4355 // trim rem and multiply by coeff, this will modularize rem as well 4356 rem.clear(); 4357 // bool trimming=true; 4358 for (;tmpend!=tmp-1;--tmpend){ 4359 if (*tmpend && *tmpend % m) 4360 break; 4361 } 4362 for (;tmpend!=tmp-1;--tmpend){ 4363 rem.push_back(*tmpend); 4364 } 4365 #if defined VISUALC || defined BESTA_OS 4366 delete [] tmp; 4367 #endif 4368 } 4369 smallmultll(const vector<longlong> & a,const vector<longlong> & b,vector<longlong> & new_coord,longlong modulo)4370 void smallmultll(const vector<longlong> & a,const vector<longlong> & b,vector<longlong> & new_coord,longlong modulo){ 4371 int128_t test=int128_t(modulo)*std::min(a.size(),b.size()); 4372 bool large=test/(1ULL<<63) > (1ULL<<63)/modulo; 4373 new_coord.clear(); 4374 if (a.empty() || b.empty()) return; 4375 vector<longlong>::const_iterator ita_begin=a.begin(),ita=a.begin(),ita_end=a.end(),itb=b.begin(),itb_end=b.end(); 4376 for ( ; ita!=ita_end; ++ita ){ 4377 vector<longlong>::const_iterator ita_cur=ita,itb_cur=itb; 4378 if (large){ 4379 longlong res=0; 4380 for (;itb_cur!=itb_end;--ita_cur,++itb_cur) { 4381 res = (res + *ita_cur * int128_t(*itb_cur))%modulo ; 4382 if (ita_cur==ita_begin) 4383 break; 4384 } 4385 new_coord.push_back(res % modulo); 4386 } 4387 else { 4388 int128_t res=0; 4389 for (;itb_cur!=itb_end;--ita_cur,++itb_cur) { 4390 res += *ita_cur * int128_t(*itb_cur) ; 4391 if (ita_cur==ita_begin) 4392 break; 4393 } 4394 new_coord.push_back(res % modulo); 4395 } 4396 } 4397 --ita; 4398 ++itb; 4399 for ( ; itb!=itb_end;++itb){ 4400 vector<longlong>::const_iterator ita_cur=ita,itb_cur=itb; 4401 if (large){ 4402 longlong res=0; 4403 for (;;) { 4404 res = (res + *ita_cur * int128_t(*itb_cur))%modulo ; 4405 if (ita_cur==ita_begin) 4406 break; 4407 --ita_cur; 4408 ++itb_cur; 4409 if (itb_cur==itb_end) 4410 break; 4411 } 4412 new_coord.push_back( res % modulo); 4413 } 4414 else { 4415 int128_t res= 0; 4416 for (;;) { 4417 res += *ita_cur * int128_t(*itb_cur) ; 4418 if (ita_cur==ita_begin) 4419 break; 4420 --ita_cur; 4421 ++itb_cur; 4422 if (itb_cur==itb_end) 4423 break; 4424 } 4425 new_coord.push_back(res % modulo); 4426 } 4427 } 4428 } 4429 a_minus_qsize2_b(const vector<longlong> & ua,const vector<longlong> & q,const vector<longlong> & ub,vector<longlong> & ur,longlong p)4430 void a_minus_qsize2_b(const vector<longlong> & ua,const vector<longlong> & q,const vector<longlong> &ub,vector<longlong> & ur,longlong p){ 4431 ur.clear(); 4432 int128_t q1=-q[0],q0=-q[1]; 4433 long_double invp=long_double(1)/p; 4434 ur.push_back(amodpll(q1*ub.front(),p,invp)); 4435 const longlong * it=&ub[0],*itend=it-1+ub.size(),*itmid=it+ub.size()-ua.size(),*jt=&ua[0]; 4436 if (ua.empty()){ 4437 for (;it!=itend;++it){ 4438 ur.push_back(amodpll(q0*it[0]+q1*it[1],p,invp)); 4439 } 4440 ur.push_back(amodpll(q0*it[0],p,invp)); 4441 } 4442 else { 4443 #if 1 4444 itmid-=4; 4445 longlong i0=it[0],i1; 4446 for (;it<itmid;it+=4){ 4447 i1=it[1]; 4448 ur.push_back(amodpll(q0*i0+q1*i1,p,invp)); 4449 i0=it[2]; 4450 ur.push_back(amodpll(q0*i1+q1*i0,p,invp)); 4451 i1=it[3]; 4452 ur.push_back(amodpll(q0*i0+q1*i1,p,invp)); 4453 i0=it[4]; 4454 ur.push_back(amodpll(q0*i1+q1*i0,p,invp)); 4455 } 4456 itmid+=4; 4457 #endif 4458 for (;it!=itmid;++it){ 4459 ur.push_back(amodpll(q0*it[0]+q1*it[1],p,invp)); 4460 } 4461 #if 1 4462 itend-=4; 4463 i0=it[0]; 4464 for (;it<itend;it+=4,jt+=4){ 4465 i1=it[1]; 4466 ur.push_back(amodpll(q0*i0+q1*i1+jt[0],p,invp)); 4467 i0=it[2]; 4468 ur.push_back(amodpll(q0*i1+q1*i0+jt[1],p,invp)); 4469 i1=it[3]; 4470 ur.push_back(amodpll(q0*i0+q1*i1+jt[2],p,invp)); 4471 i0=it[4]; 4472 ur.push_back(amodpll(q0*i1+q1*i0+jt[3],p,invp)); 4473 } 4474 itend+=4; 4475 #endif 4476 for (;it!=itend;++jt,++it){ 4477 ur.push_back(amodpll(q0*it[0]+q1*it[1]+*jt,p,invp)); 4478 } 4479 ur.push_back(amodpll(q0*it[0]+*jt,p,invp)); 4480 } 4481 } 4482 4483 // v <- w-v % m submodnegll(vector<longlong> & v,const vector<longlong> & w,longlong m)4484 void submodnegll(vector<longlong> & v,const vector<longlong> & w,longlong m){ 4485 vector<longlong>::iterator it=v.begin(),itend=v.end(); 4486 vector<longlong>::const_iterator jt=w.begin(),jtend=w.end(); 4487 longlong addv=longlong(jtend-jt)-longlong(itend-it); 4488 if (addv>0){ 4489 v.insert(v.begin(),addv,0); 4490 it=v.begin(); 4491 itend=v.end(); 4492 } 4493 else { 4494 itend -= jtend-jt; 4495 for (;it!=itend;++it) 4496 *it = -*it; 4497 itend += jtend-jt; 4498 } 4499 for (;it!=itend;++jt,++it){ 4500 longlong a=*it,b=*jt; 4501 a += (a>>63)&m; 4502 b += (b>>63)&m; 4503 *it = b-a; 4504 } 4505 for (it=v.begin();it!=itend;++it){ 4506 if (*it) 4507 break; 4508 } 4509 if (it!=v.begin()) 4510 v.erase(v.begin(),it); 4511 } 4512 hgcd_iter_ll(const vector<longlong> & a0i,const vector<longlong> & b0i,longlong m,vector<longlong> & ua,vector<longlong> & ub,vector<longlong> & va,vector<longlong> & vb,longlong p,vector<longlong> & coeffv,vector<longlong> & degv,vector<longlong> & a,vector<longlong> & b,vector<longlong> & q,vector<longlong> & r,vector<longlong> & ur,vector<longlong> & vr)4513 bool hgcd_iter_ll(const vector<longlong> & a0i,const vector<longlong> & b0i,longlong m,vector<longlong> & ua,vector<longlong> & ub,vector<longlong> & va,vector<longlong> &vb,longlong p,vector<longlong> & coeffv,vector<longlong> & degv,vector<longlong> &a,vector<longlong> & b,vector<longlong> & q,vector<longlong> & r,vector<longlong> & ur,vector<longlong> & vr){ 4514 if (debug_infolevel>2) 4515 CERR << CLOCK()*1e-6 << " halfgcd iter m=" << m << " dega0/a1 " << a0i.size() << "," << b0i.size() << '\n'; 4516 longlong as=a0i.size(); 4517 a.resize(a0i.size()); 4518 b.resize(b0i.size()); 4519 copy(a0i.begin(),a0i.end(),a.begin()); 4520 copy(b0i.begin(),b0i.end(),b.begin()); 4521 r.reserve(as); 4522 // initializes ua to 1 and ub to 0, the coeff of u in ua*a+va*b=a 4523 ua.reserve(as); ua.clear(); ua.push_back(1); ub.clear(); ub.reserve(as); ur.clear(); ur.reserve(as); 4524 va.reserve(as); va.clear(); vb.clear(); vb.reserve(as); vb.push_back(1); vr.clear(); vr.reserve(as); 4525 vector<longlong>::iterator it,itend; 4526 // DivRem: a = bq+r 4527 // hence ur <- ua-q*ub, vr <- va-q*vb verify 4528 // ur*a+vr*b=r 4529 // a <- b, b <- r, ua <- ub and ub<- ur 4530 #if 1 4531 for (;;){ 4532 int n=int(b.size())-1; 4533 if (n<m){ // degree(b) is small enough 4534 if (debug_infolevel>2) 4535 CERR << CLOCK()*1e-6 << " halfgcd iter end" << a0i.size() << "," << b0i.size() << '\n'; 4536 return true; 4537 } 4538 if (!degv.empty()){ 4539 degv.push_back(degv.back()+b.size()-a.size()); 4540 coeffv.push_back(b[0]); 4541 } 4542 DivRem(a,b,p,q,r); // division works always 4543 swap(a,b); swap(b,r); // a=b; b=r; 4544 // ur=ua-q*ub, ua<-ub, ub<-ur 4545 if (q.size()==2){ // here ua.size()<ub.size() 4546 if (ub.empty()) 4547 swap(ua,ub); 4548 else { 4549 a_minus_qsize2_b(ua,q,ub,ur,p); 4550 swap(ua,ub); swap(ub,ur); 4551 } 4552 a_minus_qsize2_b(va,q,vb,vr,p); 4553 swap(va,vb); swap(vb,vr); 4554 continue; 4555 } 4556 if (ub.empty()) 4557 swap(ua,ub); 4558 else { 4559 smallmultll(q,ub,ur,p); 4560 submodnegll(ur,ua,p); 4561 swap(ua,ub); swap(ub,ur); // ua=ub; ub=ur; 4562 } 4563 if (vb.size()==1 && vb.front()==1) vr.swap(q); else smallmultll(q,vb,vr,p); 4564 submodnegll(vr,va,p); 4565 swap(va,vb); swap(vb,vr); // ua=ub; ub=ur; 4566 } 4567 return false; // never reached 4568 #else 4569 for (;;){ 4570 longlong n=longlong(b.size())-1; 4571 if (n<m){ // degree(b) is small enough 4572 if (debug_infolevel>2) 4573 CERR << CLOCK()*1e-6 << " halfgcd iter compute v " << a0i.size() << "," << b0i.size() << '\n'; 4574 // va=(a-ua*a0i)/b0i 4575 smallmultll(ua,a0i,ur,p); 4576 submodnegll(ur,a,p); 4577 DivRem(ur,b0i,p,va,r); // shoud be va 4578 // vb=(b-ub*a0i)/b0i 4579 smallmultll(ub,a0i,ur,p); 4580 submodnegll(ur,b,p); 4581 DivRem(ur,b0i,p,vb,r); // should be vb 4582 if (debug_infolevel>2) 4583 CERR << CLOCK()*1e-6 << " halfgcd iter end" << a0i.size() << "," << b0i.size() << '\n'; 4584 //CERR << a0 << " " << a1 << " " << A << " " << B << " " << C << " " << D << '\n'; 4585 return true; 4586 } 4587 if (!degv.empty()){ 4588 degv.push_back(degv.back()+b.size()-a.size()); 4589 coeffv.push_back(b[0]); 4590 } 4591 DivRem(a,b,p,q,r); // division works always 4592 swap(a,b); swap(b,r); // a=b; b=r; 4593 // ur=ua-q*ub, ua<-ub, ub<-ur 4594 if (ub.empty()){ 4595 swap(ua,ub); 4596 continue; 4597 } 4598 if (q.size()==2){ // here ua.size()<ub.size() 4599 ur.clear(); 4600 a_minus_qsize2_b(ua,q,ub,ur,p); 4601 swap(ua,ub); swap(ub,ur);continue; 4602 } 4603 smallmultll(q,ub,ur,p); 4604 submodnegll(ur,ua,p); 4605 swap(ua,ub); swap(ub,ur); // ua=ub; ub=ur; 4606 } 4607 return false; // never reached 4608 #endif 4609 } 4610 4611 // reverse *a..*b and neg fft_rev1(longlong * a,longlong * b,int128_t p)4612 void fft_rev1(longlong * a,longlong *b,int128_t p){ 4613 for (;b>a;++a,--b){ 4614 longlong tmp=*a; 4615 *a=p-*b; 4616 *b=p-tmp; 4617 } 4618 if (a==b) 4619 *a=p-*a; 4620 } 4621 fft_reverse(vector<longlong> & W,longlong p)4622 void fft_reverse(vector<longlong> & W,longlong p){ 4623 if (W.size()<2) 4624 return; 4625 longlong * a=&W.front(); 4626 #ifdef GIAC_LLPRECOND 4627 longlong N=W.size()/2; 4628 fft_rev1(a+1,a+N-1,p); 4629 fft_rev1(a+N+1,a+2*N-1,1); 4630 #else 4631 fft_rev1(a+1,a+W.size()-1,p); 4632 #endif 4633 } 4634 4635 #ifdef GIAC_LLPRECOND // preconditionned fft2wp(vector<longlong> & W,longlong n,longlong w,longlong p)4636 void fft2wp(vector<longlong> & W,longlong n,longlong w,longlong p){ 4637 W.resize(n); 4638 w=w % p; 4639 if (w<0) w += p; 4640 longlong N=n/2; 4641 ulonglong ww=1; 4642 for (longlong i=0;i<N;++i){ 4643 W[i]=ww; 4644 ulonglong u=preconditionner_ll(ww,p); 4645 W[N+i]=u; 4646 ww=precond_mulmodll(w,ww,u,p); 4647 //ww=(ww*int128_t(w))%p; 4648 } 4649 } 4650 #else fft2wp(vector<longlong> & W,longlong n,longlong w,longlong p)4651 void fft2wp(vector<longlong> & W,longlong n,longlong w,longlong p){ 4652 W.reserve(n/2); 4653 long_double invp=long_double(1)/p; 4654 w=amodpll(w,p,invp);//w % p; 4655 if (w<0) w += p; 4656 longlong N=n/2,ww=1; 4657 for (longlong i=0;i<N;++i){ 4658 W.push_back(ww); 4659 ww=mulmodll(ww,w,p,invp);//(ww*int128_t(w))%p; 4660 ww+=(ww>>63)&p; 4661 } 4662 } 4663 #endif 4664 fft2wp5(vector<longlong> & W,longlong n,longlong w)4665 void fft2wp5(vector<longlong> & W,longlong n,longlong w){ 4666 W.reserve(n/2); 4667 w=w % p5; 4668 if (w<0) w += p5; 4669 longlong N=n/2,ww=1; 4670 for (longlong i=0;i<N;++i){ 4671 W.push_back(ww); 4672 ww=(ww*int128_t(w))%p5; 4673 } 4674 } 4675 4676 #ifdef GIAC_LLPRECOND fft_loop_p(longlong & A,longlong & An2,longlong * W,longlong n2,longlong p,long_double invp)4677 inline void fft_loop_p(longlong & A,longlong & An2,longlong * W,longlong n2,longlong p,long_double invp){ 4678 longlong s=A; 4679 longlong t=precond_mulmodll(An2,*W,*(W+n2),p); 4680 //longlong t1=mulmodll(*W,An2,p,invp); t1+=(t1>>63)&p;if ((t-t1)%p!=0) 4681 // CERR << "err\n"; 4682 A = addmodll(s,t,p); 4683 An2 = submodll(s,t,p); 4684 } fft_loop_p(longlong & A,longlong & An2,longlong W,longlong Winv,longlong p)4685 inline void fft_loop_p(longlong & A,longlong & An2,longlong W,longlong Winv,longlong p){ 4686 longlong s=A; 4687 longlong t=precond_mulmodll(An2,W,Winv,p); 4688 //longlong t1=mulmodll(*W,An2,p,invp); t1+=(t1>>63)&p;if ((t-t1)%p!=0) 4689 // CERR << "err\n"; 4690 A = addmodll(s,t,p); 4691 An2 = submodll(s,t,p); 4692 } 4693 #else fft_loop_p(longlong & A,longlong & An2,longlong * W,longlong n2,longlong p,long_double invp)4694 inline void fft_loop_p(longlong & A,longlong & An2,longlong * W,longlong n2,longlong p,long_double invp){ 4695 longlong s=A; 4696 longlong t = mulmodll(*W,An2,p,invp); 4697 A = addmodll(s,t,p); 4698 An2 = submodll(s,t,p); 4699 } 4700 #endif 4701 4702 #if !defined NUMWORKS // !defined VISUALC && !defined USE_GMP_REPLACEMENTS && defined GIAC_LLPRECOND // de-recurse fft2pnopermbefore(longlong * A,longlong n,longlong * W,longlong p,long_double invp,longlong step)4703 static void fft2pnopermbefore( longlong *A, longlong n, longlong *W,longlong p,long_double invp,longlong step) { 4704 if (n==0) 4705 CERR << "bug\n"; 4706 if (n<=1 ) return; 4707 if (n==2){ 4708 longlong f0=A[0],f1=A[1]; 4709 A[0]=addmodll(f0,f1,p); 4710 A[1]=submodll(f0,f1,p); 4711 return; 4712 } 4713 longlong n2s=n/2*step; 4714 // start by groups of 4 4715 step=n2s/2; 4716 longlong w1=W[step],w1surp=W[3*step]; 4717 longlong *Aeff=A; 4718 for (longlong pos=0;pos<n;pos+=4,Aeff+=4){ 4719 longlong f0=Aeff[0],f1=Aeff[1],f2=Aeff[2],f3=Aeff[3], 4720 f01=precond_mulmodll(f1-f3+p,w1,w1surp,p), 4721 f02p=addmodll(f0,f2,p),f02m=submodll(f0,f2,p),f13=addmodll(f1,f3,p); 4722 Aeff[0]=addmodll(f02p,f13,p); 4723 Aeff[1]=addmodll(f02m,f01,p); 4724 Aeff[2]=submodll(f02p,f13,p); 4725 Aeff[3]=submodll(f02m,f01,p); 4726 } 4727 longlong Wstack_[MAX_INTSTACK/2]; 4728 longlong *Wstack=0; 4729 if (n>MAX_INTSTACK/2) 4730 Wstack=(longlong *)malloc(n*sizeof(longlong)); 4731 else 4732 Wstack=Wstack_; 4733 // now by 8, then by 16, etc. 4734 for (longlong taille=8;taille<=n;taille*=2){ 4735 step /= 2; 4736 Aeff=A; 4737 if (taille==n && step==1){ 4738 longlong *An2=Aeff+n/2,*Aend=An2,*Weff=W+n2s; 4739 for(; Aeff<Aend; ) { 4740 fft_loop_p(Aeff[0],An2[0],W[0],Weff[0],p); 4741 fft_loop_p(Aeff[1],An2[1],W[1],Weff[1],p); 4742 fft_loop_p(Aeff[2],An2[2],W[2],Weff[2],p); 4743 fft_loop_p(Aeff[3],An2[3],W[3],Weff[3],p); 4744 Aeff+=4; An2+=4; W+=4; Weff+=4; 4745 } 4746 break; 4747 } 4748 longlong * end=Wstack+taille,*source=W,*source2=W+n2s; 4749 for (longlong * target=Wstack;target<end;target+=8){ 4750 target[0]=*source; source+=step; 4751 target[1]=*source; source+=step; 4752 target[2]=*source; source+=step; 4753 target[3]=*source; source+=step; 4754 target[4]=*source2; source2+=step; 4755 target[5]=*source2; source2+=step; 4756 target[6]=*source2; source2+=step; 4757 target[7]=*source2; source2+=step; 4758 } 4759 for (longlong pos=0;pos<n;pos+=taille){ 4760 longlong *An2=Aeff+taille/2,*Aend=An2,*Weff=Wstack; 4761 longlong s=*Aeff,t1=*An2; 4762 *Aeff=addmodll(s,t1,p); 4763 *An2=submodll(s,t1,p); 4764 fft_loop_p(Aeff[1],An2[1],Weff[1],Weff[5],p); 4765 fft_loop_p(Aeff[2],An2[2],Weff[2],Weff[6],p); 4766 fft_loop_p(Aeff[3],An2[3],Weff[3],Weff[7],p); 4767 Aeff+=4; An2+=4; Weff+=8; 4768 for (;Aeff<Aend;){ 4769 fft_loop_p(Aeff[0],An2[0],Weff[0],Weff[4],p); 4770 fft_loop_p(Aeff[1],An2[1],Weff[1],Weff[5],p); 4771 fft_loop_p(Aeff[2],An2[2],Weff[2],Weff[6],p); 4772 fft_loop_p(Aeff[3],An2[3],Weff[3],Weff[7],p); 4773 Aeff+=4; An2+=4; Weff+=8; 4774 } 4775 Aeff+=taille/2; 4776 } 4777 } 4778 if (n>MAX_INTSTACK/2) 4779 free(Wstack); 4780 } 4781 4782 #else // de-recurse fft2pnopermbefore(longlong * A,longlong n,longlong * W,longlong p,long_double invp,longlong step)4783 static void fft2pnopermbefore( longlong *A, longlong n, longlong *W,longlong p,long_double invp,longlong step) { 4784 if ( n==1 ) return; 4785 // if p is fixed, the code is about 2* faster 4786 if (n==4){ 4787 longlong w1=W[step]; 4788 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3], 4789 #ifdef GIAC_LLPRECOND 4790 f01=precond_mulmodll(submodll(f1,f3,p),w1,W[3*step],p), 4791 #else 4792 f01=mulmodll(submodll(f1,f3,p),w1,p,invp), 4793 #endif 4794 f02p=addmodll(f0,f2,p),f02m=submodll(f0,f2,p),f13=addmodll(f1,f3,p); 4795 A[0]=addmodll(f02p,f13,p); 4796 A[1]=addmodll(f02m,f01,p); 4797 A[2]=submodll(f02p,f13,p); 4798 A[3]=submodll(f02m,f01,p); 4799 return; 4800 } 4801 if (n==2){ 4802 longlong f0=A[0],f1=A[1]; 4803 A[0]=addmodll(f0,f1,p); 4804 A[1]=submodll(f0,f1,p); 4805 return; 4806 } 4807 fft2pnopermbefore(A, n/2, W,p,invp,2*step); 4808 fft2pnopermbefore(A+n/2, n/2, W,p,invp,2*step); 4809 longlong * An2=A+n/2; 4810 longlong * Aend=A+n/2; 4811 longlong n2s = n/2*step; // n2%4==0 4812 for(; A<Aend; ) { 4813 fft_loop_p(*A,*An2,W,n2s,p,invp); 4814 ++A; ++An2; W +=step ; 4815 fft_loop_p(*A,*An2,W,n2s,p,invp); 4816 ++A; ++An2; W +=step ; 4817 fft_loop_p(*A,*An2,W,n2s,p,invp); 4818 ++A; ++An2; W += step; 4819 fft_loop_p(*A,*An2,W,n2s,p,invp); 4820 ++A; ++An2; W +=step; 4821 } 4822 } 4823 #endif 4824 4825 #ifdef GIAC_LLPRECOND fft_loop_p_(longlong & Acur,longlong & An2cur,longlong * Wcur,longlong n2,longlong p,long_double invp)4826 inline void fft_loop_p_(longlong & Acur,longlong & An2cur,longlong * Wcur,longlong n2,longlong p,long_double invp){ 4827 longlong Ai,An2i; 4828 Ai=Acur; 4829 An2i=An2cur; 4830 Acur = addmodll(Ai,An2i,p); 4831 An2cur=precond_mulmodll((Ai-An2i)+p,*Wcur,*(Wcur+n2),p); 4832 } 4833 #else fft_loop_p_(longlong & Acur,longlong & An2cur,longlong * Wcur,longlong n2,longlong p,long_double invp)4834 inline void fft_loop_p_(longlong & Acur,longlong & An2cur,longlong * Wcur,longlong n2,longlong p,long_double invp){ 4835 longlong Ai,An2i; 4836 Ai=Acur; 4837 An2i=An2cur; 4838 Acur = addmodll(Ai,An2i,p); 4839 An2cur=amodpll((int128_t(Ai)+(p-An2i))* *Wcur,p,invp); 4840 An2cur += (An2cur>>63)&p; 4841 return; 4842 longlong chk=(((int128_t(Ai)+(p-An2i))* *Wcur) % p); 4843 if ( An2cur!=chk) //(An2cur-int128_t(chk))%p!=0) 4844 CERR<<"err\n"; 4845 An2cur=chk; 4846 } 4847 #endif 4848 4849 #if !defined NUMWORKS // !defined VISUALC && !defined USE_GMP_REPLACEMENTS && defined GIAC_LLPRECOND // de-recurse fft2pnopermafter(longlong * A,longlong n,longlong * W,longlong p,long_double invp,longlong step)4850 static void fft2pnopermafter( longlong *A, longlong n, longlong *W,longlong p,long_double invp,longlong step) { 4851 if (n==0) 4852 CERR << "bug\n"; 4853 if (n<=1 ) return; 4854 if (n==2){ 4855 longlong f0=A[0],f1=A[1]; 4856 A[0]=addmodll(f0,f1,p); 4857 A[1]=submodll(f0,f1,p); 4858 return; 4859 } 4860 longlong n2s=n/2*step; 4861 // group by decreasing size 4862 longlong Wstack_[MAX_INTSTACK/2]; 4863 longlong *Wstack=0; 4864 if (n>MAX_INTSTACK/2) 4865 Wstack=(longlong *)malloc(n*sizeof(longlong)); 4866 else 4867 Wstack=Wstack_; 4868 longlong * end=Wstack+n,*source=W,*source2=W+n2s; 4869 for (longlong * target=Wstack;target<end;target+=8){ 4870 target[0]=*source; source+=step; 4871 target[1]=*source; source+=step; 4872 target[2]=*source; source+=step; 4873 target[3]=*source; source+=step; 4874 target[4]=*source2; source2+=step; 4875 target[5]=*source2; source2+=step; 4876 target[6]=*source2; source2+=step; 4877 target[7]=*source2; source2+=step; 4878 } 4879 //size_t T=n*sizeof(longlong); 4880 //longlong * Wstack=(longlong*)stack_or_heap_alloc(T);//longlong Wstack[taille]; 4881 for (longlong taille=n;taille>=8;taille/=2){ 4882 longlong * Aeff=A; 4883 for (longlong pos=0;pos<n;pos+=taille){ 4884 longlong *An2=Aeff+taille/2,*Aend=An2,*Weff=Wstack; 4885 longlong s=*Aeff,t1=*An2; 4886 *Aeff=addmodll(s,t1,p); 4887 *An2=submodll(s,t1,p); 4888 fft_loop_p_(Aeff[1],An2[1],&Weff[1],4,p,invp); 4889 fft_loop_p_(Aeff[2],An2[2],&Weff[2],4,p,invp); 4890 fft_loop_p_(Aeff[3],An2[3],&Weff[3],4,p,invp); 4891 Aeff+=4; An2+=4; Weff+=8; 4892 for (;Aeff<Aend;){ 4893 fft_loop_p_(Aeff[0],An2[0],&Weff[0],4,p,invp); 4894 fft_loop_p_(Aeff[1],An2[1],&Weff[1],4,p,invp); 4895 fft_loop_p_(Aeff[2],An2[2],&Weff[2],4,p,invp); 4896 fft_loop_p_(Aeff[3],An2[3],&Weff[3],4,p,invp); 4897 Aeff+=4; An2+=4; Weff+=8; 4898 } 4899 Aeff+=taille/2; 4900 } 4901 if (taille==8) 4902 break; 4903 longlong * end=Wstack+taille,*source=Wstack; 4904 for (longlong * target=Wstack;source<end;source+=16,target+=8){ 4905 target[0]=source[0]; 4906 target[1]=source[2]; 4907 target[4]=source[4]; 4908 target[5]=source[6]; 4909 target[2]=source[8]; 4910 target[3]=source[10]; 4911 target[6]=source[12]; 4912 target[7]=source[14]; 4913 } 4914 } 4915 if (n>MAX_INTSTACK/2) 4916 free(Wstack); 4917 // finish by groups of 4 4918 step=n2s/2; 4919 longlong w1=W[step],w1surp=W[3*step]; 4920 longlong *Aeff=A; 4921 for (longlong pos=0;pos<n;pos+=4,Aeff+=4){ 4922 longlong f0=Aeff[0],f1=Aeff[1],f2=Aeff[2],f3=Aeff[3], 4923 f01=precond_mulmodll(f1-f3+p,w1,w1surp,p), 4924 f02p=addmodll(f0,f2,p),f02m=submodll(f0,f2,p),f13=addmodll(f1,f3,p); 4925 Aeff[0]=addmodll(f02p,f13,p); 4926 Aeff[1]=addmodll(f02m,f01,p); 4927 Aeff[2]=submodll(f02p,f13,p); 4928 Aeff[3]=submodll(f02m,f01,p); 4929 } 4930 } 4931 4932 #else // de-recurse fft2pnopermafter(longlong * A,longlong n,longlong * W,longlong p,long_double invp,longlong step)4933 static void fft2pnopermafter( longlong *A, longlong n, longlong *W,longlong p,long_double invp,longlong step) { 4934 if (n==1) return; 4935 if (n==4){ 4936 longlong w1=W[step]; 4937 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3], 4938 #ifdef GIAC_LLPRECOND 4939 f01=precond_mulmodll(submodll(f1,f3,p),w1,W[3*step],p), 4940 #else 4941 f01=mulmodll(submodll(f1,f3,p),w1,p,invp), 4942 #endif 4943 f02p=addmodll(f0,f2,p),f02m=submodll(f0,f2,p),f13=addmodll(f1,f3,p); 4944 A[0]=addmodll(f02p,f13,p); 4945 A[1]=addmodll(f02m,f01,p); 4946 A[2]=submodll(f02p,f13,p); 4947 A[3]=submodll(f02m,f01,p); 4948 return; 4949 } 4950 if (n==2){ 4951 longlong f0=A[0],f1=A[1]; 4952 A[0]=addmodll(f0,f1,p); 4953 A[1]=submodll(f0,f1,p); 4954 return; 4955 } 4956 // Step 1 : arithmetic 4957 longlong *An2=A+n/2; 4958 longlong * Acur=A,*An2cur=An2,*Wcur=W; 4959 longlong n2=n/2*step; 4960 for (;Acur!=An2;){ 4961 longlong Ai,An2i; 4962 fft_loop_p_(*Acur,*An2cur,Wcur,n2,p,invp); 4963 ++Acur;++An2cur; Wcur +=step; 4964 fft_loop_p_(*Acur,*An2cur,Wcur,n2,p,invp); 4965 ++Acur;++An2cur; Wcur += step; 4966 fft_loop_p_(*Acur,*An2cur,Wcur,n2,p,invp); 4967 ++Acur;++An2cur; Wcur += step; 4968 fft_loop_p_(*Acur,*An2cur,Wcur,n2,p,invp); 4969 ++Acur;++An2cur; Wcur += step; 4970 } 4971 // Step 2 : recursive calls 4972 fft2pnopermafter(A, n/2, W,p,invp,2*step); 4973 fft2pnopermafter(An2, n/2, W,p,invp,2*step); 4974 } 4975 #endif 4976 4977 // a=source mod x^N-1 mod p reverse_assign(const vector<longlong> & source,vector<longlong> & a,longlong N,longlong p)4978 void reverse_assign(const vector<longlong> & source,vector<longlong> & a,longlong N,longlong p){ 4979 a.clear(); a.resize(N); 4980 if (source.empty()) return; 4981 const longlong * stop=&*source.begin(),*start=&*source.end()-1; 4982 longlong i=0; 4983 for (;i<N && start>=stop;i++,--start){ 4984 longlong k=*start; 4985 k += (k>>63)&p; // add p if k is negative 4986 a[i]=k; 4987 } 4988 for (i=0;start>=stop;--start){ 4989 longlong k=*start; 4990 k += (k>>63)&p; 4991 k += (a[i]-p); 4992 k += (k>>63)&p; 4993 a[i]= k ; 4994 ++i; 4995 if (i==N) 4996 i=0; 4997 } 4998 } 4999 makemodulop(longlong * a,longlong as,longlong modulo)5000 void makemodulop(longlong * a,longlong as,longlong modulo){ 5001 longlong *aend=a+as; 5002 for (;a!=aend;++a){ 5003 *a %= modulo; 5004 // if (*a<0) *a += modulo; // *a -= (unsigned(modulo-*a)>>31)*modulo; 5005 } 5006 } 5007 makepositive(longlong * p,longlong n,longlong modulo)5008 void makepositive(longlong * p,longlong n,longlong modulo){ 5009 longlong * pend=p+n; 5010 for (;p!=pend;++p){ 5011 longlong P=*p; 5012 P += (P>>63) & modulo; 5013 P += (P>>63) & modulo; 5014 *p=P; 5015 } 5016 } 5017 to_fft(const std::vector<longlong> & a,longlong modulo,longlong w,std::vector<longlong> & Wp,longlong n,std::vector<longlong> & f,bool reverse,bool makeplus,bool makemod=true)5018 void to_fft(const std::vector<longlong> & a,longlong modulo,longlong w,std::vector<longlong> & Wp,longlong n,std::vector<longlong> & f,bool reverse,bool makeplus,bool makemod=true){ 5019 long_double invp=long_double(1)/modulo; 5020 #if defined GIAC_LLPRECOND 5021 longlong nw=n; 5022 #else 5023 longlong nw=n/2; 5024 #endif 5025 longlong s=giacmin(a.size(),n); 5026 longlong logrs=sizeinbase2(n-1); 5027 if (reverse){ 5028 if (&f==&a){ 5029 if (f.size()>n){ 5030 vector<longlong> tmp(n); 5031 reverse_assign(a,tmp,n,modulo); 5032 tmp.swap(f); 5033 } 5034 else { 5035 vector<longlong>::iterator it=f.begin(),itend=f.end(); 5036 for (;it!=itend;++it) 5037 *it += (*it>>63)&modulo; 5038 std::reverse(f.begin(),f.end()); 5039 f.resize(n); 5040 } 5041 } 5042 else { 5043 f.resize(n); 5044 reverse_assign(a,f,n,modulo); 5045 } 5046 } 5047 else { 5048 if (&f!=&a) 5049 f=a; 5050 f.resize(n); 5051 } 5052 if (makemod) 5053 makemodulop(&f.front(),s,modulo); 5054 if (makeplus) makepositive(&f.front(),s,modulo); 5055 if (Wp.size()<nw || Wp[0]==0){ 5056 Wp.clear(); 5057 fft2wp(Wp,n,w,modulo); 5058 } 5059 fft2pnopermafter(&f.front(),n,&Wp.front(),modulo,invp,Wp.size()/nw); 5060 } 5061 from_fft(const std::vector<longlong> & f,longlong p,std::vector<longlong> & Wp,std::vector<longlong> & res,bool reverseatend,bool revw)5062 void from_fft(const std::vector<longlong> & f,longlong p,std::vector<longlong> & Wp,std::vector<longlong> & res,bool reverseatend,bool revw){ 5063 long_double invp=long_double(1)/p; 5064 if (&res!=&f) res=f; 5065 longlong n=res.size(); 5066 #if defined GIAC_LLPRECOND 5067 int nw=n; 5068 #else 5069 int nw=n/2; 5070 #endif 5071 if (revw) fft_reverse(Wp,p); 5072 fft2pnopermbefore(&res.front(),n,&Wp.front(),p,invp,Wp.size()/nw); 5073 if (revw) fft_reverse(Wp,p); 5074 longlong i=invmodll(n,p); 5075 //mulmodll(res,i,p,invp); 5076 i += (i>>63)&p; 5077 precond_mulmodll(res,i,preconditionner_ll(i,p),p); 5078 if (reverseatend) 5079 reverse(res.begin(),res.end()); 5080 } 5081 fft_ab_cd_p(const vector<longlong> & a,const vector<longlong> & b,const vector<longlong> & c,const vector<longlong> & d,vector<longlong> & res,longlong p)5082 void fft_ab_cd_p(const vector<longlong> &a,const vector<longlong> &b,const vector<longlong> & c,const vector<longlong> &d,vector<longlong> & res,longlong p){ 5083 long_double invp=long_double(1)/p; 5084 longlong s=a.size(); 5085 res.resize(s); 5086 for (longlong i=0;i<s;++i){ 5087 res[i]=amodpll(int128_t(a[i])*b[i]+int128_t(c[i])*d[i],p,invp); 5088 } 5089 } 5090 powmodll(longlong a,ulonglong n,longlong m,long_double invm)5091 longlong powmodll(longlong a,ulonglong n,longlong m,long_double invm){ 5092 if (!n) 5093 return 1; 5094 if (n==1) 5095 return a; 5096 if (n==2) 5097 return amodpll(a*int128_t(a),m,invm); 5098 longlong b=a%m,c=1; 5099 while (n>0){ 5100 if (n%2) 5101 c=amodpll(c*int128_t(b),m,invm); 5102 n /= 2; 5103 b=amodpll(b*int128_t(b),m,invm); 5104 } 5105 return c; 5106 } 5107 powmodll(longlong a,ulonglong n,longlong m)5108 longlong powmodll(longlong a,ulonglong n,longlong m){ 5109 return powmodll(a,n,m,long_double(1)/m); 5110 } 5111 5112 // for p prime such that p-1 is divisible by 2^N, compute a 2^N-th root of 1 5113 // otherwise return 0 nthroot(longlong p,longlong N)5114 longlong nthroot(longlong p,longlong N){ 5115 longlong expo=(p-1)>>N; 5116 if ( (expo<<N)!=p-1) 5117 return 0; 5118 long_double invp=long_double(1)/p; 5119 for (longlong n=2;;++n){ 5120 longlong w=powmodll(n,expo,p,invp); // w=n^((p-1)/2^N) 5121 int128_t r=w; 5122 for (longlong i=1;i<N;++i) 5123 r=amodpll(r*r,p,invp); 5124 if (r==p-1) // r=w^(2^(N-1))=n^((p-1)/2) 5125 return w; 5126 } 5127 } 5128 find_wll(vector<longlong> & Wp,longlong shift,longlong p)5129 longlong find_wll(vector<longlong> & Wp,longlong shift,longlong p){ 5130 longlong n=1<<shift,w=0; 5131 #if defined GIAC_LLPRECOND 5132 longlong ws=Wp.size(); 5133 #else 5134 longlong ws=2*Wp.size(); 5135 #endif 5136 if (ws/n){ 5137 w=Wp[ws/n]; 5138 longlong wp=powmodll(w,n/2,p); 5139 if (wp!=p-1){ 5140 w=0; Wp.clear(); 5141 } 5142 //CERR << Wp << endl; 5143 } 5144 if (w==0 && p!=p1 && p!=p2 && p!=p3) 5145 w=nthroot(p,shift); 5146 return w; 5147 } 5148 5149 // [[RA,RB],[RC,RD]]*[a0,a1]->[a,b] matrix22lltimesvect(const vector<longlong> & RA,const vector<longlong> & RB,const vector<longlong> & RC,const vector<longlong> & RD,const vector<longlong> & a0,const vector<longlong> & a1,longlong maxadeg,longlong maxbdeg,vector<longlong> & a,vector<longlong> & b,longlong p,vector<longlong> & ra,vector<longlong> & rb,vector<longlong> & rc,vector<longlong> & rd,vector<longlong> & Wp)5150 bool matrix22lltimesvect(const vector<longlong> & RA,const vector<longlong> & RB,const vector<longlong> & RC,const vector<longlong> & RD,const vector<longlong> & a0,const vector<longlong> &a1,longlong maxadeg,longlong maxbdeg,vector<longlong> & a,vector<longlong> &b,longlong p,vector<longlong> & ra,vector<longlong> & rb,vector<longlong> & rc,vector<longlong> & rd,vector<longlong> &Wp){ 5151 longlong dega0=a0.size()-1,m=(dega0+1)/2; 5152 longlong maxabdeg=giacmax(maxadeg,maxbdeg); 5153 longlong bbsize=giacmin(maxabdeg+1,a0.size()); 5154 longlong ddsize=giacmin(maxabdeg+1,a1.size()); 5155 longlong Nreal=giacmax(bbsize+RC.size(),ddsize+RD.size())-2; 5156 int N2=giacmin(maxabdeg,Nreal); 5157 unsigned long l=sizeinbase2(N2)-1; 5158 longlong n=1<<(l+1); 5159 longlong w=find_wll(Wp,l+1,p); 5160 // vector<longlong> adbg,bdbg; 5161 if (!w) 5162 return false; 5163 to_fft(RA,p,w,Wp,n,b,true,false,false);ra.swap(b); 5164 to_fft(RB,p,w,Wp,n,b,true,false,false);rb.swap(b); 5165 to_fft(RC,p,w,Wp,n,b,true,false,false);rc.swap(b); 5166 to_fft(RD,p,w,Wp,n,b,true,false,false);rd.swap(b); 5167 to_fft(a0,p,w,Wp,n,a,true,false,false); 5168 to_fft(a1,p,w,Wp,n,b,true,false,false); 5169 fft_reverse(Wp,p); 5170 fft_ab_cd_p(rc,a,rd,b,rc,p); 5171 from_fft(rc,p,Wp,rc,true,false); 5172 fft_ab_cd_p(ra,a,rb,b,ra,p); 5173 from_fft(ra,p,Wp,ra,true,false); 5174 a.swap(ra); 5175 b.swap(rc); 5176 trim_deg(a,maxabdeg); 5177 fast_trim_inplace(a,p); 5178 trim_deg(b,maxabdeg); 5179 fast_trim_inplace(b,p); 5180 return true; 5181 } 5182 matrix22ll(vector<longlong> & RA,vector<longlong> & RB,vector<longlong> & RC,vector<longlong> & RD,vector<longlong> & SA,vector<longlong> & SB,vector<longlong> & SC,vector<longlong> & SD,vector<longlong> & A,vector<longlong> & B,vector<longlong> & C,vector<longlong> & D,longlong p,vector<longlong> & tmp,vector<longlong> & Wp)5183 bool matrix22ll(vector<longlong> & RA,vector<longlong> &RB,vector<longlong> & RC,vector<longlong> &RD,vector<longlong> &SA,vector<longlong> &SB,vector<longlong> &SC,vector<longlong> &SD,vector<longlong> &A,vector<longlong> &B,vector<longlong> &C,vector<longlong> &D,longlong p,vector<longlong> & tmp,vector<longlong> & Wp){ 5184 // 2x2 matrix operations 5185 // [[SA,SB],[SC,SD]]*[[RC,RD],[RA,RB]] == [[RA*SB+RC*SA,RB*SB+RD*SA],[RA*SD+RC*SC,RB*SD+RD*SC]] 5186 int Nreal=giacmax(giacmax(RC.size(),RD.size()),giacmax(RA.size(),RB.size()))+giacmax(giacmax(SC.size(),SD.size()),giacmax(SA.size(),SB.size()))-2; 5187 unsigned long l=sizeinbase2(Nreal)-1; // l=gen(Nreal).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 5188 unsigned long n=1<<(l+1); 5189 longlong w=nthroot(p,l+1); 5190 // vector<longlong> adbg,bdbg; 5191 if (!w) 5192 return false; 5193 // makepositive set to false since reverse_assign should make RA positive 5194 to_fft(SC,p,w,Wp,n,SC,true,false,false); 5195 to_fft(SD,p,w,Wp,n,SD,true,false,false); 5196 to_fft(RA,p,w,Wp,n,RA,true,false,false); 5197 to_fft(RB,p,w,Wp,n,RB,true,false,false); 5198 to_fft(RC,p,w,Wp,n,RC,true,false,false); 5199 to_fft(RD,p,w,Wp,n,RD,true,false,false); 5200 to_fft(SA,p,w,Wp,n,SA,true,false,false); 5201 to_fft(SB,p,w,Wp,n,SB,true,false,false); 5202 fft_reverse(Wp,p); 5203 fft_ab_cd_p(RA,SB,RC,SA,A,p); 5204 from_fft(A,p,Wp,A,true,false); 5205 fft_ab_cd_p(RB,SB,RD,SA,SA,p); SA.swap(B); 5206 from_fft(B,p,Wp,B,true,false); 5207 fft_ab_cd_p(RA,SD,RC,SC,RA,p); RA.swap(C); 5208 from_fft(C,p,Wp,C,true,false); 5209 fft_ab_cd_p(RB,SD,RD,SC,RB,p); RB.swap(D); 5210 from_fft(D,p,Wp,D,true,false); 5211 // fft_reverse(Wp,p); 5212 fast_trim_inplace(A,p); 5213 fast_trim_inplace(B,p); 5214 fast_trim_inplace(C,p); 5215 fast_trim_inplace(D,p); 5216 return true; 5217 } 5218 a_bc(const vector<longlong> & a,const vector<longlong> & b,const vector<longlong> & c,longlong p,vector<longlong> & res,vector<longlong> & tmp1)5219 void a_bc(const vector<longlong> &a,const vector<longlong> &b,const vector<longlong> &c,longlong p,vector<longlong> & res,vector<longlong> & tmp1){ 5220 // res=trim(a-b*c,env); return; 5221 size_t as=a.size(),bs=b.size(); 5222 if (as<=bs){ 5223 tmp1.clear(); tmp1.reserve(bs); 5224 if (c.size()==2){ 5225 a_minus_qsize2_b(a,c,b,tmp1,p); 5226 tmp1.swap(res); return; 5227 } 5228 } 5229 smallmultll(b,c,tmp1,p); 5230 submodnegll(tmp1,a,p); 5231 tmp1.swap(res); 5232 } 5233 hgcdll(const vector<longlong> & a0,const vector<longlong> & a1,longlong modulo,vector<longlong> & Wp,vector<longlong> & A,vector<longlong> & B,vector<longlong> & C,vector<longlong> & D,vector<longlong> & coeffv,vector<longlong> & degv,vector<longlong> & q,vector<longlong> & f,vector<longlong> & tmp0,vector<longlong> & tmp1,vector<longlong> & tmp2,vector<longlong> & tmp3)5234 bool hgcdll(const vector<longlong> & a0,const vector<longlong> & a1,longlong modulo,vector<longlong> & Wp,vector<longlong> &A,vector<longlong> &B,vector<longlong> &C,vector<longlong> &D,vector<longlong> & coeffv,vector<longlong> & degv,vector<longlong> & q,vector<longlong> & f,vector<longlong> & tmp0,vector<longlong> & tmp1,vector<longlong> & tmp2,vector<longlong> & tmp3){ // a0 is A in Yap, a1 is B 5235 vector<longlong> & g0=tmp2,&g1=tmp3; 5236 longlong dega0=a0.size()-1,dega1=a1.size()-1; 5237 longlong m=(dega0+1)/2; 5238 if (dega1<m){ 5239 D=A=vector<longlong>(1,1); 5240 B.clear(); C.clear(); 5241 return true; 5242 } 5243 if (m<HGCD/2){ 5244 hgcd_iter_ll(a0,a1,m,A,C,B,D,modulo,coeffv,degv,q,f,tmp0,tmp1,tmp2,tmp3); 5245 return true; 5246 } 5247 vector<longlong> b0(a0.begin(),a0.end()-m); // quo(a0,x^m), A0 in Yap 5248 vector<longlong> b1(a1.begin(),a1.end()-m); // quo(a1,x^m), B0 in Yap 5249 // 1st recursive call 5250 vector<longlong> RA,RB,RC,RD; 5251 if (debug_infolevel>2) 5252 CERR << CLOCK()*1e-6 << " hgcdll 1st recursive call " << dega0 << "," << dega1 << '\n'; 5253 if (!hgcdll(b0,b1,modulo,Wp,RA,RB,RC,RD,coeffv,degv,tmp0,tmp1,A,B,C,D)) 5254 return false; 5255 if (debug_infolevel>2) 5256 CERR << CLOCK()*1e-6 << " hgcdll compute A' B' " << dega0 << "," << dega1 << '\n'; 5257 longlong maxadeg=dega0+1-giacmax(RA.size(),RB.size()),maxbdeg=dega0-m/2; 5258 matrix22lltimesvect(RA,RB,RC,RD,a0,a1,maxadeg,maxbdeg,b0,b1,modulo,tmp0,tmp1,tmp2,tmp3,Wp); 5259 longlong dege=b1.size()-1; 5260 if (dege<m){ 5261 A.swap(RA); B.swap(RB); C.swap(RC); D.swap(RD); 5262 return true; 5263 // A=RA; B=RB; C=RC; D=RD; return true; 5264 } 5265 if (dege>=b0.size()-1) 5266 COUT << "hgcdll error" << '\n'; 5267 if (debug_infolevel>2) 5268 CERR << CLOCK()*1e-6 << " hgcdll euclid div " << dega0 << "," << dega1 << '\n'; 5269 // 1 euclidean step 5270 if (!degv.empty()){ 5271 coeffv.push_back(b1[0]); 5272 degv.push_back(degv.back()+b1.size()-b0.size()); 5273 } 5274 DivRem(b0,b1,modulo,q,f); // q,f are Q,D in Yap 5275 // [[0,1],[1,-q]]*[[RA,RB],[RC,RD]] == [[RC,RD],[-RC*q+RA,-RD*q+RB]] 5276 a_bc(RA,RC,q,modulo,RA,tmp1); // RA=trim(RA-RC*q,&env); 5277 a_bc(RB,RD,q,modulo,RB,tmp1); // RB=trim(RB-RD*q,&env); 5278 longlong l=b1.size()-1,k=2*m-l; 5279 if (f.size()-1<m){ 5280 A.swap(RC); B.swap(RD); C.swap(RA); D.swap(RB); return true; 5281 } 5282 g0.resize(b1.size()-k); 5283 copy(b1.begin(),b1.end()-k,g0.begin()); // vector<int> g0(b1.begin(),b1.end()-k); // quo(b,x^k), C0 in Yap 5284 if (f.size()>k){ 5285 g1.resize(f.size()-k); 5286 copy(f.begin(),f.end()-k,g1.begin()); // quo(f,x^k), D0 in Yap 5287 } 5288 vector<longlong> &SA=b0,&SB=b1,&SC=q,&SD=f; 5289 if (debug_infolevel>2) 5290 CERR << CLOCK()*1e-6 << " hgcdll 2nd recursive call " << dega0 << "," << dega1 << '\n'; 5291 if (!hgcdll(g0,g1,modulo,Wp,SA,SB,SC,SD,coeffv,degv,tmp0,tmp1,A,B,C,D)) 5292 return false; 5293 if (debug_infolevel>2) 5294 CERR << CLOCK()*1e-6 << " hgcdll end 2nd recursive call " << dega0 << "," << dega1 << '\n'; 5295 matrix22ll(RA,RB,RC,RD,SA,SB,SC,SD,A,B,C,D,modulo,tmp0,Wp); 5296 if (debug_infolevel>2) 5297 CERR << CLOCK()*1e-6 << " hgcdll end " << dega0 << "," << dega1 << '\n'; 5298 return true; 5299 } 5300 mulsmall(vector<longlong> & Q,longlong c,longlong m)5301 void mulsmall(vector<longlong> & Q,longlong c,longlong m){ 5302 if (c==1) return; 5303 //long_double invm=long_double(1)/long_double(m); 5304 longlong * ptr=&Q.front(), * ptrend=ptr+Q.size(); 5305 for (;ptr!=ptrend;++ptr){ 5306 //*ptr =amodpll(int128_t(*ptr)*c,m,invm); 5307 *ptr = (int128_t(*ptr)*c)%m; 5308 } 5309 } 5310 5311 // resultant of P and Q modulo m, modifies P and Q, resultantll(vector<longlong> & P,vector<longlong> & Q,vector<longlong> & tmp1,vector<longlong> & tmp2,longlong m)5312 longlong resultantll(vector<longlong> & P,vector<longlong> & Q,vector<longlong> & tmp1,vector<longlong> & tmp2,longlong m){ 5313 if (P.size()<Q.size()){ 5314 int res=(P.size() % 2==1 || Q.size() % 2==1)?1:-1; // (-1)^deg(P)*deg(Q) 5315 return res*resultantll(Q,P,tmp1,tmp2,m); 5316 } 5317 long_double invm=long_double(1)/m; 5318 if (P.size()==Q.size()){ 5319 longlong coeff=Q[0]; 5320 longlong invcoeff=invmodll(coeff,m); 5321 mulsmall(Q,invcoeff,m); 5322 DivRem(P,Q,m,tmp1,tmp2); 5323 int128_t res=(P.size() % 2==1)?1:-1; 5324 res *= powmodll(Q[0],longlong(P.size()-tmp2.size()),m,invm); 5325 return smodll(res*resultantll(Q,tmp2,P,tmp1,m), m); 5326 } 5327 // now P.size()>Q.size() 5328 int HGCD2=HGCD; 5329 if (Q.size()>=HGCD2){ 5330 vector<longlong> coeffv,degv,A,B,C,D,a,b,b0,b1,b2,b3,b4,b5,b6,b7,Wp; 5331 coeffv.reserve(Q.size()+1); 5332 degv.reserve(Q.size()+1); 5333 degv.push_back(P.size()-1); 5334 while (Q.size()>=HGCD2){ 5335 int deg1=P.size(),deg2=(3*deg1)/4; 5336 double coeff=nextpow2(deg1/2)*2./deg1; 5337 double coeff2=nextpow2(deg2)/double(deg2); 5338 coeff=0.5*std::min(coeff,coeff2); 5339 if (Wp.empty() && m!=p1 && m!=p2 && m!=p3){ 5340 longlong l=sizeinbase2(int(3*2*coeff/4*deg1-1)); 5341 longlong w=find_wll(Wp,l,m); 5342 fft2wp(Wp,(1<<l),w,m); 5343 } 5344 if (debug_infolevel>2) 5345 CERR << CLOCK()*1e-6 << " deg " << P.size() << " coeff " << coeff << "\n"; 5346 int seuil=1+int(std::ceil((1-coeff)*P.size())); 5347 if (HGCD/4>=Q.size()-seuil){ 5348 coeffv.push_back(Q.front()); 5349 degv.push_back(degv.back()+Q.size()-P.size()); 5350 DivRem(P,Q,m,a,b); 5351 P.swap(Q); 5352 Q.swap(b); 5353 continue; 5354 } 5355 // 1st recursive call 5356 b0.resize(P.size()-seuil); 5357 copy(P.begin(),P.end()-seuil,b0.begin()); // quo(P,x^s), 5358 b1.resize(Q.size()-seuil); 5359 copy(Q.begin(),Q.end()-seuil,b1.begin()); // quo(Q,x^s), 5360 hgcdll(b0,b1,m,Wp,A,B,C,D,coeffv,degv,b2,b3,b4,b5,b6,b7); 5361 longlong maxadeg=P.size()-giacmax(A.size(),B.size()); 5362 matrix22lltimesvect(A,B,C,D,P,Q,maxadeg,maxadeg,a,b,m,b4,b5,b6,b7,Wp); 5363 if (b.size()<HGCD){ 5364 a.swap(P); b.swap(Q); break; 5365 } 5366 coeffv.push_back(b.front()); 5367 degv.push_back(degv.back()+b.size()-a.size()); 5368 DivRem(a,b,m,P,Q); 5369 b.swap(P); 5370 } 5371 degv.push_back(Q.size()-1); 5372 longlong res=resultantll(P,Q,tmp1,tmp2,m); 5373 // adjust 5374 for (longlong i=0;i<coeffv.size();++i){ 5375 if (degv[i]%2==1 && degv[i+1]%2==1) 5376 res=-res; 5377 res=amodpll(int128_t(res)*powmodll(coeffv[i],degv[i]-degv[i+2],m),m,invm); 5378 } 5379 return smodll(res,m); 5380 } 5381 int128_t res=1; 5382 while (Q.size()>1){ 5383 #if 0 5384 longlong coeff=Q[0]; 5385 longlong invcoeff=invmodll(coeff,m); 5386 mulsmall(Q,invcoeff,m); 5387 DivRem(P,Q,m,tmp1,tmp2); 5388 res = (res*powmodll(coeff,ulonglong(P.size()-1),m)) %m; 5389 #else 5390 DivRem(P,Q,m,tmp1,tmp2); 5391 res = amodpll(res*powmodll(Q[0],P.size()-tmp2.size(),m,invm),m,invm); 5392 #endif 5393 if (P.size()%2==0 && Q.size()%2==0) 5394 res = -res; 5395 P.swap(Q); 5396 Q.swap(tmp2); 5397 } 5398 if (Q.empty()) 5399 return 0; 5400 res = amodpll(res*powmodll(Q[0],ulonglong(P.size()-1),m,invm),m,invm); 5401 return smodll(res,m); 5402 } 5403 int2longlong(const vector<int> & p,vector<longlong> & P,int modulo)5404 void int2longlong(const vector<int> & p,vector<longlong> & P,int modulo){ 5405 longlong m=modulo?modulo:p5; 5406 size_t s=p.size(); 5407 if (P.size()<s) 5408 P.resize(s); 5409 for (size_t i=0;i<s;++i){ 5410 longlong x=p[s-1-i]; 5411 P[i]=x<0?x+m:x; 5412 } 5413 } 5414 fft2p5(const vector<int> & p,const vector<int> & q,vector<longlong> & PQ,vector<longlong> & W,int modulo)5415 bool fft2p5(const vector<int> & p,const vector<int> & q,vector<longlong> & PQ,vector<longlong> & W,int modulo){ 5416 if (debug_infolevel) 5417 CERR << CLOCK()*1e-6 << "fft2p5 begin" << '\n'; 5418 int ps=int(p.size()),qs=int(q.size()),rs=ps+qs-1; 5419 int logrs=sizeinbase2(rs); 5420 if (logrs>54) return false; 5421 int n=(1u<<logrs); 5422 vector<longlong> P(n),Q(n); 5423 int2longlong(p,P,modulo); 5424 int2longlong(q,Q,modulo); 5425 if (W.empty() || W[0]==0){ 5426 //const longlong r=4917923076487504807LL; 5427 longlong w=4917923076487504807LL; 5428 for (int i=0;i<54-logrs;++i) 5429 w=(int128_t(w)*w) % p5; 5430 // longlong w=powmodll(r,(1ul<<(54-logrs)),p5); 5431 fft2wp(W,n,w,p5); 5432 } 5433 fft2pnopermafter(&P.front(),n,&W.front(),p5,invp5,1); 5434 fft2pnopermafter(&Q.front(),n,&W.front(),p5,invp5,1); 5435 for (int i=0;i<n;++i){ 5436 P[i]=mulmodll(P[i],Q[i],p5); 5437 } 5438 fft_reverse(W,p5); 5439 fft2pnopermbefore(&P.front(),n,&W.front(),p5,invp5,1); 5440 fft_reverse(W,p5); 5441 // divide by n 5442 longlong ninv=p5+(1-p5)/n; 5443 for (int i=0;i<rs;++i){ 5444 P[i]=(int128_t(ninv)*P[i]) % p5; //mulmodll(ninv,P[i],p5); 5445 if (modulo){ 5446 P[i]=smodll(P[i],modulo); 5447 } 5448 else { 5449 if (P[i]>p5/2) 5450 P[i] -= p5; 5451 } 5452 } 5453 reverse(P.begin(),P.end()); 5454 PQ.reserve(rs); 5455 int i; 5456 for (i=0;i<P.size();++i){ 5457 if (P[i]!=0) 5458 break; 5459 } 5460 for (;i<P.size();++i) 5461 PQ.push_back(P[i]); 5462 if (debug_infolevel) 5463 CERR << CLOCK()*1e-6 << "fft2p5 end" << '\n'; 5464 return true; 5465 } 5466 5467 #endif // INT128 5468 5469 // find pseudo remainder of x mod p, 2^nbits>=p>2^(nbits-1) 5470 // assumes invp=2^(2*nbits)/p+1 has been precomputed 5471 // and abs(x)<2^(31+nbits) 5472 // |remainder| <= max(2^nbits,|x|*p/2^(2nbits)), <=2*p if |x|<=p^2 pseudo_mod(longlong x,int p,unsigned invp)5473 inline longlong pseudo_mod(longlong x,int p,unsigned invp){ 5474 return x - (((x>>31)*invp)>>31)*p; 5475 } 5476 fft_ab_p(const vector<int> & a,const vector<int> & b,vector<int> & res,int p)5477 void fft_ab_p(const vector<int> &a,const vector<int> &b,vector<int> & res,int p){ 5478 int s=a.size(); 5479 res.resize(s); 5480 #if 1 5481 double invp=find_invp(p); 5482 for (int i=0;i<s;++i){ 5483 res[i]=amodp(longlong(a[i])*b[i],p,invp); 5484 } 5485 #else 5486 for (int i=0;i<s;++i){ 5487 res[i]=(longlong(a[i])*b[i])%p; 5488 } 5489 #endif 5490 } 5491 fft_aoverb_p(const vector<int> & a,const vector<int> & b,vector<int> & res,int p)5492 void fft_aoverb_p(const vector<int> &a,const vector<int> &b,vector<int> & res,int p){ 5493 int s=a.size(); 5494 res.resize(s); 5495 for (int i=0;i<s;++i){ 5496 int bi=invmod(b[i],p); 5497 bi += (bi>>31)&p; 5498 res[i]=(longlong(a[i])*bi)%p; 5499 } 5500 } 5501 fft_ab_cd_p(const vector<int> & a,const vector<int> & b,const vector<int> & c,const vector<int> & d,vector<int> & res,int p)5502 void fft_ab_cd_p(const vector<int> &a,const vector<int> &b,const vector<int> & c,const vector<int> &d,vector<int> & res,int p){ 5503 int s=a.size(); 5504 res.resize(s); 5505 #if 1 //def __x86_64__ 5506 double invp=find_invp(p); 5507 for (int i=0;i<s;++i){ 5508 longlong l=(longlong(a[i])*b[i]+longlong(c[i])*d[i]); 5509 double q=l*invp; 5510 l -= longlong(q)*p; 5511 res[i]=l; 5512 } 5513 #else 5514 for (int i=0;i<s;++i){ 5515 res[i]=(longlong(a[i])*b[i]+longlong(c[i])*d[i])%p; 5516 } 5517 #endif 5518 } 5519 fft_ab_cd_p1(const vector<int> & a,const vector<int> & b,const vector<int> & c,const vector<int> & d,vector<int> & res)5520 void fft_ab_cd_p1(const vector<int> &a,const vector<int> &b,const vector<int> & c,const vector<int> &d,vector<int> & res){ 5521 int s=a.size(); 5522 res.resize(s); 5523 for (int i=0;i<s;++i){ 5524 res[i]=(longlong(a[i])*b[i]+longlong(c[i])*d[i])%p1; 5525 } 5526 } 5527 fft_ab_cd_p2(const vector<int> & a,const vector<int> & b,const vector<int> & c,const vector<int> & d,vector<int> & res)5528 void fft_ab_cd_p2(const vector<int> &a,const vector<int> &b,const vector<int> & c,const vector<int> &d,vector<int> & res){ 5529 int s=a.size(); 5530 res.resize(s); 5531 for (int i=0;i<s;++i){ 5532 res[i]=(longlong(a[i])*b[i]+longlong(c[i])*d[i])%p2; 5533 } 5534 } 5535 fft_ab_cd_p3(const vector<int> & a,const vector<int> & b,const vector<int> & c,const vector<int> & d,vector<int> & res)5536 void fft_ab_cd_p3(const vector<int> &a,const vector<int> &b,const vector<int> & c,const vector<int> &d,vector<int> & res){ 5537 int s=a.size(); 5538 res.resize(s); 5539 for (int i=0;i<s;++i){ 5540 res[i]=(longlong(a[i])*b[i]+longlong(c[i])*d[i])%p3; 5541 } 5542 } 5543 fft_ab_cd(const fft_rep & a,const fft_rep & b,const fft_rep & c,const fft_rep & d,fft_rep & res)5544 void fft_ab_cd(const fft_rep & a,const fft_rep & b,const fft_rep & c,const fft_rep & d,fft_rep & res){ 5545 res.modulo=a.modulo; 5546 fft_ab_cd_p1(a.modp1,b.modp1,c.modp1,d.modp1,res.modp1); 5547 fft_ab_cd_p2(a.modp2,b.modp2,c.modp2,d.modp2,res.modp2); 5548 fft_ab_cd_p3(a.modp3,b.modp3,c.modp3,d.modp3,res.modp3); 5549 } 5550 multi_fft_ab_cd(const multi_fft_rep & a,const multi_fft_rep & b,const multi_fft_rep & c,const multi_fft_rep & d,multi_fft_rep & res)5551 void multi_fft_ab_cd(const multi_fft_rep & a,const multi_fft_rep & b,const multi_fft_rep & c,const multi_fft_rep & d,multi_fft_rep & res){ 5552 res.modulo=a.modulo; 5553 fft_ab_cd_p1(a.p1p2p3.modp1,b.p1p2p3.modp1,c.p1p2p3.modp1,d.p1p2p3.modp1,res.p1p2p3.modp1); 5554 fft_ab_cd_p2(a.p1p2p3.modp2,b.p1p2p3.modp2,c.p1p2p3.modp2,d.p1p2p3.modp2,res.p1p2p3.modp2); 5555 fft_ab_cd_p3(a.p1p2p3.modp3,b.p1p2p3.modp3,c.p1p2p3.modp3,d.p1p2p3.modp3,res.p1p2p3.modp3); 5556 res.v.resize(a.v.size()); 5557 for (size_t i=0;i<a.v.size();++i){ 5558 res.v[i].modulo=a.v[i].modulo; 5559 fft_ab_cd_p1(a.v[i].modp1,b.v[i].modp1,c.v[i].modp1,d.v[i].modp1,res.v[i].modp1); 5560 fft_ab_cd_p2(a.v[i].modp2,b.v[i].modp2,c.v[i].modp2,d.v[i].modp2,res.v[i].modp2); 5561 fft_ab_cd_p3(a.v[i].modp3,b.v[i].modp3,c.v[i].modp3,d.v[i].modp3,res.v[i].modp3); 5562 } 5563 } 5564 fft_ab_cd(const modpoly & a,const modpoly & b,const modpoly & c,const modpoly & d,unsigned long expoN,modpoly & res,mpz_t & tmp,mpz_t & tmpqz)5565 void fft_ab_cd(const modpoly & a,const modpoly &b,const modpoly &c,const modpoly &d,unsigned long expoN,modpoly & res,mpz_t & tmp,mpz_t &tmpqz){ 5566 int n=a.size(); 5567 for (int i=0;i<n;++i){ 5568 mpz_mul(tmp,*a[i]._ZINTptr,*b[i]._ZINTptr); 5569 mpz_addmul(tmp,*c[i]._ZINTptr,*d[i]._ZINTptr); 5570 smod2N(tmp,expoN,tmpqz); 5571 mpz_set(*res[i]._ZINTptr,tmp); 5572 } 5573 } 5574 5575 // computes a*b+c*d 5576 // set N>=0 to an upper bound of the degree if you know one ab_cd(int N,const modpoly & a,const modpoly & b,const modpoly & c,const modpoly & d,environment * env,modpoly & res,modpoly & tmp1,modpoly & tmp2)5577 void ab_cd(int N,const modpoly &a,const modpoly &b,const modpoly &c,const modpoly & d,environment * env,modpoly & res,modpoly & tmp1,modpoly & tmp2){ 5578 modpoly resdbg; 5579 if (N>=0){ 5580 if (a.size()>=FFTMUL_SIZE/4 && b.size()>=FFTMUL_SIZE/4 && c.size()>=FFTMUL_SIZE/4 && d.size()>=FFTMUL_SIZE/4 && env->moduloon){ 5581 // N is the degree after reduction mod env->modulo 5582 // but not the degree of a*b+c*d 5583 // therefore we make computation mod x^n-1 5584 int Nreal=giacmax(a.size()+b.size(),c.size()+d.size())-2; 5585 gen pPQ(Nreal*(2*env->modulo*env->modulo)+1); 5586 unsigned long l=gen(giacmin(N,Nreal)).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 5587 unsigned long n=1<<(l+1); 5588 if (env->modulo.type==_INT_){ 5589 int p=env->modulo.val; 5590 vector<int> aa; reverse_assign(a,aa,n,p); 5591 vector<int> bb; reverse_assign(b,bb,n,p); 5592 vector<int> cc; reverse_assign(c,cc,n,p); 5593 vector<int> dd; reverse_assign(d,dd,n,p); 5594 vector<int> Wp1,Wp2,Wp3; 5595 fft_rep aaf; 5596 to_fft(aa,p,Wp1,Wp2,Wp3,n,aaf,false,true); 5597 fft_rep bbf; 5598 to_fft(bb,p,Wp1,Wp2,Wp3,n,bbf,false,true); 5599 fft_rep ccf; 5600 to_fft(cc,p,Wp1,Wp2,Wp3,n,ccf,false,true); 5601 fft_rep ddf; 5602 to_fft(dd,p,Wp1,Wp2,Wp3,n,ddf,false,true); 5603 // a*b + c*d FFT size 5604 fft_rep resf; 5605 fft_ab_cd(aaf,bbf,ccf,ddf,resf); 5606 from_fft(resf,Wp1,Wp2,Wp3,dd,aa,bb,cc,true,true); 5607 vector_int2vecteur(dd,res); 5608 if (res.size()>N+1) 5609 res=modpoly(res.end()-N-1,res.end()); 5610 trim_inplace(res,env); 5611 return; 5612 } 5613 unsigned long bound=pPQ.bindigits()+1; // 2^bound=smod bound on coeff of p*q 5614 unsigned long r=(bound >> l)+1; 5615 if (0){ // not checked 5616 vector<int> Wp1,Wp2,Wp3; 5617 multi_fft_rep aaf; to_multi_fft(a,env->modulo,Wp1,Wp2,Wp3,n,aaf,true,true); 5618 multi_fft_rep bbf; to_multi_fft(b,env->modulo,Wp1,Wp2,Wp3,n,bbf,true,true); 5619 multi_fft_rep ccf; to_multi_fft(c,env->modulo,Wp1,Wp2,Wp3,n,ccf,true,true); 5620 multi_fft_rep ddf; to_multi_fft(d,env->modulo,Wp1,Wp2,Wp3,n,ddf,true,true); 5621 multi_fft_rep resf; 5622 multi_fft_ab_cd(aaf,bbf,ccf,ddf,resf); 5623 from_multi_fft(resf,Wp1,Wp2,Wp3,res,true); 5624 trim_inplace(res,env); 5625 return; 5626 } 5627 if (l>=2 && bound>=(1<<(l-2)) ){ 5628 mpz_t tmp,tmpqz; mpz_init(tmp); mpz_init(tmpqz); 5629 gen tmp1,tmp2; tmp1.uncoerce(); tmp2.uncoerce(); 5630 unsigned long expoN=r << l; // r*2^l 5631 modpoly aa; reverse_assign(a,aa,n,expoN+2); 5632 modpoly work; reverse_resize(work,n,expoN+2); 5633 fft2rl(&aa.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 5634 modpoly bb; reverse_assign(b,bb,n,expoN+2); 5635 fft2rl(&bb.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 5636 modpoly cc; reverse_assign(c,cc,n,expoN+2); 5637 fft2rl(&cc.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 5638 modpoly dd; reverse_assign(d,dd,n,expoN+2); 5639 fft2rl(&dd.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 5640 // a*b+c*d FFT size 5641 reverse_resize(res,n,expoN+2); 5642 fft_ab_cd(aa,bb,cc,dd,expoN,res,tmp,tmpqz); 5643 fft2rl(&res.front(),n,r,l,&work.front(),false,tmp1,tmp2,tmpqz); 5644 // divide by n mod 2^expoN+1 5645 fft2rldiv(res,expoN,expoN-l-1,tmp,tmpqz); 5646 if (res.size()>N+1) 5647 res=modpoly(res.end()-N-1,res.end()); 5648 trim_inplace(res,env); 5649 mpz_clear(tmpqz); mpz_clear(tmp); 5650 return; 5651 resdbg=res; 5652 } 5653 } 5654 if (1 && a.size()>N+1){ 5655 ab_cd(N,modpoly(a.end()-N-1,a.end()),b,c,d,env,res,tmp1,tmp2); 5656 return; 5657 } 5658 if (1 && b.size()>N+1){ 5659 ab_cd(N,a,modpoly(b.end()-N-1,b.end()),c,d,env,res,tmp1,tmp2); 5660 return; 5661 } 5662 if (1 && c.size()>N+1){ 5663 ab_cd(N,a,b,modpoly(c.end()-N-1,c.end()),d,env,res,tmp1,tmp2); 5664 return; 5665 } 5666 if (1 && d.size()>N+1){ 5667 ab_cd(N,a,b,c,modpoly(d.end()-N-1,d.end()),env,res,tmp1,tmp2); 5668 return; 5669 } 5670 } // end if (N>=0) 5671 // res=trim(a*b+c*d,env); return; 5672 if (1 5673 // && env && env->moduloon && env->modulo.type==_INT_ && longlong(env->modulo.val)*env->modulo.val<(1LL<<31) 5674 ){ 5675 // smod at end, faster for small modulo (modulo^2<2^31) 5676 environment zeroenv; 5677 tmp1.clear(); 5678 if (!a.empty() && !b.empty()) 5679 operator_times(a,b,&zeroenv,tmp1,N>=0?N:RAND_MAX); 5680 if (N>=0 && tmp1.size()>N+1) 5681 tmp1=modpoly(tmp1.end()-N-1,tmp1.end()); 5682 #if 0 // debug 5683 tmp2.clear(); 5684 if (!a.empty() && !b.empty()) 5685 operator_times(a,b,&zeroenv,tmp2,RAND_MAX); 5686 if (N>=0 && tmp2.size()>N+1) 5687 tmp2=modpoly(tmp2.end()-N-1,tmp2.end()); 5688 if (tmp1!=tmp2) 5689 COUT << "error" << tmp1-tmp2 << '\n'; 5690 #endif 5691 tmp2.clear(); 5692 if (!c.empty() && !d.empty()) 5693 operator_times(c,d,&zeroenv,tmp2,N>=0?N:RAND_MAX); 5694 if (N>=0 && tmp2.size()>N+1) 5695 tmp2=modpoly(tmp2.end()-N-1,tmp2.end()); 5696 #if 0 5697 addmodpoly(tmp1,tmp2,res); 5698 #else 5699 if (tmp1.size()>=tmp2.size()){ 5700 if (!tmp2.empty()) 5701 addmodpoly(tmp1,tmp2,tmp1); 5702 res.swap(tmp1); 5703 } 5704 else { 5705 if (!tmp1.empty()) 5706 addmodpoly(tmp2,tmp1,tmp2); 5707 res.swap(tmp2); 5708 } 5709 #endif 5710 trim_inplace(res,env); 5711 if (!resdbg.empty() && res!=resdbg) 5712 COUT << res-resdbg << '\n'; 5713 } 5714 else { 5715 tmp1.clear(); 5716 if (!a.empty() && !b.empty()) 5717 operator_times(a,b,env,tmp1); 5718 tmp2.clear(); 5719 if (!c.empty() && !d.empty()) 5720 operator_times(c,d,env,tmp2); 5721 addmodpoly(tmp1,tmp2,env,res); 5722 trim_inplace(res,env); 5723 } 5724 } 5725 precond_mulmodp(unsigned A,unsigned W,unsigned Winvp,int p)5726 inline int precond_mulmodp(unsigned A,unsigned W,unsigned Winvp,int p){ 5727 #if 1 5728 longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p; 5729 return t+ ((t>>31)&p); 5730 #else 5731 longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p; 5732 //return t- (t>>63)*p; 5733 int tt= t- (t>>63)*p; 5734 unsigned s=(ulonglong(A)*W)%p; 5735 if (tt!=s) 5736 CERR << '\n'; 5737 return s; 5738 #endif 5739 } 5740 mulmodp(int a,int b,int p)5741 inline int mulmodp(int a,int b,int p){ 5742 return (longlong(a)*b) % p; 5743 } 5744 mulmodp(int a,int b,int p,double invp)5745 inline int mulmodp(int a,int b,int p,double invp){ 5746 int t=amodp(longlong(a)*b, p,invp); 5747 //t=(longlong(a)*b) % p; 5748 //t += (t>>31)&p; 5749 return t; 5750 } 5751 pos_mulmodp(int a,int b,int p,double invp)5752 inline int pos_mulmodp(int a,int b,int p,double invp){ 5753 int t=apos_modp(longlong(a)*b, p,invp); 5754 return t; 5755 } 5756 5757 // reverse *a..*b and neg fft_rev1(int * a,int * b,longlong p)5758 void fft_rev1(int * a,int *b,longlong p){ 5759 for (;b>a;++a,--b){ 5760 int tmp=*a; 5761 *a=p-*b; 5762 *b=p-tmp; 5763 } 5764 if (a==b) 5765 *a=p-*a; 5766 } 5767 5768 #ifdef GIAC_PRECOND // preconditionned fft_reverse(vector<int> & W,int p)5769 void fft_reverse(vector<int> & W,int p){ 5770 if (W.size()<2) 5771 return; 5772 int * a=&W.front(); 5773 int N=W.size()/2; 5774 fft_rev1(a+1,a+N-1,p); 5775 fft_rev1(a+N+1,a+2*N-1,1); 5776 } 5777 fft2wp(vector<int> & W,int n,int w,int p)5778 void fft2wp(vector<int> & W,int n,int w,int p){ 5779 W.resize(n); 5780 w %= p; 5781 if (w<0) w += p; 5782 double invp=double(1ULL<<32)/p; 5783 int N=n/2; 5784 unsigned ww=1; 5785 for (int i=0;i<N;++i){ 5786 W[i]=ww; 5787 #if 0 5788 unsigned u=ww*invp; u++; 5789 // might error by 1 if 2^32*w-n*p==+/-1, because relative prec is 2^-53 5790 // fix with long_double, relative precision of 2^-64 5791 #else 5792 unsigned u=1+((1ULL<<32)*ww)/unsigned(p); // quotient ceiling 5793 #endif 5794 W[N+i]=u; 5795 ww=precond_mulmodp(w,ww,u,p); 5796 // ww=(ww*longlong(w))%p; 5797 // if (www!=ww) 5798 //CERR << '\n'; 5799 } 5800 } fft2wp1(vector<int> & W,int n,int w)5801 void fft2wp1(vector<int> & W,int n,int w){ 5802 W.resize(n); 5803 const int p = p1 ; 5804 w=w % p; 5805 if (w<0) w += p; 5806 int N=n/2; 5807 unsigned ww=1; 5808 for (int i=0;i<N;++i){ 5809 W[i]=ww; 5810 W[N+i]=1+((1ULL<<32)*ww)/unsigned(p); // quotient ceiling 5811 ww=(ww*longlong(w))%p; 5812 } 5813 } fft2wp2(vector<int> & W,int n,int w)5814 void fft2wp2(vector<int> & W,int n,int w){ 5815 W.resize(n); 5816 const int p = p2 ; 5817 w=w % p; 5818 if (w<0) w += p; 5819 int N=n/2; 5820 unsigned ww=1; 5821 for (int i=0;i<N;++i){ 5822 W[i]=ww; 5823 W[N+i]=1+((1LL<<32)*ww)/unsigned(p); // quotient ceiling 5824 ww=(ww*longlong(w))%p; 5825 } 5826 } fft2wp3(vector<int> & W,int n,int w)5827 void fft2wp3(vector<int> & W,int n,int w){ 5828 W.resize(n); 5829 const int p = p3 ; 5830 w=w % p; 5831 if (w<0) w += p; 5832 int N=n/2; 5833 unsigned ww=1; 5834 for (int i=0;i<N;++i){ 5835 W[i]=ww; 5836 W[N+i]=1+((1ULL<<32)*ww)/unsigned(p); // quotient ceiling 5837 ww=(ww*longlong(w))%p; 5838 } 5839 } 5840 #else fft_reverse(vector<int> & W,int p)5841 void fft_reverse(vector<int> & W,int p){ 5842 if (W.size()<2) 5843 return; 5844 int * a=&W.front(); 5845 #ifdef GIAC_CACHEW 5846 for (int N=(W.size()+1)/2;N>=2;a+=N,N/=2){ 5847 fft_rev1(a+1,a+N-1,p); 5848 } 5849 #else 5850 fft_rev1(a+1,a+W.size()-1,p); 5851 #endif 5852 } 5853 fft2wp_add(vector<int> & W,int N)5854 void fft2wp_add(vector<int> & W,int N){ 5855 int step=1; 5856 for (N/=2;N;N/=2){ 5857 step *= 2; 5858 for (int i=0;i<N;++i){ 5859 W.push_back(W[i*step]); 5860 } 5861 } 5862 W.push_back(1); 5863 } 5864 fft2wp(vector<int> & W,int n,int w,int p)5865 void fft2wp(vector<int> & W,int n,int w,int p){ 5866 #ifdef GIAC_CACHEW 5867 W.reserve(n); 5868 #else 5869 W.reserve(n/2); 5870 #endif 5871 double invp=find_invp(p); 5872 w=amodp(w,p,invp); 5873 if (w<0) w += p; 5874 int N=n/2,ww=1; 5875 for (int i=0;i<N;++i){ 5876 W.push_back(ww); 5877 ww=mulmodp(ww,w,p,invp);//(ww*longlong(w))%p; 5878 } 5879 #ifdef GIAC_CACHEW 5880 fft2wp_add(W,N); 5881 #endif 5882 } fft2wp1(vector<int> & W,int n,int w)5883 void fft2wp1(vector<int> & W,int n,int w){ 5884 #ifdef GIAC_CACHEW 5885 W.reserve(n); 5886 #else 5887 W.reserve(n/2); 5888 #endif 5889 const int p = p1 ; 5890 w=w % p; 5891 if (w<0) w += p; 5892 int N=n/2,ww=1; 5893 for (int i=0;i<N;++i){ 5894 W.push_back(ww); 5895 ww=(ww*longlong(w))%p; 5896 } 5897 #ifdef GIAC_CACHEW 5898 fft2wp_add(W,N); 5899 #endif 5900 } fft2wp2(vector<int> & W,int n,int w)5901 void fft2wp2(vector<int> & W,int n,int w){ 5902 #ifdef GIAC_CACHEW 5903 W.reserve(n); 5904 #else 5905 W.reserve(n/2); 5906 #endif 5907 const int p = p2 ; 5908 w=w % p; 5909 if (w<0) w += p; 5910 int N=n/2,ww=1; 5911 for (int i=0;i<N;++i){ 5912 W.push_back(ww); 5913 ww=(ww*longlong(w))%p; 5914 } 5915 #ifdef GIAC_CACHEW 5916 fft2wp_add(W,N); 5917 #endif 5918 } fft2wp3(vector<int> & W,int n,int w)5919 void fft2wp3(vector<int> & W,int n,int w){ 5920 #ifdef GIAC_CACHEW 5921 W.reserve(n); 5922 #else 5923 W.reserve(n/2); 5924 #endif 5925 const int p = p3 ; 5926 w=w % p; 5927 if (w<0) w += p; 5928 int N=n/2,ww=1; 5929 for (int i=0;i<N;++i){ 5930 W.push_back(ww); 5931 ww=(ww*longlong(w))%p; 5932 } 5933 #ifdef GIAC_CACHEW 5934 fft2wp_add(W,N); 5935 #endif 5936 } 5937 #endif 5938 fft_rev(vector<int> & W,int p)5939 void fft_rev(vector<int> & W,int p){ 5940 if (p==p1 || p==p2 || p==p3){ 5941 fft_reverse(W,p); 5942 return; 5943 } 5944 if (W.size()<2) 5945 return; 5946 int * a=&W.front(); 5947 for (int N=(W.size()+1)/2;N;a+=N,N/=2){ 5948 fft_rev1(a+1,a+N-1,p); 5949 } 5950 } 5951 5952 //#define DEBUG 1 5953 // [[RA,RB],[RC,RD]]*[a0,a1]->[a,b] matrix22inttimesvect(const vector<int> & RA,const vector<int> & RB,const vector<int> & RC,const vector<int> & RD,const vector<int> & a0,const vector<int> & a1,int maxadeg,int maxbdeg,vector<int> & a,vector<int> & b,int p,vector<int> & ra,vector<int> & rb,vector<int> & rc,vector<int> & rd,vector<int> & Wp)5954 void matrix22inttimesvect(const vector<int> & RA,const vector<int> & RB,const vector<int> & RC,const vector<int> & RD,const vector<int> & a0,const vector<int> &a1,int maxadeg,int maxbdeg,vector<int> & a,vector<int> &b,int p,vector<int> & ra,vector<int> & rb,vector<int> & rc,vector<int> & rd,vector<int> &Wp){ 5955 int dega0=a0.size()-1,m=(dega0+1)/2; 5956 int maxabdeg=giacmax(maxadeg,maxbdeg); 5957 int bbsize=giacmin(maxabdeg+1,a0.size()); 5958 int ddsize=giacmin(maxabdeg+1,a1.size()); 5959 int Nreal=giacmax(bbsize+RC.size(),ddsize+RD.size())-2; 5960 int N2=giacmin(maxabdeg,Nreal); // add 1 if fft is done without reverse 5961 unsigned long l=sizeinbase2(N2)-1; 5962 // l=gen(N2).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 5963 unsigned long n=1<<(l+1); 5964 if (debug_infolevel>2) 5965 CERR << CLOCK()*1e-6 << " mat22vectint begin n=" << n << " N2=" << N2 << " ra=" << ra.size() << '\n'; 5966 int w=find_w(Wp,l+1,p); 5967 #ifdef GIAC_CACHEW 5968 //Wp.clear(); 5969 #endif 5970 // vector<int> adbg,bdbg; 5971 if (w){ 5972 if (N2<n){ 5973 // if N2==n, this branch would require moving the last coeff 5974 // of a and b at the front of a and b, because it would not be 0 5975 // but it's easier to run FFT in normal order 5976 to_fft(RA,p,w,Wp,n,ra,2,false,false); 5977 to_fft(RB,p,w,Wp,n,rb,2,false,false); 5978 to_fft(RC,p,w,Wp,n,rc,2,false,false); 5979 to_fft(RD,p,w,Wp,n,rd,2,false,false); 5980 to_fft(a0,p,w,Wp,n,a,2,false,false); 5981 to_fft(a1,p,w,Wp,n,b,2,false,false); 5982 fft_reverse(Wp,p); 5983 fft_ab_cd_p(rc,a,rd,b,rc,p); 5984 from_fft(rc,p,Wp,rc,false,false); 5985 fft_ab_cd_p(ra,a,rb,b,a,p); 5986 from_fft(a,p,Wp,a,false,false); 5987 b.swap(rc); 5988 //fft_reverse(Wp,p); 5989 a.pop_back(); 5990 b.pop_back(); 5991 fast_trim_inplace(a,p,maxabdeg+1); 5992 fast_trim_inplace(b,p,maxabdeg+1); 5993 } 5994 else { 5995 // reverse_assign should have made ra,rb, etc. positive 5996 to_fft(RA,p,w,Wp,n,ra,1,false,false); 5997 to_fft(RB,p,w,Wp,n,rb,1,false,false); 5998 to_fft(RC,p,w,Wp,n,rc,1,false,false); 5999 to_fft(RD,p,w,Wp,n,rd,1,false,false); 6000 to_fft(a0,p,w,Wp,n,a,1,false,false); 6001 to_fft(a1,p,w,Wp,n,b,1,false,false); 6002 fft_reverse(Wp,p); 6003 fft_ab_cd_p(rc,a,rd,b,rc,p); 6004 from_fft(rc,p,Wp,rc,true,false); 6005 fft_ab_cd_p(ra,a,rb,b,a,p); 6006 from_fft(a,p,Wp,a,true,false); 6007 b.swap(rc); 6008 //fft_reverse(Wp,p); 6009 fast_trim_inplace(a,p,maxabdeg+1); 6010 fast_trim_inplace(b,p,maxabdeg+1); 6011 } 6012 } else { 6013 vector<int> a0_,a1_; 6014 reverse_assign(RA,ra,n,p); 6015 reverse_assign(RB,rb,n,p); 6016 reverse_assign(RC,rc,n,p); 6017 reverse_assign(RD,rd,n,p); 6018 reverse_assign(a0,a0_,n,p); 6019 reverse_assign(a1,a1_,n,p); 6020 vector<int> Wp1,Wp2,Wp3; 6021 fft_rep raf; 6022 to_fft(ra,p,Wp1,Wp2,Wp3,n,raf,false,true); 6023 fft_rep rbf; to_fft(rb,p,Wp1,Wp2,Wp3,n,rbf,false,true); 6024 fft_rep rcf; to_fft(rc,p,Wp1,Wp2,Wp3,n,rcf,false,true); 6025 fft_rep rdf; to_fft(rd,p,Wp1,Wp2,Wp3,n,rdf,false,true); 6026 fft_rep a0f; 6027 to_fft(a0_,p,Wp1,Wp2,Wp3,n,a0f,false,true); 6028 fft_rep a1f; 6029 to_fft(a1_,p,Wp1,Wp2,Wp3,n,a1f,false,true); 6030 fft_rep resf; 6031 fft_ab_cd(raf,a0f,rbf,a1f,resf); 6032 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6033 from_fft(resf,Wp1,Wp2,Wp3,a,ra,rb,rc,true,false); 6034 fft_ab_cd(rcf,a0f,rdf,a1f,resf); 6035 from_fft(resf,Wp1,Wp2,Wp3,b,ra,rb,rc,true,false); 6036 //fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6037 fast_trim_inplace(a,p,maxabdeg+1); 6038 //trim_deg(b,maxabdeg); 6039 fast_trim_inplace(b,p,maxabdeg+1); 6040 } 6041 // if (w && a!=adbg && b!=bdbg) CERR << "err\n"; 6042 //trim_deg(a,maxabdeg); 6043 if (debug_infolevel>2) 6044 CERR << CLOCK()*1e-6 << " mat22vectint end " << n << '\n'; 6045 } 6046 6047 // [[RA,RB],[RC,RD]]*[a0,a1]->[a,b] matrix22timesvect(const modpoly & RA,const modpoly & RB,const modpoly & RC,const modpoly & RD,const modpoly & a0,const modpoly & a1,int maxadeg,int maxbdeg,modpoly & a,modpoly & b,environment & env,modpoly & tmp1,modpoly & tmp2)6048 void matrix22timesvect(const modpoly & RA,const modpoly & RB,const modpoly & RC,const modpoly & RD,const modpoly & a0,const modpoly &a1,int maxadeg,int maxbdeg,modpoly & a,modpoly &b,environment & env,modpoly & tmp1,modpoly & tmp2){ 6049 bool doit=true; 6050 int dega0=a0.size()-1,m=(dega0+1)/2; 6051 int maxabdeg=giacmax(maxadeg,maxbdeg); 6052 if (1&& env.moduloon && a0.size()>=FFTMUL_SIZE/4 && a1.size()>=FFTMUL_SIZE/4 && RA.size()>=FFTMUL_SIZE/4 && RB.size()>=FFTMUL_SIZE/4){ 6053 int bbsize=giacmin(maxabdeg+1,a0.size()); 6054 int ddsize=giacmin(maxabdeg+1,a1.size()); 6055 int Nreal=giacmax(bbsize+RC.size(),ddsize+RD.size())-2; 6056 int N2=giacmin(maxabdeg,Nreal); 6057 gen pPQ(Nreal*(2*env.modulo*env.modulo)+1); 6058 unsigned long l=gen(N2).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 6059 unsigned long n=1<<(l+1); 6060 unsigned long bound=pPQ.bindigits()+1; // 2^bound=smod bound on coeff of p*q 6061 unsigned long r=(bound >> l)+1; 6062 if (env.modulo.type==_INT_){ 6063 doit=false; int p=env.modulo.val; 6064 vector<int> ra; reverse_assign(RA,ra,n,p); 6065 vector<int> rb; reverse_assign(RB,rb,n,p); 6066 vector<int> rc; reverse_assign(RC,rc,n,p); 6067 vector<int> rd; reverse_assign(RD,rd,n,p); 6068 vector<int> a0_; 6069 reverse_assign(a0,a0_,n,p); 6070 vector<int> a1_; 6071 reverse_assign(a1,a1_,n,p); 6072 vector<int> Wp1,Wp2,Wp3; 6073 fft_rep raf; 6074 to_fft(ra,p,Wp1,Wp2,Wp3,n,raf,false,true); 6075 fft_rep rbf; to_fft(rb,p,Wp1,Wp2,Wp3,n,rbf,false,true); 6076 fft_rep rcf; to_fft(rc,p,Wp1,Wp2,Wp3,n,rcf,false,true); 6077 fft_rep rdf; to_fft(rd,p,Wp1,Wp2,Wp3,n,rdf,false,true); 6078 fft_rep a0f; 6079 to_fft(a0_,p,Wp1,Wp2,Wp3,n,a0f,false,true); 6080 fft_rep a1f; 6081 to_fft(a1_,p,Wp1,Wp2,Wp3,n,a1f,false,true); 6082 fft_rep resf; 6083 fft_ab_cd(raf,a0f,rbf,a1f,resf); 6084 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6085 from_fft(resf,Wp1,Wp2,Wp3,a0_,ra,rb,rc,true,false); 6086 fft_ab_cd(rcf,a0f,rdf,a1f,resf); 6087 from_fft(resf,Wp1,Wp2,Wp3,a1_,ra,rb,rc,true,false); 6088 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6089 vector_int2vecteur(a0_,a); 6090 trim_deg(a,maxabdeg); 6091 trim_inplace(a,&env); 6092 vector_int2vecteur(a1_,b); 6093 trim_deg(b,maxabdeg); 6094 trim_inplace(b,&env); 6095 //CERR << n << " " << b << '\n'; 6096 } 6097 if (doit && l>=2 && bound>=(1<<(l-2)) ){ 6098 doit=false; 6099 mpz_t tmp,tmpqz; mpz_init(tmp); mpz_init(tmpqz); 6100 gen tmp1g,tmp2g; tmp1g.uncoerce(); tmp2g.uncoerce(); 6101 unsigned long expoN=r << l; // r*2^l 6102 modpoly aa; reverse_assign(RA,aa,n,expoN+2); 6103 modpoly work; reverse_resize(work,n,expoN+2); 6104 // RA*a0+RB*a1 FFT size 6105 fft2rl(&aa.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6106 modpoly & bb=tmp1; 6107 reverse_assign(a0,bb,n,expoN+2);// reverse_resize(bb,n,expoN+2); 6108 fft2rl(&bb.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6109 modpoly cc; reverse_assign(RB,cc,n,expoN+2); 6110 fft2rl(&cc.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6111 modpoly & dd=tmp2; 6112 reverse_assign(a1,dd,n,expoN+2); // reverse_resize(dd,n,expoN+2); 6113 fft2rl(&dd.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6114 reverse_resize(a,n,expoN+2); 6115 fft_ab_cd(aa,bb,cc,dd,expoN,a,tmp,tmpqz); 6116 fft2rl(&a.front(),n,r,l,&work.front(),false,tmp1g,tmp2g,tmpqz); 6117 // divide by n mod 2^expoN+1 6118 fft2rldiv(a,expoN,expoN-l-1,tmp,tmpqz); 6119 trim_deg(a,maxabdeg); 6120 trim_inplace(a,&env); 6121 reverse_assign(RC,aa,n,expoN+2); 6122 fft2rl(&aa.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6123 reverse_assign(RD,cc,n,expoN+2); 6124 fft2rl(&cc.front(),n,r,l,&work.front(),true,tmp1g,tmp2g,tmpqz); 6125 // RC*a0+RD*a1 FFT size 6126 reverse_resize(b,n,expoN+2); 6127 fft_ab_cd(aa,bb,cc,dd,expoN,b,tmp,tmpqz); 6128 fft2rl(&b.front(),n,r,l,&work.front(),false,tmp1g,tmp2g,tmpqz); 6129 // divide by n mod 2^expoN+1 6130 fft2rldiv(b,expoN,expoN-l-1,tmp,tmpqz); 6131 trim_deg(b,maxabdeg); 6132 trim_inplace(b,&env); 6133 mpz_clear(tmpqz); mpz_clear(tmp); 6134 } 6135 if (doit){ 6136 doit=false; 6137 vector<int> Wp1,Wp2,Wp3; 6138 multi_fft_rep raf; 6139 to_multi_fft(RA,env.modulo,Wp1,Wp2,Wp3,n,raf,true,true); 6140 //from_multi_fft(raf,Wp1,Wp2,Wp3,a,true); trim_inplace(a,&env); 6141 multi_fft_rep rbf; to_multi_fft(RB,env.modulo,Wp1,Wp2,Wp3,n,rbf,true,true); 6142 multi_fft_rep rcf; to_multi_fft(RC,env.modulo,Wp1,Wp2,Wp3,n,rcf,true,true); 6143 multi_fft_rep rdf; to_multi_fft(RD,env.modulo,Wp1,Wp2,Wp3,n,rdf,true,true); 6144 multi_fft_rep a0f; to_multi_fft(a0,env.modulo,Wp1,Wp2,Wp3,n,a0f,true,true); 6145 multi_fft_rep a1f; to_multi_fft(a1,env.modulo,Wp1,Wp2,Wp3,n,a1f,true,true); 6146 multi_fft_rep resf; 6147 multi_fft_ab_cd(raf,a0f,rbf,a1f,resf); 6148 from_multi_fft(resf,Wp1,Wp2,Wp3,a,true); 6149 trim_deg(a,maxabdeg); 6150 trim_inplace(a,&env); 6151 multi_fft_ab_cd(rcf,a0f,rdf,a1f,resf); 6152 from_multi_fft(resf,Wp1,Wp2,Wp3,b,true); 6153 trim_deg(b,maxabdeg); 6154 trim_inplace(b,&env); 6155 //CERR << n << " " << b << '\n'; 6156 } 6157 } 6158 if (doit){ 6159 ab_cd(maxbdeg,RC,a0,RD,a1,&env,b,tmp1,tmp2); // b=trim(RC*a0+RD*a1,&env); // C=B' in Yap 6160 // ab_cd(dega0,RA,a0,RB,a1,&env,a,tmp1,tmp2); // a=trim(RA*a0+RB*a1,&env); // A' in Yap 6161 ab_cd(maxadeg,RA,a0,RB,a1,&env,a,tmp1,tmp2); // a=trim(RA*a0+RB*a1,&env); // A' in Yap 6162 } 6163 } 6164 matrix22int(vector<int> & RA,vector<int> & RB,vector<int> & RC,vector<int> & RD,vector<int> & SA,vector<int> & SB,vector<int> & SC,vector<int> & SD,vector<int> & A,vector<int> & B,vector<int> & C,vector<int> & D,int p,vector<int> & tmp0,vector<int> & Wp)6165 void matrix22int(vector<int> & RA,vector<int> &RB,vector<int> & RC,vector<int> &RD,vector<int> &SA,vector<int> &SB,vector<int> &SC,vector<int> &SD,vector<int> &A,vector<int> &B,vector<int> &C,vector<int> &D,int p,vector<int> & tmp0,vector<int> & Wp){ 6166 // 2x2 matrix operations 6167 // [[SA,SB],[SC,SD]]*[[RC,RD],[RA,RB]] == [[RA*SB+RC*SA,RB*SB+RD*SA],[RA*SD+RC*SC,RB*SD+RD*SC]] 6168 int Nreal=giacmax(giacmax(RC.size(),RD.size()),giacmax(RA.size(),RB.size()))+giacmax(giacmax(SC.size(),SD.size()),giacmax(SA.size(),SB.size()))-2; 6169 // increase Nreal by 1 if fft without reverse 6170 unsigned long l=sizeinbase2(Nreal)-1; // l=gen(Nreal).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 6171 unsigned long n=1<<(l+1); 6172 if (debug_infolevel>2) 6173 CERR << CLOCK()*1e-6 << " mat22int begin " << n << " " << Nreal << '\n'; 6174 int w=find_w(Wp,l+1,p); 6175 #ifdef GIAC_CACHEW 6176 //Wp.clear(); 6177 #endif 6178 // vector<int> adbg,bdbg; 6179 if (w){ 6180 #if 0 // if set to 1 increase Nreal by 1, if set to 0 decrease Nreal by 1 6181 to_fft(SC,p,w,Wp,n,SC,2,false,false); 6182 to_fft(SD,p,w,Wp,n,SD,2,false,false); 6183 to_fft(RA,p,w,Wp,n,RA,2,false,false); 6184 to_fft(RB,p,w,Wp,n,RB,2,false,false); 6185 to_fft(RC,p,w,Wp,n,RC,2,false,false); 6186 to_fft(RD,p,w,Wp,n,RD,2,false,false); 6187 to_fft(SA,p,w,Wp,n,SA,2,false,false); 6188 to_fft(SB,p,w,Wp,n,SB,2,false,false); 6189 fft_reverse(Wp,p); 6190 fft_ab_cd_p(RA,SB,RC,SA,A,p); 6191 from_fft(A,p,Wp,A,false,false); 6192 A.pop_back(); 6193 fft_ab_cd_p(RB,SB,RD,SA,SA,p); SA.swap(B); 6194 from_fft(B,p,Wp,B,false,false); 6195 B.pop_back(); 6196 fft_ab_cd_p(RA,SD,RC,SC,RA,p); RA.swap(C); 6197 from_fft(C,p,Wp,C,false,false); 6198 C.pop_back(); 6199 fft_ab_cd_p(RB,SD,RD,SC,RB,p); RB.swap(D); 6200 from_fft(D,p,Wp,D,false,false); 6201 D.pop_back(); 6202 #else 6203 // makepositive set to false since reverse_assign should make RA positive 6204 to_fft(SC,p,w,Wp,n,SC,1,false,false); 6205 to_fft(SD,p,w,Wp,n,SD,1,false,false); 6206 to_fft(RA,p,w,Wp,n,RA,1,false,false); 6207 to_fft(RB,p,w,Wp,n,RB,1,false,false); 6208 to_fft(RC,p,w,Wp,n,RC,1,false,false); 6209 to_fft(RD,p,w,Wp,n,RD,1,false,false); 6210 to_fft(SA,p,w,Wp,n,SA,1,false,false); 6211 to_fft(SB,p,w,Wp,n,SB,1,false,false); 6212 fft_reverse(Wp,p); 6213 fft_ab_cd_p(RA,SB,RC,SA,A,p); 6214 from_fft(A,p,Wp,A,true,false); 6215 fft_ab_cd_p(RB,SB,RD,SA,SA,p); SA.swap(B); 6216 from_fft(B,p,Wp,B,true,false); 6217 fft_ab_cd_p(RA,SD,RC,SC,RA,p); RA.swap(C); 6218 from_fft(C,p,Wp,C,true,false); 6219 fft_ab_cd_p(RB,SD,RD,SC,RB,p); RB.swap(D); 6220 from_fft(D,p,Wp,D,true,false); 6221 #endif 6222 fast_trim_inplace(A,p); 6223 fast_trim_inplace(B,p); 6224 fast_trim_inplace(C,p); 6225 fast_trim_inplace(D,p); 6226 //fft_reverse(Wp,p); 6227 } 6228 else { 6229 reverse_assign(RA,tmp0,n,p); RA.swap(tmp0); 6230 reverse_assign(RB,tmp0,n,p); RB.swap(tmp0); 6231 reverse_assign(RC,tmp0,n,p); RC.swap(tmp0); 6232 reverse_assign(RD,tmp0,n,p); RD.swap(tmp0); 6233 reverse_assign(SA,tmp0,n,p); SA.swap(tmp0); 6234 reverse_assign(SB,tmp0,n,p); SB.swap(tmp0); 6235 reverse_assign(SC,tmp0,n,p); SC.swap(tmp0); 6236 reverse_assign(SD,tmp0,n,p); SD.swap(tmp0); 6237 vector<int> Wp1,Wp2,Wp3,tmp_,tmp__; 6238 fft_rep raf; to_fft(RA,p,Wp1,Wp2,Wp3,n,raf,false,true); 6239 fft_rep rbf; to_fft(RB,p,Wp1,Wp2,Wp3,n,rbf,false,true); 6240 fft_rep rcf; to_fft(RC,p,Wp1,Wp2,Wp3,n,rcf,false,true); 6241 fft_rep rdf; to_fft(RD,p,Wp1,Wp2,Wp3,n,rdf,false,true); 6242 fft_rep saf; to_fft(SA,p,Wp1,Wp2,Wp3,n,saf,false,true); 6243 fft_rep sbf; to_fft(SB,p,Wp1,Wp2,Wp3,n,sbf,false,true); 6244 fft_rep resf; 6245 fft_ab_cd(raf,sbf,rcf,saf,resf); 6246 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6247 from_fft(resf,Wp1,Wp2,Wp3,A,tmp0,tmp_,tmp__,true,false); 6248 fft_ab_cd(rbf,sbf,rdf,saf,resf); 6249 from_fft(resf,Wp1,Wp2,Wp3,B,tmp0,tmp_,tmp__,true,false); 6250 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6251 to_fft(SC,p,Wp1,Wp2,Wp3,n,saf,false,true); 6252 to_fft(SD,p,Wp1,Wp2,Wp3,n,sbf,false,true); 6253 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6254 fft_ab_cd(raf,sbf,rcf,saf,resf); 6255 from_fft(resf,Wp1,Wp2,Wp3,C,tmp0,tmp_,tmp__,true,false); 6256 fft_ab_cd(rbf,sbf,rdf,saf,resf); 6257 from_fft(resf,Wp1,Wp2,Wp3,D,tmp0,tmp_,tmp__,true,false); 6258 //fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6259 fast_trim_inplace(A,p); 6260 fast_trim_inplace(B,p); 6261 fast_trim_inplace(C,p); 6262 fast_trim_inplace(D,p); 6263 } 6264 if (debug_infolevel>2) 6265 CERR << CLOCK()*1e-6 << " mat22int end " << n << '\n'; 6266 } 6267 matrix22(modpoly & RA,modpoly & RB,modpoly & RC,modpoly & RD,modpoly & SA,modpoly & SB,modpoly & SC,modpoly & SD,modpoly & A,modpoly & B,modpoly & C,modpoly & D,environment & env,modpoly & tmp1,modpoly & tmp2)6268 void matrix22(modpoly & RA,modpoly &RB,modpoly & RC,modpoly &RD,modpoly &SA,modpoly &SB,modpoly &SC,modpoly &SD,modpoly &A,modpoly &B,modpoly &C,modpoly &D,environment & env,modpoly & tmp1,modpoly & tmp2){ 6269 // 2x2 matrix operations 6270 // [[SA,SB],[SC,SD]]*[[RC,RD],[RA,RB]] == [[RA*SB+RC*SA,RB*SB+RD*SA],[RA*SD+RC*SC,RB*SD+RD*SC]] 6271 bool doit=true; 6272 // modpoly Adbg,Bdbg,Cdbg,Ddbg; 6273 if (env.moduloon && RA.size()>=FFTMUL_SIZE/4 && SA.size()>=FFTMUL_SIZE/4 && RB.size()>=FFTMUL_SIZE/4 && SB.size()>=FFTMUL_SIZE/4){ 6274 int Nreal=giacmax(giacmax(RC.size(),RD.size()),giacmax(RA.size(),RB.size()))+giacmax(giacmax(SC.size(),SD.size()),giacmax(SA.size(),SB.size()))-2; 6275 gen pPQ(Nreal*(2*env.modulo*env.modulo)+1); 6276 unsigned long l=gen(Nreal).bindigits()-1; // m=2^l <= Nreal < 2^{l+1} 6277 unsigned long bound=pPQ.bindigits()+1; // 2^bound=smod bound on coeff of p*q 6278 unsigned long r=(bound >> l)+1; 6279 unsigned long n=1<<(l+1); 6280 if (env.modulo.type==_INT_){ 6281 doit=false; int p=env.modulo.val; 6282 vector<int> ra; reverse_assign(RA,ra,n,p); 6283 vector<int> rb; reverse_assign(RB,rb,n,p); 6284 vector<int> rc; reverse_assign(RC,rc,n,p); 6285 vector<int> rd; reverse_assign(RD,rd,n,p); 6286 vector<int> sa; reverse_assign(SA,sa,n,p); 6287 vector<int> sb; reverse_assign(SB,sb,n,p); 6288 vector<int> Wp1,Wp2,Wp3; 6289 fft_rep raf; to_fft(ra,p,Wp1,Wp2,Wp3,n,raf,false,true); 6290 fft_rep rbf; to_fft(rb,p,Wp1,Wp2,Wp3,n,rbf,false,true); 6291 fft_rep rcf; to_fft(rc,p,Wp1,Wp2,Wp3,n,rcf,false,true); 6292 fft_rep rdf; to_fft(rd,p,Wp1,Wp2,Wp3,n,rdf,false,true); 6293 fft_rep saf; to_fft(sa,p,Wp1,Wp2,Wp3,n,saf,false,true); 6294 fft_rep sbf; to_fft(sb,p,Wp1,Wp2,Wp3,n,sbf,false,true); 6295 vector<int> tmpres; 6296 fft_rep resf; 6297 fft_ab_cd(raf,sbf,rcf,saf,resf); 6298 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6299 from_fft(resf,Wp1,Wp2,Wp3,tmpres,ra,rb,rc,true,false); 6300 vector_int2vecteur(tmpres,A); trim_inplace(A,&env); 6301 fft_ab_cd(rbf,sbf,rdf,saf,resf); 6302 from_fft(resf,Wp1,Wp2,Wp3,tmpres,ra,rb,rc,true,false); 6303 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6304 vector_int2vecteur(tmpres,B); trim_inplace(B,&env); 6305 reverse_assign(SC,sa,n,p); to_fft(sa,p,Wp1,Wp2,Wp3,n,saf,false,true); 6306 reverse_assign(SD,sb,n,p); to_fft(sb,p,Wp1,Wp2,Wp3,n,sbf,false,true); 6307 fft_ab_cd(raf,sbf,rcf,saf,resf); 6308 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6309 from_fft(resf,Wp1,Wp2,Wp3,tmpres,ra,rb,rc,true,false); 6310 vector_int2vecteur(tmpres,C); trim_inplace(C,&env); 6311 fft_ab_cd(rbf,sbf,rdf,saf,resf); 6312 from_fft(resf,Wp1,Wp2,Wp3,tmpres,ra,rb,rc,true,false); 6313 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 6314 vector_int2vecteur(tmpres,D); trim_inplace(D,&env); 6315 } 6316 if (doit && l>=2 && bound>=(1<<(l-2)) ){ 6317 doit=false; 6318 mpz_t tmp,tmpqz; mpz_init(tmp); mpz_init(tmpqz); 6319 gen tmp1,tmp2; tmp1.uncoerce(); tmp2.uncoerce(); 6320 unsigned long expoN=r << l; // r*2^l 6321 modpoly work; reverse_resize(work,n,expoN+2); 6322 modpoly &ra=RA; reverse_assign(RA,ra,n,expoN+2); 6323 fft2rl(&ra.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6324 modpoly &rb=RB; reverse_assign(RB,rb,n,expoN+2); 6325 fft2rl(&rb.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6326 modpoly &rc=RC; reverse_assign(RC,rc,n,expoN+2); 6327 fft2rl(&rc.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6328 modpoly &rd=RD; reverse_assign(RD,rd,n,expoN+2); 6329 fft2rl(&rd.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6330 modpoly &sa=SA; reverse_assign(SA,sa,n,expoN+2); 6331 fft2rl(&sa.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6332 modpoly &sb=SB; reverse_assign(SB,sb,n,expoN+2); 6333 fft2rl(&sb.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6334 // A=trim(RA*SB+RC*SA,&env); 6335 reverse_resize(A,n,expoN+2); 6336 fft_ab_cd(ra,sb,rc,sa,expoN,A,tmp,tmpqz); 6337 fft2rl(&A.front(),n,r,l,&work.front(),false,tmp1,tmp2,tmpqz); 6338 fft2rldiv(A,expoN,expoN-l-1,tmp,tmpqz); 6339 trim_inplace(A,&env); 6340 // B=trim(RB*SB+RD*SA,&env); 6341 reverse_resize(B,n,expoN+2); 6342 fft_ab_cd(rb,sb,rd,sa,expoN,B,tmp,tmpqz); 6343 fft2rl(&B.front(),n,r,l,&work.front(),false,tmp1,tmp2,tmpqz); 6344 fft2rldiv(B,expoN,expoN-l-1,tmp,tmpqz); 6345 trim_inplace(B,&env); 6346 // C=trim(RA*SD+RC*SC,&env); 6347 reverse_assign(SC,sa,n,expoN+2); 6348 fft2rl(&sa.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6349 reverse_assign(SD,sb,n,expoN+2); 6350 fft2rl(&sb.front(),n,r,l,&work.front(),true,tmp1,tmp2,tmpqz); 6351 reverse_resize(C,n,expoN+2); 6352 fft_ab_cd(ra,sb,rc,sa,expoN,C,tmp,tmpqz); 6353 fft2rl(&C.front(),n,r,l,&work.front(),false,tmp1,tmp2,tmpqz); 6354 fft2rldiv(C,expoN,expoN-l-1,tmp,tmpqz); 6355 trim_inplace(C,&env); 6356 // D=trim(RB*SD+RD*SC,&env); 6357 reverse_resize(D,n,expoN+2); 6358 fft_ab_cd(rb,sb,rd,sa,expoN,D,tmp,tmpqz); 6359 fft2rl(&D.front(),n,r,l,&work.front(),false,tmp1,tmp2,tmpqz); 6360 fft2rldiv(D,expoN,expoN-l-1,tmp,tmpqz); 6361 trim_inplace(D,&env); 6362 mpz_clear(tmpqz); mpz_clear(tmp); 6363 doit=false; 6364 //Adbg=A; Bdbg=B; Cdbg=C; Ddbg=D; 6365 } 6366 if (doit){ 6367 vector<int> Wp1,Wp2,Wp3; 6368 multi_fft_rep raf; to_multi_fft(RA,env.modulo,Wp1,Wp2,Wp3,n,raf,true,true); 6369 //from_multi_fft(raf,Wp1,Wp2,Wp3,a,true); trim_inplace(a,&env); 6370 multi_fft_rep rbf; to_multi_fft(RB,env.modulo,Wp1,Wp2,Wp3,n,rbf,true,true); 6371 multi_fft_rep rcf; to_multi_fft(RC,env.modulo,Wp1,Wp2,Wp3,n,rcf,true,true); 6372 multi_fft_rep rdf; to_multi_fft(RD,env.modulo,Wp1,Wp2,Wp3,n,rdf,true,true); 6373 multi_fft_rep saf; to_multi_fft(SA,env.modulo,Wp1,Wp2,Wp3,n,saf,true,true); 6374 multi_fft_rep sbf; to_multi_fft(SB,env.modulo,Wp1,Wp2,Wp3,n,sbf,true,true); 6375 multi_fft_rep resf; 6376 multi_fft_ab_cd(raf,sbf,rcf,saf,resf); 6377 from_multi_fft(resf,Wp1,Wp2,Wp3,A,true); 6378 trim_inplace(A,&env); 6379 multi_fft_ab_cd(rbf,sbf,rdf,saf,resf); 6380 from_multi_fft(resf,Wp1,Wp2,Wp3,B,true); 6381 trim_inplace(B,&env); 6382 to_multi_fft(SC,env.modulo,Wp1,Wp2,Wp3,n,saf,true,true); 6383 to_multi_fft(SD,env.modulo,Wp1,Wp2,Wp3,n,sbf,true,true); 6384 multi_fft_ab_cd(raf,sbf,rcf,saf,resf); 6385 from_multi_fft(resf,Wp1,Wp2,Wp3,C,true); 6386 trim_inplace(C,&env); 6387 multi_fft_ab_cd(rbf,sbf,rdf,saf,resf); 6388 from_multi_fft(resf,Wp1,Wp2,Wp3,D,true); 6389 trim_inplace(D,&env); 6390 doit=false; 6391 } 6392 } 6393 if (doit){ 6394 ab_cd(-1,RA,SB,RC,SA,&env,A,tmp1,tmp2); // A=trim(RA*SB+RC*SA,&env); 6395 ab_cd(-1,RB,SB,RD,SA,&env,B,tmp1,tmp2); // B=trim(RB*SB+RD*SA,&env); 6396 ab_cd(-1,RA,SD,RC,SC,&env,C,tmp1,tmp2); // C=trim(RA*SD+RC*SC,&env); 6397 ab_cd(-1,RB,SD,RD,SC,&env,D,tmp1,tmp2); // D=trim(RB*SD+RD*SC,&env); 6398 // if (!doit && (A!=Adbg || B!=Bdbg || C!=Cdbg || D!=Ddbg)) COUT << "error" << '\n'; 6399 } 6400 } 6401 6402 #define HGCD_DIV 1 6403 6404 // coeffv and degv are used by resultant (otherwise they are left empty) 6405 // coeffv is the list of leading coefficients of the remainder sequence 6406 // degv is the list of degrees of the remainder sequence 6407 // q,f, are temporary hgcdint(const vector<int> & a0,const vector<int> & a1,int modulo,vector<int> & Wp,vector<int> & A,vector<int> & B,vector<int> & C,vector<int> & D,vector<int> & coeffv,vector<int> & degv,vector<int> & q,vector<int> & f,vector<int> & tmp0,vector<int> & tmp1,vector<int> & tmp2,vector<int> & tmp3)6408 bool hgcdint(const vector<int> & a0,const vector<int> & a1,int modulo,vector<int> & Wp,vector<int> &A,vector<int> &B,vector<int> &C,vector<int> &D,vector<int> & coeffv,vector<int> & degv,vector<int> & q,vector<int> & f,vector<int> & tmp0,vector<int> & tmp1,vector<int> & tmp2,vector<int> & tmp3){ // a0 is A in Yap, a1 is B 6409 vector<int> & g0=tmp2,&g1=tmp3; 6410 int dega0=a0.size()-1,dega1=a1.size()-1; 6411 int m=(dega0+1)/2; 6412 if (dega1<m){ 6413 D=A=vector<int>(1,1); 6414 B.clear(); C.clear(); 6415 return true; 6416 } 6417 if (m<HGCD){ 6418 hgcd_iter_int(a0,a1,m,A,C,B,D,modulo,coeffv,degv,q,f,tmp0,tmp1,tmp2,tmp3); 6419 return true; 6420 } 6421 //q.reserve(a0.size()); f.reserve(a0.size()); tmp0.reserve(a0.size()); tmp1.reserve(a0.size()); tmp2.reserve(a0.size()); tmp3.reserve(a0.size()); A.reserve(a0.size()); B.reserve(a0.size()); C.reserve(a0.size()); D.reserve(a0.size()); 6422 vector<int> b0,b1; b0.reserve(nextpow2(a0.size()-m)); b1.reserve(nextpow2(a0.size()-m)); 6423 b0.resize(a0.size()-m); copy(a0.begin(),a0.end()-m,b0.begin()); // quo(a0,x^m), A0 in Yap 6424 b1.resize(a1.size()-m); copy(a1.begin(),a1.end()-m,b1.begin()); // quo(a1,x^m), B0 in Yap 6425 // 1st recursive call 6426 vector<int> RA,RB,RC,RD; 6427 if (debug_infolevel>2) 6428 CERR << CLOCK()*1e-6 << " hgcdint 1st recursive call " << dega0 << "," << dega1 << '\n'; 6429 if (!hgcdint(b0,b1,modulo,Wp,RA,RB,RC,RD,coeffv,degv,tmp0,tmp1,A,B,C,D)) 6430 return false; 6431 if (debug_infolevel>2) 6432 CERR << CLOCK()*1e-6 << " hgcdint compute A' B' " << dega0 << "," << dega1 << '\n'; 6433 int maxadeg=dega0+1-giacmax(RA.size(),RB.size()),maxbdeg=dega0-m/2; 6434 //if (modulo==2112626689 && RA.size()==62 && RA[0]==390746818) CERR << "debug"; 6435 matrix22inttimesvect(RA,RB,RC,RD,a0,a1,maxadeg,maxbdeg,b0,b1,modulo,tmp0,tmp1,tmp2,tmp3,Wp); 6436 int dege=b1.size()-1; 6437 if (dege<m){ 6438 A.swap(RA); B.swap(RB); C.swap(RC); D.swap(RD); 6439 return true; 6440 // A=RA; B=RB; C=RC; D=RD; return true; 6441 } 6442 if (dege>=b0.size()-1){ 6443 COUT << "hgcdint error modulo " << modulo << '\n'; 6444 return false; 6445 } 6446 if (debug_infolevel>2) 6447 CERR << CLOCK()*1e-6 << " hgcdint euclid div " << dega0 << "," << dega1 << '\n'; 6448 // 1 euclidean step 6449 if (!degv.empty()){ 6450 coeffv.push_back(b1[0]); 6451 degv.push_back(degv.back()+b1.size()-b0.size()); 6452 } 6453 DivRem(b0,b1,modulo,q,f); // q,f are Q,D in Yap 6454 // [[0,1],[1,-q]]*[[RA,RB],[RC,RD]] == [[RC,RD],[-RC*q+RA,-RD*q+RB]] 6455 a_bc(RA,RC,q,modulo,RA,b0); // RA=trim(RA-RC*q,&env); 6456 a_bc(RB,RD,q,modulo,RB,b0); // RB=trim(RB-RD*q,&env); 6457 int l=b1.size()-1,k=2*m-l; 6458 if (f.size()-1<m){ 6459 A.swap(RC); B.swap(RD); C.swap(RA); D.swap(RB); 6460 return true; 6461 } 6462 g0.resize(b1.size()-k); 6463 copy(b1.begin(),b1.end()-k,g0.begin()); // vector<int> g0(b1.begin(),b1.end()-k); // quo(b,x^k), C0 in Yap 6464 if (f.size()>k){ 6465 g1.resize(f.size()-k); 6466 copy(f.begin(),f.end()-k,g1.begin()); // quo(f,x^k), D0 in Yap 6467 } 6468 vector<int> & SA=b0, &SB=b1,&SC=q,&SD=f; 6469 if (debug_infolevel>2) 6470 CERR << CLOCK()*1e-6 << " hgcdint 2nd recursive call " << dega0 << "," << dega1 << '\n'; 6471 if (!hgcdint(g0,g1,modulo,Wp,SA,SB,SC,SD,coeffv,degv,tmp0,tmp1,A,B,C,D)) 6472 return false; 6473 if (debug_infolevel>2) 6474 CERR << CLOCK()*1e-6 << " hgcdint end 2nd recursive call " << dega0 << "," << dega1 << '\n'; 6475 matrix22int(RA,RB,RC,RD,SA,SB,SC,SD,A,B,C,D,modulo,tmp0,Wp); 6476 if (debug_infolevel>2) 6477 CERR << CLOCK()*1e-6 << " hgcdint end " << dega0 << "," << dega1 << '\n'; 6478 return true; 6479 } 6480 6481 // a0.size() must be > a1.size() 6482 // Computes the coefficient of a transition matrix M=[[A,B],[C,D]] 6483 // such that M*[a0,a1]=[a_k,a_(k+1)] where 6484 // degree(a_k) >= m > degree(a_(k+1)), m=degree(a_0)/2 top rounded 6485 // if degrees are small, use iterative extended Euclide and computes 6486 // a=A*a0+B*a1 and b=C*a0+D*a1 6487 // otherwise a is empty (and b also) 6488 // https://pdfs.semanticscholar.org/a7e7/b01a3dd6ac0ec160b35e513c5efa38c2369e.pdf (Yap half gcd algorithm lecture p.59) hgcd(const modpoly & a0,const modpoly & a1,const gen & modulo,modpoly & A,modpoly & B,modpoly & C,modpoly & D,modpoly & a,modpoly & b,modpoly & tmp1,modpoly & tmp2)6489 bool hgcd(const modpoly & a0,const modpoly & a1,const gen & modulo,modpoly &A,modpoly &B,modpoly &C,modpoly &D,modpoly & a,modpoly & b,modpoly & tmp1,modpoly & tmp2){ // a0 is A in Yap, a1 is B 6490 a.clear(); b.clear(); 6491 int dega0=a0.size()-1,dega1=a1.size()-1; 6492 int m=(dega0+1)/2; 6493 if (dega1<m){ 6494 D=A=makevecteur(1); 6495 B.clear(); C.clear(); 6496 a=a0; b=a1; 6497 return true; 6498 } 6499 if (modulo.type==_INT_ 6500 #ifndef INT128 6501 && dega0*double(modulo.val)*modulo.val<(1ULL<<63) 6502 #endif 6503 ){ 6504 int p=modulo.val; 6505 vector<int> Wp,a0i,a1i,Ai,Bi,Ci,Di,coeffv,degv,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6; 6506 vecteur2vector_int(a0,p,a0i); 6507 vecteur2vector_int(a1,p,a1i); 6508 if (hgcdint(a0i,a1i,p,Wp,Ai,Bi,Ci,Di,coeffv,degv,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6)){ 6509 vector_int2vecteur(Ai,A); 6510 vector_int2vecteur(Bi,B); 6511 vector_int2vecteur(Ci,C); 6512 vector_int2vecteur(Di,D); 6513 return true; 6514 } 6515 } 6516 environment env; 6517 env.modulo=modulo; 6518 env.moduloon=true; 6519 if ( 6520 (modulo.type==_INT_ && m<64) || // m<64 seems optimal 6521 m<HGCD 6522 ){ 6523 if (debug_infolevel>2) 6524 CERR << CLOCK()*1e-6 << " halfgcd iter m=" << m << " dega0/a1 " << dega0 << "," << dega1 << '\n'; 6525 if (modulo.type==_INT_ && modulo.val 6526 #ifndef INT128 6527 && dega0*double(modulo.val)*modulo.val<(1ULL<<63) 6528 #endif 6529 ){ 6530 vector<int> a0i,b0i,ua,ub,va,vb,coeffv,degv,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5; 6531 int p=modulo.val; 6532 vecteur2vector_int(a0,p,a0i); 6533 vecteur2vector_int(a1,p,b0i); 6534 hgcd_iter_int(a0i,b0i,m,ua,ub,va,vb,p,coeffv,degv,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5); 6535 vector_int2vecteur(ua,A); 6536 vector_int2vecteur(ub,C); 6537 vector_int2vecteur(va,B); 6538 vector_int2vecteur(vb,D); 6539 return true; 6540 } 6541 if (egcd_mpz(a0,a1,m,modulo,C,D,b,&A,&B,&a)){ 6542 //CERR << a0 << " " << a1 << " " << A << " " << B << " " << C << " " << D << '\n'; 6543 if (debug_infolevel>2) 6544 CERR << CLOCK()*1e-6 << " halfgcd mpz iter end" << dega0 << "," << dega1 << '\n'; 6545 return true; 6546 } 6547 // limit on m depends on modulo (smaller limit is faster for large modulo) 6548 modpoly q,r,tmp; 6549 a=a0; 6550 b=a1; 6551 // initializes ua to 1 and ub to 0, the coeff of u in ua*a+va*b=a 6552 modpoly ua(one()),ub,va,vb(one()),ur,vr; 6553 // DivRem: a = bq+r 6554 // hence ur <- ua-q*ub, vr <- va-q*vb verify 6555 // ur*a+vr*b=r 6556 // a <- b, b <- r, ua <- ub and ub<- ur 6557 for (;;){ 6558 int n=int(b.size())-1; 6559 if (n<m){ // degree(b) is small enough 6560 #ifdef HGCD_DIV 6561 va=operator_div(operator_minus(a,operator_times(ua,a0,&env),&env),a1,&env); // ua*a0+va*b0=a 6562 vb=operator_div(operator_minus(b,operator_times(ub,a0,&env),&env),a1,&env); // ub*a0+vb*b0=b 6563 #endif 6564 //if (ua!=A) 6565 A.swap(ua); 6566 //if (va!=B) 6567 B.swap(va); 6568 //if (ub!=C) 6569 C.swap(ub); 6570 //if (vb!=D) 6571 D.swap(vb); 6572 if (debug_infolevel>2) 6573 CERR << CLOCK()*1e-6 << " halfgcd iter end" << dega0 << "," << dega1 << '\n'; 6574 //CERR << a0 << " " << a1 << " " << A << " " << B << " " << C << " " << D << '\n'; 6575 return true; 6576 } 6577 DivRem(a,b,&env,q,r); // division works always 6578 operator_times(q,ub,&env,tmp); submodpoly(ua,tmp,&env,ur); // ur=ua-q*ub; 6579 #ifndef HGCD_DIV 6580 operator_times(q,vb,&env,tmp); submodpoly(va,tmp,&env,vr); // vr=va-q*vb; 6581 swap(va,vb); swap(vb,vr); // va=vb; vb=vr; 6582 #endif 6583 swap(a,b); swap(b,r); // a=b; b=r; 6584 swap(ua,ub); swap(ub,ur); // ua=ub; ub=ur; 6585 } 6586 return false; // never reached 6587 } 6588 // 1st recursive call 6589 modpoly b0(a0.begin(),a0.end()-m); // quo(a0,x^m), A0 in Yap 6590 modpoly b1(a1.begin(),a1.end()-m); // quo(a1,x^m), B0 in Yap 6591 modpoly RA,RB,RC,RD,q,f; 6592 if (debug_infolevel>2) 6593 CERR << CLOCK()*1e-6 << " halfgcd 1st recursive call " << dega0 << "," << dega1 << '\n'; 6594 if (!hgcd(b0,b1,modulo,RA,RB,RC,RD,a,b,tmp1,tmp2)) 6595 return false; 6596 if (debug_infolevel>2) 6597 CERR << CLOCK()*1e-6 << " halfgcd compute A' B' " << dega0 << "," << dega1 << '\n'; 6598 int maxadeg=dega0+1-giacmax(RA.size(),RB.size()),maxbdeg=dega0-m/2; 6599 matrix22timesvect(RA,RB,RC,RD,a0,a1,maxadeg,maxbdeg,a,b,env,tmp1,tmp2); 6600 int dege=b.size()-1; 6601 if (dege<m){ 6602 A.swap(RA); B.swap(RB); C.swap(RC); D.swap(RD); 6603 a.clear(); b.clear(); 6604 return true; 6605 // A=RA; B=RB; C=RC; D=RD; return true; 6606 } 6607 if (dege>=a.size()-1) 6608 COUT << "hgcd error" << '\n'; 6609 if (debug_infolevel>2) 6610 CERR << CLOCK()*1e-6 << " halfgcd euclid div " << dega0 << "," << dega1 << '\n'; 6611 // 1 euclidean step 6612 if (!DivRem(a,b,&env,q,f)) // q,f are Q,D in Yap 6613 return false; 6614 // [[0,1],[1,-q]]*[[RA,RB],[RC,RD]] == [[RC,RD],[-RC*q+RA,-RD*q+RB]] 6615 a_bc(RA,RC,q,&env,RA,tmp1); // RA=trim(RA-RC*q,&env); 6616 a_bc(RB,RD,q,&env,RB,tmp1); // RB=trim(RB-RD*q,&env); 6617 int l=b.size()-1,k=2*m-l; 6618 if (f.size()-1<m){ 6619 A.swap(RC); B.swap(RD); C.swap(RA); D.swap(RB); return true; 6620 } 6621 modpoly g0(b.begin(),b.end()-k); // quo(b,x^k), C0 in Yap 6622 modpoly g1; 6623 if (f.size()>k) 6624 g1=modpoly(f.begin(),f.end()-k); // quo(f,x^k), D0 in Yap 6625 modpoly SA,SB,SC,SD; 6626 if (debug_infolevel>2) 6627 CERR << CLOCK()*1e-6 << " halfgcd 2nd recursive call " << dega0 << "," << dega1 << '\n'; 6628 if (!hgcd(g0,g1,modulo,SA,SB,SC,SD,b0,b1,tmp1,tmp2)) 6629 return false; 6630 if (debug_infolevel>2) 6631 CERR << CLOCK()*1e-6 << " halfgcd end 2nd recursive call " << dega0 << "," << dega1 << '\n'; 6632 matrix22(RA,RB,RC,RD,SA,SB,SC,SD,A,B,C,D,env,tmp1,tmp2); 6633 if (debug_infolevel>2) 6634 CERR << CLOCK()*1e-6 << " halfgcd end " << dega0 << "," << dega1 << '\n'; 6635 a.clear(); b.clear(); 6636 return true; 6637 } 6638 6639 // assumes degree(q)>degree(rem) halfgcdmodpoly(modpoly & q,modpoly & rem,environment * env,modpoly & a,modpoly & RA,modpoly & RB,modpoly & RC,modpoly & RD,modpoly & b0,modpoly & b1,modpoly & tmp1,modpoly & tmp2)6640 static bool halfgcdmodpoly(modpoly &q,modpoly & rem,environment * env,modpoly & a,modpoly & RA,modpoly &RB,modpoly &RC,modpoly &RD,modpoly &b0,modpoly & b1,modpoly & tmp1,modpoly & tmp2){ 6641 if (rem.size()<HGCD) 6642 return gcdmodpoly(q,rem,env,a); 6643 // now gcd(q,rem) with q.size()>rem.size() 6644 if (hgcd(q,rem,env->modulo,RA,RB,RC,RD,b0,b1,tmp1,tmp2)){ 6645 if (b0.empty() || b1.empty()){ 6646 int maxadeg=q.size()-giacmax(RA.size(),RB.size()),maxbdeg=q.size()/2; 6647 #if 1 6648 matrix22timesvect(RA,RB,RC,RD,q,rem,maxadeg,maxbdeg,b0,b1,*env,tmp1,tmp2); 6649 #else 6650 //ab_cd(q.size()-1,RA,q,RB,rem,env,b0,quo,tmp); // b0=trim(RA*q+RB*rem,env); 6651 ab_cd(maxadeg,RA,q,RB,rem,env,b0,tmp1,tmp2); // b0=trim(RA*q+RB*rem,env); 6652 ab_cd(maxbdeg,RC,q,RD,rem,env,b1,tmp1,tmp2); // b1=trim(RC*q+RD*rem,env); 6653 #endif 6654 } 6655 else 6656 ;//CERR << b1.size() << '\n'; 6657 if (b1.empty()){ 6658 a=b0; 6659 mulmodpoly(a,invenv(a.front(),env),env,a); 6660 return true; 6661 } 6662 DivRem(b0,b1,env,tmp1,rem); 6663 if (rem.empty()){ 6664 a=b1; 6665 mulmodpoly(a,invenv(a.front(),env),env,a); 6666 return true; 6667 } 6668 return halfgcdmodpoly(b1,rem,env,a,RA,RB,RC,RD,b0,q,tmp1,tmp2); 6669 } 6670 return false; 6671 } 6672 reim(const modpoly & p,modpoly & pr,modpoly & pi)6673 void reim(const modpoly &p,modpoly & pr,modpoly & pi){ 6674 size_t s=p.size(); 6675 pr.reserve(s); pi.reserve(s); 6676 gen R,I; 6677 for (size_t i=0;i<s;++i){ 6678 reim(p[i],R,I,context0); 6679 pr.push_back(R); 6680 pi.push_back(I); 6681 } 6682 } 6683 gcdmodpoly(const modpoly & p,const modpoly & q,environment * env,modpoly & a)6684 bool gcdmodpoly(const modpoly &p,const modpoly & q,environment * env,modpoly &a){ 6685 if (!env){ 6686 #ifndef NO_STDEXCEPT 6687 setsizeerr(); 6688 #endif 6689 return false; 6690 } 6691 #if 1 // DivRem should be fast, Yap p.54 6692 if (p.size()<q.size()) return gcdmodpoly(q,p,env,a); 6693 // run half_gcd(2) to debug 6694 if (env->moduloon && env->complexe && env->modulo.type==_INT_ && env->modulo.val % 4==1){ 6695 // find a sqrt of -1 mod modulo 6696 // replace i by + or - this sqrt 6697 // find gcd_+ and gcd_-, if same degree 6698 // check gcd=1/2*(gcd_+ + gcd_-)+i/2(gcd_+ - gcd_-) 6699 modpoly pr,pi,qr,qi,p1,p2,q1,q2,g1,g2; 6700 reim(p,pr,pi); 6701 reim(q,qr,qi); 6702 int i=modsqrtminus1(env->modulo.val); 6703 gen ig(i); 6704 p1=pr+ig*pi; p2=pr-ig*pi; 6705 q1=qr+ig*qi; q2=qr-ig*qi; 6706 env->complexe=false; 6707 if (gcdmodpoly(p1,q1,env,g1) && gcdmodpoly(p2,q2,env,g2) && g1.size()==g2.size()){ 6708 env->complexe=true; 6709 a=(g1+g2)-cst_i*ig*(g1-g2); 6710 mulmodpoly(a,invmod(2,env->modulo),env,a); 6711 modpoly & q=g1; modpoly & r=g2; 6712 DivRem(p,a,env,q,r); 6713 if (r.empty()){ 6714 DivRem(q,a,env,q,r); 6715 if (r.empty()) 6716 return true; 6717 } 6718 } 6719 env->complexe=true; 6720 } 6721 if (env->moduloon && !env->complexe && p.size()>=HGCD && q.size()>=HGCD){ 6722 modpoly rem,quo; 6723 DivRem(p,q,env,quo,rem); 6724 if (rem.empty()){ 6725 a=q; 6726 mulmodpoly(a,invenv(a.front(),env),env,a); 6727 return true; 6728 } 6729 modpoly Q(q),RA,RB,RC,RD,b0,b1,tmp1,tmp2; 6730 if (debug_infolevel) 6731 CERR << CLOCK()*1e-6 <<" halfgcd begin" << '\n'; 6732 bool b=halfgcdmodpoly(Q,rem,env,a,RA,RB,RC,RD,b0,b1,tmp1,tmp2); 6733 if (debug_infolevel) 6734 CERR << CLOCK()*1e-6 <<" halfgcd end" << '\n'; 6735 return b; 6736 } 6737 #endif 6738 #ifndef EMCC 6739 if (env->moduloon && is_zero(env->coeff) && !env->complexe && env->modulo.type==_INT_ && env->modulo.val < (1 << 15) ){ 6740 gcdsmallmodpoly(p,q,env->modulo.val,a); 6741 return true; 6742 } 6743 #endif 6744 #if 0 6745 if (env->moduloon && is_zero(env->coeff) && !env->complexe && env->modulo.type==_INT_ && env->modulo.val < (1 << 26) ){ 6746 if (gcddoublemodpoly(p,q,env->modulo.val,a)) 6747 return true; 6748 } 6749 #endif 6750 a=p; 6751 modpoly b(q); 6752 modpoly quo,rem; 6753 while (!b.empty()){ 6754 gen s=b.front(); 6755 mulmodpoly(b,invenv(s,env),env,b); 6756 DivRem(a,b,env,quo,rem); 6757 // COUT << "a:" << a << "b:" << b << "q:" << quo << "r:" << rem << '\n'; 6758 swap(a,b); // newa=b, 6759 swap(b,rem); // newb=rem 6760 } 6761 if (!a.empty()) 6762 mulmodpoly(a,invenv(a.front(),env),env,a); 6763 return true; 6764 } 6765 6766 // compute gcd of p and q mod m, result in d gcdsmallmodpoly(const vector<int> & p,const vector<int> & q,int m,vector<int> & d)6767 void gcdsmallmodpoly(const vector<int> &p,const vector<int> & q,int m,vector<int> & d){ 6768 gcdsmallmodpoly(p,q,m,d,0,0); 6769 return; 6770 #if 0 6771 int as=int(p.size()),bs=int(q.size()); 6772 if (!as){ d=q; return ; } 6773 if (!bs){ d=p; return ; } 6774 #if defined VISUALC || defined BESTA_OS 6775 int *asave=new int[as], *a=asave,*aend=a+as,*qcur=0; 6776 int *bsave=new int[bs], *b=bsave,*bend=b+bs; 6777 #else 6778 int asave[as], *a=asave,*aend=a+as,*qcur=0; 6779 int bsave[bs], *b=bsave,*bend=b+bs; 6780 #endif 6781 memcpy(a,&*p.begin(),as*sizeof(int)); 6782 memcpy(b,&*q.begin(),bs*sizeof(int)); 6783 int * t; 6784 for (;b!=bend;){ 6785 rem(a,aend,b,bend,m,qcur,0); 6786 t=a; a=b; b=t; 6787 t=aend; aend=bend; bend=t; 6788 } 6789 d.clear(); 6790 d.reserve(aend-a); 6791 int ainv=1; 6792 if (a!=aend) 6793 ainv=invmod(*a,m); 6794 if (m>=46340){ 6795 for (;a!=aend;++a){ 6796 d.push_back(smod((*a)*longlong(ainv),m)); 6797 } 6798 } 6799 else { 6800 for (;a!=aend;++a){ 6801 d.push_back(smod((*a)*ainv,m)); 6802 } 6803 } 6804 #if defined VISUALC || defined BESTA_OS 6805 delete [] asave; 6806 delete [] bsave; 6807 #endif 6808 #endif 6809 } 6810 gcdsmallmodpoly(const vector<int> & p,const vector<int> & q,int m,vector<int> & d,vector<int> * pcof,vector<int> * qcof)6811 void gcdsmallmodpoly(const vector<int> &p,const vector<int> & q,int m,vector<int> & d,vector<int> * pcof,vector<int> * qcof){ 6812 int as=int(p.size()),bs=int(q.size()); 6813 if (!as){ 6814 // p==0, pcof is undefined 6815 if (pcof) 6816 pcof->clear(); 6817 d=q; 6818 if (qcof){ 6819 qcof->clear(); 6820 qcof->push_back(1); 6821 } 6822 return ; 6823 } 6824 if (!bs){ 6825 // q==0 6826 if (qcof) 6827 qcof->clear(); 6828 d=p; 6829 if (pcof){ 6830 pcof->clear(); 6831 pcof->push_back(1); 6832 } 6833 return ; 6834 } 6835 int ms=std::max(as,bs); 6836 #if defined VISUALC || defined BESTA_OS 6837 int *asave=new int[ms], *a=asave,*aend=a+as,*qcur=0,*qend; 6838 int *bsave=new int[ms], *b=bsave,*bend=b+bs; 6839 #else 6840 int asave[ms], *a=asave,*aend=a+as,*qcur=0,*qend; 6841 int bsave[ms], *b=bsave,*bend=b+bs; 6842 #endif 6843 bool swapab=false; 6844 memcpy(a,&*p.begin(),as*sizeof(int)); 6845 memcpy(b,&*q.begin(),bs*sizeof(int)); 6846 int * t; 6847 for (;b!=bend;swapab=!swapab){ 6848 rem(a,aend,b,bend,m,qcur,0); 6849 t=a; a=b; b=t; 6850 t=aend; aend=bend; bend=t; 6851 } 6852 if (a==aend){ // should not happen! 6853 #if defined VISUALC || defined BESTA_OS 6854 delete [] asave; 6855 delete [] bsave; 6856 #endif 6857 return; 6858 } 6859 // normalize gcd 6860 int ainv=1; 6861 ainv=invmod(*a,m); 6862 if (ainv!=1){ 6863 for (int * acur=a;acur!=aend;++acur) 6864 *acur = smod((*acur)*longlong(ainv),m); 6865 } 6866 #if defined VISUALC || defined BESTA_OS 6867 int * cof=new int[ms]; 6868 #else 6869 int cof[ms]; 6870 #endif 6871 // find p cofactor 6872 if (pcof){ 6873 qcur=cof; 6874 qend=cof+ms; 6875 b=swapab?asave:bsave; 6876 bend=b+as; 6877 memcpy(b,&*p.begin(),as*sizeof(int)); 6878 rem(b,bend,a,aend,m,qcur,qend,true); 6879 pcof->clear(); 6880 pcof->reserve(qend-qcur); 6881 for (;qcur!=qend;++qcur) 6882 pcof->push_back(*qcur); 6883 } 6884 if (qcof){ 6885 qcur=cof; 6886 qend=cof+ms; 6887 b=swapab?asave:bsave; 6888 bend=b+bs; 6889 memcpy(b,&*q.begin(),bs*sizeof(int)); 6890 rem(b,bend,a,aend,m,qcur,qend,true); 6891 qcof->clear(); 6892 qcof->reserve(qend-qcur); 6893 for (;qcur!=qend;++qcur) 6894 qcof->push_back(*qcur); 6895 } 6896 d.clear(); 6897 d.reserve(aend-a); 6898 for (;a!=aend;++a) 6899 d.push_back(*a); 6900 // CERR << d << " " << pcof << " " << p << '\n'; 6901 // CERR << d << " " << qcof << " " << q << '\n'; 6902 #if defined VISUALC || defined BESTA_OS 6903 delete [] asave; 6904 delete [] bsave; 6905 delete [] cof; 6906 #endif 6907 } 6908 dbgp(const modpoly & a)6909 static void dbgp(const modpoly & a){ 6910 COUT << a << '\n'; 6911 } 6912 content_mod(const polynome & p,vecteur & gcd,environment * env)6913 static bool content_mod(const polynome & p,vecteur & gcd,environment * env){ 6914 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 6915 for (;it!=itend;++it){ 6916 if (gcd.size()==1 || it->value.type!=_VECT){ 6917 gcd=vecteur(1,1); 6918 break; 6919 } 6920 gcdmodpoly(gcd,*it->value._VECTptr,env,gcd); 6921 if (is_undef(gcd)) 6922 return false; 6923 } 6924 return true; 6925 } 6926 hornermod(const vecteur & v,const gen & alpha,const gen & modulo)6927 gen hornermod(const vecteur & v,const gen & alpha,const gen & modulo){ 6928 gen res; 6929 const_iterateur it=v.begin(),itend=v.end(); 6930 for (;it!=itend;++it){ 6931 res = smod(res*alpha+*it,modulo); 6932 } 6933 return res; 6934 } 6935 hornermod(const vecteur & v,int alpha,int modulo)6936 int hornermod(const vecteur & v,int alpha,int modulo){ 6937 int res=0; 6938 const_iterateur it=v.begin(),itend=v.end(); 6939 for (;it!=itend;++it){ 6940 /* 6941 if (it->type!=_INT_){ 6942 CERR << v << '\n'; 6943 setsizeerr(gen(v).print(context0)); 6944 } 6945 */ 6946 res = (res*alpha+it->val)%modulo; 6947 } 6948 return smod(res,modulo); 6949 } 6950 6951 // eval p at xn=alpha modulo pevaln(const polynome & p,const gen & alpha,const gen & modulo,index_t * & degptr,bool estreel)6952 static polynome pevaln(const polynome & p,const gen & alpha,const gen & modulo,index_t * & degptr,bool estreel){ 6953 int a=0,m=0,dim=p.dim; 6954 bool nonmod = is_zero(modulo); 6955 bool smallmod = estreel && alpha.type==_INT_ && modulo.type==_INT_ && (m=modulo.val)<46340 && (a=alpha.val)<46340 ; 6956 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 6957 polynome res(dim); 6958 res.coord.reserve(itend-it); 6959 gen tmp; 6960 for (;it!=itend;++it){ 6961 if (it->value.type==_VECT) 6962 tmp=nonmod?horner(*it->value._VECTptr,alpha):(smallmod?hornermod(*it->value._VECTptr,a,m):hornermod(*it->value._VECTptr,alpha,modulo)); 6963 else 6964 tmp=it->value; // smod(it->value,modulo); 6965 if (!is_zero(tmp)) 6966 res.coord.push_back(monomial<gen>(tmp,it->index)); 6967 else { 6968 if (degptr){ 6969 // if one of the indices of it->index is the same as *degptr 6970 // the lcoeff with respect to this variable may vanish 6971 for (int i=0;i<dim;++i){ 6972 if ((*degptr)[i]==(it->index)[i]){ 6973 degptr=0; 6974 break; 6975 } 6976 } 6977 } 6978 } 6979 } 6980 return res; 6981 } 6982 divmod(polynome & p,const vecteur & v,environment * env)6983 static bool divmod(polynome & p,const vecteur & v,environment * env){ 6984 if (v.size()==1){ 6985 if (!is_one(v.front())){ 6986 if (!env || !env->moduloon || !is_zero(env->coeff)) 6987 return false; // setsizeerr(); 6988 p=invmod(v.front(),env->modulo)*p; 6989 } 6990 } 6991 else { 6992 vector< monomial<gen> >::iterator it=p.coord.begin(),itend=p.coord.end(); 6993 vecteur q,r; 6994 for (;it!=itend;++it){ 6995 if (it->value.type!=_VECT) 6996 return false; // setsizeerr(); 6997 DivRem(*it->value._VECTptr,v,env,q,r); 6998 it->value=gen(q,_POLY1__VECT); 6999 } 7000 } 7001 return true; 7002 } 7003 pp_mod(polynome & p,vecteur & v,environment * env)7004 static bool pp_mod(polynome & p,vecteur & v,environment * env){ 7005 content_mod(p,v,env); 7006 return divmod(p,v,env); 7007 } 7008 7009 // extract xn dependency as a modpoly convert_xn(const polynome & p,polynome & res)7010 static void convert_xn(const polynome & p,polynome & res){ 7011 int dim=p.dim; 7012 res.dim=dim-1; 7013 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 7014 index_t old(dim,-1); 7015 vecteur cur; 7016 for (;it!=itend;++it){ 7017 const index_t & curi=it->index.iref(); 7018 old[dim-1]=curi[dim-1]; 7019 if (curi==old){ 7020 cur[curi[dim-1]]=it->value; 7021 } 7022 else { 7023 if (!cur.empty()){ 7024 reverse(cur.begin(),cur.end()); 7025 res.coord.push_back(monomial<gen>(gen(cur,_POLY1__VECT),index_t(old.begin(),old.end()-1))); 7026 } 7027 old=curi; 7028 cur=vecteur(curi[dim-1]+1); 7029 cur[curi[dim-1]]=it->value; 7030 } 7031 } 7032 if (!cur.empty()){ 7033 reverse(cur.begin(),cur.end()); 7034 res.coord.push_back(monomial<gen>(gen(cur,_POLY1__VECT),index_t(old.begin(),old.end()-1))); 7035 } 7036 } 7037 7038 // put back xn dependency as a modpoly convert_back_xn(const polynome & p,polynome & res)7039 static void convert_back_xn(const polynome & p,polynome & res){ 7040 res.coord.clear(); 7041 int dim=p.dim,deg; 7042 res.dim=dim+1; 7043 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 7044 for (;it!=itend;++it){ 7045 index_t i(it->index.iref()); 7046 i.push_back(0); 7047 if (it->value.type!=_VECT) 7048 res.coord.push_back(monomial<gen>(it->value,i)); 7049 else { 7050 const_iterateur jt=it->value._VECTptr->begin(),jtend=it->value._VECTptr->end(); 7051 deg=int(jtend-jt)-1; 7052 for (;jt!=jtend;++jt,--deg){ 7053 if (!is_zero(*jt)){ 7054 i[dim]=deg; 7055 res.coord.push_back(monomial<gen>(*jt,i)); 7056 } 7057 } 7058 } 7059 } 7060 } 7061 7062 /* 7063 void modgcd_bi(const polynome &pxn, const polynome & qxn, int modulo,int gcddeg, polynome & d,polynome & pcofactor,polynome & qcofactor){ 7064 d=polynome(dim-1); 7065 // we are now interpolating G=gcd(p,q)*a poly/x1 7066 // such that the leading coeff of G is Delta 7067 int pdeg(pxn.lexsorted_degree()),qdeg(qxn.lexsorted_degree()); 7068 int delta=min(pdeg,qdeg); 7069 int e=0; // number of evaluations 7070 int alpha=0,ps,qs; 7071 if (debug_infolevel>1) 7072 CERR << "gcdmod find alpha dim " << d.dim << " " << CLOCK() << '\n'; 7073 vector<int> palpha,qalpha,pcofactalpha,qcofactalpha,g,g1; 7074 for (;;++alpha){ 7075 if (alpha==modulo) 7076 setsizeerr(gettext("Modgcd: no suitable evaluation point")); 7077 if (debug_infolevel>1) 7078 CERR << "gcdmod eval " << alpha << " dim " << d.dim << " " << CLOCK() << '\n'; 7079 palpha=pevaln(pxn,alpha,modulo); 7080 if (palpha.empty()) 7081 continue; 7082 if ((ps=palpha.size())==1){ 7083 if (pdeg) 7084 continue; 7085 // gcd may only depend on first var 7086 d=cont; 7087 return; 7088 } 7089 qalpha=pevaln(qxn,alpha,modulo); 7090 if (qalpha.empty()) 7091 continue; 7092 if ((qs=qalpha.size())==1){ 7093 if (qdeg) 7094 continue; 7095 d=cont; 7096 return; 7097 } 7098 if ( ps!=pdeg+1 || qs!=qdeg+1 ) 7099 continue; 7100 // palpha and qalpha are p_prim and q_prim evaluated at xn=alpha 7101 if (debug_infolevel>1) 7102 CERR << "gcdmod gcd at " << alpha << " dim " << d.dim << " " << CLOCK() << '\n'; 7103 gcdsmallmodpoly(palpha,qalpha,modulo,g,pcofactalpha,qcofactalpha); 7104 int gdeg(g.size()-1); 7105 int gcd_plus_delta_deg=gcddeg+Delta.size()-1; 7106 if (gdeg==delta){ 7107 if (debug_infolevel>1) 7108 CERR << "gcdmod interp dim " << d.dim << " " << CLOCK() << '\n'; 7109 g1=g; 7110 mulmodpoly(g1,smod(hornermod(Delta,alpha,modulo)*invmod(g.front(),modulo),modulo),modulo); 7111 smallmodpoly2modpoly(g1-pevaln(d,alpha,modulo),modulo,g1); 7112 mulpoly(g1,smod(invmod(hornermod(interp,alpha,modulo),modulo)*gen(interp,_POLY1__VECT),modulo),g1); 7113 smod(d+g1,modulo,d); 7114 interp=operator_times(interp,makevecteur(1,-alpha),&env); 7115 ++e; 7116 if (e>gcddeg 7117 || is_zero(g1) 7118 ){ 7119 if (debug_infolevel) 7120 CERR << "gcdmod pp1mod dim " << d.dim << " " << CLOCK() << '\n'; 7121 polynome pD,QP(dim),QQ(dim),R(d); 7122 vecteur vtmp; 7123 pp_mod(R,vtmp,&env); 7124 convert_back_xn(R,pD); 7125 // This removes the polynomial in x1 that we multiplied by 7126 // (it was necessary to know the lcoeff of the interpolated poly) 7127 if (debug_infolevel) 7128 CERR << "gcdmod check dim " << d.dim << " " << CLOCK() << '\n'; 7129 divremmod(p,pD,modulo,QP,R); 7130 // Now, gcd divides pD for gcddeg+1 values of x1 7131 // degree(pD)<=degree(gcd) 7132 if (R.coord.empty()){ 7133 divremmod(q,pD,modulo,QQ,R); 7134 // If pD divides both P and Q, then the degree wrt variables 7135 // x1,...,xn-1 is the right one (because it is <= since pD 7136 // divides the gcd and >= since pD(xn=one of the try) was a gcd 7137 // The degree in xn is the right one because of the condition 7138 // on the lcoeff 7139 // Note that the division test might be much longer than the 7140 // interpolation itself (e.g. if the degree of the gcd is small) 7141 // but it seems unavoidable, for example if 7142 // P=Y-X+X(X-1)(X-2)(X-3) 7143 // Q=Y-X+X(X-1)(X-2)(X-4) 7144 // then gcd(P,Q)=1, but if we take Y=0, Y=1 or Y=2 7145 // we get gcddeg=1 (probably degree 1 for the gcd) 7146 // interpolation at X=0 and X=1 will lead to Y-X as candidate gcd 7147 // and even adding X=2 will not change it 7148 // We might remove division if we compute the cofactors of P and Q 7149 // if P=pD*cofactor is true for degree(P) values of x1 7150 // and same for Q, and the degrees wrt xn of pD and cofactors 7151 // have sum equal to degree of P or Q + lcoeff then pD is the gcd 7152 if (R.coord.empty()){ 7153 pD=pD*cont; 7154 d=smod(pD*invmod(pD.coord.front().value,modulo),modulo); 7155 pcofactor=pcofactor*QP; 7156 pcofactor=smod(p_orig.coord.front().value*invmod(pcofactor.coord.front().value,modulo)*pcofactor,modulo); 7157 qcofactor=qcofactor*QQ; 7158 qcofactor=smod(q_orig.coord.front().value*invmod(qcofactor.coord.front().value,modulo)*qcofactor,modulo); 7159 if (debug_infolevel) 7160 CERR << "gcdmod found dim " << d.dim << " " << CLOCK() << '\n'; 7161 return; 7162 } 7163 } 7164 if (debug_infolevel) 7165 CERR << "Gcdmod bad guess " << '\n'; 7166 continue; 7167 } 7168 else 7169 continue; 7170 } 7171 if (gdeg[0]>delta[0]) 7172 continue; 7173 if (delta[0]>=gdeg[0]){ // restart with g 7174 gcdv=vecteur(1,g); 7175 alphav=vecteur(1,alpha); 7176 delta=gdeg; 7177 d=(g*smod(hornermod(Delta,alpha,modulo),modulo))*invmod(g.coord.front().value,modulo); 7178 e=1; 7179 interp=makevecteur(1,-alpha); 7180 continue; 7181 } 7182 } 7183 */ 7184 degree_xn(const polynome & p)7185 static int degree_xn(const polynome & p){ 7186 int res=1; 7187 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 7188 for (;it!=itend;++it){ 7189 if (it->value.type==_VECT) 7190 res=giacmax(res,int(it->value._VECTptr->size())); 7191 } 7192 return res-1; 7193 } 7194 lcoeff(const polynome & p)7195 inline gen lcoeff(const polynome & p){ 7196 return p.coord.empty()?0:p.coord.front().value; 7197 } 7198 7199 // Find non zeros coeffs of p find_nonzero(const modpoly & p,index_t & res)7200 static int find_nonzero(const modpoly & p,index_t & res){ 7201 res.clear(); 7202 const_iterateur it=p.begin(),itend=p.end(); 7203 res.reserve(itend-it); 7204 if (it==itend) 7205 return 0; 7206 int nzeros=0; 7207 for (;it!=itend;++it){ 7208 if (is_zero(*it)){ 7209 res.push_back(0); 7210 ++nzeros; 7211 } 7212 else 7213 res.push_back(1); 7214 } 7215 return nzeros; 7216 } 7217 make_modprimitive_xn(polynome & p,const gen & modulo,polynome & content)7218 static void make_modprimitive_xn(polynome & p,const gen & modulo,polynome & content){ 7219 int dim=p.dim,pxns=1; 7220 vector< polynome > pxn(1,polynome(dim)); 7221 polynome d(dim),pcof(dim),qcof(dim); 7222 // fill pxn (not sorted) 7223 vector< monomial<gen> >::const_iterator it=p.coord.begin(),itend=p.coord.end(); 7224 for (;it!=itend;++it){ 7225 if (it->value.type!=_VECT) 7226 pxn[0].coord.push_back(*it); 7227 else { 7228 vecteur & v=*it->value._VECTptr; 7229 int j=int(v.size())-1; 7230 if (j>=0){ 7231 for (;j>=pxns;++pxns) 7232 pxn.push_back(polynome(dim)); 7233 const_iterateur jt=v.begin(); // ,jtend=v.end(); 7234 for (;j>=0;--j,++jt){ 7235 if (!is_zero(*jt)) 7236 pxn[j].coord.push_back(monomial<gen>(*jt,it->index)); 7237 } 7238 } 7239 } 7240 } 7241 content.dim=dim; 7242 content.coord.clear(); 7243 // now for each polynomial in pxn, sort and find gcd with content 7244 for (int j=pxns-1;j>=0;--j){ 7245 pxn[j].tsort(); 7246 modgcd(content,pxn[j],modulo,d,pcof,qcof,false); 7247 content=d; 7248 if (Tis_constant<gen>(content)){ 7249 content.coord.front().value=1; 7250 return; 7251 } 7252 } 7253 polynome q,r; 7254 divremmod(p,content,modulo,q,r); 7255 p=q; 7256 } 7257 convert(const polynome & p_orig,const polynome & q_orig,index_t & d,std::vector<hashgcd_U> & vars,std::vector<T_unsigned<gen,hashgcd_U>> & p,std::vector<T_unsigned<gen,hashgcd_U>> & q)7258 bool convert(const polynome &p_orig, const polynome & q_orig,index_t & d,std::vector<hashgcd_U> & vars,std::vector< T_unsigned<gen,hashgcd_U> > & p,std::vector< T_unsigned<gen,hashgcd_U> > & q){ 7259 int dim=p_orig.dim; 7260 index_t pdeg(p_orig.degree()),qdeg(q_orig.degree()),pqdeg(pdeg+qdeg); 7261 // convert p_orig and q_orig to vector< T_unsigned<gen,hashgcd_U> > 7262 // using pqdeg (instead of max(pdeg,qdeg) because of gcd(lcoeff(p),lcoeff(q))) 7263 // additional factor 2 since computing cofactors require more 7264 ulonglong ans=1; 7265 for (int i=0;i<dim;++i){ 7266 d[i]=2*(pdeg[i]+qdeg[i]+1); 7267 int j=1; 7268 // round to next power of 2 7269 for (;;j++){ 7270 if (!(d[i] >>= 1)) 7271 break; 7272 } 7273 d[i] = 1 << j; 7274 ans = ans*unsigned(d[i]); 7275 if (ans/RAND_MAX>=1){ 7276 return false; 7277 } 7278 } 7279 vars[dim-1]=1; 7280 for (int i=dim-2;i>=0;--i){ 7281 vars[i]=d[i+1]*vars[i+1]; 7282 } 7283 convert<gen,hashgcd_U>(p_orig,d,p); 7284 convert<gen,hashgcd_U>(q_orig,d,q); 7285 return true; 7286 } 7287 gcd_modular(const polynome & p_orig,const polynome & q_orig,polynome & pgcd,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors)7288 bool gcd_modular(const polynome &p_orig, const polynome & q_orig, polynome & pgcd,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors){ 7289 if (debug_infolevel>1) 7290 CERR << "gcd modular algo begin " << CLOCK() << '\n'; 7291 int dim=p_orig.dim; 7292 vector< T_unsigned<gen,hashgcd_U> > p,q,g,pcof,qcof; 7293 index_t d(dim); 7294 std::vector<hashgcd_U> vars(dim); 7295 if (dim==1 || p_orig.coord.empty() || is_one(q_orig) || q_orig.coord.empty() || is_one(p_orig) || !convert(p_orig,q_orig,d,vars,p,q) || !gcd(p,q,g,pcof,qcof,vars,compute_cofactors,threads)){ 7296 if (&pcofactor!=&p_orig) pcofactor=p_orig; 7297 if (&qcofactor!=&q_orig) qcofactor=q_orig; 7298 return gcd_modular_algo(pcofactor,qcofactor,pgcd,compute_cofactors); 7299 } 7300 convert_from<gen,hashgcd_U>(g,d,pgcd); 7301 pgcd.dim=qcofactor.dim=pcofactor.dim=dim; 7302 if (compute_cofactors){ 7303 convert_from<gen,hashgcd_U>(pcof,d,pcofactor); 7304 convert_from<gen,hashgcd_U>(qcof,d,qcofactor); 7305 } 7306 return true; 7307 } 7308 convert(const polynome & p_orig,const polynome & q_orig,index_t & d,std::vector<hashgcd_U> & vars,std::vector<T_unsigned<int,hashgcd_U>> & p,std::vector<T_unsigned<int,hashgcd_U>> & q,int modulo)7309 bool convert(const polynome &p_orig, const polynome & q_orig,index_t & d,std::vector<hashgcd_U> & vars,std::vector< T_unsigned<int,hashgcd_U> > & p,std::vector< T_unsigned<int,hashgcd_U> > & q,int modulo){ 7310 int dim=p_orig.dim; 7311 index_t pdeg(p_orig.degree()),qdeg(q_orig.degree()); 7312 // convert p_orig and q_orig to vector< T_unsigned<int,hashgcd_U> > 7313 // using pqdeg (instead of max(pdeg,qdeg) because of gcd(lcoeff(p),lcoeff(q))) 7314 // additional factor 2 since computing cofactors require more 7315 double ans=1; 7316 d.clear(); 7317 d.reserve(dim); 7318 for (int i=0;i<dim;++i){ 7319 d.push_back(2*(pdeg[i]+qdeg[i]+1)); 7320 if (d[i]<0) 7321 return false; 7322 int j=1; 7323 // round to next power of 2 7324 for (;;j++){ 7325 if (!(d[i] >>= 1)) 7326 break; 7327 } 7328 d[i] = 1 << j; 7329 ans = ans*unsigned(d[i]); 7330 if (ans/RAND_MAX>=(sizeof(hashgcd_U)==4?1:RAND_MAX)) // 1 if hashgcd_U is unsigned int 7331 return false; 7332 } 7333 vars=std::vector<hashgcd_U>(dim); 7334 vars[dim-1]=1; 7335 for (int i=dim-2;i>=0;--i){ 7336 vars[i]=d[i+1]*vars[i+1]; 7337 } 7338 if (!convert_int32(p_orig,d,p,modulo) || !convert_int32(q_orig,d,q,modulo) ) 7339 return false; 7340 return true; 7341 } 7342 mod_gcd(const polynome & p_orig,const polynome & q_orig,const gen & modulo,polynome & pgcd,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors)7343 bool mod_gcd(const polynome &p_orig, const polynome & q_orig, const gen & modulo, polynome & pgcd,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors){ 7344 if (debug_infolevel) 7345 CERR << "modgcd begin " << CLOCK()*1e-6 << '\n'; 7346 int dim=p_orig.dim; 7347 if ( dim==1 || p_orig.coord.empty() || is_one(q_orig) || q_orig.coord.empty() || is_one(p_orig) || modulo.type!=_INT_ ){ 7348 return mod_gcd_c(p_orig,q_orig,modulo,pgcd,pcofactor,qcofactor,compute_cofactors); 7349 } 7350 if (debug_infolevel) 7351 CERR << "modgcd begin dim>=2 " << CLOCK()*1e-6 << '\n'; 7352 std::vector<hashgcd_U> vars(dim); 7353 vector< T_unsigned<int,hashgcd_U> > p,q,g,pcof,qcof; 7354 index_t d(dim); 7355 if (!convert(p_orig,q_orig,d,vars,p,q,modulo.val) || !mod_gcd(p,q,modulo.val,g,pcof,qcof,vars,compute_cofactors,threads)) 7356 return mod_gcd_c(p_orig,q_orig,modulo,pgcd,pcofactor,qcofactor,compute_cofactors); 7357 convert_from<int,hashgcd_U>(g,d,pgcd); 7358 pgcd.dim=qcofactor.dim=pcofactor.dim=dim; 7359 if (compute_cofactors){ 7360 convert_from<int,hashgcd_U>(pcof,d,pcofactor); 7361 convert_from<int,hashgcd_U>(qcof,d,qcofactor); 7362 } 7363 return true; 7364 } 7365 modgcd(const polynome & p_orig,const polynome & q_orig,const gen & modulo,polynome & d,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors)7366 bool modgcd(const polynome &p_orig, const polynome & q_orig, const gen & modulo, polynome & d,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors){ 7367 return mod_gcd(p_orig,q_orig,modulo,d,pcofactor,qcofactor,compute_cofactors); 7368 } 7369 mod_gcd_c(const polynome & p_orig,const polynome & q_orig,const gen & modulo,polynome & d,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors)7370 bool mod_gcd_c(const polynome &p_orig, const polynome & q_orig, const gen & modulo, polynome & d,polynome & pcofactor,polynome & qcofactor,bool compute_cofactors){ 7371 if (p_orig.coord.empty() || is_one(q_orig)){ 7372 d=q_orig; 7373 if (compute_cofactors){ 7374 pcofactor=p_orig; 7375 qcofactor=q_orig/d; 7376 } 7377 return true; 7378 } 7379 if (q_orig.coord.empty() || is_one(p_orig)){ 7380 d=p_orig; 7381 if (compute_cofactors){ 7382 qcofactor=q_orig; 7383 pcofactor=p_orig/d; 7384 } 7385 return true; 7386 } 7387 int dim=p_orig.dim; 7388 d.dim=dim; 7389 pcofactor.dim=dim; 7390 qcofactor.dim=dim; 7391 environment env; 7392 env.modulo=modulo; 7393 env.pn=modulo; 7394 env.moduloon=true; 7395 bool estreel; 7396 if (gcdmod_dim1(p_orig,q_orig,modulo,d,pcofactor,qcofactor,compute_cofactors,estreel)) 7397 return true; 7398 env.complexe=!estreel; 7399 if (debug_infolevel) 7400 CERR << "xn_gcdmod content/x1..xn-1 dim " << dim << " " << CLOCK() << '\n'; 7401 // Make p and q primitive with respect to x1,...,xn-1 7402 // i.e. the coeff of p and q wrt x1,...,xn-1 which are polynomials in xn 7403 // are relative prime 7404 polynome pxn,qxn,ptmp,qtmp,pcofactorxn,qcofactorxn,dxn,cont; 7405 convert_xn(p_orig,pxn); 7406 convert_xn(q_orig,qxn); 7407 vecteur pcont1,qcont1,pqcont1; 7408 if (!pp_mod(pxn,pcont1,&env) || 7409 !pp_mod(qxn,qcont1,&env)) 7410 return false; 7411 gcdmodpoly(pcont1,qcont1,&env,pqcont1); 7412 if (is_undef(pqcont1)) 7413 return false; 7414 // Make p and q primitive with respect to xn 7415 // p(x1,...,xn) q(x1,...,xn) viewed as p(xn) and q(xn) 7416 // with coeff polynomial wrt x1..xn-1 7417 make_modprimitive_xn(pxn,modulo,pcofactorxn); 7418 make_modprimitive_xn(qxn,modulo,qcofactorxn); 7419 modgcd(pcofactorxn,qcofactorxn,modulo,dxn,ptmp,qtmp,false); 7420 mulpoly(dxn,pqcont1,dxn); 7421 convert_back_xn(dxn,cont); 7422 if (compute_cofactors){ 7423 mulpoly(pcofactorxn,pcont1,pcofactorxn); 7424 mulpoly(qcofactorxn,qcont1,qcofactorxn); 7425 convert_back_xn(pcofactorxn,pcofactor); 7426 convert_back_xn(qcofactorxn,qcofactor); 7427 } 7428 // Find degree of gcd with respect to xn, more precisely gcddeg>=degree/xn 7429 // and compute data for the sparse modular algorithm 7430 index_t vzero; // coeff of vzero correspond to zero or non zero 7431 int nzero=1; // Number of zero coeffs 7432 vecteur alphav,gcdv; // Corresponding values of alpha and gcd at alpha 7433 int gcddeg=0; 7434 vecteur b(dim-1),bnext; 7435 int pxndeg=degree_xn(pxn),qxndeg=degree_xn(qxn); 7436 for (int essai=0;essai<2;){ 7437 gen pb(peval(pxn,b,modulo)); 7438 gen qb(peval(qxn,b,modulo)); 7439 for (;;){ 7440 bnext=vranm(dim-1,0,0); // find another random point 7441 if (bnext!=b){ b=bnext; break; } 7442 } 7443 if (pb.type==_POLY && !pb._POLYptr->coord.empty()) 7444 pb=pb._POLYptr->coord.front().value; 7445 if (pb.type!=_VECT || int(pb._VECTptr->size())!=pxndeg+1) 7446 continue; 7447 if (qb.type==_POLY && !qb._POLYptr->coord.empty()) 7448 qb=qb._POLYptr->coord.front().value; 7449 if (qb.type!=_VECT || int(qb._VECTptr->size())!=qxndeg+1) 7450 continue; 7451 modpoly db; 7452 gcdmodpoly(*pb._VECTptr,*qb._VECTptr,&env,db); 7453 if (is_undef(db)) 7454 return false; 7455 int dbdeg=int(db.size())-1; 7456 if (!dbdeg){ 7457 gcddeg=0; break; 7458 } 7459 if (!essai){ // 1st gcd test 7460 gcddeg=dbdeg; 7461 nzero=find_nonzero(db,vzero); 7462 ++essai; 7463 continue; 7464 } 7465 // 2nd try 7466 if (dbdeg<gcddeg){ // 1st try was unlucky, restart 1st try 7467 gcddeg=dbdeg; 7468 nzero=find_nonzero(db,vzero); 7469 continue; 7470 } 7471 if (dbdeg!=gcddeg) 7472 continue; 7473 // Same gcd degree for 1st and 2nd try, keep this degree 7474 index_t tmp; 7475 nzero=find_nonzero(db,tmp); 7476 if (nzero){ 7477 vzero = vzero | tmp; 7478 // Recompute nzero, it is the number of 0 coeff of vzero 7479 index_t::const_iterator it=vzero.begin(),itend=vzero.end(); 7480 for (nzero=0;it!=itend;++it){ 7481 if (!*it) ++nzero; 7482 } 7483 } 7484 ++essai; 7485 } 7486 if (!gcddeg){ 7487 d=cont; 7488 return true; 7489 } 7490 vecteur interp(1,1); 7491 // gcd of leading coefficients of p and q viewed as poly in X_1...X_n-1 7492 // with coeff in Z[X_n] 7493 if (debug_infolevel) 7494 CERR << "gcdmod lcoeffn dim " << dim-1 << " " << CLOCK() << '\n'; 7495 gen lp(pxn.coord.front().value),lq(qxn.coord.front().value); 7496 vecteur Delta(1,1),lcoeffp(1,1),lcoeffq(1,1); 7497 if (lp.type==_VECT) 7498 lcoeffp=*lp._VECTptr; 7499 if (lq.type==_VECT) 7500 lcoeffq=*lq._VECTptr; 7501 if ((lp.type==_VECT) && (lq.type==_VECT) ){ 7502 gcdmodpoly(lcoeffp,lcoeffq,&env,Delta); 7503 if (is_undef(Delta)) 7504 return false; 7505 } 7506 // estimate time for full lift or division try 7507 // size=p_orig.size()+q_orig.size() 7508 // sumdeg=pxndeg+qxndeg 7509 // %age=gcddeg/min(pxndeg,qxndeg) 7510 // %age^dim*(1-%age)^dim*size^2 estimates the time for division try 7511 // gcddeg*size estimates the time for lifting to gcddeg 7512 // sumdeg*size estimates the time for full lifting 7513 // if sumdeg<(gcddeg+%age^dim*(1-%age)^dim*size) do full lifting 7514 int Deltadeg = int(Delta.size())-1,liftdeg=giacmax(pxndeg,qxndeg)+Deltadeg; 7515 int gcddeg_plus_delta=gcddeg+Deltadeg; 7516 int liftdeg0=giacmax(liftdeg-gcddeg,gcddeg_plus_delta); 7517 // once liftdeg0 is reached we can replace g/gp/gq computation 7518 // by a check that d*dp=dxn*lcoeff(d*dp)/Delta at alpha 7519 // and d*dq=dxn*lcoeff(d*dq)/lcoeff(qxn) at alpha 7520 int sumdeg = pxndeg+qxndeg; 7521 double percentage = double(gcddeg)/giacmin(pxndeg,qxndeg); 7522 int sumsize = int(p_orig.coord.size()+q_orig.coord.size()); 7523 double gcdlift=gcddeg+std::pow(percentage,dim)*std::pow(1-percentage,dim)*sumsize; 7524 bool compute_cof = sumdeg<gcdlift/(1+dim); 7525 polynome p(dim),q(dim); 7526 if (!compute_cof){ 7527 convert_back_xn(pxn,p); 7528 convert_back_xn(qxn,q); 7529 } 7530 if (debug_infolevel) 7531 CERR << "dim " << dim << ", full lift:" << sumdeg << " , gcdlift:" << gcdlift/(1+dim) << " compute cofactors=" << compute_cof << '\n'; 7532 d=polynome(dim-1); 7533 polynome dp(dim-1),dq(dim-1),g1(dim-1); 7534 // we are now interpolating G=gcd(p,q)*a poly/xn 7535 // such that the leading coeff of G is Delta 7536 index_t pdeg(pxn.degree()),qdeg(qxn.degree()); 7537 int spdeg=0,sqdeg=0; 7538 for (int i=0;i<dim-1;++i){ 7539 spdeg += pdeg[i]; 7540 sqdeg += qdeg[i]; 7541 } 7542 index_t delta=index_min(pdeg,qdeg); 7543 int e=0; // number of evaluations 7544 int alpha=0; 7545 if (debug_infolevel>1) 7546 CERR << "gcdmod find alpha dim " << d.dim << " " << CLOCK() << '\n'; 7547 for (;;++alpha){ 7548 if (alpha==modulo){ 7549 CERR << "Modgcd: no suitable evaluation point" << '\n'; 7550 return false; 7551 } 7552 if (debug_infolevel>1) 7553 CERR << "gcdmod eval " << alpha << " dim " << d.dim << " " << CLOCK() << '\n'; 7554 index_t * pdegptr=&pdeg; 7555 const polynome & palpha=pevaln(pxn,alpha,modulo,pdegptr,estreel); 7556 if (palpha.coord.empty()) 7557 continue; 7558 if (Tis_constant<gen>(palpha)){ 7559 if (spdeg) 7560 continue; 7561 // gcd may only depend on xn 7562 d=cont; 7563 return true; 7564 } 7565 if (!pdegptr) 7566 continue; 7567 index_t * qdegptr=&qdeg; 7568 const polynome & qalpha=pevaln(qxn,alpha,modulo,qdegptr,estreel); 7569 if (qalpha.coord.empty()) 7570 continue; 7571 if (Tis_constant<gen>(qalpha)){ 7572 if (sqdeg) 7573 continue; 7574 d=cont; 7575 return true; 7576 } 7577 if (!qdegptr) 7578 continue; 7579 // palpha/qalpha should have the same degrees than pxn/qxn 7580 // but the test requires checking all monomials of palpha/qalpha 7581 // if (palpha.lexsorted_degree()!=pdeg[0] || qalpha.lexsorted_degree()!=qdeg[0] ) 7582 // continue; 7583 // palpha and qalpha are p_prim and q_prim evaluated at xn=alpha 7584 if (debug_infolevel>1) 7585 CERR << "gcdmod gcd at " << alpha << " dim " << d.dim << " " << CLOCK() << '\n'; 7586 polynome g(dim-1),gp(dim-1),gq(dim-1); 7587 index_t * tmpptr=0; 7588 if (compute_cof && e>liftdeg0 && e<=liftdeg){ 7589 g=pevaln(d,alpha,modulo,tmpptr,estreel); 7590 gp=pevaln(dp,alpha,modulo,tmpptr,estreel); 7591 gq=pevaln(dq,alpha,modulo,tmpptr,estreel); 7592 // check that g*gp=palpha*lcoeff/lcoeff 7593 mulpoly(gp,smod(lcoeff(palpha)*invmod(lcoeff(g)*lcoeff(gp),modulo),modulo),gp); 7594 mulpoly(gq,smod(lcoeff(qalpha)*invmod(lcoeff(g)*lcoeff(gq),modulo),modulo),gq); 7595 gp=smod(g*gp-palpha,modulo); 7596 gq=smod(g*gq-qalpha,modulo); 7597 if (is_zero(gp) && is_zero(gq)){ 7598 ++e; 7599 continue; 7600 } 7601 } 7602 if (!modgcd(palpha,qalpha,modulo,g,gp,gq,compute_cof)) 7603 return false; 7604 index_t gdeg(g.degree()); 7605 if (gdeg==delta){ 7606 // Try spmod first 7607 if (!compute_cof && nzero){ 7608 // Add alpha,g 7609 alphav.push_back(alpha); 7610 gcdv.push_back(g); 7611 if (gcddeg-nzero==e){ 7612 // We have enough evaluations, let's try SPMOD 7613 #if 1 7614 // Build the matrix, each line has coeffs / vzero 7615 vector< vector<int> > m,minverse; 7616 m.reserve(e+1); 7617 for (int j=0;j<=e;++j){ 7618 index_t::reverse_iterator it=vzero.rbegin(),itend=vzero.rend(); 7619 vector<int> line; 7620 line.reserve(e+1); // overflow if modulo too large 7621 for (int p=alphav[j].val,pp=1;it!=itend;++it,pp=smod(p*pp,modulo.val)){ 7622 if (*it) 7623 line.push_back(pp); 7624 } 7625 reverse(line.begin(),line.end()); 7626 m.push_back(line); 7627 } 7628 // assume gcd is the vector of non zero coeffs of the gcd in x^n 7629 // we have the relation 7630 // m*gcd=gcdv 7631 // invert m (if invertible) 7632 longlong det_mod_p; 7633 if (smallmodinv(m,minverse,modulo.val,det_mod_p) && det_mod_p){ 7634 // hence gcd=minverse*gcdv, where the i-th component of gcd 7635 // must be "multiplied" by xn^degree_corresponding_vzero[i] 7636 vector< polynome > minversegcd(e+1,polynome(dim)); 7637 // find bound of required size 7638 size_t taille=1; 7639 for (int k=0;k<=e;++k){ 7640 if (gcdv[k].type==_POLY) 7641 taille += gcdv[k]._POLYptr->coord.size(); 7642 } 7643 for (int j=0;j<=e;++j) 7644 minversegcd[j].coord.reserve(taille); 7645 polynome tmpadd(dim),tmpmult(dim); 7646 tmpadd.coord.reserve(taille); 7647 size_t taille2=0; 7648 for (int j=0;j<=e;++j){ 7649 for (int k=0;k<=e;++k){ 7650 // smallmult(minverse[j][k],tmpmult,tmpmult,modulo); 7651 int fact=minverse[j][k]; 7652 if (!fact) 7653 continue; 7654 if (gcdv[k].type==_POLY) 7655 tmpmult=*gcdv[k]._POLYptr; 7656 else 7657 tmpmult=polynome(gcdv[k],dim-1); 7658 vector< monomial<gen> >::iterator it=tmpmult.coord.begin(),itend=tmpmult.coord.end(); 7659 for (;it!=itend;++it){ 7660 it->value=smod(fact*it->value,modulo); 7661 } 7662 tmpadd.coord.swap(minversegcd[j].coord); 7663 // smalladd(tmpadd,tmpmult,minversegcd[j],modulo); 7664 tmpadd.TAdd(tmpmult,minversegcd[j]); 7665 } 7666 polynome res(minversegcd[j].dim); 7667 res.coord.reserve(minversegcd[j].coord.size()); 7668 res.coord.swap(minversegcd[j].coord); 7669 smod(res,modulo,minversegcd[j]); 7670 taille2 += minversegcd[j].coord.size(); 7671 } 7672 polynome trygcd(dim-1); 7673 index_t::const_iterator it=vzero.begin(),itend=vzero.end(); 7674 int deg=int(itend-it)-1; 7675 for (int pos=0;it!=itend;++it,--deg){ 7676 if (!*it) 7677 continue; 7678 polynome & tmp=minversegcd[pos]; 7679 tmp.untruncn(deg); 7680 polynome tmpxn; 7681 convert_xn(tmp,tmpxn); 7682 trygcd=trygcd+tmpxn; 7683 ++pos; 7684 } 7685 #else 7686 // Build the matrix, each line has coeffs / vzero 7687 if (debug_infolevel>1) 7688 CERR << CLOCK()*1e-6 << " SPMOD start" << '\n'; 7689 matrice m; m.reserve(e+1); 7690 for (int j=0;j<=e;++j){ 7691 index_t::reverse_iterator it=vzero.rbegin(),itend=vzero.rend(); 7692 vecteur line; line.reserve(e+2); 7693 for (gen p=alphav[j],pp=plus_one;it!=itend;++it,pp=smod(p*pp,modulo)){ 7694 if (*it) 7695 line.push_back( pp); 7696 } 7697 reverse(line.begin(),line.end()); 7698 line.push_back(gcdv[j]); 7699 m.push_back(line); 7700 } 7701 // Reduce linear system modulo modulo 7702 gen det; vecteur pivots; matrice mred; 7703 if (debug_infolevel>1) 7704 CERR << CLOCK()*1e-6 << " SPMOD begin rref" << '\n'; 7705 if (!modrref(m,mred,pivots,det,0,int(m.size()),0,int(m.front()._VECTptr->size())-1,true,false,modulo,false,false)) 7706 return false; 7707 if (debug_infolevel>1) 7708 CERR << CLOCK()*1e-6 << " SPMOD end rref" << '\n'; 7709 if (!is_zero(det)){ 7710 // Last column is the solution, it should be polynomials 7711 // that must be untrunced with index = to non-0 coeff of vzero 7712 polynome trygcd(dim); 7713 index_t::const_iterator it=vzero.begin(),itend=vzero.end(); 7714 int deg=int(itend-it)-1; 7715 for (int pos=0;it!=itend;++it,--deg){ 7716 if (!*it) 7717 continue; 7718 gen tmp=mred[pos][e+1]; // e+1=#of points -> last col 7719 if (tmp.type==_POLY){ 7720 //*tmp._POLYptr= 7721 tmp._POLYptr->untruncn(deg); 7722 polynome tmpxn; 7723 convert_xn(*tmp._POLYptr,tmpxn); 7724 trygcd=trygcd+tmpxn; 7725 } 7726 else { 7727 if (!is_zero(tmp)){ 7728 vecteur tmpxn(deg+1); 7729 tmpxn.front()=tmp; 7730 trygcd=trygcd+monomial<gen>(gen(tmpxn,_POLY1__VECT),dim-1); 7731 } 7732 } 7733 ++pos; 7734 } 7735 #endif 7736 // Check if trygcd is the gcd! 7737 vecteur tmpv; 7738 if (!pp_mod(trygcd,tmpv,&env)) 7739 return false; 7740 polynome pD,QP(dim),QQ(dim),R(dim); 7741 convert_back_xn(trygcd,pD); 7742 if (debug_infolevel>1) 7743 CERR << CLOCK()*1e-6 << " SPMOD try gcd candidate" << '\n'; 7744 if (pD.coord.size()<=p.coord.size() && pD.coord.size()<=q.coord.size() && divremmod(p,pD,modulo,QP,R) && R.coord.empty()){ 7745 if (divremmod(q,pD,modulo,QQ,R) && R.coord.empty()){ 7746 pD=pD*cont; 7747 d=smod(pD*invmod(pD.coord.front().value,modulo),modulo); 7748 if (compute_cofactors){ 7749 pcofactor=pcofactor*QP; 7750 pcofactor=smod(p_orig.coord.front().value*invmod(pcofactor.coord.front().value,modulo)*pcofactor,modulo); 7751 qcofactor=qcofactor*QQ; 7752 qcofactor=smod(q_orig.coord.front().value*invmod(qcofactor.coord.front().value,modulo)*qcofactor,modulo); 7753 } 7754 return true; 7755 } 7756 } 7757 } 7758 // SPMOD not successful :-( 7759 nzero=0; 7760 } // end if gcddeg-nzero==e 7761 } // end if (nzero) 7762 if (debug_infolevel>1) 7763 CERR << "gcdmod interp dim " << d.dim << " " << CLOCK() << '\n'; 7764 if (compute_cof){ 7765 // interpolate p cofactor 7766 mulpoly(gp,smod(hornermod(lcoeffp,alpha,modulo)*invmod(gp.coord.front().value,modulo),modulo),g1); 7767 smod(g1-pevaln(dp,alpha,modulo,tmpptr,estreel),modulo,g1); 7768 if (!is_zero(g1)){ 7769 mulpoly(g1,smod(invmod(hornermod(interp,alpha,modulo),modulo)*gen(interp,_POLY1__VECT),modulo),g1); 7770 smod(dp+g1,modulo,dp); 7771 } 7772 // interpolate q cofactor 7773 mulpoly(gq,smod(hornermod(lcoeffq,alpha,modulo)*invmod(gq.coord.front().value,modulo),modulo),g1); 7774 smod(g1-pevaln(dq,alpha,modulo,tmpptr,estreel),modulo,g1); 7775 if (!is_zero(g1)){ 7776 mulpoly(g1,smod(invmod(hornermod(interp,alpha,modulo),modulo)*gen(interp,_POLY1__VECT),modulo),g1); 7777 smod(dq+g1,modulo,dq); 7778 } 7779 } 7780 // interp GCD 7781 mulpoly(g,smod(hornermod(Delta,alpha,modulo)*invmod(g.coord.front().value,modulo),modulo),g1); 7782 smod(g1-pevaln(d,alpha,modulo,tmpptr,estreel),modulo,g1); 7783 if (!is_zero(g1)){ 7784 mulpoly(g1,smod(invmod(hornermod(interp,alpha,modulo),modulo)*gen(interp,_POLY1__VECT),modulo),g1); 7785 smod(d+g1,modulo,d); 7786 } 7787 interp=operator_times(interp,makevecteur(1,-alpha),&env); 7788 ++e; 7789 vecteur vtmp; 7790 if (compute_cof){ 7791 if (e>liftdeg){ 7792 // divide d,dp,dq by their content in xn 7793 if (!pp_mod(d,vtmp,&env) || 7794 !pp_mod(dp,vtmp,&env) || 7795 !pp_mod(dq,vtmp,&env)) 7796 return false; 7797 polynome pD(dim),PP(dim),QQ(dim); 7798 // check xn degrees of d+dp=degree(pxn), d+dq=degree(qxn) 7799 int dxndeg=degree_xn(d),dpxndeg=degree_xn(dp),dqxndeg=degree_xn(dq); 7800 if ( dxndeg+dpxndeg==degree_xn(pxn) && 7801 dxndeg+dqxndeg==degree_xn(qxn) ){ 7802 convert_back_xn(d,pD); 7803 d=pD*cont; 7804 if (compute_cofactors){ 7805 convert_back_xn(dp,PP); 7806 convert_back_xn(dq,QQ); 7807 pcofactor=PP*pcofactor; 7808 qcofactor=QQ*qcofactor; 7809 pcofactor=smod(p_orig.coord.front().value*invmod(pcofactor.coord.front().value,modulo)*pcofactor,modulo); 7810 qcofactor=smod(q_orig.coord.front().value*invmod(qcofactor.coord.front().value,modulo)*qcofactor,modulo); 7811 } 7812 if (debug_infolevel) 7813 CERR << "gcdmod end dim " << dim << " " << CLOCK() << '\n'; 7814 return true; 7815 } 7816 d.coord.clear(); dp.coord.clear(); dq.coord.clear(); 7817 gcdv.clear(); alphav.clear(); 7818 interp.clear(); interp.push_back(1); 7819 e=0; 7820 } 7821 } 7822 else { 7823 if (e>gcddeg || is_zero(g1)){ 7824 if (debug_infolevel) 7825 CERR << "gcdmod pp1mod dim " << dim << " " << CLOCK() << '\n'; 7826 polynome pD,QP(dim),QQ(dim),R(d); 7827 if (!pp_mod(R,vtmp,&env)) 7828 return false; 7829 convert_back_xn(R,pD); 7830 // This removes the polynomial in xn that we multiplied by 7831 // (it was necessary to know the lcoeff of the interpolated poly) 7832 if (debug_infolevel) 7833 CERR << "gcdmod check dim " << dim << " " << CLOCK() << '\n'; 7834 // Now, gcd divides pD for gcddeg+1 values of x1 7835 // degree(pD)<=degree(gcd) 7836 if (divremmod(p,pD,modulo,QP,R) && R.coord.empty()){ 7837 // If pD divides both P and Q, then the degree wrt variables 7838 // x1,...,xn-1 is the right one (because it is <= since pD 7839 // divides the gcd and >= since pD(xn=one of the try) was a gcd 7840 // The degree in xn is the right one because of the condition 7841 // on the lcoeff 7842 // Note that the division test might be much longer than the 7843 // interpolation itself (e.g. if the degree of the gcd is small) 7844 // but it seems unavoidable, for example if 7845 // P=Y-X+X(X-1)(X-2)(X-3) 7846 // Q=Y-X+X(X-1)(X-2)(X-4) 7847 // then gcd(P,Q)=1, but if we take Y=0, Y=1 or Y=2 7848 // we get gcddeg=1 (probably degree 1 for the gcd) 7849 // interpolation at X=0 and X=1 will lead to Y-X as candidate gcd 7850 // and even adding X=2 will not change it 7851 // We might remove division if we compute the cofactors of P and Q 7852 // if P=pD*cofactor is true for degree(P) values of x1 7853 // and same for Q, and the degrees wrt xn of pD and cofactors 7854 // have sum equal to degree of P or Q + lcoeff then pD is the gcd 7855 if (divremmod(q,pD,modulo,QQ,R) &&R.coord.empty()){ 7856 pD=pD*cont; 7857 d=smod(pD*invmod(pD.coord.front().value,modulo),modulo); 7858 if (compute_cofactors){ 7859 pcofactor=pcofactor*QP; 7860 pcofactor=smod(p_orig.coord.front().value*invmod(pcofactor.coord.front().value,modulo)*pcofactor,modulo); 7861 qcofactor=qcofactor*QQ; 7862 qcofactor=smod(q_orig.coord.front().value*invmod(qcofactor.coord.front().value,modulo)*qcofactor,modulo); 7863 } 7864 if (debug_infolevel) 7865 CERR << "gcdmod found dim " << d.dim << " " << CLOCK() << '\n'; 7866 return true; 7867 } 7868 } 7869 if (debug_infolevel) 7870 CERR << "Gcdmod bad guess " << '\n'; 7871 } // end if (e>gcddeg) 7872 } // end else [if (compute_cof)] 7873 continue; 7874 } // end gdeg==delta 7875 // FIXME: the current implementation may break if we are unlucky 7876 // If the degrees of palpha and qalpha are the same than 7877 // those of pxn and qxn, delta <- index_min(gdeg,delta) 7878 // restart with g only if gdeg[j]<=delta[j] for all indices 7879 // stay with d only if delta[j]<=gdeg[j] 7880 if (gdeg[0]>delta[0]) 7881 continue; 7882 if (delta[0]>=gdeg[0]){ // restart with g 7883 gcdv=vecteur(1,g); 7884 alphav=vecteur(1,alpha); 7885 delta=gdeg; 7886 d=(g*smod(hornermod(Delta,alpha,modulo),modulo))*invmod(g.coord.front().value,modulo); 7887 if (compute_cof){ 7888 dp=(gp*smod(hornermod(lcoeffp,alpha,modulo),modulo))*invmod(gp.coord.front().value,modulo); 7889 dq=(gq*smod(hornermod(lcoeffq,alpha,modulo),modulo))*invmod(gq.coord.front().value,modulo); 7890 } 7891 e=1; 7892 interp=makevecteur(1,-alpha); 7893 continue; 7894 } 7895 } 7896 } 7897 7898 void unmodularize(const modpoly &p,const gen & modulo,modpoly &P){ 7899 P.clear(); P.reserve(p.size()); 7900 const_iterateur it=p.begin(),itend=p.end(); 7901 for (;it!=itend;++it){ 7902 const gen & g=*it; 7903 if (g.type==_MOD) 7904 P.push_back(*g._MODptr); 7905 else 7906 P.push_back(g); 7907 } 7908 } 7909 7910 // Previous Fourier prime, find n-throot of unity 7911 int prevfourier(unsigned p,int fourier_for_n,int & w){ 7912 int l=sizeinbase2(fourier_for_n); 7913 unsigned pdiv=p>>l; 7914 for (--pdiv;pdiv>=(1<<(30-l));--pdiv){ 7915 p=(pdiv<<l)+1; 7916 int p15=p%15; 7917 if (p15==0 || p15==3 || p15==6 || p15==9 || p15==12 || p15==5 || p15==10) 7918 continue; 7919 // find nthroot of 1 and checks Miller-Rabin primality 7920 unsigned char charprimes[]={2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}; 7921 w=0; 7922 for (int i=0;i<sizeof(charprimes);++i){ 7923 ulonglong r=powmod(unsigned(charprimes[i]),pdiv,p); 7924 unsigned w_=r,j; 7925 if (r==1) 7926 continue; 7927 for (j=1;j<l;++j){ 7928 r=(r*r)%p; 7929 if (r==1 || r==p-1) 7930 break; 7931 } 7932 if (r!=p-1) 7933 break; // p is not prime 7934 if (j==l-1) 7935 w=w_; 7936 if (w && i>=20) 7937 return p; 7938 } 7939 } 7940 return 0; 7941 } 7942 7943 // a list of 2048 primes generated by 7944 // p:=2^31-2^16 7945 // L:=[]; for j from 1 to 2^11 do p:=prevprime(p-1); L.append(p); od; L 7946 const unsigned primes31[]={2147418083,2147418079,2147418067,2147418043,2147418041,2147418037,2147418011,2147418001,2147417969,2147417941,2147417939,2147417929,2147417911,2147417903,2147417869,2147417819,2147417813,2147417773,2147417759,2147417717,2147417707,2147417693,2147417669,2147417663,2147417653,2147417593,2147417551,2147417539,2147417527,2147417521,2147417453,2147417443,2147417423,2147417383,2147417381,2147417351,2147417329,2147417303,2147417267,2147417179,2147417171,2147417141,2147417117,2147417113,2147417093,2147417053,2147417023,2147416991,2147416987,2147416979,2147416967,2147416963,2147416949,2147416927,2147416907,2147416883,2147416877,2147416867,2147416823,2147416813,2147416807,2147416783,2147416771,2147416769,2147416759,2147416721,2147416709,2147416703,2147416657,2147416637,2147416589,2147416573,2147416561,2147416559,2147416553,2147416519,2147416507,2147416483,2147416477,2147416441,2147416379,2147416357,2147416343,2147416339,2147416301,2147416267,2147416241,2147416223,2147416213,2147416207,2147416189,2147416181,2147416177,2147416163,2147416147,2147416133,2147416121,2147416111,2147416109,2147416079,2147416067,2147416057,2147416031,2147416003,2147415989,2147415943,2147415931,2147415889,2147415883,2147415859,2147415857,2147415839,2147415817,2147415811,2147415773,2147415749,2147415737,2147415719,2147415709,2147415703,2147415659,2147415629,2147415619,2147415617,2147415607,2147415563,2147415559,2147415553,2147415541,2147415533,2147415527,2147415499,2147415463,2147415451,2147415449,2147415443,2147415427,2147415373,2147415343,2147415337,2147415323,2147415311,2147415307,2147415203,2147415181,2147415119,2147415107,2147415059,2147415047,2147415041,2147415029,2147415019,2147415013,2147414999,2147414977,2147414953,2147414947,2147414839,2147414837,2147414807,2147414791,2147414767,2147414723,2147414699,2147414651,2147414641,2147414629,2147414617,2147414603,2147414567,2147414531,2147414519,2147414513,2147414509,2147414497,2147414461,2147414441,2147414411,2147414407,2147414377,2147414369,2147414363,2147414317,2147414233,2147414231,2147414207,2147414201,2147414189,2147414183,2147414117,2147414057,2147414053,2147413993,2147413981,2147413979,2147413949,2147413913,2147413903,2147413889,2147413867,2147413859,2147413813,2147413781,2147413757,2147413753,2147413739,2147413729,2147413711,2147413661,2147413657,2147413651,2147413603,2147413559,2147413553,2147413529,2147413519,2147413441,2147413381,2147413343,2147413309,2147413297,2147413291,2147413283,2147413277,2147413273,2147413223,2147413211,2147413201,2147413153,2147413139,2147413109,2147413061,2147413049,2147413001,2147412989,2147412979,2147412977,2147412973,2147412961,2147412947,2147412929,2147412899,2147412877,2147412833,2147412797,2147412791,2147412731,2147412721,2147412691,2147412689,2147412671,2147412667,2147412637,2147412629,2147412601,2147412599,2147412593,2147412581,2147412577,2147412539,2147412521,2147412493,2147412479,2147412467,2147412461,2147412451,2147412413,2147412403,2147412361,2147412359,2147412347,2147412301,2147412277,2147412263,2147412257,2147412247,2147412233,2147412229,2147412209,2147412149,2147412143,2147412133,2147412037,2147412017,2147411963,2147411953,2147411911,2147411887,2147411879,2147411857,2147411839,2147411821,2147411789,2147411743,2147411723,2147411713,2147411683,2147411657,2147411639,2147411621,2147411603,2147411579,2147411561,2147411557,2147411551,2147411549,2147411533,2147411527,2147411501,2147411477,2147411473,2147411471,2147411429,2147411359,2147411339,2147411333,2147411297,2147411263,2147411251,2147411221,2147411213,2147411209,2147411183,2147411087,2147411033,2147411011,2147411009,2147411003,2147410973,2147410963,2147410949,2147410891,2147410873,2147410849,2147410829,2147410823,2147410817,2147410813,2147410789,2147410781,2147410757,2147410753,2147410729,2147410717,2147410687,2147410679,2147410673,2147410649,2147410637,2147410621,2147410619,2147410597,2147410567,2147410543,2147410537,2147410523,2147410483,2147410481,2147410451,2147410379,2147410373,2147410351,2147410339,2147410333,2147410327,2147410313,2147410297,2147410273,2147410271,2147410253,2147410247,2147410217,2147410163,2147410159,2147410127,2147410051,2147410043,2147410037,2147410007,2147409989,2147409977,2147409967,2147409949,2147409907,2147409899,2147409871,2147409841,2147409799,2147409793,2147409787,2147409751,2147409721,2147409713,2147409707,2147409653,2147409647,2147409631,2147409629,2147409623,2147409619,2147409601,2147409577,2147409547,2147409541,2147409499,2147409493,2147409491,2147409457,2147409443,2147409409,2147409403,2147409389,2147409373,2147409361,2147409353,2147409343,2147409337,2147409323,2147409311,2147409301,2147409287,2147409263,2147409239,2147409217,2147409181,2147409167,2147409163,2147409157,2147409137,2147409113,2147409083,2147409067,2147409049,2147409041,2147409031,2147408981,2147408957,2147408911,2147408909,2147408881,2147408827,2147408779,2147408761,2147408749,2147408741,2147408729,2147408717,2147408713,2147408707,2147408699,2147408629,2147408621,2147408609,2147408591,2147408587,2147408563,2147408551,2147408531,2147408513,2147408507,2147408467,2147408441,2147408387,2147408339,2147408323,2147408321,2147408303,2147408299,2147408293,2147408279,2147408273,2147408267,2147408243,2147408233,2147408209,2147408201,2147408171,2147408143,2147408111,2147408093,2147408083,2147408027,2147408017,2147407991,2147407973,2147407939,2147407907,2147407891,2147407877,2147407861,2147407807,2147407799,2147407793,2147407741,2147407699,2147407697,2147407681,2147407667,2147407657,2147407621,2147407609,2147407543,2147407463,2147407439,2147407429,2147407403,2147407337,2147407333,2147407319,2147407279,2147407271,2147407261,2147407253,2147407193,2147407177,2147407153,2147407133,2147407127,2147407117,2147407069,2147407039,2147407033,2147407027,2147407013,2147407001,2147406997,2147406991,2147406979,2147406953,2147406931,2147406917,2147406869,2147406839,2147406827,2147406823,2147406817,2147406773,2147406769,2147406757,2147406739,2147406721,2147406643,2147406631,2147406623,2147406601,2147406589,2147406553,2147406517,2147406511,2147406497,2147406491,2147406463,2147406451,2147406409,2147406397,2147406379,2147406367,2147406341,2147406319,2147406281,2147406229,2147406223,2147406199,2147406193,2147406167,2147406161,2147406139,2147406101,2147406061,2147406059,2147406047,2147406031,2147405993,2147405987,2147405977,2147405947,2147405921,2147405917,2147405893,2147405801,2147405759,2147405747,2147405693,2147405657,2147405591,2147405573,2147405569,2147405563,2147405531,2147405527,2147405503,2147405471,2147405467,2147405461,2147405417,2147405389,2147405357,2147405353,2147405333,2147405329,2147405291,2147405279,2147405243,2147405237,2147405209,2147405203,2147405173,2147405131,2147405129,2147405119,2147405111,2147405107,2147405063,2147405017,2147405003,2147404991,2147404981,2147404957,2147404943,2147404933,2147404901,2147404891,2147404859,2147404841,2147404829,2147404771,2147404711,2147404639,2147404621,2147404619,2147404601,2147404583,2147404579,2147404577,2147404489,2147404447,2147404439,2147404429,2147404393,2147404381,2147404349,2147404331,2147404327,2147404319,2147404309,2147404247,2147404243,2147404229,2147404211,2147404183,2147404157,2147404153,2147404141,2147404121,2147404099,2147404067,2147404037,2147404031,2147404019,2147404001,2147403983,2147403953,2147403893,2147403889,2147403887,2147403859,2147403857,2147403781,2147403779,2147403737,2147403719,2147403701,2147403673,2147403593,2147403581,2147403563,2147403547,2147403529,2147403509,2147403497,2147403469,2147403457,2147403409,2147403407,2147403403,2147403383,2147403343,2147403341,2147403311,2147403283,2147403217,2147403179,2147403143,2147403121,2147403103,2147403061,2147403007,2147402993,2147402989,2147402977,2147402951,2147402941,2147402927,2147402911,2147402899,2147402869,2147402833,2147402827,2147402791,2147402783,2147402779,2147402771,2147402711,2147402689,2147402687,2147402683,2147402611,2147402603,2147402563,2147402507,2147402489,2147402479,2147402443,2147402437,2147402423,2147402419,2147402417,2147402407,2147402381,2147402377,2147402371,2147402357,2147402329,2147402297,2147402269,2147402267,2147402239,2147402221,2147402203,2147402177,2147402161,2147402119,2147402099,2147402027,2147402009,2147401973,2147401957,2147401951,2147401943,2147401933,2147401891,2147401807,2147401769,2147401759,2147401747,2147401741,2147401721,2147401709,2147401681,2147401667,2147401661,2147401591,2147401573,2147401567,2147401549,2147401513,2147401499,2147401457,2147401447,2147401441,2147401369,2147401343,2147401327,2147401313,2147401307,2147401303,2147401271,2147401253,2147401241,2147401213,2147401181,2147401171,2147401169,2147401163,2147401129,2147401103,2147401099,2147401021,2147400991,2147400977,2147400973,2147400949,2147400943,2147400889,2147400877,2147400847,2147400821,2147400803,2147400799,2147400769,2147400763,2147400743,2147400683,2147400643,2147400637,2147400623,2147400599,2147400583,2147400571,2147400511,2147400481,2147400469,2147400457,2147400449,2147400433,2147400397,2147400377,2147400361,2147400337,2147400331,2147400329,2147400301,2147400293,2147400239,2147400217,2147400139,2147400127,2147400113,2147400089,2147400071,2147400053,2147400011,2147400001,2147399983,2147399981,2147399959,2147399957,2147399927,2147399923,2147399909,2147399851,2147399843,2147399819,2147399809,2147399803,2147399789,2147399777,2147399767,2147399759,2147399753,2147399731,2147399719,2147399711,2147399701,2147399699,2147399689,2147399671,2147399663,2147399629,2147399603,2147399593,2147399587,2147399581,2147399561,2147399533,2147399521,2147399519,2147399509,2147399491,2147399461,2147399447,2147399431,2147399407,2147399383,2147399381,2147399363,2147399329,2147399263,2147399227,2147399203,2147399197,2147399167,2147399161,2147399153,2147399147,2147399141,2147399123,2147399113,2147399087,2147399069,2147399063,2147399053,2147399021,2147399017,2147398997,2147398969,2147398963,2147398949,2147398919,2147398849,2147398819,2147398783,2147398769,2147398681,2147398679,2147398667,2147398597,2147398577,2147398559,2147398553,2147398549,2147398531,2147398529,2147398507,2147398501,2147398387,2147398361,2147398321,2147398313,2147398283,2147398277,2147398261,2147398229,2147398219,2147398207,2147398157,2147398109,2147398091,2147398081,2147398079,2147398021,2147398009,2147397997,2147397953,2147397947,2147397943,2147397883,2147397881,2147397877,2147397859,2147397853,2147397827,2147397821,2147397817,2147397809,2147397787,2147397751,2147397743,2147397731,2147397677,2147397643,2147397589,2147397587,2147397569,2147397563,2147397557,2147397541,2147397479,2147397463,2147397443,2147397437,2147397433,2147397409,2147397383,2147397361,2147397359,2147397353,2147397289,2147397283,2147397281,2147397269,2147397257,2147397209,2147397199,2147397193,2147397137,2147397097,2147397071,2147397029,2147397019,2147397011,2147396989,2147396987,2147396963,2147396921,2147396903,2147396897,2147396893,2147396887,2147396869,2147396857,2147396827,2147396819,2147396807,2147396761,2147396759,2147396749,2147396711,2147396687,2147396659,2147396623,2147396621,2147396609,2147396579,2147396569,2147396561,2147396557,2147396533,2147396521,2147396513,2147396441,2147396413,2147396411,2147396401,2147396399,2147396353,2147396351,2147396341,2147396323,2147396309,2147396281,2147396267,2147396243,2147396227,2147396213,2147396203,2147396189,2147396179,2147396159,2147396129,2147396077,2147396063,2147396057,2147396051,2147396039,2147395969,2147395961,2147395949,2147395937,2147395927,2147395907,2147395891,2147395841,2147395829,2147395777,2147395771,2147395729,2147395721,2147395709,2147395703,2147395697,2147395669,2147395661,2147395651,2147395633,2147395631,2147395609,2147395589,2147395553,2147395499,2147395489,2147395487,2147395427,2147395423,2147395421,2147395417,2147395379,2147395373,2147395343,2147395331,2147395309,2147395303,2147395297,2147395291,2147395259,2147395241,2147395193,2147395147,2147395123,2147395043,2147395039,2147394979,2147394973,2147394961,2147394959,2147394947,2147394917,2147394913,2147394889,2147394869,2147394853,2147394811,2147394761,2147394749,2147394719,2147394631,2147394619,2147394617,2147394589,2147394577,2147394569,2147394553,2147394551,2147394547,2147394539,2147394533,2147394527,2147394479,2147394467,2147394461,2147394427,2147394373,2147394371,2147394341,2147394289,2147394283,2147394281,2147394259,2147394247,2147394239,2147394217,2147394199,2147394187,2147394173,2147394167,2147394131,2147394127,2147394097,2147394089,2147394043,2147394023,2147394017,2147393959,2147393929,2147393921,2147393909,2147393881,2147393867,2147393863,2147393861,2147393767,2147393761,2147393747,2147393701,2147393693,2147393687,2147393683,2147393681,2147393671,2147393659,2147393639,2147393621,2147393609,2147393561,2147393533,2147393489,2147393461,2147393453,2147393449,2147393447,2147393419,2147393401,2147393399,2147393393,2147393377,2147393371,2147393359,2147393351,2147393317,2147393299,2147393291,2147393273,2147393257,2147393249,2147393221,2147393201,2147393179,2147393147,2147393141,2147393099,2147393093,2147393051,2147393009,2147392993,2147392991,2147392981,2147392903,2147392883,2147392879,2147392873,2147392853,2147392843,2147392829,2147392811,2147392789,2147392721,2147392703,2147392697,2147392669,2147392591,2147392571,2147392561,2147392547,2147392543,2147392531,2147392487,2147392469,2147392409,2147392399,2147392391,2147392367,2147392337,2147392301,2147392283,2147392271,2147392253,2147392249,2147392243,2147392201,2147392189,2147392153,2147392151,2147392147,2147392139,2147392129,2147392069,2147392031,2147392021,2147391991,2147391973,2147391931,2147391889,2147391863,2147391859,2147391847,2147391833,2147391811,2147391787,2147391781,2147391773,2147391751,2147391721,2147391713,2147391709,2147391691,2147391683,2147391641,2147391637,2147391629,2147391583,2147391569,2147391563,2147391559,2147391551,2147391511,2147391487,2147391461,2147391409,2147391403,2147391383,2147391353,2147391349,2147391347,2147391331,2147391313,2147391299,2147391283,2147391271,2147391269,2147391247,2147391241,2147391209,2147391203,2147391187,2147391161,2147391121,2147391107,2147391011,2147390977,2147390939,2147390933,2147390923,2147390897,2147390873,2147390827,2147390789,2147390783,2147390779,2147390737,2147390731,2147390671,2147390657,2147390639,2147390629,2147390627,2147390617,2147390611,2147390599,2147390587,2147390563,2147390489,2147390447,2147390437,2147390419,2147390387,2147390363,2147390341,2147390327,2147390291,2147390281,2147390257,2147390249,2147390233,2147390213,2147390209,2147390183,2147390149,2147390117,2147390111,2147390107,2147389991,2147389967,2147389961,2147389957,2147389939,2147389931,2147389919,2147389903,2147389897,2147389891,2147389877,2147389843,2147389819,2147389781,2147389763,2147389753,2147389691,2147389679,2147389631,2147389619,2147389591,2147389579,2147389577,2147389523,2147389501,2147389483,2147389471,2147389441,2147389421,2147389417,2147389403,2147389399,2147389369,2147389351,2147389333,2147389331,2147389327,2147389267,2147389219,2147389213,2147389183,2147389159,2147389151,2147389147,2147389141,2147389121,2147389109,2147389103,2147389067,2147389033,2147389019,2147388979,2147388961,2147388953,2147388923,2147388871,2147388851,2147388829,2147388809,2147388791,2147388773,2147388769,2147388751,2147388731,2147388709,2147388703,2147388701,2147388679,2147388611,2147388577,2147388563,2147388559,2147388497,2147388487,2147388479,2147388437,2147388431,2147388427,2147388413,2147388409,2147388403,2147388389,2147388377,2147388329,2147388323,2147388319,2147388301,2147388253,2147388251,2147388241,2147388239,2147388233,2147388211,2147388209,2147388179,2147388119,2147388107,2147388101,2147388097,2147388083,2147388049,2147388043,2147388041,2147388007,2147387959,2147387947,2147387933,2147387903,2147387897,2147387857,2147387839,2147387821,2147387791,2147387743,2147387741,2147387729,2147387719,2147387713,2147387699,2147387677,2147387651,2147387647,2147387629,2147387609,2147387573,2147387531,2147387507,2147387503,2147387441,2147387413,2147387393,2147387383,2147387339,2147387323,2147387299,2147387279,2147387257,2147387227,2147387219,2147387213,2147387153,2147387131,2147387083,2147387063,2147387059,2147387029,2147387017,2147387009,2147387003,2147386987,2147386921,2147386883,2147386853,2147386757,2147386753,2147386733,2147386729,2147386727,2147386711,2147386699,2147386687,2147386667,2147386639,2147386627,2147386607,2147386601,2147386583,2147386561,2147386529,2147386471,2147386463,2147386459,2147386447,2147386387,2147386303,2147386277,2147386273,2147386271,2147386259,2147386217,2147386211,2147386207,2147386187,2147386169,2147386147,2147386123,2147386117,2147386091,2147386051,2147386049,2147385973,2147385967,2147385941,2147385937,2147385931,2147385907,2147385893,2147385887,2147385857,2147385847,2147385829,2147385827,2147385809,2147385781,2147385769,2147385767,2147385763,2147385733,2147385727,2147385697,2147385677,2147385671,2147385659,2147385637,2147385631,2147385607,2147385601,2147385599,2147385589,2147385587,2147385553,2147385479,2147385473,2147385439,2147385431,2147385389,2147385371,2147385367,2147385337,2147385329,2147385313,2147385293,2147385281,2147385223,2147385211,2147385199,2147385197,2147385157,2147385151,2147385113,2147385103,2147385091,2147385089,2147385077,2147385049,2147385001,2147384977,2147384917,2147384909,2147384893,2147384861,2147384843,2147384839,2147384821,2147384797,2147384753,2147384747,2147384737,2147384711,2147384689,2147384683,2147384669,2147384653,2147384633,2147384621,2147384609,2147384599,2147384593,2147384537,2147384527,2147384509,2147384501,2147384497,2147384413,2147384387,2147384333,2147384329,2147384287,2147384263,2147384251,2147384243,2147384227,2147384221,2147384203,2147384201,2147384191,2147384111,2147384089,2147384027,2147384023,2147384021,2147384017,2147384011,2147384003,2147383999,2147383993,2147383991,2147383957,2147383943,2147383939,2147383873,2147383871,2147383867,2147383831,2147383829,2147383807,2147383789,2147383787,2147383783,2147383741,2147383739,2147383729,2147383709,2147383697,2147383681,2147383649,2147383633,2147383627,2147383573,2147383531,2147383519,2147383507,2147383501,2147383489,2147383481,2147383463,2147383421,2147383379,2147383369,2147383349,2147383327,2147383307,2147383291,2147383283,2147383267,2147383223,2147383193,2147383177,2147383129,2147383123,2147383093,2147383079,2147383019,2147383009,2147383003,2147382997,2147382973,2147382901,2147382899,2147382883,2147382877,2147382871,2147382851,2147382847,2147382827,2147382791,2147382773,2147382767,2147382751,2147382707,2147382697,2147382683,2147382647,2147382637,2147382623,2147382607,2147382599,2147382583,2147382581,2147382551,2147382539,2147382509,2147382469,2147382449,2147382443,2147382439,2147382407,2147382403,2147382383,2147382379,2147382323,2147382301,2147382287,2147382277,2147382253,2147382239,2147382233,2147382221,2147382199,2147382197,2147382179,2147382173,2147382137,2147382043,2147381989,2147381953,2147381927,2147381909,2147381903,2147381897,2147381869,2147381857,2147381851,2147381791,2147381783,2147381701,2147381693,2147381689,2147381669,2147381633,2147381629,2147381627,2147381573,2147381567,2147381527,2147381413,2147381371,2147381363,2147381351,2147381347,2147381323,2147381317,2147381309,2147381279,2147381267,2147381263,2147381251,2147381237,2147381227,2147381221,2147381219,2147381207,2147381153,2147381147,2147381123,2147381113,2147381107,2147381059,2147381029,2147381017,2147380957,2147380919,2147380909,2147380903,2147380883,2147380867,2147380853,2147380849,2147380841,2147380831,2147380811,2147380801,2147380799,2147380787,2147380783,2147380733,2147380727,2147380693,2147380681,2147380673,2147380663,2147380649,2147380639,2147380583,2147380577,2147380553,2147380537,2147380523,2147380451,2147380421,2147380387,2147380379,2147380373,2147380369,2147380357,2147380331,2147380309,2147380273,2147380243,2147380219,2147380189,2147380159,2147380141,2147380129,2147380099,2147380051,2147380033,2147379979,2147379977,2147379937,2147379929,2147379919,2147379917,2147379869,2147379863,2147379859,2147379847,2147379823,2147379811,2147379809,2147379799,2147379763,2147379761,2147379739,2147379721,2147379673,2147379671,2147379631,2147379571,2147379557,2147379541,2147379539,2147379529,2147379503,2147379499,2147379491,2147379473,2147379431,2147379419,2147379359,2147379319,2147379301,2147379197,2147379133,2147379109,2147379097,2147379077,2147378999,2147378983,2147378971,2147378941,2147378903,2147378887,2147378873,2147378843,2147378839,2147378837,2147378833,2147378819,2147378777,2147378767,2147378759,2147378729,2147378713,2147378671,2147378557,2147378539,2147378533,2147378521,2147378501,2147378479,2147378447,2147378411,2147378393,2147378381,2147378377,2147378353,2147378297,2147378269,2147378267,2147378257,2147378251,2147378227,2147378201,2147378131,2147378099,2147378063,2147378011,2147377987,2147377961,2147377951,2147377943,2147377927,2147377879,2147377871,2147377789,2147377787,2147377753,2147377741,2147377699,2147377697,2147377691,2147377679,2147377667,2147377633,2147377607,2147377601,2147377591,2147377567,2147377541,2147377523,2147377489,2147377487,2147377481,2147377469,2147377459,2147377429,2147377381,2147377343,2147377277,2147377273,2147377247,2147377241,2147377231,2147377213,2147377189,2147377153,2147377019,2147377003,2147376989,2147376983,2147376953,2147376929,2147376919,2147376893,2147376887,2147376883,2147376823,2147376817,2147376719,2147376691,2147376631,2147376617,2147376557,2147376551,2147376547,2147376529,2147376509,2147376503,2147376487,2147376463,2147376457,2147376433,2147376419,2147376401,2147376391,2147376379,2147376377,2147376367,2147376323,2147376241,2147376193,2147376151,2147376149,2147376113,2147376089,2147376073,2147376061,2147376059,2147376053,2147376017,2147376001,2147375999,2147375987,2147375969,2147375939,2147375899,2147375869,2147375849,2147375807,2147375773,2147375761,2147375729,2147375693,2147375683,2147375677,2147375669,2147375641,2147375627,2147375597,2147375591,2147375569,2147375567,2147375561,2147375539,2147375521,2147375509,2147375471,2147375437,2147375429,2147375413,2147375411,2147375407,2147375389,2147375371,2147375297,2147375291,2147375257,2147375249,2147375207,2147375201,2147375173,2147375171,2147375161,2147375149,2147375141,2147375137,2147375119,2147375099,2147375089,2147375051,2147374997,2147374993,2147374987,2147374969,2147374951,2147374897,2147374847,2147374841,2147374819,2147374787,2147374741,2147374727,2147374711,2147374673,2147374639,2147374633,2147374597,2147374577,2147374531,2147374519,2147374477,2147374459,2147374337,2147374331,2147374301,2147374297,2147374283 7947 }; 7948 const unsigned nprimes31=sizeof(primes31)/sizeof(unsigned); 7949 7950 // Previous Fourier prime 7951 int prevprimep1p2p3(int p,int maxp,int fourier_for_n){ 7952 if (p==p1+2 || p==p1+1) 7953 return p1; 7954 if (p==p1 || p==p1-1 || p==p1-2) 7955 return p2; 7956 if (p==p2 || p==p2-1 || p==p2-2) 7957 return p3; 7958 if (p==p3 || p==p3-1 || p==p3-2) 7959 p=fourier_for_n?(p1-2):maxp; 7960 if (fourier_for_n){ 7961 int l=sizeinbase2(fourier_for_n); 7962 int pdiv=p>>l; 7963 for (--pdiv;pdiv>=(1<<(30-l));--pdiv){ 7964 p=(pdiv<<l)+1; 7965 if (p!=p1 && p!=p2&& p!=p3 && is_probab_prime_p(p)) 7966 return p; 7967 } 7968 } 7969 p=prevprime(p-2).val; 7970 if (p==p1 || p==p2 || p==p3) 7971 p=prevprime(p-2).val; 7972 return p; 7973 } 7974 7975 longlong prevprimell(longlong p,longlong fourier_for_n=0){ 7976 if (fourier_for_n){ 7977 int l=sizeinbase2(fourier_for_n); 7978 longlong pdiv=p>>l; 7979 for (--pdiv;pdiv>=(1<<(62-l));--pdiv){ 7980 p=(pdiv<<l)+1; 7981 if (is_probab_prime_p(p)) 7982 return p; 7983 } 7984 } 7985 gen g=prevprime(p-2); 7986 if (g.type==_ZINT) 7987 return mpz_get_si(*g._ZINTptr); 7988 return g.val; 7989 } 7990 7991 bool gcd_modular_algo(const modpoly &p,const modpoly &q,modpoly &d,modpoly * p_simp,modpoly * q_simp){ 7992 gen tmp; 7993 int pt=coefftype(p,tmp),qt=coefftype(q,tmp); 7994 //if (pt!=0 || qt!=0) return false; 7995 if ( (pt!=_INT_ && pt!=_CPLX) 7996 || (qt!=_INT_ && qt!=_CPLX) ) 7997 return false; 7998 gen gcdfirstcoeff(gcd(p.front(),q.front(),context0)); 7999 int gcddeg= giacmin(int(p.size()),int(q.size()))-1; 8000 environment env; 8001 env.modulo=p1+1; env.moduloon=true; 8002 env.complexe=!vect_is_real(p,context0) || !vect_is_real(q,context0); 8003 int maxdeg=giacmax(p.size(),q.size())-1; 8004 int maxp=std::sqrt(p1p2/4./maxdeg); 8005 gen productmodulo(1); 8006 modpoly currentgcd,Q,R; 8007 for (;;){ 8008 env.modulo=prevprimep1p2p3(env.modulo.val,maxp); 8009 while (is_zero(p.front() % env.modulo) || is_zero(q.front() % env.modulo)){ 8010 env.modulo=prevprimep1p2p3(env.modulo.val,maxp); 8011 if (env.complexe){ 8012 while (smod(env.modulo,4)!=1) 8013 env.modulo=prevprimep1p2p3(env.modulo.val,maxp); 8014 } 8015 } 8016 modpoly gcdmod; 8017 gcdmodpoly(p,q,&env,gcdmod); 8018 if (is_undef(gcdmod)) 8019 return false; 8020 gen adjustcoeff=gcdfirstcoeff*invmod(gcdmod.front(),env.modulo); 8021 mulmodpoly(gcdmod,adjustcoeff,&env,gcdmod); 8022 int m=int(gcdmod.size())-1; 8023 if (!m){ 8024 d=makevecteur(1); 8025 return true; 8026 } 8027 if (m>gcddeg) // this prime is bad, just ignore 8028 continue; 8029 // combine step 8030 if (m<gcddeg){ // previous prime was bad 8031 gcddeg=m; 8032 currentgcd.swap(gcdmod); 8033 productmodulo=env.modulo; 8034 } 8035 else { 8036 // m==gcddeg, start combine 8037 if (productmodulo==gen(1)){ // no need to combine primes 8038 currentgcd.swap(gcdmod); 8039 productmodulo=env.modulo; 8040 } 8041 else { 8042 if (productmodulo.type==_INT_) 8043 currentgcd=ichinrem(gcdmod,currentgcd,env.modulo,productmodulo); 8044 else 8045 ichinrem_inplace(currentgcd,gcdmod,productmodulo,env.modulo.val); 8046 productmodulo=productmodulo*env.modulo; 8047 } 8048 } 8049 // check candidate gcd 8050 modpoly dmod(currentgcd); 8051 if (is_undef(dmod)) 8052 return false; 8053 ppz(dmod); 8054 if ( DenseDivRem(p,dmod,Q,R,true) && R.empty()){ 8055 if (p_simp) 8056 p_simp->swap(Q); 8057 if (DenseDivRem(q,dmod,Q,R,true) && R.empty() ){ 8058 if (q_simp) 8059 q_simp->swap(Q); 8060 d.swap(dmod); 8061 return true; 8062 } 8063 } 8064 } 8065 return false; 8066 } 8067 8068 modpoly gcd(const modpoly & p,const modpoly &q,environment * env,bool call_ntl){ 8069 if (p.empty()) return q; 8070 if (q.empty()) return p; 8071 if (!env){ 8072 if (p.front().type==_MOD){ 8073 // unmodularize, get env, call gcdmodpoly 8074 environment e; 8075 e.modulo=*(p.front()._MODptr+1); 8076 e.moduloon=true; 8077 modpoly P,Q,A; 8078 unmodularize(p,e.modulo,P); 8079 unmodularize(q,e.modulo,Q); 8080 if (call_ntl && ntlgcd(P,Q,e.modulo,A)) 8081 ; 8082 else 8083 gcdmodpoly(P,Q,&e,A); 8084 modularize(A,e.modulo); 8085 return A; 8086 } 8087 else { 8088 if (q.front().type==_MOD) 8089 return gcd(q,p,env); 8090 } 8091 } 8092 if (!env || !env->moduloon || !is_zero(env->coeff)){ 8093 modpoly g; 8094 if (call_ntl && ntlgcd(p,q,0,g)) 8095 return g; 8096 if (gcd_modular_algo(p,q,g,NULL,NULL)) 8097 return g; 8098 polynome r,s; 8099 int dim=giacmax(inner_POLYdim(p),inner_POLYdim(q)); 8100 poly12polynome(p,1,r,dim); 8101 poly12polynome(q,1,s,dim); 8102 return polynome2poly1(gcd(r,s),1); 8103 } 8104 modpoly a; 8105 gcdmodpoly(p,q,env,a); 8106 return a; 8107 // dbgp(a); 8108 // return a; 8109 } 8110 8111 modpoly lcm(const modpoly & p,const modpoly &q,environment * env){ 8112 modpoly g(gcd(p,q,env)); 8113 return operator_times(operator_div(p,g,env),q,env); 8114 } 8115 8116 bool algnorme(const polynome & p_y,const polynome & pmini,polynome & n){ 8117 n=resultant(p_y,pmini).trunc1(); 8118 return true; 8119 matrice S=sylvester(polynome2poly1(pmini,1),polynome2poly1(p_y,1)); 8120 S=mtran(S); 8121 gen g=det_minor(S,vecteur(0),false,context0); 8122 if (g.type!=_POLY) 8123 return false; 8124 n=*g._POLYptr; 8125 return true; 8126 } 8127 8128 #ifdef USE_GMP_REPLACEMENTS 8129 bool egcd_mpz(const modpoly & a,const modpoly &b,int degstop,const gen & m,modpoly & u,modpoly &v,modpoly & d,modpoly * u_ptr,modpoly * v_ptr,modpoly * r_ptr){ 8130 return false; 8131 } 8132 8133 #else 8134 // set B to free mpz copies of a 8135 bool assign_mpz(const modpoly & a,modpoly &A,int s=128){ 8136 int n=a.size(); 8137 A.reserve(n); 8138 for (int i=0;i<n;++i){ 8139 gen ai(a[i]); 8140 if (ai.type==_INT_) 8141 ai.uncoerce(s); 8142 else { 8143 if (ai.type!=_ZINT) 8144 return false; 8145 gen b; b.uncoerce(s); 8146 mpz_set(*b._ZINTptr,*ai._ZINTptr); 8147 swapgen(ai,b); 8148 } 8149 A.push_back(ai); 8150 } 8151 return true; 8152 } 8153 8154 void uncoerce(modpoly & R,int s){ 8155 for (int i=0;i<R.size();++i){ 8156 if (R[i].type==_INT_) 8157 R[i].uncoerce(s); 8158 } 8159 } 8160 8161 // A.size()==B.size()+1, A -= (q1*X+q0)*B mod m 8162 // A[i] -= q1*B[i]+q0*B[i-1] mod m 8163 void rem_mpz(modpoly & A,const gen & q1,const gen & q0,const gen & m,modpoly & B,mpz_t & z,int cancel=0){ 8164 int a=A.size()-1,i=1; 8165 if (cancel) 8166 i=cancel; 8167 else { 8168 mpz_set(z,*A.front()._ZINTptr); 8169 mpz_submul(z,*q1._ZINTptr,*B.front()._ZINTptr); 8170 mpz_tdiv_r(*A.front()._ZINTptr,z,*m._ZINTptr); 8171 if (mpz_cmp_si(*A.front()._ZINTptr,0)==0) 8172 cancel=1; 8173 } 8174 for (;i<a;++i){ 8175 mpz_set(z,*A[i]._ZINTptr); 8176 mpz_submul(z,*q1._ZINTptr,*B[i]._ZINTptr); 8177 mpz_submul(z,*q0._ZINTptr,*B[i-1]._ZINTptr); 8178 mpz_tdiv_r(*A[i]._ZINTptr,z,*m._ZINTptr); 8179 if (cancel==i && mpz_cmp_si(*A[i]._ZINTptr,0)==0) 8180 ++cancel; 8181 } 8182 mpz_set(z,*A.back()._ZINTptr); 8183 mpz_submul(z,*q0._ZINTptr,*B.back()._ZINTptr); 8184 mpz_tdiv_r(*A.back()._ZINTptr,z,*m._ZINTptr); 8185 if (cancel) 8186 A.erase(A.begin(),A.begin()+cancel); 8187 } 8188 8189 bool egcd_mpz(const modpoly & a,const modpoly &b,int degstop,const gen & m,modpoly & u,modpoly &v,modpoly & d,modpoly * u_ptr,modpoly * v_ptr,modpoly * r_ptr){ 8190 if (m.type!=_ZINT) 8191 return false; 8192 environment env; 8193 env.modulo=m; 8194 env.moduloon=true; 8195 int s=mpz_sizeinbase(*m._ZINTptr,2)+1; 8196 modpoly A,B,U0(1,1),U1,Q,R; 8197 U0[0].uncoerce(s); 8198 assign_mpz(a,A,s); 8199 assign_mpz(b,B,s); 8200 bool swapped=A.size()<B.size(); 8201 if (swapped) 8202 A.swap(B); 8203 gen q1,q0; q1.uncoerce(s); q0.uncoerce(s); 8204 gen Z; Z.uncoerce(2*s); 8205 mpz_t & z=*Z._ZINTptr; 8206 int bs,niter=0; 8207 for (;(bs=B.size())-1>=degstop;++niter){ 8208 // gen B0=invmod(B.front(),m); B0.uncoerce(s); 8209 mpz_invert(*q0._ZINTptr,*B.front()._ZINTptr,*m._ZINTptr); 8210 mpz_mul(z,*A.front()._ZINTptr,*q0._ZINTptr); // B0 stored in q0 8211 mpz_tdiv_r(*q1._ZINTptr,z,*m._ZINTptr); // A.front()*B0 modulo m 8212 if (A.size()==bs){ // may happen only at first iteration 8213 // quotient is a constant, A0*B0, replace A by A-quotient*B 8214 int cancel=1; 8215 for (int i=1;i<bs;++i){ 8216 mpz_set(z,*A[i]._ZINTptr); 8217 mpz_submul(z,*q1._ZINTptr,*B[i]._ZINTptr); 8218 mpz_tdiv_r(*A[i]._ZINTptr,z,*m._ZINTptr); 8219 if (i==cancel && mpz_cmp_si(*A[i]._ZINTptr,0)==0) 8220 ++cancel; 8221 } 8222 // no change for U0 since U1 is 0 at 1st iteration 8223 A.erase(A.begin(),A.begin()+cancel); 8224 if (A.empty()) 8225 break; 8226 } 8227 else { 8228 if (A.size()==bs+1){ // generic iteration, compute quotient=q1*X+q0 8229 mpz_set(z,*A[1]._ZINTptr); 8230 mpz_submul(z,*q1._ZINTptr,*B[1]._ZINTptr); 8231 mpz_mul(z,z,*q0._ZINTptr); // B0 stored in q0 8232 mpz_tdiv_r(*q0._ZINTptr,z,*m._ZINTptr); // (A[1]-q1*B[1])*B0 modulo m 8233 // A -= (q1*X+q0)*B (2 leading terms cancel, maybe more) 8234 rem_mpz(A,q1,q0,m,B,z,2); 8235 if (A.empty()) 8236 break; 8237 // U0=U0-(q1*X+q0)*U1 (U1.size()>U0.size(), no leading term cancel) 8238 if (!U1.empty()){ 8239 U0.insert(U0.begin(),2,0); 8240 U0[0].uncoerce(s); 8241 U0[1].uncoerce(s); 8242 if (U0.size()==2 && U1.size()==1 && mpz_cmp_si(*U1.front()._ZINTptr,1)==0){ 8243 mpz_neg(*U0[0]._ZINTptr,*q1._ZINTptr); 8244 mpz_neg(*U0[1]._ZINTptr,*q0._ZINTptr); 8245 } 8246 else { 8247 rem_mpz(U0,q1,q0,m,U1,z,0); 8248 } 8249 } 8250 } 8251 else { 8252 // call divrem 8253 DivRem(A,B,&env,Q,R,false); 8254 uncoerce(R,s); 8255 A.swap(R); 8256 if (A.empty()) 8257 break; // B is the gcd 8258 // U0=U0-Q*U1 8259 operator_times(Q,U1,&env,R); submodpoly(U0,R,&env,U0); // ur=ua-q*ub; 8260 uncoerce(U0,s); 8261 } 8262 } 8263 // next iteration A is the remainder (non zero) and U0 the coeff, swap 8264 A.swap(B); 8265 U0.swap(U1); 8266 } 8267 if (niter==0){ 8268 if (swapped){ 8269 u=vecteur(1,1); 8270 v=vecteur(0); 8271 d=a; 8272 if (r_ptr) 8273 *r_ptr=b; 8274 } 8275 else { 8276 u=vecteur(0); 8277 v=vecteur(1,1); 8278 d=b; 8279 if (r_ptr) 8280 *r_ptr=a; 8281 } 8282 if (u_ptr){ 8283 *u_ptr=v; 8284 *v_ptr=u; 8285 } 8286 return true; 8287 } 8288 // B is the gcd, U1 is the coeff of a unless swapped is true 8289 trim_inplace(B,&env); 8290 d.swap(B); 8291 trim_inplace(U1,&env); 8292 u.swap(U1); 8293 if (r_ptr){ 8294 trim_inplace(A,&env); 8295 r_ptr->swap(A); 8296 } 8297 if (u_ptr){ 8298 trim_inplace(U0,&env); 8299 u_ptr->swap(U0); 8300 } 8301 #if 0 8302 q1=invmod(d.front(),m); 8303 mulmodpoly(u,q1,&env,u); 8304 mulmodpoly(d,q1,&env,d); 8305 #endif 8306 modpoly tmp1,tmp2; 8307 operator_times(u,swapped?b:a,&env,tmp1); 8308 // next step is not required because degree(d)<degree(smallest of a and b) 8309 // since we made at least 1 iteration without breaking 8310 if (0 && (d.size()==swapped?a.size():b.size())){ 8311 submodpoly(tmp1,d,&env,tmp2); tmp1.swap(tmp2); // tmp1=u*a-d 8312 } 8313 DivRem(tmp1,swapped?a:b,&env,v,R); // R would be 0 if step above was taken, DivQuo might be called if degree are large, but egcd_mpz should not be called... 8314 negmodpoly(v,v); 8315 if (swapped) 8316 u.swap(v); 8317 if (u_ptr && v_ptr){ 8318 if (niter==1){ 8319 // breaked at 2nd iteration, at 1st iteration we have u=0 and v=1 8320 u_ptr->clear(); 8321 *v_ptr=vecteur(1,1); 8322 } 8323 else { 8324 operator_times(*u_ptr,swapped?b:a,&env,tmp1); 8325 DivRem(tmp1,swapped?a:b,&env,*v_ptr,R); 8326 negmodpoly(*v_ptr,*v_ptr); 8327 } 8328 if (swapped) 8329 u_ptr->swap(*v_ptr); 8330 } 8331 return true; 8332 } 8333 #endif // USE_GMP_REPLACEMENTS 8334 8335 // returns [[A,B],[C,D]] and d such that [[A,B],[C,D]]*[a,b]=[d,0] 8336 bool half_egcd(const modpoly &a,const modpoly &b,const gen & modulo,modpoly &A,modpoly &B,modpoly &C,modpoly &D,modpoly & d){ 8337 if (a.size()<b.size()) 8338 return half_egcd(b,a,modulo,B,A,D,C,d); 8339 environment env; 8340 env.modulo=modulo; 8341 env.moduloon=true; 8342 modpoly q,r,tmp1; 8343 if (a.size()==b.size()){ // requires an additional euclidean division step 8344 DivRem(a,b,&env,q,r); 8345 if (!half_egcd(b,r,modulo,A,B,C,D,d)) 8346 return false; 8347 // [[A,B],[C,D]]*[b,r]=[d,0], where r=a-b*q 8348 // A*b+B*(a-b*q)=d i.e. B*a+(A-q*B)*b=d 8349 // C*b+D*(a-b*q)=0 i.e. D*a+(C-q*D)*b=0 8350 a_bc(A,B,q,&env,A,tmp1); 8351 a_bc(C,D,q,&env,C,tmp1); 8352 // now [[B,A],[D,C]]*[a,b]=[d,0] 8353 A.swap(B); C.swap(D); 8354 return true; 8355 } 8356 modpoly RA,RB,RC,RD,tmp0,tmp2; 8357 if (!hgcd(a,b,modulo,RA,RB,RC,RD,d,tmp0,tmp1,tmp2)) 8358 return false; 8359 int maxadeg=a.size()+1-giacmax(RA.size(),RB.size()); 8360 matrix22timesvect(RA,RB,RC,RD,a,b,maxadeg,maxadeg,d,tmp0,env,tmp1,tmp2); 8361 if (tmp0.empty()){ 8362 A.swap(RA); B.swap(RB); C.swap(RC); D.swap(RD); 8363 return true; 8364 } 8365 modpoly & P2=d; modpoly & P3=tmp0; modpoly & P4=r; // Yap notations 8366 // [[RA,RB],[RC,RD]]*[a,b]=[P2,P3] 8367 DivRem(P2,P3,&env,q,P4); // P4=P2-q*P3=RA*a+RB*b-q*(RC*a+RD*b) 8368 // [[0,1],[1,-q]]*[[RA,RB],[RC,RD]] == [[RC,RD],[-RC*q+RA,-RD*q+RB]] 8369 a_bc(RA,RC,q,&env,RA,tmp1); 8370 a_bc(RB,RD,q,&env,RB,tmp1); // [[RC,RD],[RA,RB]]*[a,b]=[P3,P4] 8371 if (P4.empty()){ 8372 A.swap(RC); B.swap(RD); C.swap(RA); D.swap(RB); d.swap(tmp0); return true; 8373 } 8374 modpoly SA,SB,SC,SD; 8375 if (!half_egcd(P3,P4,modulo,SA,SB,SC,SD,d)) 8376 return false; 8377 matrix22(RA,RB,RC,RD,SA,SB,SC,SD,A,B,C,D,env,tmp1,tmp2); 8378 return true; 8379 } 8380 8381 void neg(vector<int> & v){ 8382 vector<int>::iterator it=v.begin(),itend=v.end(); 8383 for (;it!=itend;++it) 8384 *it=-*it; 8385 } 8386 8387 int adjust_resultant(int &res, const vector<int> & coeffv,const vector<int> & degv,int m){ 8388 // adjust 8389 for (int i=0;i<coeffv.size();++i){ 8390 if (degv[i]%2==1 && degv[i+1]%2==1) 8391 res=-res; 8392 res=(longlong(res)*powmod(coeffv[i],degv[i]-degv[i+2],m))%m; 8393 } 8394 return res; 8395 } 8396 8397 #define EGCD_INT 1 8398 #ifdef EGCD_INT 8399 // for a and b co-prime mod p 8400 // returns [[A,B],[C,D]] and d such that [[A,B],[C,D]]*[a,b]=[d,0] 8401 bool in_egcd_int(const vector<int> &a,const vector<int> &b,int p,vector<int> &A,vector<int> &B,vector<int> &C,vector<int> &D,vector<int> & coeffv,vector<int> & degv,int & d){ 8402 d=1; 8403 vector<int> RA,RB,RC,RD,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,Wp; 8404 if (b.size()<HGCD){ 8405 hgcd_iter_int(a,b,0,A,C,B,D,p,coeffv,degv,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5); 8406 if (tmp0.size()>1) 8407 d=0; 8408 else { 8409 d=invmod(tmp0[0],p); 8410 degv.push_back(0); 8411 mulmod(A,d,p); 8412 mulmod(B,d,p); 8413 } 8414 return true; 8415 } 8416 if (!hgcdint(a,b,p,Wp,RA,RB,RC,RD,coeffv,degv,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5)) 8417 return false; 8418 int maxadeg=a.size()+1-giacmax(RA.size(),RB.size()); 8419 matrix22inttimesvect(RA,RB,RC,RD,a,b,maxadeg,maxadeg,tmp0,tmp1,p,tmp2,tmp3,tmp4,tmp5,Wp); 8420 if (tmp1.empty()){ 8421 A.swap(RA); B.swap(RB); C.swap(RC); D.swap(RD); 8422 return true; 8423 } 8424 vector<int> & P2=tmp0,& P3=tmp1,&q=tmp2,&r=tmp3; // Yap notations 8425 degv.push_back(degv.back()+P3.size()-P2.size()); 8426 coeffv.push_back(P3[0]); 8427 // [[RA,RB],[RC,RD]]*[a,b]=[P2,P3] 8428 DivRem(P2,P3,p,q,r); // P4=P2-q*P3=RA*a+RB*b-q*(RC*a+RD*b) 8429 // [[0,1],[1,-q]]*[[RA,RB],[RC,RD]] == [[RC,RD],[-RC*q+RA,-RD*q+RB]] 8430 a_bc(RA,RC,q,p,RA,tmp4); 8431 a_bc(RB,RD,q,p,RB,tmp4); // [[RC,RD],[RA,RB]]*[a,b]=[P3,P4] 8432 if (r.empty()){ 8433 A.swap(RC); B.swap(RD); C.swap(RA); D.swap(RB); 8434 return true; 8435 } 8436 vector<int> SA,SB,SC,SD; 8437 if (!in_egcd_int(P3,r,p,SA,SB,SC,SD,coeffv,degv,d)) 8438 return false; 8439 matrix22int(RA,RB,RC,RD,SA,SB,SC,SD,A,B,C,D,p,tmp0,Wp); 8440 return true; 8441 } 8442 8443 bool egcd_int(const vector<int> &a,const vector<int> &b,int p,vector<int> &A,vector<int> &B,vector<int> &C,vector<int> &D,int & d){ 8444 if (a.size()<b.size()){ 8445 bool res=egcd_int(b,a,p,B,A,D,C,d); 8446 if (a.size()%2==0 && b.size()%2==0){ 8447 d=-d; 8448 neg(A); neg(B); 8449 } 8450 return res; 8451 } 8452 vector<int> q,r; 8453 if (a.size()==b.size()){ // requires an additional euclidean division step 8454 DivRem(a,b,p,q,r); 8455 if (!egcd_int(b,r,p,A,B,C,D,d)) 8456 return false; 8457 // [[A,B],[C,D]]*[b,r]=[d,0], where r=a-b*q 8458 // A*b+B*(a-b*q)=d i.e. B*a+(A-q*B)*b=d 8459 // C*b+D*(a-b*q)=0 i.e. D*a+(C-q*D)*b=0 8460 a_bc(A,B,q,p,A,r); 8461 a_bc(C,D,q,p,C,r); 8462 // now [[B,A],[D,C]]*[a,b]=[d,0] 8463 A.swap(B); C.swap(D); 8464 d=(d*longlong(powmod(b[0],a.size()-r.size(),p)))%p; 8465 if (a.size()%2==0){ 8466 d=-d; 8467 neg(A); neg(B); 8468 } 8469 return true; 8470 } 8471 vector<int> coeffv,degv; 8472 coeffv.reserve(b.size()+1); 8473 degv.reserve(b.size()+2); 8474 degv.push_back(a.size()); 8475 if (!in_egcd_int(a,b,p,A,B,C,D,coeffv,degv,d)) 8476 return false; 8477 adjust_resultant(d,coeffv,degv,p); 8478 mulmod(A,d,p); 8479 mulmod(B,d,p); 8480 return true; 8481 } 8482 8483 #endif 8484 8485 // modular extended Euclide algorithm with rational reconstruction 8486 // this would become faster only for very large degrees 8487 bool egcd_z(const modpoly &a, const modpoly & b, modpoly & u,modpoly & v,modpoly & d,bool deterministic){ 8488 d=gcd(a,b,0); 8489 if (d.size()>1){ 8490 modpoly D; 8491 bool b=egcd_z(a/d,b/d,u,v,D,deterministic); 8492 if (!b) return false; 8493 u=u*d; 8494 v=v*d; 8495 d=D*d; 8496 return true; 8497 } 8498 if (a.size()>=NTL_XGCD && b.size()>=NTL_XGCD && ntlxgcd(a,b,0,u,v,d)) 8499 return true; 8500 if (a.size()<HGCD || b.size()<HGCD) 8501 return false; 8502 environment env; 8503 env.moduloon=true; 8504 env.modulo=p1+1; gen pip=1; 8505 int gcddeg=giacmin(a.size(),b.size())-1; 8506 int maxdeg=giacmax(a.size(),b.size())-1; 8507 int maxp=std::sqrt(p1p2/4./maxdeg),iter; 8508 modpoly urec,vrec,drec,ucur,vcur,dcur; 8509 gen borne=pow(norm(a,context0),(int)b.size()-1)*pow(norm(b,context0),(int)a.size()-1); 8510 borne=2*pow(b.size(),a.size()-1)*pow(a.size(),b.size()-1)*borne*borne; 8511 #if 1 // compute resultant first 8512 #ifdef EGCD_INT 8513 gen R; 8514 vector<int> ai,bi,A,B,C,D; int di; 8515 #else 8516 gen R=mod_resultant(a,b,0.0); // deterministic?0.0:1e-80); // deterministic 8517 #endif 8518 bool stable=false; 8519 int mem_reserve=0;//(sizeinbase2(borne)+1)/2+64; 8520 for (iter=0;is_greater(borne,pip*pip,context0);++iter){ 8521 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 8522 while (is_zero(a.front() % env.modulo) || is_zero(b.front() % env.modulo)) 8523 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 8524 int p=env.modulo.val; 8525 #ifdef EGCD_INT 8526 vecteur2vector_int(a,p,ai); 8527 vecteur2vector_int(b,p,bi); 8528 egcd_int(ai,bi,p,A,B,C,D,di); 8529 if (di==0) 8530 continue; 8531 if (urec.empty()){ 8532 vector_int2vecteur(A,urec); 8533 vector_int2vecteur(B,vrec); 8534 R=di; 8535 pip=env.modulo; 8536 continue; 8537 } 8538 if (pip.type==_INT_){ 8539 ichinrem_inplace(urec,A,pip,p,mem_reserve); 8540 ichinrem_inplace(vrec,B,pip,p,mem_reserve); 8541 R=ichinrem(R,di,pip,p); 8542 } 8543 else { 8544 bool b0,b1,b2; 8545 if (!(b0=chk_equal_mod(R,di,p))) 8546 R=ichinrem(R,di,pip,p); 8547 if (!(b1=chk_equal_mod(urec,A,p))) 8548 ichinrem_inplace(urec,A,pip,p); 8549 if (!(b2=chk_equal_mod(vrec,B,p))) 8550 ichinrem_inplace(vrec,B,pip,p); 8551 if (b0 && b1 && b2 && !deterministic){ 8552 if (stable) 8553 break; 8554 stable=true; // make a last run for more confidence 8555 } 8556 else 8557 stable=false; 8558 } 8559 pip=pip*env.modulo; 8560 #else 8561 egcd(a,b,&env,ucur,vcur,dcur); 8562 int m=dcur.size(); 8563 if (m>gcddeg) 8564 continue; 8565 int r=R.type==_ZINT?modulo(*R._ZINTptr,env.modulo.val):R.val; 8566 r=(longlong(r)*invmod(dcur[0].val,env.modulo.val))%env.modulo.val; 8567 mulmodpoly(ucur,gen(r),&env,ucur); 8568 mulmodpoly(vcur,gen(r),&env,vcur); 8569 if (m<gcddeg || pip==1){ // 1st run or previous primes were bad 8570 gcddeg=m; 8571 pip=env.modulo; 8572 urec.swap(ucur); 8573 vrec.swap(vcur); 8574 } 8575 else { 8576 // chinese remainder 8577 if (pip.type==_INT_){ 8578 urec=ichinrem(urec,ucur,pip,env.modulo); 8579 vrec=ichinrem(vrec,vcur,pip,env.modulo); 8580 } 8581 else { 8582 bool b1,b2; 8583 if (!(b1=chk_equal_mod(urec,ucur,env.modulo.val))) 8584 ichinrem_inplace(urec,ucur,pip,env.modulo.val); 8585 if (!(b2=chk_equal_mod(vrec,vcur,env.modulo.val))) 8586 ichinrem_inplace(vrec,vcur,pip,env.modulo.val); 8587 if (b1 && b2 && !deterministic){ 8588 if (stable) 8589 break; 8590 stable=true; // make a last run for more confidence 8591 } 8592 else 8593 stable=false; 8594 } 8595 pip=pip*env.modulo; 8596 } 8597 #endif 8598 } 8599 if (debug_infolevel) 8600 CERR << CLOCK()*1e-6 << " #primes used " << iter << "\n"; 8601 u.swap(urec); 8602 v.swap(vrec); 8603 d=makevecteur(R); 8604 return true; 8605 #else 8606 // computing u,v and d simultaneously could be 2* efficient 8607 // if dcur is the resultant of ucur and vcur 8608 // test would be borne>=pip and no fractional reconstruction 8609 for (iter=0;is_greater(borne,pip,context0);++iter){ 8610 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 8611 while (is_zero(a.front() % env.modulo) || is_zero(b.front() % env.modulo)) 8612 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 8613 egcd(a,b,&env,ucur,vcur,dcur); 8614 int m=dcur.size(); 8615 if (m>gcddeg) 8616 continue; 8617 if (m<gcddeg || pip==1){ // 1st run or previous primes were bad 8618 gcddeg=m; 8619 pip=env.modulo; 8620 urec.swap(ucur); 8621 vrec.swap(vcur); 8622 drec.swap(dcur); 8623 } 8624 else { 8625 // chinese remainder 8626 if (pip.type==_INT_){ 8627 urec=ichinrem(urec,ucur,pip,env.modulo); 8628 vrec=ichinrem(vrec,vcur,pip,env.modulo); 8629 drec=ichinrem(drec,dcur,pip,env.modulo); 8630 } 8631 else { 8632 ichinrem_inplace(urec,ucur,pip,env.modulo.val); 8633 ichinrem_inplace(vrec,vcur,pip,env.modulo.val); 8634 ichinrem_inplace(drec,dcur,pip,env.modulo.val); 8635 } 8636 pip=pip*env.modulo; 8637 } 8638 } 8639 // rational reconstruction 8640 if (debug_infolevel) 8641 CERR << CLOCK()*1e-6 << " fracmod begin\n" ; 8642 gen den(drec); 8643 d=fracmod(drec,pip,&den); 8644 u=fracmod(urec,pip,&den); 8645 v=fracmod(vrec,pip,&den); 8646 mulmodpoly(d,den,d); 8647 mulmodpoly(u,den,u); 8648 mulmodpoly(v,den,v); 8649 if (debug_infolevel) 8650 CERR << CLOCK()*1e-6 << " fracmod end, #primes used " << iter << "\n"; 8651 return true; 8652 #endif 8653 } 8654 8655 // p1*u+p2*v=d 8656 void egcd(const modpoly &p1, const modpoly & p2, environment * env,modpoly & u,modpoly & v,modpoly & d,bool deterministic){ 8657 #if 1 8658 if (!p1.empty() && !p2.empty() && 8659 (!env || !env->moduloon)){ 8660 bool p1mod=p1.front().type==_MOD,p2mod=p1.front().type==_MOD; 8661 if (p1mod || p2mod){ 8662 environment e; 8663 e.modulo=*((p1mod?p1:p2).front()._MODptr+1); 8664 e.moduloon=true; 8665 egcd(unmod(p1,e.modulo),unmod(p2,e.modulo),&e,u,v,d); 8666 modularize(u,e.modulo); 8667 modularize(v,e.modulo); 8668 modularize(d,e.modulo); 8669 return; 8670 } 8671 } 8672 if (env && env->moduloon){ 8673 modpoly C,D; 8674 if (p1.size()>=HGCD && p2.size()>=HGCD && half_egcd(p1,p2,env->modulo,u,v,C,D,d)){ 8675 if (!is_one(d.front())){ 8676 gen d0=invmod(d.front(),env->modulo); 8677 mulmodpoly(u,d0,env,u); 8678 mulmodpoly(v,d0,env,v); 8679 mulmodpoly(d,d0,env,d); 8680 } 8681 return; 8682 } 8683 if (egcd_mpz(p1,p2,1,env->modulo,u,v,d,0,0,0)) 8684 return; 8685 } 8686 #endif 8687 if ( (!env || !env->moduloon || !is_zero(env->coeff))){ 8688 gen p1g,p2g; 8689 int p1t=coefftype(p1,p1g); 8690 int p2t=coefftype(p2,p2g); 8691 if (p1t==0 && p2t==0 && egcd_z(p1,p2,u,v,d,deterministic)) 8692 return; 8693 int dim=giacmax(inner_POLYdim(p1),inner_POLYdim(p2)); 8694 polynome pp1(dim),pp2(dim),pu(dim),pv(dim),pd(dim); 8695 gen den1(1),den2(1); 8696 poly12polynome(p1,1,pp1,dim); 8697 lcmdeno(pp1,den1); 8698 if (!is_one(pp1)) pp1=den1*pp1; 8699 poly12polynome(p2,1,pp2,dim); 8700 lcmdeno(pp2,den2); 8701 if (!is_one(pp2)) pp2=den2*pp2; 8702 if (p1t==0 && p2t==0 8703 && p1.size()>=GIAC_PADIC/2 && p2.size()>=GIAC_PADIC/2 8704 ){ 8705 polynome2poly1(gcd(pp1,pp2),1,d); 8706 if (d.size()==1){ 8707 // solve sylvester matrix * []=d 8708 matrice S=sylvester(p1,p2); 8709 S=mtran(S); 8710 int add=int(p1.size()+p2.size()-d.size()-2); 8711 v=mergevecteur(vecteur(add,0),d); 8712 u=linsolve(S,v,context0); 8713 gen D; 8714 lcmdeno(u,D,context0); 8715 d=multvecteur(D,d); 8716 v=vecteur(u.begin()+p2.size()-1,u.end()); 8717 u=vecteur(u.begin(),u.begin()+p2.size()-1); 8718 if (!is_one(den1)) 8719 u=den1*u; 8720 if (!is_one(den2)) 8721 v=den2*v; 8722 return; 8723 } 8724 } 8725 if (p1t==_EXT && p2t==_EXT && p1g.type==_EXT && p2g.type==_EXT && *(p1g._EXTptr+1)==*(p2g._EXTptr+1) && (p1g._EXTptr+1)->type==_VECT){ 8726 polynome2poly1(gcd(pp1,pp2),1,d); 8727 if (d.size()==1){ 8728 polynome P1,P2; 8729 if (algext_convert(pp1,p1g,P1) && algext_convert(pp2,p1g,P2)){ 8730 polynome pmini(P1.dim),P1n(P1.dim-1),P2n(P1.dim-1); 8731 algext_vmin2pmin(*(p1g._EXTptr+1)->_VECTptr,pmini); 8732 if (algnorme(P1,pmini,P1n) && algnorme(P2,pmini,P2n) ){ 8733 // first solve norme(p1)*un+norme(p2)*vn=d 8734 // then norme(p1)/p1*un*p1+norme(p2)/p2*vn*p2=d 8735 // hence u=norme(p1)/p1*un and v=norme(p2)/p2*vn 8736 int p1t=coefftype(P1n,p1g); 8737 int p2t=coefftype(P2n,p2g); 8738 polynome P12g=gcd(P1n,P2n); 8739 if (p1t==0 && p2t==0 && P12g.lexsorted_degree()==0){ 8740 //CERR << P1n % pp1 << '\n'; 8741 //CERR << P2n % pp2 << '\n'; 8742 P1=P1n/pp1; 8743 P2=P2n/pp2; 8744 // solve sylvester matrix * []=d 8745 matrice S=sylvester(polynome2poly1(P1n,1),polynome2poly1(P2n,1)); 8746 S=mtran(S); 8747 v=vecteur(S.size()); 8748 v[S.size()-1]=d[0]; 8749 u=linsolve(S,v,context0); 8750 gen D; 8751 lcmdeno(u,D,context0); 8752 d=multvecteur(D,d); 8753 int p2s=P2n.lexsorted_degree(); 8754 v=vecteur(u.begin()+p2s,u.end()); 8755 v=operator_times(v,polynome2poly1(P2,1),0); 8756 v=operator_mod(v,p1,0); 8757 u=vecteur(u.begin(),u.begin()+p2s); 8758 u=operator_times(u,polynome2poly1(P1,1),0); 8759 u=operator_mod(u,p2,0); 8760 if (!is_one(den1)) 8761 u=den1*u; 8762 if (!is_one(den2)) 8763 v=den2*v; 8764 //CERR << (operator_times(u,p1,0)+operator_times(v,p2,0))/D << '\n'; 8765 return; 8766 } 8767 } 8768 } 8769 } 8770 } 8771 if (0 && p1t==_EXT && p2t==0 && p1g.type==_EXT && (p1g._EXTptr+1)->type==_VECT){ 8772 polynome2poly1(gcd(pp1,pp2),1,d); 8773 if (d.size()==1){ 8774 polynome P1; 8775 if (algext_convert(pp1,p1g,P1)){ 8776 polynome pmini(P1.dim),P1n(P1.dim-1); 8777 algext_vmin2pmin(*(p1g._EXTptr+1)->_VECTptr,pmini); 8778 if (algnorme(P1,pmini,P1n)){ 8779 // first solve norme(p1)*un+p2*v=d 8780 // then norme(p1)/p1*un*p1+v*p2=d 8781 // hence u=norme(p1)/p1*un 8782 int p1t=coefftype(P1n,p1g); 8783 if (p1t==0){ 8784 P1=P1n/pp1; 8785 // solve sylvester matrix * []=d 8786 matrice S=sylvester(polynome2poly1(P1n,1),p2); 8787 S=mtran(S); 8788 v=vecteur(S.size()); 8789 v[S.size()-1]=d[0]; 8790 u=linsolve(S,v,context0); 8791 gen D; 8792 lcmdeno(u,D,context0); 8793 d=multvecteur(D,d); 8794 int p2s=int(p2.size()-1); 8795 v=vecteur(u.begin()+p2s,u.end()); 8796 u=vecteur(u.begin(),u.begin()+p2s); 8797 u=operator_times(u,polynome2poly1(P1,1),0); 8798 if (!is_one(den1)) 8799 u=den1*u; 8800 if (!is_one(den2)) 8801 v=den2*v; 8802 //CERR << (operator_times(u,p1,0)+operator_times(v,p2,0))/D << '\n'; 8803 return; 8804 } 8805 } 8806 } 8807 } 8808 } 8809 egcd(pp1,pp2,pu,pv,pd); 8810 polynome2poly1(pu,1,u); 8811 polynome2poly1(pv,1,v); 8812 polynome2poly1(pd,1,d); 8813 if (is_minus_one(d)){ 8814 d=-d; u=-u; v=-v; 8815 } 8816 if (!is_one(den1)) 8817 u=den1*u; 8818 if (!is_one(den2)) 8819 v=den2*v; 8820 return; 8821 } // end if modular env does not apply 8822 if (p2.empty()){ 8823 u=one(); 8824 v.clear(); 8825 d=p1; 8826 return ; 8827 } 8828 if (p1.empty()){ 8829 v=one(); 8830 u.clear(); 8831 d=p2; 8832 return ; 8833 } 8834 modpoly a,b,q,r,tmp; 8835 bool swapped=false; 8836 // change feb 2017, add p2.size()==1 check because I prefer u!=0 if p1 and p2 are csts (and this is required in polynomial Smith normal form) 8837 if (p1.size()<p2.size() || p1.size()==1) 8838 swapped=true; 8839 // initializes a and b to p1, p2 8840 if (swapped){ 8841 a=p2; 8842 b=p1; 8843 } 8844 else { 8845 a=p1; 8846 b=p2; 8847 } 8848 // initializes ua to 1 and ub to 0, the coeff of u in ua*a+va*b=a 8849 modpoly ua(one()),ub,ur; 8850 // TDivRem: a = bq+r 8851 // hence ur <- ua-q*ub verifies 8852 // ur*a+vr*b=r 8853 // a <- b, b <- r, ua <- ub and ub<- ur 8854 for (;;){ 8855 int n=int(b.size()); 8856 if (n==1){ // b is cst !=0 hence is the gcd, ub is valid 8857 break; 8858 } 8859 DivRem(a,b,env,q,r); // division works always 8860 // if r is 0 then b is the gcd and ub the coeff 8861 if (r.empty()) 8862 break; 8863 operator_times(q,ub,env,tmp); submodpoly(ua,tmp,env,ur); // ur=ua-q*ub; 8864 swap(a,b); swap(b,r); // a=b; b=r; 8865 swap(ua,ub); swap(ub,ur); // ua=ub; ub=ur; 8866 } 8867 // ub is valid and b is the gcd, vb=(b-ub*p1)/p2 if not swapped 8868 gen s=invmod(b.front(),env->modulo); 8869 mulmodpoly(b,s,env,d); // d=b*s; 8870 if (swapped){ 8871 mulmodpoly(ub,s,env,v); 8872 // COUT << ub << "*" << s << "=" << v << '\n'; 8873 // COUT << "swapped" << d << "-" << v << "*" << p2 << "/" << p1 << '\n'; 8874 u=operator_div(operator_minus(d,operator_times(v,p2,env),env),p1,env); 8875 } 8876 else { 8877 mulmodpoly(ub,s,env,u); 8878 // COUT << d << "-" << u << "*" << p1 << "/" << p2 << '\n'; 8879 v=operator_div(operator_minus(d,operator_times(u,p1,env),env),p2,env); 8880 } 8881 // COUT << "Verif " << p1 << "*" << u << "+" << p2 << "*" << v << "=" << p1*u+p2*v << " " << d << '\n'; 8882 } 8883 8884 // Solve a=b*x modulo the polynomial n 8885 // with degree(a)<l and degree(b)<=degree(n)-l 8886 // Assume degree(x)<degree(n) 8887 bool egcd_pade(const modpoly & n,const modpoly & x,int l,modpoly & a,modpoly &b,environment * env,bool psron){ 8888 l=absint(l); 8889 modpoly r1(n); 8890 modpoly r2(x); 8891 modpoly v1,v2(one()),q,r(x),v(1,1); 8892 gen g(1),h(1),r20,r2pow,hpow; 8893 for (;;){ 8894 // During the loop, v1*x+not_computed*n=r1 and v2*x+not_computed*n=r2 8895 int deg2=int(r2.size())-1; 8896 if (deg2<l){ 8897 break; 8898 } 8899 int deg1=int(r1.size())-1,ddeg=deg1-deg2; 8900 if (!env || !env->moduloon || !is_zero(env->coeff)){ 8901 r20=r2.front(); 8902 r2pow=pow(r2.front(),ddeg+1); 8903 DivRem(r2pow*r1,r2,env,q,r); 8904 } 8905 else 8906 DivRem(r1,r2,env,q,r); 8907 v=operator_minus(r2pow*v1,operator_times(q,v2,env),env); 8908 if (!psron){ 8909 gen tmp=gcd(lgcd(r),lgcd(v),context0); 8910 r=operator_div(r,tmp,env); 8911 v=operator_div(v,tmp,env); 8912 } 8913 else { 8914 if (!env || !env->moduloon || !is_zero(env->coeff)){ 8915 hpow=pow(h,ddeg); 8916 r=operator_div(r,hpow*g,env); 8917 v=operator_div(v,hpow*g,env); 8918 if (ddeg==1) 8919 h=r20; 8920 else 8921 h=(pow(r20,ddeg)*h)/hpow; 8922 g=r20; 8923 } 8924 } 8925 r1=r2; 8926 r2=r; 8927 v1=v2; 8928 v2=v; 8929 } 8930 a=r; 8931 b=v; 8932 // If a and b are not prime together, we may have a failure 8933 q=gcd(a,b,env); 8934 if (q.size()>1) 8935 return false; 8936 return true; 8937 } 8938 8939 // Given [v_0 ... v_(2n-1)] (begin of the recurrence sequence) 8940 // return [b_n...b_0] such that b_n*v_{n+k}+...+b_0*v_k=0 8941 // Example [1,-1,3,3] -> [1,-3,-6] 8942 // -> the recurrence relation is v_{n+2}=3v_{n+1}+6v_n 8943 // Algo: B*V=A with deg(A)< n and deg(B)=n -> B*V_truncated=A mod x^(2n) 8944 // psron=true by default to use the PSR Euclidean algorithm 8945 vecteur reverse_rsolve(const vecteur & v_orig,bool psron){ 8946 if (v_orig.size()%2) 8947 return vecteur(1,gensizeerr(gettext("Argument must be a vector of even size")+gen(v_orig).print(context0))); 8948 vecteur v(v_orig); 8949 reverse(v.begin(),v.end()); 8950 int n=int(v.size()/2); 8951 vecteur x2n(2*n+1),A,B; 8952 x2n[0]=1; 8953 egcd_pade(x2n,v,n,A,B,0,psron); 8954 vecteur G=gcd(A,B,0); 8955 v=B/G; 8956 reverse(v.begin(),v.end()); 8957 v=trim(v,0); 8958 return v; 8959 } 8960 8961 //*************************************************************** 8962 // Fonctions independent on the actual implementation of modpoly 8963 //*************************************************************** 8964 8965 // given a, find u such that 8966 // a[0]*...a[n-1]*u[n]+a[0]*...*a[n-2]*a[n]*u[n-1]+...+a[1]*...*a[n-1]*u[0]=1 8967 bool egcd(const vector<modpoly> & a,environment * env,vector<modpoly> & u){ 8968 int n=int(a.size()); 8969 if (n==0) return false; // setsizeerr(gettext("modpoly.cc/egcd")); 8970 // first compute the sequence of products 8971 // pi[0]=a[n-1], pi[k]=pi[k-1]*a[n-k-1], ... pi[n-2]=pi[n-3]*a[1] 8972 u.clear(); 8973 u.reserve(n); 8974 vector<modpoly> pi; 8975 pi.reserve(n); 8976 pi.push_back(a[n-1]); 8977 modpoly tmp; 8978 for (int k=1;k<=n-2;k++){ 8979 operator_times(pi[k-1],a[n-k-1],env,tmp); 8980 pi.push_back(tmp); 8981 } 8982 // COUT << "a:" << a << '\n'; 8983 // COUT << "pi:" << pi << '\n'; 8984 modpoly c(1,plus_one),U(1),v(1),d(1),q,r; 8985 // compute u[0] using egcd(a[0],p[n-2]) 8986 // since a[0]*()+p[n-2]*u[0]=c 8987 // then solve ()=v[0] 8988 for (int k=0;k<=n-2;k++){ 8989 egcd(a[k],pi[n-k-2],env,v,U,d); 8990 if (d.size()==1 && !is_one(d.front())){ 8991 divmodpoly(v,d.front(),v); 8992 divmodpoly(U,d.front(),U); 8993 d.front()=1; 8994 } 8995 if (!is_one(d)) return false; // setsizeerr(gettext("modpoly.cc/egcd")); 8996 // multiply by v and U by c, compute new c, push u[] 8997 operator_times(U,c,env,tmp); DivRem(tmp,a[k],env,q,r); // r= U*c % a[k] 8998 u.push_back(r); 8999 operator_times(v,c,env,tmp); DivRem(tmp,pi[n-k-2],env,q,c); // c=(v*c) % pi[n-k-2]; 9000 } 9001 u.push_back(c); 9002 // COUT << "u:" << u << '\n'; 9003 return true; 9004 } 9005 9006 // same as above 9007 /* 9008 vector<modpoly> egcd(const vector<modpoly> & a,environment * env){ 9009 vector<modpoly> u; 9010 egcd(a,env,u); 9011 return u; 9012 } 9013 */ 9014 9015 modpoly simplify(modpoly & a, modpoly & b,environment * env){ 9016 modpoly g; 9017 gcdmodpoly(a,b,env,g); 9018 a=operator_div(a,g,env); 9019 b=operator_div(b,g,env); 9020 return g; 9021 } 9022 9023 static void inpowmod(const modpoly & p,const gen & n,const modpoly & pmod,environment * env,modpoly & res){ 9024 if (is_zero(n)){ 9025 res=one(); 9026 return ; 9027 } 9028 if (is_one(n)){ 9029 res=p; 9030 return; 9031 } 9032 #if 1 9033 modpoly p2k(p),tmp,tmpq; 9034 res=one(); 9035 gen N(n),q,r; 9036 while (!is_zero(N)){ 9037 r=irem(N,2,q); 9038 N=iquo(N,2); // not sure q can be used because of inplace operations 9039 if (is_one(r)){ 9040 operator_times(res,p2k,env,tmp); 9041 if (env) 9042 DivRem(tmp,pmod,env,tmpq,res); 9043 else 9044 swap(res,tmp); // res=tmp 9045 } 9046 operator_times(p2k,p2k,env,tmp); 9047 if (env) 9048 DivRem(tmp,pmod,env,tmpq,p2k); 9049 else 9050 swap(p2k,tmp); // res=tmp 9051 } 9052 #else 9053 inpowmod(p,iquo(n,2),pmod,env,res); 9054 modpoly tmp,q; 9055 operator_times(res,res,env,tmp); 9056 if (env) 9057 DivRem(tmp,pmod,env,q,res); 9058 else 9059 res=tmp; // res=(res*res) % pmod ; 9060 if (!is_zero(smod(n,2))){ 9061 operator_times(res,p,env,tmp); 9062 if (env) 9063 DivRem(tmp,pmod,env,q,res); // res=(res*p)%pmod; 9064 else 9065 res=tmp; 9066 } 9067 #endif 9068 } 9069 9070 modpoly powmod(const modpoly & p,const gen & n,const modpoly & pmod,environment * env){ 9071 if (!ck_is_positive(n,0)){ 9072 return vecteur(1,gensizeerr(gettext("modpoly.cc/powmod"))); 9073 } 9074 modpoly res; 9075 inpowmod( (env?operator_mod(p,pmod,env):p) ,n,pmod,env,res); 9076 return res; 9077 } 9078 9079 void hornerfrac(const modpoly & p,const gen &num, const gen &den,gen & res,gen & d){ 9080 d=1; 9081 if (p.empty()) 9082 res=0; 9083 else { 9084 modpoly::const_iterator it=p.begin(),itend=p.end(); 9085 res=*it; 9086 ++it; 9087 if (it==itend){ 9088 return; 9089 } 9090 d=den; 9091 for (;;){ 9092 res=res*num+(*it)*d; 9093 ++it; 9094 if (it==itend) 9095 break; 9096 d=d*den; 9097 } 9098 } 9099 } 9100 9101 gen hornerint(const modpoly & p,const gen & num,const gen & den,bool simp){ 9102 mpz_t resz,dz,numz,denz; 9103 if (num.type==_INT_) 9104 mpz_init_set_si(numz,num.val); 9105 else 9106 mpz_init_set(numz,*num._ZINTptr); 9107 if (den.type==_INT_) 9108 mpz_init_set_si(denz,den.val); 9109 else 9110 mpz_init_set(denz,*den._ZINTptr); 9111 mpz_init_set(dz,denz); 9112 mpz_init(resz); 9113 modpoly::const_iterator it=p.begin(),itend=p.end(); 9114 if (it->type==_INT_) 9115 mpz_set_si(resz,it->val); 9116 else 9117 mpz_set(resz,*it->_ZINTptr); 9118 ++it; 9119 for (;;){ 9120 // res=res*num+(*it)*d; 9121 mpz_mul(resz,resz,numz); 9122 if (it->type==_INT_){ 9123 if (it->val>0) 9124 mpz_addmul_ui(resz,dz,it->val); 9125 else 9126 mpz_submul_ui(resz,dz,-it->val); 9127 } 9128 else 9129 mpz_addmul(resz,dz,*it->_ZINTptr); 9130 ++it; 9131 if (it==itend) 9132 break; 9133 mpz_mul(dz,dz,denz); // d=d*den; 9134 } 9135 gen res; 9136 if (simp) 9137 res=rdiv(gen(resz),gen(dz),context0); 9138 else 9139 res=fraction(gen(resz),gen(dz)); 9140 mpz_clear(resz); 9141 mpz_clear(dz); 9142 mpz_clear(denz); 9143 mpz_clear(numz); 9144 return res; 9145 } 9146 9147 void cint2mpz(const gen & num,mpz_t & numr,mpz_t & numi){ 9148 if (num.type==_INT_){ 9149 mpz_set_si(numr,num.val); 9150 mpz_set_si(numi,0); 9151 } 9152 else { 9153 if (num.type==_ZINT){ 9154 mpz_set(numr,*num._ZINTptr); 9155 mpz_set_si(numi,0); 9156 } 9157 else { 9158 if (num._CPLXptr->type==_INT_) 9159 mpz_set_si(numr,num._CPLXptr->val); 9160 else 9161 mpz_set(numr,*num._CPLXptr->_ZINTptr); 9162 if ((num._CPLXptr+1)->type==_INT_) 9163 mpz_set_si(numi,(num._CPLXptr+1)->val); 9164 else 9165 mpz_set(numi,*(num._CPLXptr+1)->_ZINTptr); 9166 } 9167 } 9168 } 9169 9170 gen hornercint(const modpoly & p,const gen & num,const gen & den,bool simp){ 9171 mpz_t resr,resi,dz,numr,numi,denz,tmp1,tmp2,tmp3,tmp4; 9172 mpz_init(numr); mpz_init(numi); 9173 cint2mpz(num,numr,numi); 9174 if (den.type==_INT_) 9175 mpz_init_set_si(denz,den.val); 9176 else 9177 mpz_init_set(denz,*den._ZINTptr); 9178 mpz_init_set(dz,denz); 9179 mpz_init(resr); 9180 mpz_init(resi); 9181 mpz_init(tmp1); 9182 mpz_init(tmp2); 9183 mpz_init(tmp3); 9184 mpz_init(tmp4); 9185 modpoly::const_iterator it=p.begin(),itend=p.end(); 9186 cint2mpz(*it,resr,resi); 9187 ++it; 9188 for (;;){ 9189 // res=res*num+(*it)*d; 9190 mpz_mul(tmp1,resr,numr); 9191 mpz_mul(tmp2,resi,numi); 9192 mpz_mul(tmp3,resr,numi); 9193 mpz_mul(tmp4,resi,numr); 9194 mpz_sub(resr,tmp1,tmp2); 9195 mpz_add(resi,tmp3,tmp4); 9196 if (it->type==_INT_){ 9197 if (it->val>0) 9198 mpz_addmul_ui(resr,dz,it->val); 9199 else 9200 mpz_submul_ui(resr,dz,-it->val); 9201 } 9202 else { 9203 if (it->type==_ZINT) 9204 mpz_addmul(resr,dz,*it->_ZINTptr); 9205 else { 9206 cint2mpz(*it,tmp1,tmp2); 9207 mpz_mul(tmp1,tmp1,dz); 9208 mpz_mul(tmp2,tmp2,dz); 9209 mpz_add(resr,resr,tmp1); 9210 mpz_add(resi,resi,tmp2); 9211 } 9212 } 9213 ++it; 9214 if (it==itend) 9215 break; 9216 mpz_mul(dz,dz,denz); // d=d*den; 9217 } 9218 gen res; 9219 if (simp) 9220 res=rdiv(gen(gen(resr),gen(resi)),gen(dz)); 9221 else 9222 res=fraction(gen(gen(resr),gen(resi)),gen(dz)); 9223 mpz_clear(tmp4); 9224 mpz_clear(tmp3); 9225 mpz_clear(tmp2); 9226 mpz_clear(tmp1); 9227 mpz_clear(resr); 9228 mpz_clear(resi); 9229 mpz_clear(dz); 9230 mpz_clear(denz); 9231 mpz_clear(numr); 9232 mpz_clear(numi); 9233 return res; 9234 } 9235 9236 gen horner(const modpoly & p,const fraction & f,bool simp){ 9237 if (p.empty()) 9238 return 0; 9239 gen num=f.num,den=f.den,d=den; 9240 modpoly::const_iterator it=p.begin(),itend=p.end(); 9241 if (itend-it>2 && is_integer(num) && is_integer(den)){ 9242 for (;it!=itend;++it){ 9243 if (!is_integer(*it)) 9244 break; 9245 } 9246 if (it==itend) 9247 return hornerint(p,num,den,simp); 9248 } 9249 if (itend-it>2 && is_cinteger(num) && is_integer(den)){ 9250 for (;it!=itend;++it){ 9251 if (!is_cinteger(*it)) 9252 break; 9253 } 9254 if (it==itend) 9255 return hornercint(p,num,den,simp); 9256 } 9257 it=p.begin(); 9258 gen res(*it); 9259 ++it; 9260 if (it==itend) 9261 return res; 9262 for (;;){ 9263 res=res*num+(*it)*d; 9264 ++it; 9265 if (it==itend) 9266 break; 9267 d=d*den; 9268 } 9269 return rdiv(res,d,context0); 9270 } 9271 9272 // n=d-1-e, d=degree(Sd), e=degree(Sd1), Se=(lc(Sd1)^n*Sd1)/lc(Sd)^n 9273 void ducos_e(const modpoly & Sd,const gen & sd,const modpoly & Sd1,modpoly &Se){ 9274 int n=int(Sd.size()-Sd1.size()-1); 9275 if (!n){ 9276 Se=Sd1; 9277 return; 9278 } 9279 if (n==1){ 9280 Se=Sd1.front()*Sd1/sd; 9281 return; 9282 } 9283 // n>=2 9284 gen sd1(Sd1.front()),s((sd1*sd1)/sd); 9285 for (int j=2;j<n;++j){ 9286 s=(s*sd1)/sd; 9287 } 9288 Se=(s*Sd1)/sd; 9289 } 9290 9291 // compute S_{e-1} 9292 void ducos_e1(const modpoly & A,const modpoly & Sd1,const modpoly & Se,const gen & sd,modpoly & res){ 9293 int d=int(A.size())-1,e=int(Sd1.size())-1,dim=1; 9294 if (debug_infolevel>3) 9295 CERR << CLOCK()*1e-6 << " ducos_e1 begin d=" << d << '\n'; 9296 gen cd1(Sd1.front()),se(Se.front()); 9297 vector< modpoly > Hv(e); 9298 Hv.reserve(d); 9299 if (Se.size()>1 && Se[1]!=0){ 9300 Hv.push_back(modpoly(Se.begin()+1,Se.end())); 9301 negmodpoly(Hv.back(),Hv.back()); 9302 } 9303 else { 9304 modpoly tmp(e+1); 9305 tmp[0]=se; 9306 Hv.push_back(tmp-Se); // in fact it's -Se without first element 9307 } 9308 for (int j=e+1;j<d;++j){ 9309 modpoly XHj1(Hv.back()); 9310 XHj1.push_back(0); // X*H_{j-1} 9311 gen piXHj1; 9312 if (int(XHj1.size())-1-e>=0){ 9313 piXHj1=XHj1[XHj1.size()-1-e]; 9314 XHj1=XHj1-(piXHj1*Sd1)/cd1; 9315 } 9316 Hv.push_back(XHj1); 9317 } 9318 modpoly D,tmpv; // sum_{j<d} pi_j(A)*H_j/lc(A) 9319 D.reserve(d); 9320 // split next loop in 2 parts, because Hv indexes lower than e are straightforward 9321 if (debug_infolevel>3) 9322 CERR << CLOCK()*1e-6 << " ducos_e1 D begin" << '\n'; 9323 for (int j=e-1;j>=0;--j){ 9324 D.push_back(A[A.size()-1-j]*se); 9325 } 9326 if (debug_infolevel>3) 9327 CERR << CLOCK()*1e-6 << " ducos_e1 D j=e " << e << "<" << d << '\n'; 9328 for (int j=e;j<d;++j){ 9329 D = D + A[A.size()-1-j]*Hv[j]; 9330 } 9331 if (debug_infolevel>3) 9332 CERR << CLOCK()*1e-6 << " ducos_e1 D end, start division" << '\n'; 9333 if (is_integer(A.front())) 9334 iquo(D,A.front()); 9335 else 9336 D = D/A.front(); 9337 if (debug_infolevel>3) 9338 CERR << CLOCK()*1e-6 << " ducos_e1 D ready" << '\n'; 9339 modpoly & Hd1=Hv.back(); 9340 Hd1.push_back(0); // X*Hd1 9341 int hd1=int(Hd1.size())-1-e; 9342 gen hd=hd1<0?0:Hd1[hd1]; 9343 #if 1 9344 addmodpoly(Hd1,D,tmpv); 9345 mulmodpoly(tmpv,cd1,tmpv); 9346 mulmodpoly(Sd1,hd,D); 9347 submodpoly(tmpv,D,res); 9348 #else 9349 addmodpoly(D,Hd1,D); 9350 mulmodpoly(D,cd1,D); 9351 mulmodpoly(Sd1,hd,tmpv); 9352 submodpoly(D,tmpv,D); 9353 D.swap(res); 9354 //res=cd1*(Hd1+D)-(hd*Sd1); 9355 #endif 9356 if (debug_infolevel>3) 9357 CERR << CLOCK()*1e-6 << " ducos_e1 D final division" << '\n'; 9358 trim_inplace(res); // res=trim(res,0); 9359 if (is_integer(sd)) iquo(res,sd); else res=res/sd; 9360 if (!res.empty() && res.front()==0) 9361 CERR << "err" << '\n'; 9362 if (debug_infolevel>3) 9363 CERR << CLOCK()*1e-6 << " ducos_e1 end" << '\n'; 9364 if ( (d-e+1)%2) 9365 res=-res; 9366 } 9367 9368 void mulsmall(vector<int> & Q,int c,int m){ 9369 int * ptr=&Q.front(), * ptrend=ptr+Q.size(); 9370 for (;ptr!=ptrend;++ptr){ 9371 *ptr = (longlong(*ptr)*c)%m; 9372 } 9373 } 9374 9375 int resultant_iter(const vector<int> & P0,const vector<int> & Q0,int m){ 9376 if (debug_infolevel>1) 9377 CERR << CLOCK()*1e-6 << " resultant_iter begin " << Q0.size() << '\n'; 9378 vector<int> P(P0),Q(Q0),tmp1,tmp2; 9379 longlong res=1; 9380 double invm=find_invp(m); 9381 while (Q.size()>1){ 9382 DivRem(P,Q,m,tmp1,tmp2); 9383 int ddeg=P.size()-tmp2.size(); 9384 if (ddeg==2) 9385 res = amodp(res*amodp(Q[0]*longlong(Q[0]),m,invm),m,invm); 9386 else 9387 res = amodp(res*powmod(Q[0],ddeg,m),m,invm); 9388 if (P.size()%2==0 && Q.size()%2==0) 9389 res = -res; 9390 P.swap(Q); 9391 Q.swap(tmp2); 9392 } 9393 if (Q.empty()) 9394 return 0; 9395 res = amodp(res*powmod(Q[0],P.size()-1,m),m,invm); 9396 if (debug_infolevel>1) 9397 CERR << CLOCK()*1e-6 << " resultant_iter end " << Q0.size() << '\n'; 9398 return smod(res,m); 9399 } 9400 9401 // adapted from NTL 9402 inline int deg(const vector<int> & v){ return v.size()-1;} 9403 inline bool IsZero(const vector<int> &v){return v.empty();} 9404 inline int LeadCoeff(const vector<int> &v){return v.front(); } 9405 void RightShift(vector<int> & target,const vector<int> & source,long n){ 9406 if (source.size()<n){ 9407 target.clear(); return; 9408 } 9409 target.resize(source.size()-n); 9410 copy(source.begin(),source.end()-n,target.begin()); 9411 } 9412 9413 void ResHalfGCD(const vector<int> &U,const vector<int> & V,long d_red,vector<int> & cvec,vector<int> & dvec,vector<int> & A,vector<int> &B,vector<int> &C,vector<int> & D,int p,vector<int> & a,vector<int> &b,vector<int> & tmp1,vector<int> & tmp2,vector<int> & tmp3,vector<int> & tmp4){ 9414 if (V.size()<=U.size()-d_red){ 9415 D=A=vector<int>(1,1); 9416 B.clear(); C.clear(); 9417 tmp1.clear(); tmp2.clear(); tmp3.clear(); tmp4.clear(); 9418 return; 9419 } 9420 9421 long n = deg(U) - 2*d_red + 2; 9422 if (n < 0) n = 0; 9423 9424 vector<int> U1, V1; 9425 9426 RightShift(U1, U, n); 9427 RightShift(V1, V, n); 9428 9429 if (d_red <= HGCD) { 9430 hgcd_iter_int(U1,V1,U1.size()-d_red,A,C,B,D,p,cvec,dvec,a,b,tmp1,tmp2,tmp3,tmp4); // d_red? 9431 tmp1.clear(); tmp2.clear(); tmp3.clear(); tmp4.clear(); 9432 return; 9433 } 9434 9435 long d1 = (d_red + 1)/2; 9436 if (d1 < 1) d1 = 1; 9437 if (d1 >= d_red) d1 = d_red - 1; 9438 9439 vector<int> A1,B1,C1,D1,Wp; 9440 9441 ResHalfGCD(U1, V1, d1, cvec, dvec,A1,B1,C1,D1,p,a,b,tmp1,tmp2,tmp3,tmp4); 9442 int maxdeg=U1.size()-giacmax(A1.size(),B1.size()); 9443 matrix22inttimesvect(A1,B1,C1,D1,U1,V1,maxdeg,maxdeg,a,b,p,tmp1,tmp2,tmp3,tmp4,Wp); 9444 a.swap(U1); b.swap(V1); 9445 tmp1.clear(); tmp2.clear(); tmp3.clear(); tmp4.clear(); 9446 9447 long d2 = deg(V1) - deg(U) + n + d_red; 9448 9449 if (IsZero(V1) || d2 <= 0) { 9450 A.swap(A1); B.swap(B1); C.swap(C1); D.swap(D1); 9451 return; 9452 } 9453 9454 cvec.push_back( LeadCoeff(V1)); 9455 dvec.push_back( dvec.back()-deg(U1)+deg(V1)); 9456 DivRem(U1,V1,p,tmp2, tmp1); 9457 U1.swap(V1); V1.swap(tmp1); 9458 a_bc(A1,C1,tmp2,p,A1,tmp1); 9459 a_bc(B1,D1,tmp2,p,B1,tmp1); 9460 9461 ResHalfGCD(U1, V1, d2, cvec, dvec,A,B,C,D,p,a,b,tmp1,tmp2,tmp3,tmp4); 9462 matrix22int(A1,B1,C1,D1,A,B,C,D,a,b,U1,V1,p,tmp1,Wp); 9463 A.swap(a); B.swap(b); C.swap(U1); D.swap(V1); 9464 9465 } 9466 9467 void ResHalfGCD(vector<int>& U, vector<int> & V, vector<int>& cvec, vector<int>& dvec,int p,vector<int>& A1,vector<int>& B1,vector<int>& C1,vector<int>& D1,vector<int>& a,vector<int>& b,vector<int>& tmp1,vector<int>& tmp2,vector<int>& tmp3,vector<int>& tmp4){ 9468 long d_red = (deg(U)+1)/2; 9469 9470 if (IsZero(V) || deg(V) <= deg(U) - d_red) { 9471 tmp1.clear(); tmp2.clear(); tmp3.clear(); tmp4.clear(); 9472 return; 9473 } 9474 9475 long du = deg(U); 9476 9477 9478 long d1 = (d_red + 1)/2; 9479 if (d1 < 1) d1 = 1; 9480 if (d1 >= d_red) d1 = d_red - 1; 9481 9482 ResHalfGCD(U, V, d1, cvec, dvec,A1,B1,C1,D1,p,a,b,tmp1,tmp2,tmp3,tmp4); 9483 int maxdeg=U.size()-giacmax(A1.size(),B1.size()); 9484 vector<int> Wp; 9485 matrix22inttimesvect(A1,B1,C1,D1,U,V,maxdeg,maxdeg,a,b,p,tmp1,tmp2,tmp3,tmp4,Wp); 9486 U.swap(a); V.swap(b); 9487 tmp1.clear(); tmp2.clear(); tmp3.clear(); tmp4.clear(); 9488 9489 long d2 = deg(V) - du + d_red; 9490 9491 if (IsZero(V) || d2 <= 0) { 9492 return; 9493 } 9494 9495 cvec.push_back( LeadCoeff(V)); 9496 dvec.push_back( dvec.back()-deg(U)+deg(V)); 9497 DivRem(U,V, p,tmp2, tmp1); 9498 U.swap(V); V.swap(tmp1); 9499 9500 ResHalfGCD(U, V, d2, cvec, dvec,A1,B1,C1,D1,p,a,b,tmp1,tmp2,tmp3,tmp4); 9501 maxdeg=U.size()-giacmax(A1.size(),B1.size()); 9502 matrix22inttimesvect(A1,B1,C1,D1,U,V,maxdeg,maxdeg,a,b,p,tmp1,tmp2,tmp3,tmp4,Wp); 9503 U.swap(a); V.swap(b); 9504 } 9505 9506 inline void PlainResultant(int & res,const vector<int> & u,const vector<int> &v,int p){ 9507 res=resultant_iter(u,v,p); 9508 } 9509 9510 inline void power(int & res,int a,int m,int p){ 9511 res=powmod(a,m,p); 9512 } 9513 9514 void resultant_int_like_ntl(int & res, const vector<int> & u, const vector<int> & v,int p){ 9515 if (deg(u) <= HGCD || deg(v) <= HGCD) { 9516 PlainResultant(res, u, v,p); 9517 return; 9518 } 9519 9520 vector<int> u1(u), v1(v),tmp1,tmp2;; 9521 9522 int t; res=1; 9523 9524 if (deg(u1) == deg(v1)) { 9525 DivRem(u1,v1,p,tmp1,tmp2); 9526 u1.swap(v1); 9527 v1.swap(tmp2); 9528 9529 if (IsZero(v1)) { 9530 res=0; 9531 return; 9532 } 9533 9534 power(t, LeadCoeff(u1), deg(u1) - deg(v1),p); 9535 res=(longlong(res)*t) %p; 9536 if (deg(u1) & 1) 9537 res=-res; 9538 } 9539 else if (deg(u1) < deg(v1)) { 9540 u1.swap(v1); 9541 if (deg(u1) & deg(v1) & 1) 9542 res=-res; 9543 } 9544 9545 // deg(u1) > deg(v1) && v1 != 0 9546 9547 vector<int> cvec,dvec; 9548 9549 cvec.reserve(deg(v1)+2); 9550 dvec.reserve(deg(v1)+2); 9551 9552 cvec.push_back( LeadCoeff(u1)); 9553 dvec.push_back( deg(u1)); 9554 9555 vector<int> A1,B1,C1,D1,a,b,tmp3,tmp4; // all temporary 9556 9557 while (deg(u1) > HGCD && !IsZero(v1)) { 9558 ResHalfGCD(u1, v1, cvec, dvec,p,A1,B1,C1,D1,a,b,tmp1,tmp2,tmp3,tmp4); 9559 9560 if (!IsZero(v1)) { 9561 cvec.push_back( LeadCoeff(v1)); 9562 dvec.push_back( deg(v1)); 9563 DivRem(u1,v1,p,tmp1,tmp2); 9564 u1.swap(v1); 9565 v1.swap(tmp2); 9566 } 9567 } 9568 9569 if (IsZero(v1) && deg(u1) > 0) { 9570 res=0; 9571 return; 9572 } 9573 9574 long i, l; 9575 l = dvec.size(); 9576 9577 if (deg(u1) == 0) { 9578 // we went all the way... 9579 9580 for (i = 0; i <= l-3; i++) { 9581 power(t, cvec[i+1], dvec[i]-dvec[i+2],p); 9582 res=(longlong(res)*t) % p; 9583 if (dvec[i] & dvec[i+1] & 1) 9584 res=-res; 9585 } 9586 9587 power(t, cvec[l-1], dvec[l-2],p); 9588 res=(longlong(res)*t) % p; 9589 } 9590 else { 9591 for (i = 0; i <= l-3; i++) { 9592 power(t, cvec[i+1], dvec[i]-dvec[i+2],p); 9593 res=(longlong(res)*t) % p; 9594 if (dvec[i] & dvec[i+1] & 1) 9595 res=-res; 9596 } 9597 9598 power(t, cvec[l-1], dvec[l-2]-deg(v1),p); 9599 res=(longlong(res)*t) % p; 9600 if (dvec[l-2] & dvec[l-1] & 1) 9601 res=-res; 9602 9603 PlainResultant(t, u1, v1,p); 9604 res=(longlong(res)*t) % p; 9605 } 9606 } 9607 9608 // resultant of P and Q modulo m, modifies P and Q, 9609 int resultant_int(vector<int> & P,vector<int> & Q,vector<int> & tmp1,vector<int> & tmp2,int m,int w){ 9610 if (P.size()<Q.size()){ 9611 int res=(P.size() % 2==1 || Q.size() % 2==1)?1:-1; // (-1)^deg(P)*deg(Q) 9612 return res*resultant_int(Q,P,tmp1,tmp2,m,w); 9613 } 9614 if (P.size()==Q.size()){ 9615 int coeff=Q[0]; 9616 int invcoeff=invmod(coeff,m); 9617 mulsmall(Q,invcoeff,m); 9618 DivRem(P,Q,m,tmp1,tmp2); 9619 longlong res=(P.size() % 2==1)?1:-1; 9620 res = res*powmod(Q[0],P.size()-tmp2.size(),m); 9621 return smod(res*resultant_int(Q,tmp2,P,tmp1,m,w),m); 9622 } 9623 // now P.size()>Q.size() 9624 int HGCD2=3*HGCD; 9625 if (Q.size()>=HGCD2){ 9626 if (debug_infolevel>2) 9627 CERR << "resultantint hgcd mod " << m << '\n'; 9628 #if 0 //ndef USE_GMP_REPLACEMENTS // activate NTL-like ResHalfGCD code 9629 int r; 9630 resultant_int_like_ntl(r,P,Q,m); 9631 r=smod(r,m); 9632 return r; 9633 #endif 9634 // old code 9635 vector<int> coeffv,degv,A,B,C,D,a,b,b0,b1,b2,b3,b4,b5,b6,b7,Wp; 9636 coeffv.reserve(Q.size()+1); 9637 degv.reserve(Q.size()+1); 9638 degv.push_back(P.size()-1); 9639 while (Q.size()>=HGCD2){ 9640 #if 0 9641 // b4..b7 data is used below 9642 hgcdint(P,Q,m,Wp,A,B,C,D,coeffv,degv,b2,b3,b4,b5,b6,b7); 9643 #else 9644 int deg1=P.size(),deg2=(3*deg1)/4; 9645 double coeff=nextpow2(deg1/2)*2.0/deg1; 9646 double coeff2=nextpow2(deg2)/double(deg2); 9647 coeff=0.5*std::min(coeff,coeff2); 9648 if (Wp.empty() && m!=p1 && m!=p2 && m!=p3){ 9649 int l=sizeinbase2(int(3*2*coeff/4*deg1-1)); 9650 if (w){ 9651 longlong ww=w; // powmod(w,2**(l-1),m) 9652 for (int j=1;j<l;++j) 9653 ww=(ww*ww)%m; 9654 if (ww==1) 9655 w=0; 9656 else { 9657 for (int j=0;ww!=m-1 && j<27;++j){ 9658 ww=(ww*ww)%m; 9659 w=(w*longlong(w))%m; 9660 } 9661 if (ww!=m-1) 9662 w=0; 9663 } 9664 } 9665 if (w==0) 9666 w=find_w(Wp,l,m); 9667 fft2wp(Wp,(1<<l),w,m); 9668 } 9669 if (debug_infolevel>1) 9670 CERR << CLOCK()*1e-6 << " deg " << P.size() << " coeff " << coeff << "\n"; 9671 int seuil=1+int(std::ceil((1-coeff)*P.size())); 9672 if (HGCD/4>=Q.size()-seuil){ 9673 coeffv.push_back(Q.front()); 9674 degv.push_back(degv.back()+Q.size()-P.size()); 9675 DivRem(P,Q,m,a,b); 9676 P.swap(Q); 9677 Q.swap(b); 9678 continue; 9679 } 9680 // 1st recursive call 9681 b0.resize(P.size()-seuil); 9682 copy(P.begin(),P.end()-seuil,b0.begin()); // quo(P,x^s), 9683 b1.resize(Q.size()-seuil); 9684 copy(Q.begin(),Q.end()-seuil,b1.begin()); // quo(Q,x^s), 9685 hgcdint(b0,b1,m,Wp,A,B,C,D,coeffv,degv,b2,b3,b4,b5,b6,b7); // degree=deg(P)*coeff 9686 #endif 9687 int maxadeg=P.size()-giacmax(A.size(),B.size()); 9688 matrix22inttimesvect(A,B,C,D,P,Q,maxadeg,maxadeg,a,b,m,b4,b5,b6,b7,Wp); 9689 if (b.size()<HGCD){ 9690 a.swap(P); b.swap(Q); 9691 break; 9692 } 9693 if (1 && a.size()-b.size()==1){ 9694 a.swap(P); b.swap(Q); 9695 continue; 9696 } 9697 coeffv.push_back(b.front()); 9698 degv.push_back(degv.back()+b.size()-a.size()); 9699 DivRem(a,b,m,P,Q); 9700 b.swap(P); 9701 } 9702 degv.push_back(Q.size()-1); 9703 int res=resultant_int(P,Q,tmp1,tmp2,m,w); 9704 adjust_resultant(res,coeffv,degv,m); 9705 return smod(res,m); 9706 } 9707 return resultant_iter(P,Q,m); 9708 } 9709 int sizeinbase2(const gen & g){ 9710 if (g.type==_INT_) 9711 return sizeinbase2(absint(g.val)); 9712 if (g.type==_ZINT) 9713 return mpz_sizeinbase(*g._ZINTptr,2); 9714 if (g.type!=_VECT) 9715 return -1; 9716 return sizeinbase2(*g._VECTptr); 9717 } 9718 int sizeinbase2(const vecteur & v){ 9719 int m=0; 9720 const_iterateur it=v.begin(),itend=v.end(); 9721 for (;it!=itend;++it){ 9722 int c=sizeinbase2(*it); 9723 if (c>m) 9724 m=c; 9725 } 9726 return m+(sizeinbase2(int(v.size()))+1)/2; 9727 } 9728 int wpcount=0; 9729 gen mod_resultant(const modpoly & P,const modpoly & Q,double eps){ 9730 gen R; 9731 if (P.size()>=NTL_RESULTANT && Q.size()>=NTL_RESULTANT && ntlresultant(P,Q,0,R)) 9732 return R; 9733 // gen h2=4*pow(l2norm2(P),Q.size()-1)*pow(l2norm2(Q),P.size()-1); 9734 int h=sizeinbase2(P)*(int(Q.size())-1)+sizeinbase2(Q)*(int(P.size())-1)+1; 9735 gen D=1; // p-adic acceleration 9736 if (0 && P.size()>GIAC_PADIC && Q.size()>GIAC_PADIC){ 9737 matrice S=sylvester(P,Q); 9738 vecteur v=vranm(int(S.size()),0,context0); 9739 vecteur u=linsolve(S,v,context0); 9740 lcmdeno(u,D,context0); 9741 h -= sizeinbase2(D); 9742 } 9743 gen P0=P.front(),Q0=Q.front(); 9744 #if 0 && defined INT128 // && defined GIAC_LLPRECOND 9745 if (1){ 9746 vector<longlong> p,q,tmp1,tmp2; 9747 // reconstruct resultant/D 9748 int maxdeg=giacmax(P.size(),Q.size())-1; 9749 // precond compatible => p5 9750 longlong m=1LL<<61; 9751 m=prevprimell(m-1,maxdeg); 9752 gen pim=m,res; 9753 vecteur2vector_ll(P,m,p); 9754 vecteur2vector_ll(Q,m,q); 9755 res=resultantll(p,q,tmp1,tmp2,m); 9756 #if 0 9757 ntlresultant(P,Q,m,R,false); 9758 if ((R-res)%m!=0) 9759 CERR << "bug\n"; 9760 #endif 9761 mpz_t tmpz; 9762 mpz_init(tmpz); 9763 int proba=0; 9764 int probamax=RAND_MAX; 9765 if (eps>0) 9766 probamax=int(-std::log(eps)/30/std::log(2.0)); 9767 while (h>sizeinbase2(pim) && proba<probamax){ 9768 m=prevprimell(m-1,maxdeg); 9769 vecteur2vector_ll(P,m,p); 9770 vecteur2vector_ll(Q,m,q); 9771 longlong r; 9772 r=resultantll(p,q,tmp1,tmp2,m); 9773 #if 0 9774 ntlresultant(P,Q,m,R,false); 9775 if ((R-r)%m!=0) 9776 CERR << "bug\n"; 9777 #endif 9778 #ifndef USE_GMP_REPLACEMENTS 9779 if (pim.type==_ZINT && res.type==_ZINT){ 9780 longlong amodm=mpz_fdiv_ui(*res._ZINTptr,m); 9781 if (amodm!=r){ 9782 gen u,v,d; longlong U; 9783 egcd(pim,m,u,v,d); 9784 if (u.type==_ZINT) 9785 U=mpz_fdiv_ui(*u._ZINTptr,m); 9786 else 9787 U=u.val; 9788 if (d==-1){ U=-U; v=-v; d=1; } 9789 mpz_mul_si(tmpz,*pim._ZINTptr,(U*(r-int128_t(amodm)))%m); 9790 mpz_add(*res._ZINTptr,*res._ZINTptr,tmpz); 9791 proba=0; 9792 } 9793 else ++proba; 9794 } 9795 else 9796 #endif 9797 res=ichinrem(res,gen(r),pim,gen(m)); 9798 pim=gen(m)*pim; 9799 } 9800 mpz_clear(tmpz); 9801 return smod(res,pim)*D; 9802 } 9803 #endif // INT128 9804 if (debug_infolevel>1) 9805 CERR << CLOCK()*1e-6 << " Wpcount begin " << wpcount << "\n"; 9806 vector<int> p,q,tmp1,tmp2; 9807 // reconstruct resultant/D 9808 int maxdeg=giacmax(P.size(),Q.size())-1; 9809 int w=0; 9810 int m=maxdeg<HGCD?primes31[0]:prevfourier(2147483647,maxdeg,w);//p1,w=1227303670; 9811 gen pim=m,res; 9812 vecteur2vector_int(P,m,p); 9813 vecteur2vector_int(Q,m,q); 9814 res=resultant_int(p,q,tmp1,tmp2,m,w); 9815 #if 0 9816 ntlresultant(P,Q,m,R,false); 9817 if ((R-res)%m!=0) 9818 CERR << "bug\n"; 9819 #endif 9820 if (D!=1) 9821 res=int((res.val*longlong(invmod(smod(D,m).val,m)))%m); 9822 mpz_t tmpz; 9823 mpz_init2(tmpz,h); 9824 int proba=0; 9825 int probamax=RAND_MAX; 9826 if (eps>0) 9827 probamax=1+int(-std::log(eps)/30/std::log(2.0)); 9828 int maxp=std::sqrt(p1p2/4./maxdeg);int niter=1; 9829 if (debug_infolevel>1) 9830 CERR << CLOCK()*1e-6 << " resultant modular algo max #primes " << h/30 << endl; 9831 for (;h>sizeinbase2(pim) && proba<probamax;++niter){ 9832 if (debug_infolevel>1) 9833 CERR << CLOCK()*1e-6 << " prevfourier start\n"; 9834 if (maxdeg<HGCD){ 9835 w=0; 9836 if (niter<nprimes31) 9837 m=primes31[niter]; 9838 else 9839 m=prevprime(m-1).val; 9840 } 9841 else 9842 m=prevfourier(m-1,maxdeg,w);//prevprimep1p2p3(m-1,maxp,maxdeg); 9843 if (debug_infolevel>1) 9844 CERR << CLOCK()*1e-6 << " prevfourier end\n"; 9845 // CERR << CLOCK()*1e-6 << " " << m << "\n"; 9846 if (m && (is_multiple(P0,m)||is_multiple(Q0,m))) 9847 continue; 9848 #ifdef INT128 9849 if (m<(1<<30) 9850 && m!=p3 9851 ){ 9852 CERR << CLOCK()*1e-6 << " modular resultant, switching to long primes\n"; 9853 longlong m=(1LL<<61); //m=2305843009116209153+16385; 9854 vector<longlong> p,q,tmp1,tmp2; 9855 mpz_t tmpz; 9856 mpz_init(tmpz); 9857 for (;h>sizeinbase2(pim) && proba<probamax;++niter){ 9858 m=prevprimell(m-1,maxdeg); 9859 vecteur2vector_ll(P,m,p); 9860 vecteur2vector_ll(Q,m,q); 9861 longlong r; 9862 r=resultantll(p,q,tmp1,tmp2,m); 9863 #if 0 9864 ntlresultant(P,Q,m,R,false); 9865 if ((R-r)%m!=0){ 9866 CERR << "bug\n"; 9867 CERR << m << endl; 9868 break; 9869 } 9870 #endif 9871 #ifndef USE_GMP_REPLACEMENTS 9872 if (pim.type==_ZINT && res.type==_ZINT){ 9873 longlong amodm=mpz_fdiv_ui(*res._ZINTptr,m); 9874 if (amodm!=r){ 9875 gen u,v,d; longlong U; 9876 egcd(pim,m,u,v,d); 9877 if (u.type==_ZINT) 9878 U=mpz_fdiv_ui(*u._ZINTptr,m); 9879 else 9880 U=u.val; 9881 if (d==-1){ U=-U; v=-v; d=1; } 9882 mpz_mul_si(tmpz,*pim._ZINTptr,(U*(r-int128_t(amodm)))%m); 9883 mpz_add(*res._ZINTptr,*res._ZINTptr,tmpz); 9884 proba=0; 9885 } 9886 else 9887 ++proba; 9888 mpz_mul_si(*pim._ZINTptr,*pim._ZINTptr,m); 9889 continue; 9890 } 9891 else 9892 #endif 9893 res=ichinrem(res,gen(r),pim,gen(m)); 9894 pim=gen(m)*pim; 9895 } 9896 mpz_clear(tmpz); 9897 break; 9898 } 9899 #endif 9900 vecteur2vector_int(P,m,p); 9901 vecteur2vector_int(Q,m,q); 9902 int r; 9903 if (debug_infolevel>1) 9904 CERR << CLOCK()*1e-6 << " resultant begin niter " << niter << " proba " << proba << " p= " << m << "\n"; 9905 r=resultant_int(p,q,tmp1,tmp2,m,w); 9906 if (debug_infolevel>1) 9907 CERR << CLOCK()*1e-6 << " resultant end " << niter << endl; 9908 #if 0 9909 ntlresultant(P,Q,m,R,false); 9910 if ((R-r)%m!=0) 9911 CERR << "bug\n"; 9912 #endif 9913 if (D!=1) 9914 r=(r*longlong(invmod(smod(D,m).val,m)))%m; 9915 #if 1 // ndef USE_GMP_REPLACEMENTS 9916 if (pim.type==_ZINT && res.type==_ZINT){ 9917 if (debug_infolevel>1) 9918 CERR << CLOCK()*1e-6 << " ichinrem start\n"; 9919 int amodm=modulo(*res._ZINTptr,m); 9920 if ((amodm-r)%m!=0){ 9921 int u,v,d; 9922 d=iegcd(modulo(*pim._ZINTptr,m),m,u,v); 9923 if (d==-1){ u=-u; v=-v; } 9924 mpz_mul_si(tmpz,*pim._ZINTptr,(u*(r-longlong(amodm)))%m); 9925 mpz_add(*res._ZINTptr,*res._ZINTptr,tmpz); 9926 if (debug_infolevel>1) 9927 CERR << CLOCK()*1e-6 << " ichinrem end\n"; 9928 proba=0; 9929 } 9930 else ++proba; 9931 } 9932 else 9933 #endif 9934 res=ichinrem(res,r,pim,m); 9935 pim=m*pim; 9936 #ifndef USE_GMP_REPLACEMENTS 9937 if (res.type==_ZINT && niter==1) 9938 mpz_realloc2(*res._ZINTptr,h); 9939 #endif 9940 } 9941 if (debug_infolevel>1) 9942 CERR << CLOCK()*1e-6 << " Wpcount end " << wpcount << " nprimes " << niter << "\n"; 9943 mpz_clear(tmpz); 9944 return smod(res,pim)*D; 9945 } 9946 9947 // resultant of P and Q, modifies P and Q, 9948 // suitable if coeffs are invertible without fraction 9949 gen gf_ext_resultant(const vecteur & P0,const vecteur & Q0){ 9950 vecteur P(P0),Q(Q0),tmp1,tmp2; 9951 gen res=1; 9952 while (Q.size()>1){ 9953 gen coeff=Q[0]; 9954 gen invcoeff=inv(coeff,context0); 9955 mulmodpoly(Q,invcoeff,Q); 9956 DivRem(P,Q,0,tmp1,tmp2); 9957 res = res*pow(coeff,int(P.size())-1); 9958 if (P.size()%2==0 && Q.size()%2==0) 9959 res = -res; 9960 P.swap(Q); 9961 Q.swap(tmp2); 9962 } 9963 if (Q.empty()) 9964 return 0; 9965 res = (res*pow(Q[0],int(P.size())-1)); 9966 return res; 9967 } 9968 9969 void subresultant(const modpoly & P,const modpoly & Q,gen & res){ 9970 if ( 9971 ( 9972 //0 && 9973 P.size()>MODRESULTANT && Q.size()>MODRESULTANT && is_integer_vecteur(P) && is_integer_vecteur(Q)) 9974 ){ 9975 res=mod_resultant(P,Q,0.0); 9976 // according to my tests ducos is faster (except for very small coefficients) 9977 return ; 9978 } 9979 int d=int(P.size())-1,e=int(Q.size())-1; 9980 if (d<e){ 9981 subresultant(Q,P,res); 9982 // adjust sign 9983 if ((d*e)%2) res=-res; 9984 return; 9985 } 9986 if (e<=0){ 9987 res=pow((e<0?0:Q[0]),d,context0); 9988 return; 9989 } 9990 for (int i=0;i<P.size();++i){ 9991 gen g=P[i]; 9992 if (g.type==_USER){ 9993 res=gf_ext_resultant(P,Q); 9994 return; 9995 } 9996 if (g.type==_EXT){ 9997 gen h=*g._EXTptr; 9998 if (h.type==_VECT){ 9999 for (int j=0;j<h._VECTptr->size();++j){ 10000 gen k=(*h._VECTptr)[j]; 10001 if (k.type==_USER){ 10002 res=gf_ext_resultant(P,Q); 10003 return; 10004 } 10005 } 10006 } 10007 } 10008 } 10009 gen sd(pow(Q[0],d-e,context0)),tmp; 10010 vecteur A(Q),a,B,C,quo; 10011 PseudoDivRem(P,-Q,quo,B,tmp); 10012 for (unsigned step=0;;++step){ 10013 d=int(A.size())-1,e=int(B.size())-1; 10014 if (B.empty()){ 10015 res=0; 10016 return ; 10017 } 10018 int delta=d-e; 10019 if (delta>1){ 10020 gen sd(A[0]); 10021 if (step==0) 10022 sd=pow(sd,P.size()-Q.size(),context0); 10023 ducos_e(A,sd,B,C); 10024 } 10025 else 10026 C=B; 10027 if (e==0){ 10028 // adjust sign: already done by doing pseudodivrem(-Q,...) 10029 //if ((P.lexsorted_degree()*Q.lexsorted_degree())%2) C=-C; 10030 res=C[0]; 10031 return; 10032 } 10033 ducos_e1(A,B,C,sd,B); 10034 A.swap(C); // A=C; 10035 sd=A[0]; 10036 } 10037 } 10038 10039 // P(x) -> P(-x) 10040 void Pminusx(vecteur & P){ 10041 unsigned Ps=unsigned(P.size()); 10042 for (unsigned i=0;i<Ps;++i){ 10043 if ( (Ps-i-1) %2) 10044 P[i]=-P[i]; 10045 } 10046 } 10047 10048 // split P=Pp-Pn in two parts, Pp positive coeffs and Pn negative coeffs 10049 void splitP(const vecteur &P,vecteur &Pp,vecteur &Pn){ 10050 unsigned Ps=unsigned(P.size()); 10051 Pp.resize(Ps); 10052 Pn.resize(Ps); 10053 for (unsigned i=0;i<Ps;++i){ 10054 if (is_positive(P[i],context0)) 10055 Pp[i]=P[i]; 10056 else 10057 Pn[i]=-P[i]; 10058 } 10059 } 10060 10061 #ifdef HAVE_LIBMPFI 10062 gen horner_basic(const modpoly & p,const gen & x){ 10063 modpoly::const_iterator it=p.begin(),itend=p.end(); 10064 gen res(*it); 10065 ++it; 10066 for (;it!=itend;++it) 10067 res=res*x+(*it); 10068 return res; 10069 } 10070 10071 gen horner_interval(const modpoly & p,const gen & x){ 10072 gen l=_left(x,context0),r=_right(x,context0); 10073 if (l.type!=_REAL || r.type!=_REAL) 10074 return gensizeerr(context0); 10075 bool lpos=is_positive(l,context0),rpos=is_positive(r,context0); 10076 if (lpos && rpos){ 10077 l=real_interval(*l._REALptr); 10078 r=real_interval(*r._REALptr); 10079 gen n1,n2,p1,p2; 10080 modpoly pp,pn; 10081 splitP(p,pp,pn); 10082 p1=horner_basic(pp,l); 10083 p2=horner_basic(pp,r); 10084 n1=horner_basic(pn,l); 10085 n2=horner_basic(pn,r); 10086 l=_left(p1,context0)-_right(n2,context0); 10087 r=_right(p2,context0)-_left(n1,context0); 10088 l=gen(makevecteur(l,r),_INTERVAL__VECT); 10089 l=eval(l,1,context0); 10090 return l; 10091 } 10092 if ((is_exactly_zero(l) || !lpos) && (is_exactly_zero(r) || !rpos)){ 10093 modpoly pm(p); Pminusx(pm); 10094 return horner_interval(pm,-x); 10095 } 10096 l=gen(makevecteur(l,0),_INTERVAL__VECT); 10097 l=eval(l,1,context0); 10098 l=horner_interval(p,l); 10099 r=gen(makevecteur(0,r),_INTERVAL__VECT); 10100 r=eval(r,1,context0); 10101 r=horner_interval(p,r); 10102 gen m=min(_left(l,context0),_left(r,context0),context0); 10103 gen M=max(_right(l,context0),_right(r,context0),context0); 10104 l=gen(makevecteur(m,M),_INTERVAL__VECT); 10105 l=eval(l,1,context0); 10106 return l; 10107 } 10108 #endif 10109 10110 // p([l,r]) with l and r exact 10111 vecteur horner_interval(const modpoly & p,const gen & l,const gen & r){ 10112 bool lpos=is_positive(l,context0),rpos=is_positive(r,context0); 10113 if (lpos && rpos){ 10114 gen n1,n2,p1,p2; 10115 modpoly pp,pn; 10116 splitP(p,pp,pn); 10117 p1=horner(pp,l,0,false); 10118 p2=horner(pp,r,0,false); 10119 n1=horner(pn,l,0,false); 10120 n2=horner(pn,r,0,false); 10121 return makevecteur(p1-n2,p2-n1); 10122 } 10123 if ((is_exactly_zero(l) || !lpos) && (is_exactly_zero(r) || !rpos)){ 10124 modpoly pm(p); Pminusx(pm); 10125 return horner_interval(pm,-r,-l); 10126 } 10127 vecteur L=horner_interval(p,l,0); 10128 vecteur R=horner_interval(p,0,r); 10129 gen m=min(L[0],R[0],context0); 10130 gen M=max(L[1],R[1],context0); 10131 return makevecteur(m,M); 10132 } 10133 10134 /* set res to p^m 10135 If p(x) = sum_{i=0}^n p_i x^k 10136 Then p(x)^m = sum_{k=0}^{m*n} a(m,k) x^k 10137 a(m,0) = p_0^m, 10138 a(m,k) = 1/(k p_0) sum_{i=1}^min(n,k) p_i *((m+1)*i-k) *a(m,k-i), 10139 does not work in non-0 characteristic 10140 */ 10141 bool miller_pow(const modpoly & p_,unsigned m,modpoly & res){ 10142 if (p_.empty()){ 10143 res.clear(); 10144 return true; 10145 } 10146 // quichk check for 0 char 10147 const_iterateur it=p_.begin(),itend=p_.end(); 10148 for (;it!=itend;++it){ 10149 gen g=*it; 10150 int t=g.type; 10151 while (t==_EXT || t==_POLY){ 10152 if (t==_EXT){ 10153 if (g._EXTptr->type==_VECT && !g._EXTptr->_VECTptr->empty()){ 10154 g=g._EXTptr->_VECTptr->front(); 10155 t=g.type; 10156 } 10157 else return false; 10158 } 10159 if (t==_POLY){ 10160 if (g._POLYptr->coord.empty()) 10161 return false; 10162 g=g._POLYptr->coord.front().value; 10163 t=g.type; 10164 } 10165 } 10166 if (t==_VECT || t==_MOD || t==_USER) 10167 return false; 10168 } 10169 modpoly p(p_); 10170 int shift=0; 10171 for (;!p.empty() && is_zero(p.back());++shift) 10172 p.pop_back(); 10173 reverse(p.begin(),p.end()); 10174 unsigned n=int(p.size())-1; 10175 unsigned mn=n*m; 10176 res.resize(mn+1); 10177 gen p0=p[0],invp0; 10178 if (p0.type==_VECT) 10179 return false; 10180 if (p0.type==_EXT || p0.type==_USER) 10181 invp0=inv(p0,context0); 10182 res[0]=pow(p0,int(m),context0); 10183 for (unsigned k=1;k<=mn;++k){ 10184 unsigned end=k<n?k:n; 10185 gen tmp; 10186 for (unsigned i=1;i<=end;++i){ 10187 tmp += int((m+1)*i-k)*(p[i]*res[k-i]); 10188 } 10189 if (is_zero(invp0)) 10190 res[k]=tmp/(int(k)*p0); 10191 else 10192 res[k]=tmp*(invp0/int(k)); 10193 } 10194 reverse(res.begin(),res.end()); 10195 if (shift) 10196 res=mergevecteur(res,vecteur(m*shift,0)); 10197 return true; 10198 } 10199 10200 gen horner(const modpoly & p,const gen & x,environment * env,bool simp){ 10201 int s=int(p.size()); 10202 if (s==0) 10203 return 0; 10204 if (s==1) 10205 return p.front(); 10206 if (is_inf(x)){ 10207 if (s%2) 10208 return plus_inf*p.front(); 10209 return x*p.front(); 10210 } 10211 if (s==2){ 10212 if (env && env->moduloon) 10213 return smod(p.front()*x+p.back(),env->modulo); 10214 else 10215 return p.front()*x+p.back(); 10216 } 10217 if ( (!env || !env->moduloon || !is_zero(env->coeff)) && x.type==_FRAC) 10218 return horner(p,*x._FRACptr,simp); 10219 #if defined HAVE_LIBMPFI && !defined NO_RTTI 10220 if (x.type==_REAL){ 10221 if (dynamic_cast<real_interval *>(x._REALptr)) 10222 return horner_interval(p,x); 10223 } 10224 #endif 10225 modpoly::const_iterator it=p.begin(),itend=p.end(); 10226 if (x.type==_CPLX && x.subtype==3){ 10227 complex<double> res(0),X(x._CPLXptr->_DOUBLE_val,(x._CPLXptr+1)->_DOUBLE_val); 10228 bool ok=true; 10229 for (;ok && it!=itend;++it){ 10230 res *=X; 10231 switch (it->type){ 10232 case _INT_: 10233 res += it->val; 10234 break; 10235 case _DOUBLE_: 10236 res += it->_DOUBLE_val; 10237 break; 10238 case _CPLX: 10239 if (it->subtype==3){ 10240 res += complex<double>(it->_CPLXptr->_DOUBLE_val,(it->_CPLXptr+1)->_DOUBLE_val); 10241 break; 10242 } 10243 default: 10244 ok=false; 10245 } 10246 } 10247 if (ok) return res; 10248 } 10249 it=p.begin(); 10250 gen res(*it); 10251 ++it; 10252 if (env && env->moduloon){ 10253 for (;it!=itend;++it) 10254 res=smod(res*x+(*it),env->modulo); 10255 } 10256 else { 10257 for (;it!=itend;++it) 10258 res=res*x+(*it); 10259 } 10260 return res; 10261 } 10262 10263 gen horner(const modpoly & p,const gen & x){ 10264 return horner(p,x,0); 10265 } 10266 10267 gen horner(const gen & g,const gen & x){ 10268 if (g.type!=_VECT) 10269 return g; 10270 return horner(*g._VECTptr,x); 10271 } 10272 complex<double> horner_newton(const vecteur & p,const std::complex<double> &x,GIAC_CONTEXT){ 10273 complex<double> num,den; 10274 const_iterateur it=p.begin(),itend=p.end(); 10275 double n=itend-it-1; gen tmp; 10276 for (;it!=itend;--n,++it){ 10277 num *= x; 10278 if (n) den *= x; 10279 switch (it->type){ 10280 case _INT_: 10281 num += it->val; 10282 den += n*it->val; 10283 break; 10284 case _DOUBLE_: 10285 num += it->_DOUBLE_val; 10286 den += n*it->_DOUBLE_val; 10287 break; 10288 case _CPLX: 10289 tmp=it->subtype==3?*it:evalf_double(*it,1,contextptr); 10290 if (tmp.type==_CPLX && tmp.subtype==3){ 10291 num += complex<double>(tmp._CPLXptr->_DOUBLE_val,(tmp._CPLXptr+1)->_DOUBLE_val); 10292 den += n*complex<double>(tmp._CPLXptr->_DOUBLE_val,(tmp._CPLXptr+1)->_DOUBLE_val); 10293 break; 10294 } 10295 default: 10296 return (num=0)/(den=0); 10297 } 10298 } // end for 10299 return x-num/den; 10300 } 10301 gen _horner(const gen & args,GIAC_CONTEXT){ 10302 if ( args.type==_STRNG && args.subtype==-1) return args; 10303 if (args.type!=_VECT) 10304 return symbolic(at_horner,args); 10305 vecteur & v=*args._VECTptr; 10306 int s=int(v.size()); 10307 if (s<2) 10308 return gensizeerr(contextptr); 10309 const gen &p=v.front(); 10310 const gen & q=v[1]; 10311 if (p.type==_VECT){ 10312 if (q.type==_VECT && (p._VECTptr->size()==q._VECTptr->size() || p._VECTptr->size()==q._VECTptr->size()+1) && s==3){ 10313 // Horner-like evaluation for divided difference 10314 // p=divided differences, q=list of abscissas, r=eval point 10315 const gen & x=v[2]; 10316 const vecteur & P=*p._VECTptr; 10317 s=int(P.size())-1; 10318 const vecteur & Q=*q._VECTptr; 10319 gen r=P[s]; 10320 for (int i=s-1;i>=0;--i){ 10321 r=r*(x-Q[i])+P[i]; 10322 } 10323 return r; 10324 } 10325 if (s==3){ 10326 const gen & v2=v[2]; 10327 if (v2.type==_FUNC && *v2._FUNCptr==at_newton){ 10328 // Newton iteration for a polynomial 10329 complex<double> x; 10330 if (q.type==_DOUBLE_) 10331 x=q._DOUBLE_val; 10332 else { 10333 if (q.type==_CPLX && q.subtype==3) 10334 x=complex<double>(q._CPLXptr->_DOUBLE_val,(q._CPLXptr+1)->_DOUBLE_val); 10335 else { 10336 gen tmp=evalf_double(q,1,contextptr); 10337 if (tmp.type!=_CPLX || tmp.subtype!=3) 10338 return gensizeerr(contextptr); 10339 x=complex<double>(tmp._CPLXptr->_DOUBLE_val,(tmp._CPLXptr+1)->_DOUBLE_val); 10340 } 10341 } 10342 return horner_newton(*p._VECTptr,x,contextptr); 10343 } 10344 } // end newton iteration 10345 return horner(*p._VECTptr,q); 10346 } 10347 gen x; 10348 if (s==2) 10349 x=vx_var; 10350 else 10351 x=v.back(); 10352 if (!is_zero(derive(q,x,contextptr))) 10353 return gensizeerr(contextptr); 10354 vecteur lv(1,x); 10355 lvar(p,lv); 10356 lvar(q,lv); 10357 gen aa=e2r(p,lv,contextptr),aan,aad; 10358 fxnd(aa,aan,aad); 10359 if ( ( (aad.type==_POLY)&&(aad._POLYptr->lexsorted_degree()) ) 10360 ) 10361 return gensizeerr(contextptr); 10362 if (aan.type!=_POLY) 10363 return p; 10364 lv=vecteur(lv.begin()+1,lv.end()); 10365 gen ba=e2r(q,lv,contextptr); 10366 vecteur a(polynome2poly1(*aan._POLYptr,1)); 10367 return r2e(horner(a,ba),lv,contextptr)/r2e(aad,lv,contextptr); 10368 } 10369 static const char _horner_s []="horner"; 10370 static define_unary_function_eval (__horner,&_horner,_horner_s); 10371 define_unary_function_ptr5( at_horner ,alias_at_horner,&__horner,0,true); 10372 10373 gen symb_horner(const modpoly & p,const gen & x,int d){ 10374 // better suited if x is symbolic 10375 if (p.empty()) 10376 return 0; 10377 modpoly::const_iterator it=p.begin(),itend=p.end(); 10378 gen res; 10379 int i=int(itend-it)-1; 10380 if (!i) 10381 return *it; 10382 for (;i>=0;++it,--i){ 10383 if (i==d+1) 10384 res=res+(*it)*x; 10385 else { 10386 if (i==d) 10387 res=res+(*it); 10388 else 10389 res=res+(*it)*symbolic(at_pow,gen(makevecteur(x,i-d),_SEQ__VECT)); 10390 } 10391 } 10392 return res; 10393 } 10394 10395 gen symb_horner(const modpoly & p,const gen & x){ 10396 if (x.type==_VECT && x._VECTptr->empty()) 10397 return gen(p,_POLY1__VECT); 10398 return symb_horner(p,x,0); 10399 } 10400 10401 // p=(X-x)q+p(x) 10402 gen horner(const modpoly & p,const gen & x,environment * env,modpoly & q){ 10403 modpoly::const_iterator it=p.begin(),itend=p.end(); 10404 if (p.empty()){ 10405 q.clear(); 10406 return 0; 10407 } 10408 q.resize(itend-it-1); 10409 gen res(*it); 10410 ++it; 10411 if (it==itend) 10412 return res; 10413 q[0]=res; 10414 if (env && env->moduloon){ 10415 for (int pos=1;;++pos){ 10416 res=smod(res*x+(*it),env->modulo); 10417 ++it; 10418 if (it==itend) 10419 break; 10420 q[pos]=res; 10421 } 10422 } 10423 else { 10424 if (x==1){ 10425 for (int pos=1;;++pos){ 10426 res += *it ; 10427 ++it; 10428 if (it==itend) 10429 break; 10430 q[pos]=res; 10431 } 10432 } 10433 else { 10434 for (int pos=1;;++pos){ 10435 res=res*x+(*it); 10436 ++it; 10437 if (it==itend) 10438 break; 10439 q[pos]=res; 10440 } 10441 } 10442 } 10443 return res; 10444 } 10445 10446 static modpoly taylordiff(const modpoly & p,const gen & x){ 10447 int d=int(p.size()); 10448 modpoly res(p),P(p); 10449 for (int i=1;i<=d;++i){ 10450 res[d-i]=horner(P,x); 10451 P=derivative(P)/gen(i); 10452 } 10453 return res; 10454 } 10455 10456 void modpoly2mpzpoly(const modpoly & p,mpz_t * & res){ 10457 const_iterateur it=p.begin(),itend=p.end(); 10458 res=new mpz_t[itend-it]; 10459 for (int i=0;it!=itend;++i,++it){ 10460 if (it->type==_INT_) 10461 mpz_init_set_si(res[i],it->val); 10462 else 10463 mpz_init_set(res[i],*it->_ZINTptr); 10464 } 10465 } 10466 10467 void taylorshift1(mpz_t * tab,int size){ 10468 for (int i=1;i<size;++i){ 10469 // tab[j]=tab[j-1]+tab[j] for j from 1 to size-i 10470 for (int j=1;j<=size-i;++j){ 10471 mpz_add(tab[j],tab[j],tab[j-1]); 10472 } 10473 } 10474 } 10475 10476 void mpzpoly2modpoly(mpz_t * p,modpoly & res){ 10477 iterateur it=res.begin(),itend=res.end(); 10478 for (int i=0;it!=itend;++i,++it){ 10479 *it=*(p+i); 10480 mpz_clear(p[i]); 10481 } 10482 delete [] p; 10483 } 10484 10485 bool isintpoly(const modpoly & p){ 10486 const_iterateur it=p.begin(),itend=p.end(); 10487 for (;it!=itend;++it){ 10488 if (!is_integer(*it)) 10489 return false; 10490 } 10491 return true; 10492 } 10493 10494 // shift polynomial 10495 modpoly taylor(const modpoly & p,const gen & x,environment * env){ 10496 if (p.empty()) 10497 return p; 10498 if ( (!env || !env->moduloon || !is_zero(env->coeff)) && x.type==_FRAC) // use derivatives of p 10499 return taylordiff(p,x); 10500 modpoly res,a,b; 10501 a=p; 10502 if (x==1 && a.size()>5 && isintpoly(a)){ 10503 mpz_t * tab; 10504 modpoly2mpzpoly(a,tab); 10505 taylorshift1(tab,int(a.size())); 10506 mpzpoly2modpoly(tab,a); 10507 return a; 10508 } 10509 int d=int(p.size()); 10510 for (int i=0;i<d;++i){ 10511 res.push_back(horner(a,x,env,b)); 10512 a.swap(b); // a=b; 10513 } 10514 reverse(res.begin(),res.end()); 10515 return res; 10516 } 10517 10518 gen lgcd(const dense_POLY1 & p){ 10519 if (p.empty()) 10520 return 1; 10521 dense_POLY1::const_iterator it=p.begin(),itend=p.end(); 10522 gen n(*it),n1(1); 10523 for (;it!=itend;++it){ 10524 n=gcd(n,*it,context0); 10525 if (n==n1) 10526 return 1; 10527 } 10528 return n; 10529 } 10530 10531 // gcd of coeff of p and g 10532 gen lgcd(const dense_POLY1 & p,const gen & g){ 10533 if (p.empty()) 10534 return g; 10535 dense_POLY1::const_iterator it=p.begin(),itend=p.end(); 10536 gen n(g); 10537 for (;it!=itend;++it){ 10538 n=gcd(n,*it,context0); 10539 if (is_one(n)) 10540 return 1; 10541 } 10542 return n; 10543 } 10544 10545 gen ppz(dense_POLY1 & p){ 10546 gen n(lgcd(p)); 10547 p=p/n; 10548 return n; 10549 } 10550 10551 // does not seem threadable, no idea why... 10552 gen norm(const dense_POLY1 & p,GIAC_CONTEXT){ 10553 gen res; 10554 dense_POLY1::const_iterator it=p.begin(), itend=p.end(); 10555 for (;it!=itend;++it){ 10556 gen tmp(abs(*it,contextptr)); 10557 if (is_strictly_greater(tmp,res,contextptr)) // (res<tmp) 10558 res=tmp; 10559 } 10560 return res; 10561 } 10562 10563 gen intnorm(const dense_POLY1 & p,GIAC_CONTEXT){ 10564 gen res,mres; 10565 dense_POLY1::const_iterator it=p.begin(), itend=p.end(); 10566 for (;it!=itend;++it){ 10567 if (it->type==_INT_){ 10568 if (res.val*longlong(res.val)<it->val*longlong(it->val)){ 10569 res.val=absint(it->val); 10570 mres.val=-res.val; 10571 } 10572 continue; 10573 } 10574 if (it->type!=_ZINT) 10575 return norm(p,contextptr); 10576 mres=res=*it; 10577 if (is_positive(res,contextptr)) 10578 mres=-res; 10579 else 10580 res=-mres; 10581 break; 10582 } 10583 for (;it!=itend;++it){ 10584 if (it->type==_INT_) 10585 continue; 10586 if (it->type!=_ZINT) 10587 return norm(p,contextptr); 10588 if ( 10589 (res.type==_ZINT && mpz_cmp(*it->_ZINTptr,*res._ZINTptr)>0) 10590 || (res.type==_INT_ && mpz_cmp_si(*it->_ZINTptr,res.val)>0) 10591 ){ 10592 res=*it; 10593 mres=-res; 10594 continue; 10595 } 10596 if ( 10597 (mres.type==_ZINT && mpz_cmp(*mres._ZINTptr,*it->_ZINTptr)>0) 10598 || (mres.type==_INT_ && mpz_cmp_si(*it->_ZINTptr,mres.val)<0) 10599 ){ 10600 mres=*it; 10601 res=-mres; 10602 } 10603 } 10604 //if (res!=norm(p,contextptr)) CERR << "intnorm err" << '\n'; 10605 return res; 10606 } 10607 10608 // assuming pmod and qmod are prime together, find r such that 10609 // r = p mod pmod and r = q mod qmod 10610 // hence r = p + A*pmod = q + B*qmod 10611 // or A*pmod -B*qmod = q - p 10612 // assuming u*pmod+v*pmod=d we get 10613 // A=u*(q-p)/d 10614 dense_POLY1 ichinrem(const dense_POLY1 &p,const dense_POLY1 & q,const gen & pmod,const gen & qmod){ 10615 gen u,v,d,tmp,pqmod(pmod*qmod); 10616 egcd(pmod,qmod,u,v,d); 10617 // COUT << u << "*" << pmod << "+" << v << "*" << qmod << "=" << d << " " << u*pmod+v*qmod << '\n'; 10618 dense_POLY1::const_iterator a = p.begin(); 10619 dense_POLY1::const_iterator a_end = p.end(); 10620 dense_POLY1::const_iterator b = q.begin(); 10621 dense_POLY1::const_iterator b_end = q.end(); 10622 int n=int(a_end-a), m=int(b_end-b); 10623 dense_POLY1 res; 10624 res.reserve(giacmax(n,m)); 10625 for (;m>n;++b,--m) 10626 res.push_back(smod(iquo(u*(*b),d),pqmod)); 10627 for (;n>m;++a,--n) 10628 res.push_back(smod(*a-iquo(u*(*a),d),pqmod)); 10629 for (;a!=a_end;++a,++b){ 10630 res.push_back(smod(*a+iquo(u*(*b-*a),d) *pmod,pqmod)) ; 10631 // COUT << a->value << " " << b->value << "->" << tmp << " " << pqmod << '\n'; 10632 } 10633 return res; 10634 } 10635 10636 // p and q assumed to have the same size, gcd(pmod,qmod)=1 10637 bool ichinrem_inplace(dense_POLY1 &p,const dense_POLY1 & q,const gen & pmod,int qmodval){ 10638 if (debug_infolevel>2) 10639 CERR << CLOCK()*1e-6 << " ichinrem begin "<< p.size() << '\n'; 10640 gen u,v,d,tmp,pqmod(qmodval*pmod),pqmod2=iquo(pqmod,2),minuspqmod2=-pqmod2; 10641 egcd(pmod,qmodval,u,v,d); 10642 if (u.type==_ZINT) 10643 u=modulo(*u._ZINTptr,qmodval); 10644 int U=u.val; 10645 if (d==-1){ u=-u; v=-v; d=1; } 10646 if (d!=1) 10647 return false; 10648 if (pmod.type!=_ZINT) 10649 return false; 10650 dense_POLY1::iterator a = p.begin(),a_end = p.end(); 10651 dense_POLY1::const_iterator b = q.begin(),b_end = q.end(); 10652 int n=int(a_end-a), m=int(b_end-b); 10653 if (n!=m) 10654 return false; 10655 mpz_t tmpz; 10656 mpz_init(tmpz); 10657 for (;a!=a_end;++a,++b){ 10658 // smod(*a+((u*(*b-*a))%qmod)*pmod,pqmod) 10659 #ifndef USE_GMP_REPLACEMENTS 10660 if (a->type==_ZINT){ 10661 #if 1 10662 int amodq=modulo(*a->_ZINTptr,qmodval); 10663 if (amodq==b->val) 10664 continue; 10665 mpz_mul_si(tmpz,*pmod._ZINTptr,(U*(b->val-longlong(amodq)))%qmodval); 10666 mpz_add(tmpz,tmpz,*a->_ZINTptr); 10667 #else 10668 mpz_set_si(tmpz,b->val); 10669 mpz_sub(tmpz,tmpz,*a->_ZINTptr); 10670 mpz_mul_si(tmpz,*pmod._ZINTptr,(longlong(U)*modulo(tmpz,qmodval))%qmodval); 10671 mpz_add(tmpz,tmpz,*a->_ZINTptr); 10672 #endif 10673 } 10674 else { 10675 mpz_mul_si(tmpz,*pmod._ZINTptr,(U*(longlong(b->val)-a->val))%qmodval); 10676 if (a->val>=0) 10677 mpz_add_ui(tmpz,tmpz,a->val); 10678 else 10679 mpz_sub_ui(tmpz,tmpz,-a->val); 10680 } 10681 if (mpz_cmp(tmpz,*pqmod2._ZINTptr)>=0) 10682 mpz_sub(tmpz,tmpz,*pqmod._ZINTptr); 10683 else { 10684 if (mpz_cmp(tmpz,*minuspqmod2._ZINTptr)<=0) 10685 mpz_add(tmpz,tmpz,*pqmod._ZINTptr); 10686 } 10687 if (a->type==_ZINT) mpz_set(*a->_ZINTptr,tmpz); else *a=tmpz; 10688 #else 10689 *a=*a+u*(*b-*a) *pmod ; // improve to modulo(U*(*b-*a), qmodval) and type checking for overwrite 10690 *a = smod(*a,pqmod); 10691 #endif 10692 } 10693 mpz_clear(tmpz); 10694 if (debug_infolevel>2) 10695 CERR << CLOCK()*1e-6 << " ichinrem end "<< p.size() << '\n'; 10696 return true; 10697 } 10698 10699 void ichinremp1p2(const vector<int> & resp1,const vector<int> & resp2,size_t rs,vecteur & pq,int nbits){ 10700 pq.clear(); 10701 const int p1modinv=-9;//invmod(p1,p2); 10702 for (size_t i=0;i<rs;++i){ 10703 //int A=pq[i].val,B=curres[i].val; 10704 int A=resp1[i],B=resp2[i]; 10705 // A mod p1, B mod p2 -> res mod p1*p2 10706 longlong res=A+((longlong(p1modinv)*(B-A))%p2)*p1; 10707 if (res>p1p2sur2) res-=p1p2; 10708 else if (res<-p1p2sur2) res+=p1p2; 10709 pq.push_back(gen(res,nbits)); // pq[i]=res; 10710 } 10711 } 10712 10713 bool ichinrem(const vector<int> & p,const vector<int> & q,int pmod,int qmod,int zsize,bool dosmod,vecteur & res){ 10714 vector<int>::const_iterator a = p.begin(),a_end = p.end(); 10715 vector<int>::const_iterator b = q.begin(),b_end = q.end(); 10716 int u,v; 10717 if ( (a_end-a!=b_end-b) || iegcd(pmod,qmod,u,v)!=1) 10718 return false; 10719 res.reserve(a_end-a); 10720 res.clear(); 10721 longlong pqmod=longlong(pmod)*qmod; 10722 for (;a!=a_end;++a,++b){ 10723 // smod(*a+((u*(*b-*a))%qmod)*pmod,pqmod) 10724 longlong r=*a+ ( (u*(longlong(*b)-*a)) %qmod) *pmod ; 10725 r -= (r>>63)*pqmod; 10726 if (dosmod && r>pqmod/2) 10727 r-=pqmod; 10728 res.push_back(gen(r,zsize)); 10729 } 10730 return true; 10731 } 10732 10733 // p and q assumed to have the same size, gcd(pmod,qmod)=1 10734 // returns 0 on error, 1 if p has changed, 2 if p is unchanged 10735 int ichinrem_inplace(dense_POLY1 &p,const vector<int> & q,const gen & pmod,int qmodval,int reserve_mem){ 10736 if (debug_infolevel>2) 10737 CERR << CLOCK()*1e-6 << " ichinrem_inplace begin deg "<< p.size() << '\n'; 10738 gen u,v,d,tmp,pqmod(qmodval*pmod),pqmod2=iquo(pqmod,2),minuspqmod2=-pqmod2; 10739 egcd(pmod,qmodval,u,v,d); 10740 if (u.type==_ZINT) 10741 u=modulo(*u._ZINTptr,qmodval); 10742 if (d==-1){ u=-u; v=-v; d=1; } 10743 int U=u.val; 10744 if (d!=1) 10745 return 0; 10746 gen pmod_(pmod); 10747 pmod_.uncoerce(); 10748 dense_POLY1::iterator a = p.begin(),a_end = p.end(); 10749 vector<int>::const_iterator b = q.begin(),b_end = q.end(); 10750 int n=int(a_end-a), m=int(b_end-b); 10751 if (n!=m) 10752 return 0; 10753 bool changed=false; 10754 mpz_t tmpz; 10755 if (reserve_mem) 10756 mpz_init2(tmpz,reserve_mem); 10757 else 10758 mpz_init(tmpz); 10759 for (;a!=a_end;++a,++b){ 10760 // smod(*a+((u*(*b-*a))%qmod)*pmod,pqmod) 10761 #ifndef USE_GMP_REPLACEMENTS 10762 if (a->type==_ZINT){ 10763 int amodq=modulo(*a->_ZINTptr,qmodval); 10764 if (amodq==*b) 10765 continue; 10766 int ab=(U*(*b-longlong(amodq)))%qmodval; 10767 if (ab==0) 10768 continue; 10769 changed=true; 10770 mpz_mul_si(tmpz,*pmod_._ZINTptr,ab); 10771 mpz_add(tmpz,tmpz,*a->_ZINTptr); 10772 } 10773 else { 10774 int ab=(U*(longlong(*b)-a->val))%qmodval; 10775 if (ab==0) 10776 continue; 10777 changed=true; 10778 mpz_mul_si(tmpz,*pmod_._ZINTptr,ab); 10779 if (a->val>=0) 10780 mpz_add_ui(tmpz,tmpz,a->val); 10781 else 10782 mpz_sub_ui(tmpz,tmpz,-a->val); 10783 } 10784 if (mpz_cmp(tmpz,*pqmod2._ZINTptr)>0) 10785 mpz_sub(tmpz,tmpz,*pqmod._ZINTptr); 10786 else { 10787 if (mpz_cmp(tmpz,*minuspqmod2._ZINTptr)<=0) 10788 mpz_add(tmpz,tmpz,*pqmod._ZINTptr); 10789 } 10790 // && a->ref_count()==1 ? 10791 if (a->type==_ZINT) 10792 mpz_set(*a->_ZINTptr,tmpz); 10793 else 10794 *a=tmpz; 10795 #else 10796 *a=*a+u*(*b-*a) *pmod ; // improve to modulo(U*(*b-*a), qmodval) and type checking for overwrite 10797 *a = smod(*a,pqmod); 10798 #endif 10799 } 10800 mpz_clear(tmpz); 10801 if (debug_infolevel>2) 10802 CERR << CLOCK()*1e-6 << " ichinrem_inplace end deg "<< p.size() << '\n'; 10803 return changed?1:2; 10804 } 10805 10806 // assuming pmod and qmod are prime together, find r such that 10807 // r = p mod pmod and r = q mod qmod 10808 // hence r = p + A*pmod = q + B*qmod 10809 // or A*pmod -B*qmod = q - p 10810 // assuming u*pmod+v*qmod=d we get 10811 // A=u*(q-p)/d 10812 modpoly chinrem(const modpoly & p,const modpoly & q, const modpoly & pmod, const modpoly & qmod,environment * env){ 10813 modpoly u,v,d,r; 10814 egcd(pmod,qmod,env,u,v,d); 10815 r=operator_plus(p,operator_times(operator_times(u,operator_div(operator_minus(q,p,env),d,env),env),pmod,env),env); 10816 if (r.size()>=pmod.size()+qmod.size()-1) 10817 r=operator_mod(r,operator_times(pmod,qmod,env),env); 10818 return r; 10819 } 10820 10821 void divided_differences(const vecteur & x,vecteur & res,environment * env,bool divexact){ 10822 int s=int(x.size()); 10823 for (int k=1;k<s;++k){ 10824 if (env && env->moduloon){ 10825 for (int j=s-1;j>=k;--j){ 10826 res[j]=smod((res[j]-res[j-1])*invmod(x[j]-x[j-k],env->modulo),env->modulo); 10827 } 10828 } 10829 else { 10830 for (int j=s-1;j>=k;--j){ 10831 gen & g=res[j]; 10832 operator_minus_eq(g,res[j-1],context0); 10833 gen dx(x[j]-x[j-k]); 10834 #ifndef USE_GMP_REPLACEMENTS 10835 if (divexact && g.type==_ZINT && g.ref_count()==1 && dx.type==_INT_){ 10836 mpz_t * z=g._ZINTptr; 10837 if (dx.val>0) 10838 mpz_divexact_ui(*z,*z,dx.val); 10839 else { 10840 mpz_divexact_ui(*z,*z,-dx.val); 10841 mpz_neg(*z,*z); 10842 } 10843 } 10844 else 10845 #endif 10846 g=g/dx; 10847 } 10848 } 10849 } 10850 } 10851 10852 void divided_differences(const vecteur & x,const vecteur & y,vecteur & res,environment * env){ 10853 res=y; 10854 divided_differences(x,res,env,false); 10855 } 10856 10857 void interpolate(const vecteur & x,const vecteur & y,modpoly & res,environment * env){ 10858 vecteur alpha; 10859 divided_differences(x,y,alpha,env); 10860 unsigned s=unsigned(x.size()); 10861 res.clear(); 10862 res.reserve(s); 10863 int j=s-1; 10864 res.push_back(alpha[j]); 10865 for (j--;j>=0;j--){ 10866 res.push_back(alpha[j]); 10867 iterateur it=res.end()-2,itbeg=res.begin()-1; 10868 const gen & fact = x[j]; 10869 for (;it!=itbeg;it-=2){ 10870 gen & tmp = *it; 10871 ++it; 10872 *it -= tmp*fact; 10873 if (env && env->moduloon) 10874 *it=smod(*it,env->modulo); 10875 } 10876 } 10877 } 10878 10879 void interpolate_inplace(const vecteur & x,modpoly & res,environment * env){ 10880 divided_differences(x,res,env,true); 10881 unsigned s=unsigned(x.size()); 10882 int j=s-1; 10883 reverse(res.begin(),res.end()); 10884 for (j--;j>=0;j--){ 10885 iterateur it=res.begin()+(s-2-j),itbeg=res.begin()-1; 10886 const gen & fact = x[j]; 10887 for (;it!=itbeg;it-=2){ 10888 gen & tmp = *it; 10889 ++it; 10890 type_operator_minus_times(tmp,fact,*it); // *it -= tmp*fact; 10891 if (env && env->moduloon) 10892 *it=smod(*it,env->modulo); 10893 } 10894 } 10895 } 10896 10897 // Multiplication of multivariate polynomials using Lagrange interpolation 10898 void mulpoly_interpolate(const polynome & p,const polynome & q,polynome & res,environment * env){ 10899 int s=p.dim; 10900 gen modulo; 10901 if (env &&env->moduloon) 10902 modulo=env->modulo; 10903 if (s<2){ 10904 mulpoly(p,q,res,modulo); 10905 return; 10906 } 10907 bool estreel=poly_is_real(p) && poly_is_real(q); 10908 polynome pxn,qxn; 10909 convert_xn(p,pxn); 10910 convert_xn(q,qxn); 10911 int pd=p.degree(s-1); 10912 int qd=q.degree(s-1); 10913 int sd=pd+qd; 10914 vecteur x(sd+1); 10915 vecteur y(sd+1); 10916 modpoly v; 10917 index_t * degptr=0; 10918 for (int i=0;i<=sd;++i){ 10919 x[i]=i; 10920 y[i]=new ref_polynome(s); 10921 mulpoly_interpolate(pevaln(pxn,i,modulo,degptr,estreel),pevaln(qxn,i,modulo,degptr,estreel),*y[i]._POLYptr,env); 10922 } 10923 interpolate(x,y,v,env); 10924 poly12polynome(v,s,res,s); 10925 } 10926 10927 int vect_polynome2poly1(vecteur & A){ 10928 int dim=0; 10929 for (size_t i=0;i<A.size();++i){ 10930 if (A[i].type==_POLY){ 10931 dim=A[i]._POLYptr->dim; 10932 A[i]=gen(polynome2poly1(*A[i]._POLYptr,1),_POLY1__VECT); 10933 } 10934 } 10935 return dim; 10936 } 10937 10938 void vect_poly12polynome(vecteur & v,int dim){ 10939 iterateur it=v.begin(),itend=v.end(); 10940 for (;it!=itend;++it){ 10941 if (it->type==_VECT) 10942 *it=poly12polynome(*it->_VECTptr,1,dim); 10943 } 10944 } 10945 10946 void mat_poly12polynome(matrice & A,int dim){ 10947 iterateur it=A.begin(),itend=A.end(); 10948 for (;it!=itend;++it){ 10949 if (it->type==_VECT) 10950 vect_poly12polynome(*it->_VECTptr,dim); 10951 } 10952 } 10953 10954 void vect_horner(const vecteur & v,const gen & g,vecteur & res){ 10955 res=v; 10956 iterateur it=res.begin(),itend=res.end(); 10957 for (;it!=itend;++it) 10958 if (it->type==_VECT) 10959 *it=horner(*it->_VECTptr,g); 10960 } 10961 10962 // compute dotvecteur of a and b by interpolation if it would be faster 10963 // 1-d interpolation cost : D*M+D^2 10964 // where D=max(size(a[i])+size(b[i])-1), M=min(size(a),size(b)) 10965 // normal cost: sum_i(size(a[i])*size(b[i])) 10966 // if a and b are of length n and degree n, interp cost is O(n^2) 10967 // while normal cost is O(n^3) 10968 // Beware: this is not interesting in characteristic 0 because 10969 // we replace n-deg polynomials with integers of size n*ln(n) 10970 bool dotvecteur_interp(const vecteur & a,const vecteur &b,gen & res){ 10971 if (a.empty() || b.empty()){ 10972 res=0; return true; 10973 } 10974 if (a.front().type==_POLY || b.front().type==_POLY){ 10975 vecteur A(a), B(b); int dim; 10976 if (!(dim=vect_polynome2poly1(A)) || dim!=vect_polynome2poly1(B)) 10977 return false; 10978 if (dotvecteur_interp(A,B,res)){ 10979 if (res.type==_VECT) res=poly12polynome(*res._VECTptr,1,dim); 10980 return true; 10981 } 10982 return false; 10983 } 10984 if (a.front().type==_VECT || b.front().type==_VECT){ 10985 int D=0,M=giacmin(int(a.size()),int(b.size())); 10986 double interpcost=0.0,normalcost=0.0; 10987 for (int i=0;i<M;++i){ 10988 int as=1,bs=1; 10989 if (a[i].type==_VECT) as=int(a[i]._VECTptr->size()); 10990 if (b[i].type==_VECT) bs=int(b[i]._VECTptr->size()); 10991 if (D<as+bs-1) D=as+bs-1; 10992 normalcost += as*bs; 10993 } 10994 if (normalcost<D*(M+D)) 10995 return false; 10996 // now do the real work! 10997 int shift=-D/2; 10998 vecteur X(D),Y(D),A(M),B(M); 10999 for (int j=0;j<D;++j){ 11000 X[j]=j-shift; 11001 for (int i=0;i<M;++i){ 11002 A[i]=horner(a[i],j-shift); 11003 B[i]=horner(b[i],j-shift); 11004 } 11005 Y[j]=dotvecteur(A,B); 11006 } 11007 vecteur R; 11008 interpolate(X,Y,R,0); 11009 res=R; 11010 return true; 11011 } 11012 return false; 11013 } 11014 11015 // R is a degree D-1 polynomial of MxN matrices, 11016 // rebuild a matrix of polynomials 11017 void polymat2matpoly(const vecteur & R,vecteur & res){ 11018 if (R.empty()) return; 11019 int M,N,D=int(R.size()); 11020 mdims(*R[0]._VECTptr,M,N); 11021 // init res 11022 res.resize(M); 11023 for (int i=0;i<M;++i){ 11024 res[i]=vecteur(N); 11025 vecteur & resi=*res[i]._VECTptr; 11026 for (int j=0;j<N;++j) 11027 resi[j]=vecteur(D); 11028 } 11029 // modify in place 11030 for (int d=0;d<D;++d){ 11031 vecteur & md=*R[d]._VECTptr; 11032 for (int i=0;i<M;++i){ 11033 vecteur & resi=*res[i]._VECTptr; 11034 vecteur &mdi=*md[i]._VECTptr; 11035 for (int j=0;j<N;++j){ 11036 vecteur & resij=*resi[j]._VECTptr; 11037 resij[d]=mdi[j]; 11038 } 11039 } 11040 } 11041 for (int i=0;i<M;++i){ 11042 vecteur & resi=*res[i]._VECTptr; 11043 for (int j=0;j<N;++j){ 11044 trim(*resi[j]._VECTptr); 11045 } 11046 } 11047 } 11048 11049 // warning b is already transposed 11050 bool mmult_interp(const matrice & a,const matrice &b,matrice & res){ 11051 if (a.front()[0].type==_POLY || b.front()[0].type==_POLY){ 11052 matrice A(a), B(b); 11053 int S=giacmin(int(A.size()),int(B.size())),dim=0; 11054 for (int i=0;i<S;++i){ 11055 if (A[i].type!=_VECT || B[i].type!=_VECT) return false; 11056 A[i]=*A[i]._VECTptr; 11057 B[i]=*B[i]._VECTptr; 11058 if (!(dim=vect_polynome2poly1(*A[i]._VECTptr)) || dim!=vect_polynome2poly1(*B[i]._VECTptr)) 11059 return false; 11060 } 11061 if (mmult_interp(A,B,res)){ 11062 mat_poly12polynome(res,dim); 11063 return true; 11064 } 11065 return false; 11066 } 11067 if (a.front()[0].type==_VECT || b.front()[0].type==_VECT){ 11068 // find required degree 11069 int D=0,M=giacmin(int(a.size()),int(b.size())),N=0; 11070 for (int i=0;i<M;++i){ 11071 gen ai=a[i],bi=b[i]; 11072 if (ai.type!=_VECT || bi.type!=_VECT) 11073 return false; 11074 vecteur av=*ai._VECTptr,bv=*bi._VECTptr; 11075 N=giacmin(int(av.size()),int(bv.size())); 11076 for (int j=0;j<N;++j){ 11077 int as=1,bs=1; 11078 if (av[j].type==_VECT) as=int(av[j]._VECTptr->size()); 11079 if (bv[j].type==_VECT) bs=int(bv[j]._VECTptr->size()); 11080 if (D<as+bs-1) D=as+bs-1; 11081 } 11082 } 11083 // do the real work! 11084 int shift=D/2; 11085 vecteur X(D),Y(D),A(M),B(M); 11086 for (int j=0;j<D;++j){ 11087 X[j]=j-shift; 11088 for (int i=0;i<M;++i){ 11089 vecteur tmp; 11090 vect_horner(*a[i]._VECTptr,j-shift,tmp); 11091 A[i]=tmp; 11092 vect_horner(*b[i]._VECTptr,j-shift,tmp); 11093 B[i]=tmp; 11094 } 11095 vecteur tmp; 11096 mmult_atranb(A,B,tmp); 11097 Y[j]=tmp; 11098 } 11099 vecteur R; 11100 interpolate(X,Y,R,0); 11101 polymat2matpoly(R,res); 11102 return true; 11103 } 11104 return false; 11105 } 11106 11107 bool do_pcar_interp(const matrice & a,vecteur & p,bool compute_pmin,GIAC_CONTEXT){ 11108 if (a.front()[0].type==_POLY){ 11109 matrice A(a); 11110 int S=int(A.size()),dim=0; 11111 for (int i=0;i<S;++i){ 11112 if (A[i].type!=_VECT) return false; 11113 A[i]=*A[i]._VECTptr; 11114 if (!(dim=vect_polynome2poly1(*A[i]._VECTptr))) 11115 return false; 11116 } 11117 if (!do_pcar_interp(A,p,compute_pmin,contextptr)) 11118 return false; 11119 vect_poly12polynome(p,dim); 11120 return true; 11121 } 11122 if (a.front()[0].type==_VECT){ 11123 // find required number of interpolations 11124 int D=0,M=int(a.size()),N=0; 11125 for (int i=0;i<M;++i){ 11126 gen ai=a[i]; 11127 if (ai.type!=_VECT) 11128 return false; 11129 vecteur av=*ai._VECTptr; 11130 N=int(av.size()); 11131 for (int j=0;j<N;++j){ 11132 int as=1; 11133 if (av[j].type==_VECT) as=int(av[j]._VECTptr->size()); 11134 if (D<as-1) D=as-1; 11135 } 11136 } 11137 int Dorig=D; 11138 D = M*D+1; 11139 // do the real work! 11140 int shift=-D/2; 11141 vecteur X(D),Y(D),A(M); 11142 int resdegp1=M+1; 11143 for (int j=0;j<D;++j,++shift){ 11144 for (int i=0;i<M;++i){ 11145 vecteur tmp; 11146 vect_horner(*a[i]._VECTptr,shift,tmp); 11147 A[i]=tmp; 11148 } 11149 gen tmp; 11150 if (compute_pmin) 11151 tmp=_pmin(A,contextptr); 11152 else 11153 tmp=_pcar(A,contextptr); 11154 if (tmp.type!=_VECT) 11155 return false; 11156 int tmpd=int(tmp._VECTptr->size()); 11157 if (!j) resdegp1=tmpd; 11158 if (tmpd==resdegp1){ 11159 X[j]=shift; 11160 Y[j]=tmp; 11161 if (j==resdegp1*Dorig){ 11162 D=j+1; 11163 break; 11164 } 11165 continue; 11166 } 11167 if (tmpd<resdegp1) // bad reduction, pmin degree is too small 11168 continue; 11169 // tmpd>resdegp1, previous pmin were bad reduction, restart 11170 j=0; 11171 X[j]=shift; 11172 Y[j]=tmp; 11173 } 11174 vecteur R; 11175 X.resize(D); Y.resize(D); // early termination 11176 // pmin(a)==0 because it's a matrix with polynomial coeffs 11177 // in the parameter of degree < D and it is 0 for D values 11178 // of the parameter 11179 interpolate(X,Y,R,0); 11180 // R is a polynomial of pmins, we must rebuild a pmin of polynomials 11181 // init res 11182 vecteur & res=p; 11183 res.resize(resdegp1); 11184 for (int i=0;i<resdegp1;++i){ 11185 res[i]=gen(vecteur(D),_POLY1__VECT); 11186 } 11187 // modify in place 11188 for (int d=0;d<D;++d){ 11189 if (R[d].type!=_VECT) 11190 continue; 11191 vecteur & md=*R[d]._VECTptr; 11192 int shift=resdegp1-int(md.size()); 11193 for (int i=shift;i<resdegp1;++i){ 11194 vecteur & resi=*res[i]._VECTptr; 11195 resi[d]=md[i-shift]; 11196 } 11197 } 11198 for (int i=0;i<res.size();++i){ 11199 vecteur & resi=*res[i]._VECTptr; 11200 trim(resi); 11201 } 11202 return true; 11203 } 11204 return false; 11205 } 11206 11207 bool poly_pcar_interp(const matrice & a,vecteur & p,bool compute_pmin,GIAC_CONTEXT){ 11208 if (a.empty()) return false; 11209 if (a[0][0].type==_POLY || a[0][0].type==_VECT){ 11210 if (!do_pcar_interp(a,p,compute_pmin,contextptr)) 11211 return false; 11212 return true; 11213 } 11214 vecteur lv=alg_lvar(a); 11215 if (lv.empty()) 11216 return false; 11217 matrice A=*(e2r(a,lv,contextptr)._VECTptr); 11218 for (int i=0;i<A.size();++i){ 11219 gen Ai=A[i]; 11220 if (Ai.type!=_VECT) return false; 11221 const_iterateur it=Ai._VECTptr->begin(),itend=Ai._VECTptr->end(); 11222 for (;it!=itend;++it){ 11223 if (it->type==_FRAC && it->_FRACptr->den.type==_POLY) 11224 return false; 11225 } 11226 } 11227 // extract common denominator 11228 vecteur Aflat; gen d; 11229 aplatir(A,Aflat); 11230 const_iterateur jt=Aflat.begin(); 11231 lcmdeno(Aflat,d,contextptr); 11232 for (int i=0;i<A.size();++i){ 11233 gen Ai=A[i]; 11234 if (Ai.type!=_VECT) return false; 11235 iterateur it=Ai._VECTptr->begin(),itend=Ai._VECTptr->end(); 11236 for (;it!=itend;++it,++jt){ 11237 *it=*jt; 11238 } 11239 } 11240 if (!do_pcar_interp(A,p,compute_pmin,contextptr)) 11241 return false; 11242 // eigenvalues of A are lambda/d, 11243 // we must scale p by d, leading coeff does not change, then /d, etc. 11244 gen powd=1; 11245 for (int i=0;i<p.size();++i){ 11246 p[i]=r2e(p[i]/powd,lv,contextptr); 11247 powd=powd*d; 11248 } 11249 return true; 11250 } 11251 11252 // n <- n mod N where N=2^expoN+1 11253 // n=q*(N-1)+r => n=q*N+(r-q) 11254 void smod2N(longlong & n,unsigned long expoN,bool do_smod){ 11255 if (n<0){ 11256 n=-n; 11257 smod2N(n,expoN,do_smod); 11258 n=-n; 11259 return; 11260 } 11261 longlong q = n >> expoN; 11262 if (q){ 11263 n -= q << expoN; 11264 n -= q; 11265 } 11266 if (n>0){ 11267 q = n >> expoN; 11268 if (q){ 11269 n -= q << expoN; 11270 n -= q; 11271 } 11272 } 11273 if (!do_smod) 11274 return; 11275 if (n<0){ 11276 q = (-n) >> (expoN-1); 11277 n += q+(q << expoN); 11278 } 11279 else { 11280 q = n >> (expoN-1); 11281 n -= q + (q << expoN); 11282 } 11283 } 11284 11285 // replace g in-place by g mod N where N=2^expoN+1 11286 // if do_smod==true, returns g in [-N/2-1,N/2+1] 11287 void smod2N(gen & g,unsigned long expoN,mpz_t tmpqz,bool do_smod=false){ 11288 if (g.type!=_ZINT){ 11289 if (expoN<31){ 11290 longlong n=g.val; 11291 smod2N(n,expoN,do_smod); 11292 g.val = n; 11293 } 11294 return; 11295 } 11296 mpz_t & z=*g._ZINTptr; 11297 mpz_tdiv_q_2exp(tmpqz,z,expoN); 11298 mpz_tdiv_r_2exp(z,z,expoN); 11299 mpz_sub(z,z,tmpqz); 11300 mpz_tdiv_q_2exp(tmpqz,z,expoN); 11301 mpz_tdiv_r_2exp(z,z,expoN); 11302 mpz_sub(z,z,tmpqz); 11303 if (!do_smod) 11304 return; 11305 mpz_tdiv_q_2exp(tmpqz,z,expoN-1); 11306 mpz_sub(z,z,tmpqz); 11307 mpz_mul_2exp(tmpqz,tmpqz,expoN); 11308 mpz_sub(z,z,tmpqz); 11309 } 11310 11311 void shift2N(gen & tmp,unsigned long shift){ 11312 if (tmp.type==_INT_ ){ 11313 if (shift<31){ 11314 if (tmp.val<0) 11315 tmp = -(longlong(-tmp.val)<<shift); 11316 else 11317 tmp = longlong(tmp.val) << shift; 11318 } 11319 else { 11320 tmp.uncoerce(); 11321 mpz_mul_2exp(*tmp._ZINTptr,*tmp._ZINTptr,shift); 11322 } 11323 } 11324 else { 11325 if (tmp.ref_count()!=1) 11326 tmp=*tmp._ZINTptr; // make a copy 11327 mpz_mul_2exp(*tmp._ZINTptr,*tmp._ZINTptr,shift); 11328 } 11329 } 11330 11331 // z1 <- z1*2^(expoN-shift) mod 2^expoN+1 11332 void shiftsmod2N(mpz_t & z1,int expoN,int shift,mpz_t & tmpqz,bool do_smod=false){ 11333 mpz_tdiv_q_2exp(tmpqz,z1,expoN-shift); 11334 mpz_tdiv_r_2exp(z1,z1,expoN-shift); 11335 mpz_mul_2exp(z1,z1,shift); 11336 mpz_sub(z1,z1,tmpqz); 11337 mpz_tdiv_q_2exp(tmpqz,z1,expoN); 11338 if (mpz_cmp_si(tmpqz,0)){ 11339 mpz_tdiv_r_2exp(z1,z1,expoN); 11340 mpz_sub(z1,z1,tmpqz); 11341 } 11342 if (!do_smod) 11343 return; 11344 mpz_tdiv_q_2exp(tmpqz,z1,expoN-1); 11345 if (mpz_cmp_si(tmpqz,0)){ 11346 mpz_sub(z1,z1,tmpqz); 11347 mpz_mul_2exp(tmpqz,tmpqz,expoN); 11348 mpz_sub(z1,z1,tmpqz); 11349 } 11350 } 11351 11352 // Fast Fourier Transform, f the poly sum_{j<n} f_j x^j, 11353 // first call omega=2^r, omega is a 2^(l+1)-root of unity 11354 // computation is done modulo N=2^{r*2^(l)}+1 11355 // recursive calls will replace r by r*2^k with k<=l 11356 // return [f(1),f(omega),...,f(omega^[n-1]) 11357 // WARNING f is given in ascending power 11358 // this version assumes that all integers are free _ZINT 11359 void fft2rl(gen * f,long n,int r,int l,gen * t,bool direct,gen & tmp1, gen & tmp2,mpz_t & tmpqz){ 11360 if (n==1) return; 11361 unsigned long expoN = r<<l; 11362 if (n==2){ 11363 mpz_add(*tmp1._ZINTptr,*f[0]._ZINTptr,*f[1]._ZINTptr); 11364 smod2N(*tmp1._ZINTptr,expoN,tmpqz); 11365 mpz_sub(*tmp2._ZINTptr,*f[0]._ZINTptr,*f[1]._ZINTptr); 11366 smod2N(*tmp2._ZINTptr,expoN,tmpqz); 11367 mpz_set(*f[0]._ZINTptr,*tmp1._ZINTptr); 11368 mpz_set(*f[1]._ZINTptr,*tmp2._ZINTptr); 11369 return; 11370 } 11371 // gen F0,F1,F2,F3; 11372 if (n==4){ 11373 mpz_t & z1=*tmp1._ZINTptr; 11374 mpz_t & z2=*tmp2._ZINTptr; 11375 mpz_add(z1,*f[0]._ZINTptr,*f[2]._ZINTptr); // z1=f0+f2 11376 mpz_add(z2,*f[1]._ZINTptr,*f[3]._ZINTptr); // z2=f1+f3 11377 mpz_add(*t[0]._ZINTptr,z1,z2); // t0=f0+f1+f2+f3 11378 mpz_sub(*t[2]._ZINTptr,z1,z2); // t2=f0-f1+f2-f3 11379 mpz_sub(z1,*f[1]._ZINTptr,*f[3]._ZINTptr); // z1=f1-f3 11380 shiftsmod2N(z1,expoN,expoN/2,z2); // z1=(f1-f3)*w 11381 mpz_sub(z2,*f[0]._ZINTptr,*f[2]._ZINTptr); // z2=f0-f2 11382 if (direct){ 11383 mpz_add(*f[1]._ZINTptr,z2,z1); // f1=f0-f2+(f1-f3)*w 11384 mpz_sub(*f[3]._ZINTptr,z2,z1); // f3=f0-f2-(f1-f3)*w 11385 } 11386 else { 11387 mpz_add(*f[3]._ZINTptr,z2,z1); // f3=f0-f2+(f1-f3)*w 11388 mpz_sub(*f[1]._ZINTptr,z2,z1); // f1=f0-f2-(f1-f3)*w 11389 } 11390 // F0=*t[0]._ZINTptr; F1=*t[1]._ZINTptr; F2=*t[2]._ZINTptr; F3=*t[3]._ZINTptr; 11391 mpz_set(*f[0]._ZINTptr,*t[0]._ZINTptr); 11392 mpz_set(*f[2]._ZINTptr,*t[2]._ZINTptr); 11393 return; // not full reduced mod N 11394 } 11395 unsigned long m=1<<(l+1); 11396 long step=r*(direct?m/n:-long(m/n)); // step is a power of 2 11397 gen * r0=t,*r1=t+n/2; 11398 gen * it=f,*itn=it+n/2,*itend=itn; 11399 unsigned long shift=direct?0:expoN; 11400 // first step with 0 shift 11401 mpz_add(*r0->_ZINTptr,*it->_ZINTptr,*itn->_ZINTptr); 11402 smod2N(*r0->_ZINTptr,expoN,tmpqz); 11403 mpz_sub(*r1->_ZINTptr,*it->_ZINTptr,*itn->_ZINTptr); 11404 smod2N(*r1->_ZINTptr,expoN,tmpqz); 11405 for (++itn,shift+=step,++it,++r0,++r1;it!=itend;++itn,shift+=step,++it,++r0,++r1){ 11406 mpz_t & z0=*tmp1._ZINTptr; 11407 mpz_t & z1=*tmp2._ZINTptr; 11408 mpz_add(z0,*it->_ZINTptr,*itn->_ZINTptr); 11409 if (mpz_sizeinbase(z0,2)>=expoN) 11410 smod2N(z0,expoN,tmpqz); 11411 mpz_set(*r0->_ZINTptr,z0); 11412 if (direct) 11413 mpz_sub(z1,*it->_ZINTptr,*itn->_ZINTptr); 11414 else 11415 mpz_sub(z1,*itn->_ZINTptr,*it->_ZINTptr); 11416 #if 1 11417 shiftsmod2N(z1,expoN,shift,tmpqz); 11418 #else 11419 mpz_mul_2exp(z1,z1,shift); 11420 smod2N(z1,expoN,tmpqz); 11421 #endif 11422 mpz_set(*r1->_ZINTptr,z1); 11423 } 11424 // Recursive calls 11425 gen * r0f=f, * r1f=f+n/2; 11426 fft2rl(t,n/2,r,l,r0f,direct,tmp1,tmp2,tmpqz); 11427 fft2rl(t+n/2,n/2,r,l,r1f,direct,tmp1,tmp2,tmpqz); 11428 // Return a mix of r0/r1 11429 it=t; itend=it+n/2; itn=t+n/2; 11430 #ifdef USE_GMP_REPLACEMENTS 11431 for (;it!=itend;){ 11432 mpz_set(tmpqz,*it->_ZINTptr); 11433 mpz_set(*it->_ZINTptr,*f->_ZINTptr); 11434 mpz_set(*f._ZINTptr,tmpqz); 11435 ++it; ++f; 11436 mpz_set(tmpqz,*itn->_ZINTptr); 11437 mpz_set(*itn->_ZINTptr,*f->_ZINTptr); 11438 mpz_set(*f._ZINTptr,tmpqz); 11439 ++itn; ++f; 11440 } 11441 #else 11442 for (;it!=itend;){ 11443 mpz_swap(*f->_ZINTptr,*it->_ZINTptr); 11444 ++it; ++f; 11445 mpz_swap(*f->_ZINTptr,*itn->_ZINTptr); 11446 ++itn; ++f; 11447 } 11448 #endif 11449 // if (n==4 && (f[-4]!=F0 || f[-3]!=F1 || f[-2]!=F2 || f[-1]!=F3)) COUT << "err" << '\n'; 11450 } 11451 11452 // Fast Fourier Transform, f the poly sum_{j<n} f_j x^j, 11453 // first call omega=2^r, omega is a 2^(l+1)-root of unity 11454 // computation is done modulo N=2^{r*2^(l)}+1 11455 // recursive calls will replace r by r*2^k with k<=l 11456 // return [f(1),f(omega),...,f(omega^[n-1]) 11457 // WARNING f is given in ascending power 11458 void fft2rl(const modpoly & f,int r,int l,modpoly & res,bool direct,mpz_t & tmpqz){ 11459 unsigned long n=f.size(); 11460 unsigned long expoN = r<<l; 11461 if (1 11462 //&& expoN>30 11463 ){ 11464 modpoly F(f);res.clear(); res.resize(n); // free copy of F 11465 for (size_t i=0;i<n;++i){ 11466 if (F[i].type==_INT_) 11467 F[i].uncoerce(expoN+1); 11468 else 11469 F[i]=*F[i]._ZINTptr; 11470 res[i].uncoerce(expoN+1); 11471 } 11472 gen tmp1,tmp2; tmp1.uncoerce(); tmp2.uncoerce(); 11473 fft2rl(&F.front(),n,r,l,&res.front(),direct,tmp1,tmp2,tmpqz); 11474 F.swap(res); 11475 return; 11476 } 11477 if (n==1) return; 11478 if (n==2){ 11479 gen tmp=f[0]+f[1]; 11480 smod2N(tmp,expoN,tmpqz); 11481 res[0]=tmp; 11482 tmp=f[0]-f[1]; 11483 smod2N(tmp,expoN,tmpqz); 11484 res[1]=tmp; 11485 return; 11486 } 11487 unsigned long m=1<<(l+1); 11488 long step=r*(direct?m/n:-long(m/n)); // step is a power of 2 11489 modpoly r0,r1; r0.reserve(n/2); r1.reserve(n/2); 11490 const_iterateur it=f.begin(),itn=it+n/2,itend=itn; 11491 unsigned long shift=direct?0:expoN; 11492 gen tmp; 11493 // first step with 0 shift 11494 tmp=*it+(*itn); 11495 smod2N(tmp,expoN,tmpqz); 11496 r0.push_back(tmp); 11497 tmp=(*it)-(*itn); 11498 //if (!direct) tmp=-tmp; 11499 smod2N(tmp,expoN,tmpqz); 11500 r1.push_back(tmp); 11501 for (++itn,shift+=step,++it;it!=itend;++itn,shift+=step,++it){ 11502 tmp=(*it)+(*itn); 11503 smod2N(tmp,expoN,tmpqz); 11504 r0.push_back(tmp); 11505 if (direct) 11506 tmp = (*it)-(*itn); 11507 else 11508 tmp = (*itn)-(*it); 11509 shift2N(tmp,shift); 11510 smod2N(tmp,expoN,tmpqz); 11511 r1.push_back(tmp); 11512 } 11513 // Recursive calls 11514 modpoly r0f(n/2),r1f(n/2); 11515 fft2rl(r0,r,l,r0f,direct,tmpqz); 11516 fft2rl(r1,r,l,r1f,direct,tmpqz); 11517 // Return a mix of r0/r1 11518 res.clear(); 11519 res.reserve(n); 11520 it=r0f.begin(); itend=it+n/2; itn=r1f.begin(); 11521 for (;it!=itend;){ 11522 res.push_back(*it); 11523 ++it; 11524 res.push_back(*itn); 11525 ++itn; 11526 } 11527 } 11528 11529 // alpha[i] *= beta[i] mod 2^(expoN)+1 11530 void fft2rltimes(modpoly & alpha,const modpoly & beta,unsigned long expoN,mpz_t & tmp,mpz_t & tmpqz){ 11531 int n=alpha.size(); 11532 for (unsigned long i=0;i<n;++i){ 11533 if (alpha[i].type==_ZINT && beta[i].type==_ZINT){ 11534 mpz_mul(tmp,*alpha[i]._ZINTptr,*beta[i]._ZINTptr); 11535 smod2N(tmp,expoN,tmpqz); 11536 mpz_set(*alpha[i]._ZINTptr,tmp); 11537 } 11538 else { 11539 type_operator_times(alpha[i],beta[i],alpha[i]); // alpha[i]=alpha[i]*beta[i]; 11540 smod2N(alpha[i],expoN,tmpqz); 11541 } 11542 } 11543 } 11544 11545 // alpha[i] *= beta[i] mod 2^(expoN)+1 11546 void fft2rltimes(const modpoly & alpha,const modpoly & beta,modpoly & res,unsigned long expoN,mpz_t & tmp,mpz_t & tmpqz){ 11547 int n=alpha.size(); 11548 for (unsigned long i=0;i<n;++i){ 11549 if (alpha[i].type==_ZINT && beta[i].type==_ZINT){ 11550 mpz_mul(tmp,*alpha[i]._ZINTptr,*beta[i]._ZINTptr); 11551 smod2N(tmp,expoN,tmpqz); 11552 mpz_set(*res[i]._ZINTptr,tmp); 11553 } 11554 else 11555 COUT << "fft2rltimes type error" << '\n'; 11556 } 11557 } 11558 11559 // pq *= -2^shift mod N=2^(expoN+1) where -2^shift is the inverse of n mod N 11560 void fft2rldiv(modpoly & pq,unsigned long expoN,unsigned long shift,mpz_t & tmp,mpz_t & tmpqz){ 11561 int n=pq.size(); 11562 for (unsigned long i=0;i<n;++i){ 11563 // pq[i]=-pq[i]; 11564 if (pq[i].type==_INT_) 11565 mpz_set_si(tmp,-pq[i].val); 11566 else 11567 mpz_neg(tmp,*pq[i]._ZINTptr); 11568 #if 1 11569 shiftsmod2N(tmp,expoN,shift,tmpqz,true); 11570 #else 11571 mpz_mul_2exp(tmp,tmp,shift); 11572 smod2N(tmp,expoN,tmpqz,true); 11573 #endif 11574 if (mpz_sizeinbase(tmp,2)<31) 11575 pq[i]=mpz_get_si(tmp); 11576 else { 11577 if (pq[i].type==_ZINT) 11578 mpz_set(*pq[i]._ZINTptr,tmp); 11579 else 11580 pq[i]=tmp; 11581 } 11582 } 11583 reverse(pq.begin(),pq.end()); 11584 trim_inplace(pq); 11585 } 11586 11587 void fftprod2rl(const modpoly & p0,const modpoly &q0,int r,int l,modpoly & pq){ 11588 if (debug_infolevel>3) 11589 CERR << CLOCK()*1e-6 << " fftmult 2^r as a 2^l-root r=" << r << " l=" << l << '\n' ; 11590 mpz_t tmp,tmpqz; mpz_init(tmp); mpz_init(tmpqz); 11591 unsigned long expoN=r << l; // r*2^l 11592 unsigned long n=1<<(l+1); 11593 modpoly p(p0),q(q0); 11594 reverse(p.begin(),p.end()); 11595 reverse(q.begin(),q.end()); 11596 unsigned long ps=long(p.size()),qs=long(q.size()); 11597 for (unsigned long i=ps;i<n;++i) 11598 p.push_back(0); 11599 for (unsigned long i=qs;i<n;++i) 11600 q.push_back(0); 11601 modpoly alpha(n),beta(n); 11602 fft2rl(p,r,l,alpha,true,tmpqz); 11603 fft2rl(q,r,l,beta,true,tmpqz); 11604 fft2rltimes(alpha,beta,expoN,tmp,tmpqz); 11605 fft2rl(alpha,r,l,pq,false,tmpqz); 11606 // divide by n mod N and coerce 11607 // 2^{r*2^l}}=-1 mod N therefore n=2^{l+1} inverse is -2^{r*2^l-(l+1)} 11608 unsigned long shift=expoN-l-1; 11609 fft2rldiv(pq,expoN,shift,tmp,tmpqz); 11610 mpz_clear(tmpqz); mpz_clear(tmp); 11611 if (debug_infolevel>3) 11612 CERR << CLOCK()*1e-6 << " fftmult end 2^r as a 2^l-root r=" << r << " l=" << l << '\n' ; 11613 } 11614 11615 11616 // Fast Fourier Transform, f the poly sum_{j<n} f_j x^j, 11617 // and w=[1,omega,...,omega^[m-1]] with m a multiple of n (m=step*n) 11618 // return [f(1),f(omega),...,f(omega^[n-1]) [it's indeed n, not m] 11619 // WARNING f is given in ascending power 11620 void fft(const modpoly & f,const modpoly & w ,modpoly & res,environment * env){ 11621 if (env && env->moduloon && env->modulo.type==_INT_ && is_integer_vecteur(f,true) && is_integer_vecteur(w,true)){ 11622 vector<int> F=vecteur_2_vector_int(f); 11623 vector<int> W=vecteur_2_vector_int(w); 11624 vector<int> RES(F.size()); 11625 int m=env->modulo.val; 11626 #ifndef FXCG 11627 if (debug_infolevel>2) 11628 CERR << CLOCK()*1e-6 << " begin fft int " << W.size() << " memory " << memory_usage()*1e-6 << "M" << '\n'; 11629 #endif 11630 fft(F,W,RES,m); 11631 #ifndef FXCG 11632 if (debug_infolevel>2) 11633 CERR << CLOCK()*1e-6 << " end fft int " << W.size() << " memory " << memory_usage()*1e-6 << "M" << '\n'; 11634 #endif 11635 unsigned n=unsigned(RES.size()); 11636 res.clear(); 11637 res.reserve(n); 11638 for (unsigned i=0;i<n;++i){ 11639 if (RES[i]<0) 11640 res.push_back(RES[i]+m); 11641 else 11642 res.push_back(RES[i]); 11643 } 11644 return; 11645 } 11646 unsigned long n=long(f.size()); // unsigned long does not parse with gcc 11647 if (n==1){ 11648 res = f; 11649 return ; 11650 } 11651 unsigned long m=long(w.size()); 11652 unsigned long step=m/n; 11653 unsigned k=0; 11654 if (n%2){ 11655 for (k=3;k*k<=n;k++){ 11656 if (!(n%k)) 11657 break; 11658 } 11659 } 11660 else 11661 k=2; 11662 if (k*k>n){ 11663 // prime size, slow discrete Fourier transform 11664 res.clear(); 11665 res.reserve(n); 11666 gen tmp; 11667 unsigned pos; 11668 for (unsigned i=0;i<n;++i){ 11669 tmp = 0; 11670 pos = 0; 11671 for (unsigned j=0;j<n;++j){ 11672 tmp = tmp + f[j]*w[pos]; 11673 pos = (pos+i*step)%m; 11674 if (env && env->moduloon) 11675 tmp=smod(tmp,env->modulo); 11676 } 11677 res.push_back(tmp); 11678 } 11679 return; 11680 } 11681 if (k!=2){ 11682 // assumes n is divisible by k, nk=n/k 11683 // P(X)=P_k(X)*[X^nk]^(k-1)+...+P_1(X) degree(P_k)<nk 11684 // P(w^(kj+l))= Q_l ( (w^k)^j ) 11685 // with Q_l=P_1^(w^l)+w^(nk)*P_2^(w^l)+... 11686 unsigned long n2=n/k; 11687 vector<modpoly> Q(k),Qfft(k); 11688 for (unsigned j=0;j<k;++j) 11689 Q[j]=vecteur(n2,0); 11690 gen tmp; 11691 for (unsigned j=0;j<k;j++){ 11692 // find Q[j] 11693 for (unsigned i=0;i<n2;i++){ 11694 tmp=0; 11695 for (unsigned J=0;J<k;J++){ 11696 tmp += f[J*n2+i]*w[(J*j*n2*step)%m]; 11697 } 11698 tmp=tmp*w[j*step*i]; 11699 if (env && env->moduloon) 11700 tmp=smod(tmp,env->modulo); 11701 Q[j][i]=tmp; 11702 } 11703 fft(Q[j],w,Qfft[j],env); 11704 } 11705 // build fft 11706 res.clear(); 11707 res.reserve(n); 11708 for (unsigned i=0;i<n2;++i){ 11709 for (unsigned j=0;j<k;++j) 11710 res.push_back(Qfft[j][i]); 11711 } 11712 return; 11713 } 11714 // Compute r0=sum_[j<n/2] (f_j+f_(j+n/2))*x^j 11715 // and r1=sum_[j<n/2] (f_j-f_(j+n/2))*omega^[step*j]*x^j 11716 unsigned long n2=n/2; 11717 modpoly r0,r1; 11718 r0.reserve(n2); r1.reserve(n2); 11719 const_iterateur it=f.begin(),itn=it+n2,itend=itn,itk=w.begin(); 11720 gen tmp; 11721 for (;it!=itend;++itn,itk+=step,++it){ 11722 tmp=(*it)+(*itn); 11723 if (env && env->moduloon) 11724 tmp=smod(tmp,env->modulo); 11725 r0.push_back(tmp); 11726 tmp=((*it)-(*itn))*(*itk); 11727 if (env && env->moduloon) 11728 tmp=smod(tmp,env->modulo); 11729 r1.push_back(tmp); 11730 } 11731 // Recursive call 11732 modpoly r0f(n2),r1f(n2); 11733 fft(r0,w,r0f,env); 11734 fft(r1,w,r1f,env); 11735 // Return a mix of r0/r1 11736 res.clear(); 11737 res.reserve(n); 11738 it=r0f.begin(); itend=it+n2; itn=r1f.begin(); 11739 for (;it!=itend;){ 11740 res.push_back(*it); 11741 ++it; 11742 res.push_back(*itn); 11743 ++itn; 11744 } 11745 } 11746 11747 static void fft2( complex<double> *A, int n, complex<double> *W, complex<double> *T ) { 11748 if ( n==1 ) return; 11749 // if p is fixed, the code is about 2* faster 11750 if (n==4){ 11751 complex<double> w1=W[1]; 11752 complex<double> f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=(f1-f3)*w1; 11753 A[0]=(f0+f1+f2+f3); 11754 A[1]=(f0-f2+f01); 11755 A[2]=(f0-f1+f2-f3); 11756 A[3]=(f0-f2-f01); 11757 return; 11758 } 11759 if (n==2){ 11760 complex<double> f0=A[0],f1=A[1]; 11761 A[0]=(f0+f1); 11762 A[1]=(f0-f1); 11763 return; 11764 } 11765 int i,n2; 11766 n2 = n/2; 11767 // Step 1 : arithmetic 11768 complex<double> * Tn2=T+n2,*An2=A+n2; 11769 for( i=0; i<n2; ++i ) { 11770 complex<double> Ai,An2i; 11771 Ai=A[i]; 11772 An2i=An2[i]; 11773 T[i] = Ai+An2i; // addmod(Ai,An2i,p); 11774 Tn2[i] = (Ai-An2i)*W[i]; // submod(Ai,An2i,p); mulmod(t,W[i],p); 11775 i++; 11776 Ai=A[i]; 11777 An2i=An2[i]; 11778 T[i] = Ai+An2i; // addmod(Ai,An2i,p); 11779 Tn2[i] = (Ai-An2i)*W[i]; // submod(Ai,An2i,p); mulmod(t,W[i],p); 11780 } 11781 // Step 2 : recursive calls 11782 fft2( T, n2, W+n2, A ); 11783 fft2( Tn2, n2, W+n2, A+n2 ); 11784 // Step 3 : permute 11785 for( i=0; i<n2; ++i ) { 11786 A[ 2*i] = T[i]; 11787 A[2*i+1] = Tn2[i]; 11788 ++i; 11789 A[ 2*i] = T[i]; 11790 A[2*i+1] = Tn2[i]; 11791 } 11792 return; 11793 } 11794 11795 void fft2( complex<double> * A, int n, double theta){ 11796 #ifndef FXCG 11797 if (debug_infolevel>2) 11798 CERR << CLOCK()*1e-6 << " begin fft2 C " << n << " memory " << memory_usage()*1e-6 << "M" << '\n'; 11799 #endif 11800 vector< complex<double> > W,T(n); 11801 W.reserve(n); 11802 double thetak(theta); 11803 for (int N=n/2;N;N/=2,thetak*=2){ 11804 complex<double> ww(1); 11805 complex<double> wk(std::cos(thetak),std::sin(thetak)); 11806 for (int i=0;i<N;ww=ww*wk,++i){ 11807 if (i%64==0) 11808 ww=complex<double>(std::cos(i*thetak),std::sin(i*thetak)); 11809 W.push_back(ww); 11810 } 11811 } 11812 fft2(A,n,&W.front(),&T.front()); 11813 #ifndef FXCG 11814 if (debug_infolevel>2) 11815 CERR << CLOCK()*1e-6 << " end fft C " << n << " memory " << memory_usage()*1e-6 << "M" << '\n'; 11816 #endif 11817 } 11818 11819 void fft(std::complex<double> * f,int n,const std::complex<double> * w,int m,complex< double> * t){ 11820 if (n==1) 11821 return ; 11822 int step=m/n; 11823 int k=0; 11824 if (n%2){ 11825 for (k=3;k*k<=n;k++){ 11826 if (!(n%k)) 11827 break; 11828 } 11829 } 11830 else 11831 k=2; 11832 if (k*k>n){ 11833 // prime size, slow discrete Fourier transform 11834 complex<double> *fj,*fend_=f+n-3,*fend=f+n; 11835 complex<double> * res=t; 11836 for (int i=0;i<n;++i){ 11837 complex<double> tmp (0,0); 11838 int pos=0,istep=i*step; 11839 for (fj=f;fj<fend_;fj+=3){ 11840 tmp += fj[0]*w[pos]; 11841 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 11842 tmp += fj[1]*w[pos]; 11843 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 11844 tmp += fj[2]*w[pos]; 11845 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 11846 } 11847 for (;fj<fend;++fj){ 11848 tmp += (*fj)*w[pos]; 11849 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 11850 } 11851 *res=tmp; 11852 ++res; 11853 } 11854 for (fj=f,res=t;fj<fend;++fj,++res){ 11855 *fj=*res; 11856 } 11857 return; 11858 } 11859 if (k!=2){ 11860 // assumes n is divisible by k, nk=n/k 11861 // P(X)=P_k(X)*[X^nk]^(k-1)+...+P_1(X) degree(P_k)<nk 11862 // P(w^(kj+l))= Q_l ( (w^k)^j ) 11863 // with Q_l=P_1^(w^l)+w^(nk)*P_2^(w^l)+... 11864 unsigned long n2=n/k; 11865 for (int j=0;j<k;j++){ 11866 // find Q[j] 11867 complex<double> * Qj=t+n2*j; 11868 for (unsigned i=0;i<n2;i++){ 11869 complex<double> tmp(0,0); 11870 int pos=0,jn2step=j*n2*step; 11871 const complex<double> * fi=&f[i], *fiend=fi+k*n2; 11872 for (;fi<fiend;fi+=n2){ 11873 tmp += (*fi)*w[pos]; 11874 pos += jn2step-m; pos += (unsigned(pos)>>31)*m; 11875 } 11876 Qj[i]=tmp*w[j*step*i]; 11877 } 11878 } 11879 for (int j=0;j<k;++j){ 11880 fft(t+n2*j,n2,w,m,f+n2*j); 11881 } 11882 // build fft 11883 for (unsigned i=0;i<n2;++i){ 11884 for (int j=0;j<k;++j,++f) 11885 *f=t[n2*j+i]; 11886 } 11887 return; 11888 } 11889 // Compute r0=sum_[j<n/2] (f_j+f_(j+n/2))*x^j 11890 // and r1=sum_[j<n/2] (f_j-f_(j+n/2))*omega^[step*j]*x^j 11891 unsigned long n2=n/2; 11892 complex<double> * r0=t, *r1=t+n2; 11893 complex<double> * it=f,*itn=f+n2,*itend=itn; 11894 const complex<double> *itk=w; 11895 for (;it!=itend;++itn,itk+=step,++it,++r0,++r1){ 11896 *r0=*it+*itn; 11897 *r1=(*it-*itn)*(*itk); 11898 } 11899 // Recursive call 11900 complex<double> * r0f=f,*r1f=f+n2; 11901 fft(t,n2,w,m,r0f); 11902 fft(t+n2,n2,w,m,r1f); 11903 // Return a mix of r0/r1 11904 it=t; itend=t+n2; itn=t+n2; 11905 for (;it!=itend;){ 11906 *f=*it; 11907 ++it; ++f; 11908 *f=*itn; 11909 ++itn; ++f; 11910 } 11911 } 11912 11913 // inplace fft with positive representant 11914 static inline int addmod(int a, int b, int p) { 11915 int t=(a-p)+b; 11916 #ifdef EMCC 11917 if (t<0) return t+p; else return t; 11918 #else 11919 t += (t>>31)&p; 11920 return t; 11921 #endif 11922 } 11923 static inline int submod(int a, int b, int p) { 11924 int t=a-b; 11925 #ifdef EMCC 11926 if (t<0) return t+p; else return t; 11927 #else 11928 t += (t>>31)&p; 11929 return t; 11930 #endif 11931 } 11932 11933 static inline int mulmod(int a, int b, int p) { 11934 return (longlong(a)*b) % p; 11935 } 11936 11937 inline int mulmodp1(int a,int b){ 11938 return amodp((longlong(a)*b),p1,invp1); // FIXME? 11939 return (longlong(a)*b) % p1; 11940 } 11941 11942 inline int precond_mulmodp1(unsigned A,unsigned W,unsigned Winvp){ 11943 longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p1; 11944 return t- (t>>63)*p1; 11945 int tt= t- (t>>63)*p1; 11946 unsigned s=(ulonglong(A)*W)%p1; 11947 if (tt!=s) 11948 CERR << '\n'; 11949 return s; 11950 } 11951 11952 inline int mulmodp2(int a,int b){ 11953 return amodp((longlong(a)*b),p2,invp2); // FIXME? 11954 return (longlong(a)*b) % p2; 11955 } 11956 11957 inline int precond_mulmodp2(unsigned A,unsigned W,unsigned Winvp){ 11958 longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p2; 11959 return t- (t>>63)*p2; 11960 int tt= t- (t>>63)*p2; 11961 unsigned s=(ulonglong(A)*W)%p2; 11962 if (tt!=s) 11963 CERR << '\n'; 11964 return s; 11965 } 11966 11967 inline int mulmodp3(int a,int b){ 11968 return amodp((longlong(a)*b),p3,invp3); 11969 return (longlong(a)*b) % p3; 11970 } 11971 11972 inline int precond_mulmodp3(unsigned A,unsigned W,unsigned Winvp){ 11973 longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p3; 11974 return t- (t>>63)*p3; 11975 int tt= t- (t>>63)*p3; 11976 unsigned s=(ulonglong(A)*W)%p3; 11977 if (tt!=s) 11978 CERR << '\n'; 11979 return s; 11980 } 11981 11982 inline int mulmodp4(int a,int b){ 11983 return amodp((longlong(a)*b),p4,invp4); 11984 return (longlong(a)*b) % p4; 11985 } 11986 11987 // this should probably not be defined because gcc does division by csts 11988 // with multiplication itself 11989 11990 #ifdef GIAC_PRECOND // preconditionned 11991 inline void fft_loop_p1(int & A,int & An2,int W,int Winv){ 11992 int s = A; 11993 int t1; 11994 // t1=longlong(*An2)*(*W)-((longlong(*An2)*(*(W+n2)))>>31)*p1; t1 -= (t1>>31)*p1; 11995 t1=precond_mulmodp1(An2,W,Winv); 11996 A = addmod(s,t1,p1); 11997 An2 = submod(s,t1,p1); 11998 } 11999 #else // not preconditionned 12000 inline void fft_loop_p1(int & A,int & An2,int W){ 12001 int s=A; 12002 int t = mulmodp1(W,An2); 12003 // if (t1!=t) CERR << t1 << " " << t << '\n'; 12004 A = addmod(s,t,p1); 12005 An2 = submod(s,t,p1); 12006 } 12007 #endif 12008 12009 inline void fft_loop_p1_(int * Acur,int *An2cur,int * Wcur,int n2){ 12010 int Ai,An2i; 12011 Ai=*Acur; 12012 An2i=*An2cur; 12013 *Acur = addmod(Ai,An2i,p1); 12014 #ifdef GIAC_PRECOND 12015 *An2cur=precond_mulmodp1(submod(Ai,An2i,p1),*Wcur,*(Wcur+n2)); 12016 #else 12017 *An2cur=amodp((longlong(Ai)+p1-An2i)* *Wcur,p1,invp1); 12018 #endif 12019 } 12020 12021 inline void fft_loop_p2(int * A,int *An2,int *W,int n2){ 12022 int s = *A; 12023 #ifdef GIAC_PRECOND // preconditionned 12024 int t=precond_mulmodp2(*An2,*W,*(W+n2)); 12025 #else // not preconditionned 12026 int t = mulmodp2(*W,*An2); 12027 #endif 12028 *A = addmod(s,t,p2); 12029 *An2 = submod(s,t,p2); 12030 } 12031 12032 inline void fft_loop_p2_(int * Acur,int *An2cur,int * Wcur,int n2){ 12033 int Ai,An2i; 12034 Ai=*Acur; 12035 An2i=*An2cur; 12036 *Acur = addmod(Ai,An2i,p2); 12037 #ifdef GIAC_PRECOND 12038 *An2cur=precond_mulmodp2(submod(Ai,An2i,p2),*Wcur,*(Wcur+n2)); 12039 #else 12040 *An2cur=amodp((longlong(Ai)+p2-An2i)* *Wcur,p2,invp2); 12041 #endif 12042 } 12043 12044 inline void fft_loop_p3(int * A,int *An2,int *W,int n2){ 12045 int s = *A; 12046 #ifdef GIAC_PRECOND // preconditionned 12047 int t=precond_mulmodp3(*An2,*W,*(W+n2)); 12048 #else // not preconditionned 12049 int t = mulmodp3(*W,*An2); 12050 #endif 12051 *A = addmod(s,t,p3); 12052 *An2 = submod(s,t,p3); 12053 } 12054 12055 inline void fft_loop_p3_(int * Acur,int *An2cur,int * Wcur,int n2){ 12056 int Ai,An2i; 12057 Ai=*Acur; 12058 An2i=*An2cur; 12059 *Acur = addmod(Ai,An2i,p3); 12060 #ifdef GIAC_PRECOND 12061 *An2cur=precond_mulmodp3(submod(Ai,An2i,p3),*Wcur,*(Wcur+n2)); 12062 #else 12063 *An2cur=amodp((longlong(Ai)+p3-An2i)* *Wcur,p3,invp3); 12064 #endif 12065 } 12066 12067 // Interesting primes (from A parallel implementation for polynomial multiplication modulo a prime, Law & Monagan, pasco 2015) 12068 // p:=2^25; for k from 64 downto 1 do if isprime(k*p+1) then print(k*p+1); fi od 12069 // p1 := 2013265921 ; r:=1227303670; root of unity order 2^27 (15*2^27+1) 12070 // p2 := 1811939329 ; r:=814458146; order 2^26 12071 // p3 := 469762049 ; r:=2187; order 2^26 12072 // p4 := 2113929217 ; ( 63×2^25 +1) 12073 // p5 := 1711276033 ; ( 51×2^25 +1 ) 12074 // For polynomial multiplication applications mod a prime p <2^32 12075 // with degree product<2^26 12076 // make multiplication in Z[x] before reducing modulo p 12077 // multiplication in Z[x] is computed by chinrem 12078 // from multiplication in Z/p1, Z/p2, Z/p3 using fft 12079 // of size 2^k>degree(product), root of unity from a power of r 12080 // For multiplication in Z[x], do it mod sufficiently many primes<2^32 12081 // input A with positive int, output fft in A 12082 // W must contain 12083 // [1,w,...,w^(n/2-1),1,w^2,w^4,...,w^(n/2-2),1,w^4,...,w^(n/2-4)...,1,w^(n/4),1] 12084 static void fft2p1( int *A, int n, int *W, int *T,int step=1) { 12085 int i,n2,t; 12086 if ( n==1 ) return; 12087 // if p is fixed, the code is about 2* faster 12088 if (n==4){ 12089 int w1=W[step]; 12090 #if 1 12091 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p1),w1,p1),f02p=addmod(f0,f2,p1),f02m=submod(f0,f2,p1),f13=addmod(f1,f3,p1); 12092 A[0]=addmod(f02p,f13,p1); 12093 A[1]=addmod(f02m,f01,p1); 12094 A[2]=submod(f02p,f13,p1); 12095 A[3]=submod(f02m,f01,p1); 12096 #else 12097 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=(f1-f3)*w1; 12098 A[0]=(f0+f1+f2+f3)%p1; 12099 A[1]=(f0-f2+f01)%p1; 12100 A[2]=(f0-f1+f2-f3)%p1; 12101 A[3]=(f0-f2-f01)%p1; 12102 #endif 12103 return; 12104 } 12105 if (n==2){ 12106 int f0=A[0],f1=A[1]; 12107 A[0]=addmod(f0,f1,p1); 12108 A[1]=submod(f0,f1,p1); 12109 return; 12110 } 12111 n2 = n/2; 12112 // Step 1 : arithmetic 12113 int * Tn2=T+n2,*An2=A+n2; 12114 for( i=0; i<n2; ++i ) { 12115 int Ai,An2i; 12116 Ai=A[i]; 12117 An2i=An2[i]; 12118 T[i] = addmod(Ai,An2i,p1); 12119 t = submod(Ai,An2i,p1); 12120 Tn2[i] = mulmodp1(t,W[i*step]); 12121 i++; 12122 Ai=A[i]; 12123 An2i=An2[i]; 12124 T[i] = addmod(Ai,An2i,p1); 12125 t = submod(Ai,An2i,p1); 12126 Tn2[i] = mulmodp1(t,W[i*step]); 12127 } 12128 // Step 2 : recursive calls 12129 fft2p1(T, n2, W, A,2*step); 12130 fft2p1(Tn2, n2, W, A+n2,2*step); 12131 // Step 3 : permute 12132 for( i=0; i<n2; ++i ) { 12133 A[2*i] = T[i]; 12134 A[2*i+1] = Tn2[i]; 12135 ++i; 12136 A[2*i] = T[i]; 12137 A[2*i+1] = Tn2[i]; 12138 } 12139 return; 12140 } 12141 12142 static void fft2p1nopermbefore( int *A, int n, int *W,int step=1) { 12143 if (n==0) 12144 CERR << "bug" << endl; 12145 if ( n==1 ) return; 12146 // if p is fixed, the code is about 2* faster 12147 if (n==4){ 12148 int w1=W[step]; 12149 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p1),w1,p1),f02p=addmod(f0,f2,p1),f02m=submod(f0,f2,p1),f13=addmod(f1,f3,p1); 12150 A[0]=addmod(f02p,f13,p1); 12151 A[1]=addmod(f02m,f01,p1); 12152 A[2]=submod(f02p,f13,p1); 12153 A[3]=submod(f02m,f01,p1); 12154 return; 12155 } 12156 if (n==2){ 12157 int f0=A[0],f1=A[1]; 12158 A[0]=addmod(f0,f1,p1); 12159 A[1]=submod(f0,f1,p1); 12160 return; 12161 } 12162 fft2p1nopermbefore( A, n/2, W,2*step); // fft2p1nopermbefore(A,n2,W+n2); 12163 fft2p1nopermbefore( A+n/2, n/2, W,2*step); // fft2p1nopermbefore(An2,n2,W+n2); 12164 int * An2=A+n/2; 12165 int * Aend=A+n/2; 12166 #if 0 12167 Aend=A+n; 12168 for (; An2<Aend;){ 12169 *An2=mulmodp1(*An2,*W); ++An2; W+=step; 12170 *An2=mulmodp1(*An2,*W); ++An2; W+=step; 12171 *An2=mulmodp1(*An2,*W); ++An2; W+=step; 12172 *An2=mulmodp1(*An2,*W); ++An2; W+=step; 12173 } 12174 An2=A+n/2; Aend=An2; 12175 for (;A<Aend;){ 12176 int s,t; 12177 s=A[0]; t=An2[0]; A[0]=addmod(s,t,p1); An2[0]=submod(s,t,p1); 12178 s=A[1]; t=An2[1]; A[1]=addmod(s,t,p1); An2[1]=submod(s,t,p1); 12179 s=A[2]; t=An2[2]; A[2]=addmod(s,t,p1); An2[2]=submod(s,t,p1); 12180 s=A[3]; t=An2[3]; A[3]=addmod(s,t,p1); An2[3]=submod(s,t,p1); 12181 A+=4; An2+=4; 12182 } 12183 return; 12184 #endif 12185 int n2s = n/2*step; // n2%4==0 12186 #ifdef GIAC_PRECOND 12187 #if 1 12188 for(; A<Aend; ) { 12189 fft_loop_p1(*A,*An2,*W,*(W+n2s)); 12190 ++A; ++An2; W +=step ; 12191 fft_loop_p1(*A,*An2,*W,*(W+n2s)); 12192 ++A; ++An2; W +=step ; 12193 fft_loop_p1(*A,*An2,*W,*(W+n2s)); 12194 ++A; ++An2; W += step; 12195 fft_loop_p1(*A,*An2,*W,*(W+n2s)); 12196 ++A; ++An2; W +=step; 12197 } 12198 #else 12199 for(int i=0; i<n/2; i +=4 ) { 12200 fft_loop_p1(A[i],An2[i],W[i*step],W[(i+n2s)*step]); 12201 fft_loop_p1(A[i+1],An2[i+1],W[(i+1)*step],W[(i+1+n2s)*step]); 12202 fft_loop_p1(A[i+2],An2[i+2],W[(i+2)*step],W[(i+2+n2s)*step]); 12203 fft_loop_p1(A[i+3],An2[i+3],W[(i+3)*step],W[(i+3+n2s)*step]); 12204 } 12205 #endif 12206 #else // GIAC_PRECOND 12207 for(; A<Aend; ) { 12208 #if 1 12209 fft_loop_p1(*A,*An2,*W); 12210 ++A; ++An2; W +=step ; 12211 fft_loop_p1(*A,*An2,*W); 12212 ++A; ++An2; W +=step ; 12213 fft_loop_p1(*A,*An2,*W); 12214 ++A; ++An2; W += step; 12215 fft_loop_p1(*A,*An2,*W); 12216 ++A; ++An2; W +=step; 12217 #else 12218 fft_loop_p1(A[0],An2[0],W[0]); 12219 fft_loop_p1(A[1],An2[1],W[step]); 12220 fft_loop_p1(A[2],An2[2],W[2*step]); 12221 fft_loop_p1(A[3],An2[3],W[3*step]); 12222 A+=4; An2+=4; W +=4*step ; 12223 #endif 12224 } 12225 #endif // GIAC_PRECOND 12226 } 12227 12228 static void fft2p1nopermafter( int *A, int n, int *W,int step=1) { 12229 if ( n==1 ) return; 12230 // if p is fixed, the code is about 2* faster 12231 if (n==4){ 12232 int w1=W[step]; 12233 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p1),w1,p1),f02p=addmod(f0,f2,p1),f02m=submod(f0,f2,p1),f13=addmod(f1,f3,p1); 12234 A[0]=addmod(f02p,f13,p1); 12235 A[1]=addmod(f02m,f01,p1); 12236 A[2]=submod(f02p,f13,p1); 12237 A[3]=submod(f02m,f01,p1); 12238 return; 12239 } 12240 if (n==2){ 12241 int f0=A[0],f1=A[1]; 12242 A[0]=addmod(f0,f1,p1); 12243 A[1]=submod(f0,f1,p1); 12244 return; 12245 } 12246 // Step 1 : arithmetic 12247 int *An2=A+n/2; 12248 #if 1 12249 int * Acur=A,*An2cur=An2,*Wcur=W; 12250 int n2=n/2*step; 12251 for (;Acur!=An2;){ 12252 int Ai,An2i; 12253 fft_loop_p1_(Acur,An2cur,Wcur,n2); 12254 ++Acur;++An2cur; Wcur +=step; 12255 fft_loop_p1_(Acur,An2cur,Wcur,n2); 12256 ++Acur;++An2cur; Wcur += step; 12257 fft_loop_p1_(Acur,An2cur,Wcur,n2); 12258 ++Acur;++An2cur; Wcur += step; 12259 fft_loop_p1_(Acur,An2cur,Wcur,n2); 12260 ++Acur;++An2cur; Wcur += step; 12261 } 12262 #else 12263 for( i=0; i<n/2; ++i ) { 12264 int Ai,An2i; 12265 Ai=A[i]; 12266 An2i=An2[i]; 12267 A[i] = addmod(Ai,An2i,p1); 12268 An2[i]=((longlong(Ai)+p1-An2i)*W[i*step]) % p1; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 12269 i++; 12270 Ai=A[i]; 12271 An2i=An2[i]; 12272 A[i] = addmod(Ai,An2i,p1); 12273 An2[i]=((longlong(Ai)+p1-An2i)*W[i*step]) % p1; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 12274 } 12275 #endif 12276 // Step 2 : recursive calls 12277 fft2p1nopermafter(A, n/2, W,2*step); 12278 fft2p1nopermafter(An2, n/2, W,2*step); 12279 } 12280 12281 inline void fft_loop_p_precond(int & A,int & An2,int W,int Winv,int p){ 12282 int s = A; 12283 int t1; 12284 // longlong t = ulonglong(A)*W-((ulonglong(A)*Winvp)>>32)*p; return t+ ((t>>31)&p); 12285 t1=precond_mulmodp(An2,W,Winv,p); 12286 A = addmod(s,t1,p); 12287 An2 = submod(s,t1,p); 12288 } 12289 12290 inline void fft_loop_p(int & A,int & An2,int W,int p,double invp){ 12291 int s=A; 12292 int t = mulmodp(An2,W,p,invp); 12293 //t += (t>>31)&p; // FIXME? 12294 // if (t1!=t) CERR << t1 << " " << t << '\n'; 12295 A = addmod(s,t,p); 12296 An2 = submod(s,t,p); 12297 } 12298 12299 #if !defined NUMWORKS // !defined VISUALC && !defined USE_GMP_REPLACEMENTS && defined GIAC_PRECOND // de-recurse 12300 static void fft2pnopermbefore( int *A, int n, int *W,int p,double invp,int step) { 12301 if (n==0) 12302 CERR << "bug\n"; 12303 if (n<=1 ) return; 12304 if (n==2){ 12305 int f0=A[0],f1=A[1]; 12306 A[0]=addmod(f0,f1,p); 12307 A[1]=submod(f0,f1,p); 12308 return; 12309 } 12310 int n2s=n/2*step; 12311 // start by groups of 4 12312 step=n2s/2; 12313 int w1=W[step],w1surp=W[3*step]; 12314 int *Aeff=A; 12315 for (int pos=0;pos<n;pos+=4,Aeff+=4){ 12316 int f0=Aeff[0],f1=Aeff[1],f2=Aeff[2],f3=Aeff[3], 12317 f01=precond_mulmodp(f1-f3+p,w1,w1surp,p), 12318 f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12319 Aeff[0]=addmod(f02p,f13,p); 12320 Aeff[1]=addmod(f02m,f01,p); 12321 Aeff[2]=submod(f02p,f13,p); 12322 Aeff[3]=submod(f02m,f01,p); 12323 } 12324 // now by 8, then by 16, etc. 12325 int Wstack_[MAX_INTSTACK]; 12326 int *Wstack=0; 12327 if (n>MAX_INTSTACK) 12328 Wstack=(int *)malloc(n*sizeof(int)); 12329 else 12330 Wstack=Wstack_; 12331 //size_t T=n*sizeof(int); 12332 //int * Wstack=(int*)stack_or_heap_alloc(T);//int Wstack[taille]; 12333 for (int taille=8;taille<=n;taille*=2){ 12334 step /= 2; 12335 Aeff=A; 12336 if (taille==n && step==1){ 12337 int *An2=Aeff+n/2,*Aend=An2,*Weff=W+n2s; 12338 for(; Aeff<Aend; ) { 12339 fft_loop_p_precond(Aeff[0],An2[0],W[0],Weff[0],p); 12340 fft_loop_p_precond(Aeff[1],An2[1],W[1],Weff[1],p); 12341 fft_loop_p_precond(Aeff[2],An2[2],W[2],Weff[2],p); 12342 fft_loop_p_precond(Aeff[3],An2[3],W[3],Weff[3],p); 12343 Aeff+=4; An2+=4; W+=4; Weff+=4; 12344 } 12345 break; 12346 } 12347 int * end=Wstack+taille,*source=W,*source2=W+n2s; 12348 for (int * target=Wstack;target<end;target+=8){ 12349 target[0]=*source; source+=step; 12350 target[1]=*source; source+=step; 12351 target[2]=*source; source+=step; 12352 target[3]=*source; source+=step; 12353 target[4]=*source2; source2+=step; 12354 target[5]=*source2; source2+=step; 12355 target[6]=*source2; source2+=step; 12356 target[7]=*source2; source2+=step; 12357 } 12358 for (int pos=0;pos<n;pos+=taille){ 12359 int *An2=Aeff+taille/2,*Aend=An2,*Weff=Wstack; 12360 int s=*Aeff,t1=*An2; 12361 *Aeff=addmod(s,t1,p); 12362 *An2=submod(s,t1,p); 12363 fft_loop_p_precond(Aeff[1],An2[1],Weff[1],Weff[5],p); 12364 fft_loop_p_precond(Aeff[2],An2[2],Weff[2],Weff[6],p); 12365 fft_loop_p_precond(Aeff[3],An2[3],Weff[3],Weff[7],p); 12366 Aeff+=4; An2+=4; Weff+=8; 12367 for (;Aeff<Aend;){ 12368 fft_loop_p_precond(Aeff[0],An2[0],Weff[0],Weff[4],p); 12369 fft_loop_p_precond(Aeff[1],An2[1],Weff[1],Weff[5],p); 12370 fft_loop_p_precond(Aeff[2],An2[2],Weff[2],Weff[6],p); 12371 fft_loop_p_precond(Aeff[3],An2[3],Weff[3],Weff[7],p); 12372 Aeff+=4; An2+=4; Weff+=8; 12373 } 12374 Aeff+=taille/2; 12375 } 12376 } 12377 if (n>MAX_INTSTACK) 12378 free(Wstack); 12379 } 12380 12381 #else // de-recurse 12382 static void fft2pnopermbefore( int *A, int n, int *W,int p,double invp,int step) { 12383 if (n==0) 12384 CERR << "bug" << endl; 12385 if ( n==1 ) return; 12386 // if p is fixed, the code is about 2* faster 12387 if (n==4){ 12388 #ifdef GIAC_CACHEW 12389 int w1=W[1]; 12390 #else 12391 int w1=W[step]; 12392 #endif 12393 //CERR << n << " " << w1 << endl; 12394 int f0=A[0],f1=A[1],f2=A[2],f3=A[3], 12395 #ifdef GIAC_PRECOND 12396 f01=precond_mulmodp(f1-f3+p,w1,W[3*step],p), 12397 #else 12398 f01=mulmodp(submod(f1,f3,p),w1,p,invp), 12399 #endif 12400 f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12401 A[0]=addmod(f02p,f13,p); 12402 A[1]=addmod(f02m,f01,p); 12403 A[2]=submod(f02p,f13,p); 12404 A[3]=submod(f02m,f01,p); 12405 return; 12406 } 12407 if (n==2){ 12408 int f0=A[0],f1=A[1]; 12409 A[0]=addmod(f0,f1,p); 12410 A[1]=submod(f0,f1,p); 12411 return; 12412 } 12413 int * An2=A+n/2; 12414 int * Aend=A+n/2; 12415 #ifdef GIAC_CACHEW 12416 fft2pnopermbefore( A, n/2, W+n/2,p,invp,2*step); // fft2pnopermbefore(A,n2,W+n2); 12417 fft2pnopermbefore( A+n/2, n/2, W+n/2,p,invp,2*step); // fft2pnopermbefore(An2,n2,W+n2); 12418 for(; A<Aend; ) { 12419 fft_loop_p(*A,*An2,*W,p,invp); 12420 fft_loop_p(A[1],An2[1],W[1],p,invp); 12421 fft_loop_p(A[2],An2[2],W[2],p,invp); 12422 fft_loop_p(A[3],An2[3],W[3],p,invp); 12423 A+=4; An2+=4; W+=4; 12424 } 12425 #else // GIAC_CACHEW 12426 fft2pnopermbefore( A, n/2, W,p,invp,2*step); // fft2pnopermbefore(A,n2,W+n2); 12427 fft2pnopermbefore( A+n/2, n/2, W,p,invp,2*step); // fft2pnopermbefore(An2,n2,W+n2); 12428 int n2s = n/2*step; // n2%4==0 12429 #ifdef GIAC_PRECOND 12430 for(; A<Aend; ) { 12431 fft_loop_p_precond(*A,*An2,*W,*(W+n2s),p); 12432 ++A; ++An2; W +=step ; 12433 fft_loop_p_precond(*A,*An2,*W,*(W+n2s),p); 12434 ++A; ++An2; W +=step ; 12435 fft_loop_p_precond(*A,*An2,*W,*(W+n2s),p); 12436 ++A; ++An2; W += step; 12437 fft_loop_p_precond(*A,*An2,*W,*(W+n2s),p); 12438 ++A; ++An2; W +=step; 12439 } 12440 #else // GIAC_PRECOND 12441 for(; A<Aend; ) { 12442 #if 1 12443 fft_loop_p(*A,*An2,*W,p,invp); 12444 W += step; 12445 fft_loop_p(A[1],An2[1],*W,p,invp); 12446 W += step; 12447 fft_loop_p(A[2],An2[2],*W,p,invp); 12448 W += step; 12449 fft_loop_p(A[3],An2[3],*W,p,invp); 12450 W += step; 12451 A+=4; An2+=4; 12452 #else 12453 fft_loop_p(*A,*An2,*W,p,invp); 12454 ++A; ++An2; W +=step ; 12455 fft_loop_p(*A,*An2,*W,p,invp); 12456 ++A; ++An2; W +=step ; 12457 fft_loop_p(*A,*An2,*W,p,invp); 12458 ++A; ++An2; W += step; 12459 fft_loop_p(*A,*An2,*W,p,invp); 12460 ++A; ++An2; W +=step; 12461 #endif 12462 } 12463 #endif // GIAC_PRECOND 12464 #endif // GIAC_CACHEW 12465 } 12466 #endif // derecurse 12467 12468 #ifdef GIAC_PRECOND 12469 inline void fft_loop_p_(int * Acur,int *An2cur,int * Wcur,int n2,int p,double invp){ 12470 int Ai,An2i; 12471 Ai=*Acur; 12472 An2i=*An2cur; 12473 *Acur = addmod(Ai,An2i,p); 12474 *An2cur=precond_mulmodp(Ai-An2i+p,*Wcur,*(Wcur+n2),p); 12475 // *An2cur=precond_mulmodp(submod(Ai,An2i,p),*Wcur,*(Wcur+n2),p); 12476 } 12477 inline void fft_loop_p_precond_(int * Acur,int *An2cur,int Wcur,int Winvp,int p){ 12478 int Ai,An2i; 12479 Ai=*Acur; 12480 An2i=*An2cur; 12481 *Acur = addmod(Ai,An2i,p); 12482 *An2cur=precond_mulmodp(Ai-An2i+p,Wcur,Winvp,p); 12483 } 12484 #else 12485 inline void fft_loop_p_(int * Acur,int *An2cur,int * Wcur,int p,double invp){ 12486 int Ai,An2i; 12487 Ai=*Acur; 12488 An2i=*An2cur; 12489 *Acur = addmod(Ai,An2i,p); 12490 *An2cur=amodp((longlong(Ai)+p-An2i)* *Wcur,p,invp); 12491 // Ai==amodp((longlong(Ai)+p-An2i)* *Wcur,p,invp); 12492 //Ai+=(Ai>>31)&p; 12493 //*An2cur=Ai; 12494 } 12495 #endif 12496 12497 12498 #if !defined NUMWORKS // !defined VISUALC && !defined USE_GMP_REPLACEMENTS && defined GIAC_PRECOND // de-recurse 12499 static void fft2pnopermafter( int *A, int n, int *W,int p,double invp,int step) { 12500 if (n==0) 12501 CERR << "bug\n"; 12502 if (n<=1 ) return; 12503 if (n==2){ 12504 int f0=A[0],f1=A[1]; 12505 A[0]=addmod(f0,f1,p); 12506 A[1]=submod(f0,f1,p); 12507 return; 12508 } 12509 int n2s=n/2*step; 12510 // group by decreasing size 12511 int Wstack_[MAX_INTSTACK]; 12512 int * Wstack=0; 12513 if (n>MAX_INTSTACK) 12514 Wstack=(int *)malloc(n*sizeof(int)); 12515 else 12516 Wstack=Wstack_; 12517 int * end=Wstack+n,*source=W,*source2=W+n2s; 12518 for (int * target=Wstack;target<end;target+=8){ 12519 target[0]=*source; source+=step; 12520 target[1]=*source; source+=step; 12521 target[2]=*source; source+=step; 12522 target[3]=*source; source+=step; 12523 target[4]=*source2; source2+=step; 12524 target[5]=*source2; source2+=step; 12525 target[6]=*source2; source2+=step; 12526 target[7]=*source2; source2+=step; 12527 } 12528 for (int taille=n;taille>=8;taille/=2){ 12529 int * Aeff=A; 12530 for (int pos=0;pos<n;pos+=taille){ 12531 int *An2=Aeff+taille/2,*Aend=An2,*Weff=Wstack; 12532 int s=*Aeff,t1=*An2; 12533 *Aeff=addmod(s,t1,p); 12534 *An2=submod(s,t1,p); 12535 fft_loop_p_precond_(&Aeff[1],&An2[1],Weff[1],Weff[5],p); 12536 fft_loop_p_precond_(&Aeff[2],&An2[2],Weff[2],Weff[6],p); 12537 fft_loop_p_precond_(&Aeff[3],&An2[3],Weff[3],Weff[7],p); 12538 Aeff+=4; An2+=4; Weff+=8; 12539 for (;Aeff<Aend;){ 12540 #if 0 // def HAVE_VCL1_VECTORCLASS_H 12541 Vec4ui A4,An4,B4; 12542 Vec4uq C4; 12543 A4.load(Aeff); 12544 An4.load(An2); 12545 B4 = (A4-p)+An4; 12546 B4 += ( (Vec4i) B4>>31) & p; // make positive 12547 B4.store(Aeff); 12548 C4 = extend( A4+p-An4); 12549 A4.load(Weff); // W4 12550 B4.load(Weff+4); // W4inv 12551 C4 = C4*extend(A4)-((C4*extend(B4))>>32)*p; 12552 // C4 += ( (Vec4q) C4>>63) & p; 12553 // An4=compress(C4); 12554 An4=compress(*(Vec4q*) &C4); 12555 An4 += ( (Vec4i) An4>>31)&p; 12556 An4.store(An2); 12557 Aeff+=4; An2+=4; Weff+=8; continue; 12558 #endif // VECTORCLASS_H 12559 fft_loop_p_precond_(&Aeff[0],&An2[0],Weff[0],Weff[4],p); 12560 fft_loop_p_precond_(&Aeff[1],&An2[1],Weff[1],Weff[5],p); 12561 fft_loop_p_precond_(&Aeff[2],&An2[2],Weff[2],Weff[6],p); 12562 fft_loop_p_precond_(&Aeff[3],&An2[3],Weff[3],Weff[7],p); 12563 Aeff+=4; An2+=4; Weff+=8; 12564 } 12565 Aeff+=taille/2; 12566 } 12567 if (taille==8) 12568 break; 12569 int * end=Wstack+taille,*source=Wstack; 12570 for (int * target=Wstack;source<end;source+=16,target+=8){ 12571 target[0]=source[0]; 12572 target[1]=source[2]; 12573 target[4]=source[4]; 12574 target[5]=source[6]; 12575 target[2]=source[8]; 12576 target[3]=source[10]; 12577 target[6]=source[12]; 12578 target[7]=source[14]; 12579 } 12580 } 12581 if (n>MAX_INTSTACK) 12582 free(Wstack); 12583 // finish by groups of 4 12584 step=n2s/2; 12585 int w1=W[step],w1surp=W[3*step]; 12586 int *Aeff=A; 12587 for (int pos=0;pos<n;pos+=4,Aeff+=4){ 12588 int f0=Aeff[0],f1=Aeff[1],f2=Aeff[2],f3=Aeff[3], 12589 f01=precond_mulmodp(f1-f3+p,w1,w1surp,p), 12590 f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12591 Aeff[0]=addmod(f02p,f13,p); 12592 Aeff[1]=addmod(f02m,f01,p); 12593 Aeff[2]=submod(f02p,f13,p); 12594 Aeff[3]=submod(f02m,f01,p); 12595 } 12596 } 12597 12598 #else // de-recurse 12599 static void fft2pnopermafter( int *A, int n, int *W,int p,double invp,int step) { 12600 if ( n==1 ) return; 12601 // if p is fixed, the code is about 2* faster 12602 if (n==4){ 12603 #ifdef GIAC_CACHEW 12604 int w1=W[1]; 12605 #else 12606 int w1=W[step]; 12607 #endif 12608 int f0=A[0],f1=A[1],f2=A[2],f3=A[3], 12609 #ifdef GIAC_PRECOND 12610 f01=precond_mulmodp(f1-f3+p,w1,W[3*step],p), 12611 #else 12612 f01=mulmodp(submod(f1,f3,p),w1,p,invp), 12613 #endif 12614 f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12615 A[0]=addmod(f02p,f13,p); 12616 A[1]=addmod(f02m,f01,p); 12617 A[2]=submod(f02p,f13,p); 12618 A[3]=submod(f02m,f01,p); 12619 return; 12620 } 12621 if (n==2){ 12622 int f0=A[0],f1=A[1]; 12623 A[0]=addmod(f0,f1,p); 12624 A[1]=submod(f0,f1,p); 12625 return; 12626 } 12627 // Step 1 : arithmetic 12628 int *An2=A+n/2; 12629 int * Acur=A,*An2cur=An2,*Wcur=W; 12630 #ifdef GIAC_CACHEW 12631 for (;Acur!=An2;){ 12632 fft_loop_p_(Acur,An2cur,Wcur,p,invp); 12633 fft_loop_p_(&Acur[1],&An2cur[1],Wcur+1,p,invp); 12634 fft_loop_p_(&Acur[2],&An2cur[2],Wcur+2,p,invp); 12635 fft_loop_p_(&Acur[3],&An2cur[3],Wcur+3,p,invp); 12636 Acur+=4; An2cur+=4; Wcur += 4; 12637 } 12638 fft2pnopermafter(A, n/2, W+n/2,p,invp,2*step); 12639 fft2pnopermafter(An2, n/2, W+n/2,p,invp,2*step); 12640 return; 12641 #endif 12642 #ifdef GIAC_PRECOND 12643 int n2=n/2*step; 12644 for (;Acur!=An2;){ 12645 fft_loop_p_(Acur,An2cur,Wcur,n2,p,invp); 12646 ++Acur;++An2cur; Wcur +=step; 12647 fft_loop_p_(Acur,An2cur,Wcur,n2,p,invp); 12648 ++Acur;++An2cur; Wcur += step; 12649 fft_loop_p_(Acur,An2cur,Wcur,n2,p,invp); 12650 ++Acur;++An2cur; Wcur += step; 12651 fft_loop_p_(Acur,An2cur,Wcur,n2,p,invp); 12652 ++Acur;++An2cur; Wcur += step; 12653 // continue; 12654 #if 0 // def HAVE_VCL1_VECTORCLASS_H // debug 12655 A4.load(Acur-4); 12656 An4.load(An2cur-4); 12657 if ( horizontal_count(An4==compress(C4))!=4 || horizontal_count(A4==compress(B4))!=4) 12658 CERR << "err\n"; 12659 #endif 12660 } 12661 #else // GIAC_PRECOND 12662 for (;Acur!=An2;){ 12663 fft_loop_p_(Acur,An2cur,Wcur,p,invp); 12664 Wcur +=step; 12665 fft_loop_p_(&Acur[1],&An2cur[1],Wcur,p,invp); 12666 Wcur += step; 12667 fft_loop_p_(&Acur[2],&An2cur[2],Wcur,p,invp); 12668 Wcur += step; 12669 fft_loop_p_(&Acur[3],&An2cur[3],Wcur,p,invp); 12670 Acur+=4; An2cur+=4; Wcur += step; 12671 } 12672 #endif // GIAC_PRECOND 12673 // Step 2 : recursive calls 12674 fft2pnopermafter(A, n/2, W,p,invp,2*step); 12675 fft2pnopermafter(An2, n/2, W,p,invp,2*step); 12676 } 12677 #endif // de-recurse 12678 12679 12680 #if 0 12681 static void fft4wp1(vector<int> & W,int n,int w){ 12682 W.reserve(n); 12683 const int p = 2013265921 ; 12684 w=w % p; 12685 if (w<0) w += p; 12686 longlong wk=w; 12687 for (int N=n/2;N;N/=4,wk=(wk*wk)%p,wk=(wk*wk)%p){ 12688 int ww=1; 12689 for (int i=0;i<N;ww=(ww*wk)%p,++i){ 12690 W.push_back(ww); 12691 } 12692 } 12693 } 12694 12695 static void fft4wp2(vector<int> & W,int n,int w){ 12696 W.reserve(n); 12697 const int p = 1811939329 ; 12698 w=w % p; 12699 if (w<0) w += p; 12700 longlong wk=w; 12701 for (int N=n/2;N;N/=4,wk=(wk*wk)%p,wk=(wk*wk)%p){ 12702 int ww=1; 12703 for (int i=0;i<N;ww=(ww*wk)%p,++i){ 12704 W.push_back(ww); 12705 } 12706 } 12707 } 12708 12709 static void fft4p1nopermafter( int *A, int n, int *W) { 12710 if ( n==1 ) return; 12711 // if p is fixed, the code is about 2* faster 12712 const int p = 2013265921 ; 12713 if (n==4){ 12714 int w1=W[1]; 12715 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p),w1,p),f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12716 A[0]=addmod(f02p,f13,p); 12717 A[1]=addmod(f02m,f01,p); 12718 A[2]=submod(f02p,f13,p); 12719 A[3]=submod(f02m,f01,p); 12720 return; 12721 } 12722 if (n==2){ 12723 int f0=A[0],f1=A[1]; 12724 A[0]=addmod(f0,f1,p); 12725 A[1]=submod(f0,f1,p); 12726 return; 12727 } 12728 int i,n2,n3,n4; 12729 n4=n/4; n2=n/2; n3=n2+n4; 12730 // Step 1 : arithmetic 12731 int *An4=A+n4, *An2=A+n2, *A3n4=A+n3,*Wn4=W+n4; 12732 for( i=0; i<n4; ++i ) { 12733 int Ai,An2i,An4i,A3n4i; 12734 Ai=A[i]; 12735 An4i=An4[i]; 12736 An2i=An2[i]; 12737 A3n4i=A3n4[i]; 12738 int w=W[2*i]; 12739 int s1 = addmod(Ai,An2i,p); 12740 int s2 = addmod(An4i,A3n4i,p); 12741 A[i]=addmod(s1,s2,p); 12742 An4[i]=((longlong(s1)+p-s2)*w)%p;// mulmod(submod(s1,s2,p),w,p); 12743 s1 = ((longlong(Ai)+p-An2i)*W[i])%p;// mulmod(submod(Ai,An2i,p),W[i],p); 12744 s2 = ((longlong(An4i)+p-A3n4i)*Wn4[i])%p;// mulmod(submod(An4i,A3n4i,p),W[i+n4],p); 12745 An2[i]=addmod(s1,s2,p); 12746 A3n4[i]=((longlong(s1)+p-s2)*w)%p; // mulmod(submod(t1,t2,p),w,p); 12747 ++i; 12748 Ai=A[i]; 12749 An4i=An4[i]; 12750 An2i=An2[i]; 12751 A3n4i=A3n4[i]; 12752 w=W[2*i]; 12753 s1 = addmod(Ai,An2i,p); 12754 s2 = addmod(An4i,A3n4i,p); 12755 A[i]=addmod(s1,s2,p); 12756 An4[i]=((longlong(s1)+p-s2)*w)%p;// mulmod(submod(s1,s2,p),w,p); 12757 s1 = ((longlong(Ai)+p-An2i)*W[i])%p;// mulmod(submod(Ai,An2i,p),W[i],p); 12758 s2 = ((longlong(An4i)+p-A3n4i)*Wn4[i])%p;// mulmod(submod(An4i,A3n4i,p),W[i+n4],p); 12759 An2[i]=addmod(s1,s2,p); 12760 A3n4[i]=((longlong(s1)+p-s2)*w)%p; // mulmod(submod(t1,t2,p),w,p); 12761 } 12762 // Step 2 : recursive calls 12763 fft4p1nopermafter(A, n4, W+n2); 12764 fft4p1nopermafter(A+n4, n4, W+n2); 12765 fft4p1nopermafter(A+n2, n4, W+n2); 12766 fft4p1nopermafter(A+n3, n4, W+n2); 12767 if (n==8){ 12768 swapint(A[1],A[2]); 12769 swapint(A[5],A[6]); 12770 } 12771 } 12772 static void fft4p2nopermafter( int *A, int n, int *W) { 12773 if ( n==1 ) return; 12774 // if p is fixed, the code is about 2* faster 12775 const int p = 1811939329 ; 12776 if (n==4){ 12777 int w1=W[1]; 12778 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p),w1,p),f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 12779 A[0]=addmod(f02p,f13,p); 12780 A[1]=addmod(f02m,f01,p); 12781 A[2]=submod(f02p,f13,p); 12782 A[3]=submod(f02m,f01,p); 12783 return; 12784 } 12785 if (n==2){ 12786 int f0=A[0],f1=A[1]; 12787 A[0]=addmod(f0,f1,p); 12788 A[1]=submod(f0,f1,p); 12789 return; 12790 } 12791 int i,n2,n3,n4; 12792 n4=n/4; n2=n/2; n3=n2+n4; 12793 // Step 1 : arithmetic 12794 int *An4=A+n4, *An2=A+n2, *A3n4=A+n3,*Wn4=W+n4; 12795 for( i=0; i<n4; ++i ) { 12796 int Ai,An2i,An4i,A3n4i; 12797 Ai=A[i]; 12798 An4i=An4[i]; 12799 An2i=An2[i]; 12800 A3n4i=A3n4[i]; 12801 int w=W[2*i]; 12802 int s1 = addmod(Ai,An2i,p); 12803 int s2 = addmod(An4i,A3n4i,p); 12804 A[i]=addmod(s1,s2,p); 12805 An4[i]=((longlong(s1)+p-s2)*w)%p;// mulmod(submod(s1,s2,p),w,p); 12806 s1 = ((longlong(Ai)+p-An2i)*W[i])%p;// mulmod(submod(Ai,An2i,p),W[i],p); 12807 s2 = ((longlong(An4i)+p-A3n4i)*Wn4[i])%p;// mulmod(submod(An4i,A3n4i,p),W[i+n4],p); 12808 An2[i]=addmod(s1,s2,p); 12809 A3n4[i]=((longlong(s1)+p-s2)*w)%p; // mulmod(submod(t1,t2,p),w,p); 12810 ++i; 12811 Ai=A[i]; 12812 An4i=An4[i]; 12813 An2i=An2[i]; 12814 A3n4i=A3n4[i]; 12815 w=W[2*i]; 12816 s1 = addmod(Ai,An2i,p); 12817 s2 = addmod(An4i,A3n4i,p); 12818 A[i]=addmod(s1,s2,p); 12819 An4[i]=((longlong(s1)+p-s2)*w)%p;// mulmod(submod(s1,s2,p),w,p); 12820 s1 = ((longlong(Ai)+p-An2i)*W[i])%p;// mulmod(submod(Ai,An2i,p),W[i],p); 12821 s2 = ((longlong(An4i)+p-A3n4i)*Wn4[i])%p;// mulmod(submod(An4i,A3n4i,p),W[i+n4],p); 12822 An2[i]=addmod(s1,s2,p); 12823 A3n4[i]=((longlong(s1)+p-s2)*w)%p; // mulmod(submod(t1,t2,p),w,p); 12824 } 12825 // Step 2 : recursive calls 12826 fft4p2nopermafter(A, n4, W+n2); 12827 fft4p2nopermafter(A+n4, n4, W+n2); 12828 fft4p2nopermafter(A+n2, n4, W+n2); 12829 fft4p2nopermafter(A+n3, n4, W+n2); 12830 if (n==8){ 12831 swapint(A[1],A[2]); 12832 swapint(A[5],A[6]); 12833 } 12834 } 12835 #endif 12836 12837 static void fft2p2nopermbefore( int *A, int n, int *W,int step=1) { 12838 if ( n==1 ) return; 12839 // if p is fixed, the code is about 2* faster 12840 if (n==4){ 12841 int w1=W[step]; 12842 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p2),w1,p2),f02p=addmod(f0,f2,p2),f02m=submod(f0,f2,p2),f13=addmod(f1,f3,p2); 12843 A[0]=addmod(f02p,f13,p2); 12844 A[1]=addmod(f02m,f01,p2); 12845 A[2]=submod(f02p,f13,p2); 12846 A[3]=submod(f02m,f01,p2); 12847 return; 12848 } 12849 if (n==2){ 12850 int f0=A[0],f1=A[1]; 12851 A[0]=addmod(f0,f1,p2); 12852 A[1]=submod(f0,f1,p2); 12853 return; 12854 } 12855 fft2p2nopermbefore( A, n/2, W,2*step); // fft2p2nopermbefore(A,n2,W+n2); 12856 int * An2=A+n/2; 12857 fft2p2nopermbefore( An2, n/2, W,2*step); // fft2p2nopermbefore(An2,n2,W+n2); 12858 #if 1 12859 int n2 = n/2*step; // n2%4==0 12860 int * Aend=An2; 12861 for(; A<Aend; ) { 12862 fft_loop_p2(A,An2,W,n2); 12863 ++A; ++An2; W +=step ; 12864 fft_loop_p2(A,An2,W,n2); 12865 ++A; ++An2; W +=step ; 12866 fft_loop_p2(A,An2,W,n2); 12867 ++A; ++An2; W += step; 12868 fft_loop_p2(A,An2,W,n2); 12869 ++A; ++An2; W +=step; 12870 } 12871 #else 12872 for(int i=0; i<n/2; i +=4 ) { 12873 int s = A[i]; 12874 int t = mulmodp2(W[i*step],An2[i]); 12875 A[i] = addmod(s,t,p2); 12876 An2[i] = submod(s,t,p2); 12877 s = A[i+1]; 12878 t = mulmodp2(W[(i+1)*step],An2[i+1]); 12879 A[i+1] = addmod(s,t,p2); 12880 An2[i+1] = submod(s,t,p2); 12881 s = A[i+2]; 12882 t = mulmodp2(W[(i+2)*step],An2[i+2]); 12883 A[i+2] = addmod(s,t,p2); 12884 An2[i+2] = submod(s,t,p2); 12885 s = A[i+3]; 12886 t = mulmodp2(W[(i+3)*step],An2[i+3]); 12887 A[i+3] = addmod(s,t,p2); 12888 An2[i+3] = submod(s,t,p2); 12889 } 12890 #endif 12891 } 12892 12893 static void fft2p2nopermafter( int *A, int n, int *W,int step=1) { 12894 if ( n==1 ) return; 12895 // if p is fixed, the code is about 2* faster 12896 if (n==4){ 12897 int w1=W[step]; 12898 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p2),w1,p2),f02p=addmod(f0,f2,p2),f02m=submod(f0,f2,p2),f13=addmod(f1,f3,p2); 12899 A[0]=addmod(f02p,f13,p2); 12900 A[1]=addmod(f02m,f01,p2); 12901 A[2]=submod(f02p,f13,p2); 12902 A[3]=submod(f02m,f01,p2); 12903 return; 12904 } 12905 if (n==2){ 12906 int f0=A[0],f1=A[1]; 12907 A[0]=addmod(f0,f1,p2); 12908 A[1]=submod(f0,f1,p2); 12909 return; 12910 } 12911 // Step 1 : arithmetic 12912 int *An2=A+n/2; 12913 #if 1 12914 int * Acur=A,*An2cur=An2,*Wcur=W; 12915 int n2=n/2*step; 12916 for (;Acur!=An2;){ 12917 int Ai,An2i; 12918 fft_loop_p2_(Acur,An2cur,Wcur,n2); 12919 ++Acur;++An2cur; Wcur +=step; 12920 fft_loop_p2_(Acur,An2cur,Wcur,n2); 12921 ++Acur;++An2cur; Wcur += step; 12922 fft_loop_p2_(Acur,An2cur,Wcur,n2); 12923 ++Acur;++An2cur; Wcur += step; 12924 fft_loop_p2_(Acur,An2cur,Wcur,n2); 12925 ++Acur;++An2cur; Wcur += step; 12926 } 12927 #else 12928 for( i=0; i<n/2; ++i ) { 12929 int Ai,An2i; 12930 Ai=A[i]; 12931 An2i=An2[i]; 12932 A[i] = addmod(Ai,An2i,p2); 12933 An2[i]=((longlong(Ai)+p2-An2i)*W[i*step]) % p2; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 12934 i++; 12935 Ai=A[i]; 12936 An2i=An2[i]; 12937 A[i] = addmod(Ai,An2i,p2); 12938 An2[i]=((longlong(Ai)+p2-An2i)*W[i*step]) % p2; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 12939 } 12940 #endif 12941 // Step 2 : recursive calls 12942 fft2p2nopermafter(A, n/2, W,2*step); 12943 fft2p2nopermafter(An2, n/2, W,2*step); 12944 } 12945 12946 static void fft2p2( int *A, int n, int *W, int *T,int step=1) { 12947 int i,n2,t; 12948 if ( n==1 ) return; 12949 // if p is fixed, the code is about 2* faster 12950 if (n==4){ 12951 int w1=W[step]; 12952 #if 1 12953 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p2),w1,p2),f02p=addmod(f0,f2,p2),f02m=submod(f0,f2,p2),f13=addmod(f1,f3,p2); 12954 A[0]=addmod(f02p,f13,p2); 12955 A[1]=addmod(f02m,f01,p2); 12956 A[2]=submod(f02p,f13,p2); 12957 A[3]=submod(f02m,f01,p2); 12958 #else 12959 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=(f1-f3)*w1; 12960 A[0]=(f0+f1+f2+f3)%p2; 12961 A[1]=(f0-f2+f01)%p2; 12962 A[2]=(f0-f1+f2-f3)%p2; 12963 A[3]=(f0-f2-f01)%p2; 12964 #endif 12965 return; 12966 } 12967 if (n==2){ 12968 int f0=A[0],f1=A[1]; 12969 A[0]=addmod(f0,f1,p2); 12970 A[1]=submod(f0,f1,p2); 12971 return; 12972 } 12973 n2 = n/2; 12974 // Step 1 : arithmetic 12975 int * Tn2=T+n2,*An2=A+n2; 12976 for( i=0; i<n2; ++i ) { 12977 int Ai,An2i; 12978 Ai=A[i]; 12979 An2i=An2[i]; 12980 T[i] = addmod(Ai,An2i,p2); 12981 t = submod(Ai,An2i,p2); 12982 Tn2[i] = mulmodp2(t,W[i*step]); 12983 i++; 12984 Ai=A[i]; 12985 An2i=An2[i]; 12986 T[i] = addmod(Ai,An2i,p2); 12987 t = submod(Ai,An2i,p2); 12988 Tn2[i] = mulmodp2(t,W[i*step]); 12989 } 12990 // Step 2 : recursive calls 12991 fft2p2(T, n2, W, A,2*step); 12992 fft2p2(Tn2, n2, W, A+n2,2*step); 12993 // Step 3 : permute 12994 for( i=0; i<n2; ++i ) { 12995 A[2*i] = T[i]; 12996 A[2*i+1] = Tn2[i]; 12997 ++i; 12998 A[2*i] = T[i]; 12999 A[2*i+1] = Tn2[i]; 13000 } 13001 return; 13002 } 13003 13004 static void fft2p3nopermbefore( int *A, int n, int *W,int step=1) { 13005 if ( n==1 ) return; 13006 // if p is fixed, the code is about 2* faster 13007 if (n==4){ 13008 int w1=W[step]; 13009 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p3),w1,p3),f02p=addmod(f0,f2,p3),f02m=submod(f0,f2,p3),f13=addmod(f1,f3,p3); 13010 A[0]=addmod(f02p,f13,p3); 13011 A[1]=addmod(f02m,f01,p3); 13012 A[2]=submod(f02p,f13,p3); 13013 A[3]=submod(f02m,f01,p3); 13014 return; 13015 } 13016 if (n==2){ 13017 int f0=A[0],f1=A[1]; 13018 A[0]=addmod(f0,f1,p3); 13019 A[1]=submod(f0,f1,p3); 13020 return; 13021 } 13022 fft2p3nopermbefore( A, n/2, W,2*step); // fft2p3nopermbefore(A,n2,W+n2); 13023 int * An2=A+n/2; 13024 fft2p3nopermbefore( An2, n/2, W,2*step); // fft2p3nopermbefore(An2,n2,W+n2); 13025 #if 1 13026 int n2 = n/2*step; // n2%4==0 13027 int * Aend=An2; 13028 for(; A<Aend; ) { 13029 fft_loop_p3(A,An2,W,n2); 13030 ++A; ++An2; W +=step ; 13031 fft_loop_p3(A,An2,W,n2); 13032 ++A; ++An2; W +=step ; 13033 fft_loop_p3(A,An2,W,n2); 13034 ++A; ++An2; W += step; 13035 fft_loop_p3(A,An2,W,n2); 13036 ++A; ++An2; W +=step; 13037 } 13038 #else 13039 for(int i=0; i<n/2; i +=4 ) { 13040 int s = A[i]; 13041 int t = mulmodp3(W[i*step],An2[i]); 13042 A[i] = addmod(s,t,p3); 13043 An2[i] = submod(s,t,p3); 13044 s = A[i+1]; 13045 t = mulmodp3(W[(i+1)*step],An2[i+1]); 13046 A[i+1] = addmod(s,t,p3); 13047 An2[i+1] = submod(s,t,p3); 13048 s = A[i+2]; 13049 t = mulmodp3(W[(i+2)*step],An2[i+2]); 13050 A[i+2] = addmod(s,t,p3); 13051 An2[i+2] = submod(s,t,p3); 13052 s = A[i+3]; 13053 t = mulmodp3(W[(i+3)*step],An2[i+3]); 13054 A[i+3] = addmod(s,t,p3); 13055 An2[i+3] = submod(s,t,p3); 13056 } 13057 #endif 13058 } 13059 13060 static void fft2p3nopermafter( int *A, int n, int *W,int step=1) { 13061 if ( n==1 ) return; 13062 // if p is fixed, the code is about 2* faster 13063 if (n==4){ 13064 int w1=W[step]; 13065 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p3),w1,p3),f02p=addmod(f0,f2,p3),f02m=submod(f0,f2,p3),f13=addmod(f1,f3,p3); 13066 A[0]=addmod(f02p,f13,p3); 13067 A[1]=addmod(f02m,f01,p3); 13068 A[2]=submod(f02p,f13,p3); 13069 A[3]=submod(f02m,f01,p3); 13070 return; 13071 } 13072 if (n==2){ 13073 int f0=A[0],f1=A[1]; 13074 A[0]=addmod(f0,f1,p3); 13075 A[1]=submod(f0,f1,p3); 13076 return; 13077 } 13078 // Step 1 : arithmetic 13079 int *An2=A+n/2; 13080 #if 1 13081 int * Acur=A,*An2cur=An2,*Wcur=W; 13082 int n2=n/2*step; 13083 for (;Acur!=An2;){ 13084 int Ai,An2i; 13085 fft_loop_p3_(Acur,An2cur,Wcur,n2); 13086 ++Acur;++An2cur; Wcur +=step; 13087 fft_loop_p3_(Acur,An2cur,Wcur,n2); 13088 ++Acur;++An2cur; Wcur += step; 13089 fft_loop_p3_(Acur,An2cur,Wcur,n2); 13090 ++Acur;++An2cur; Wcur += step; 13091 fft_loop_p3_(Acur,An2cur,Wcur,n2); 13092 ++Acur;++An2cur; Wcur += step; 13093 } 13094 #else 13095 for( i=0; i<n/2; ++i ) { 13096 int Ai,An2i; 13097 Ai=A[i]; 13098 An2i=An2[i]; 13099 A[i] = addmod(Ai,An2i,p3); 13100 An2[i]=((longlong(Ai)+p3-An2i)*W[i*step]) % p3; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 13101 i++; 13102 Ai=A[i]; 13103 An2i=An2[i]; 13104 A[i] = addmod(Ai,An2i,p3); 13105 An2[i]=((longlong(Ai)+p3-An2i)*W[i*step]) % p3; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 13106 } 13107 #endif 13108 // Step 2 : recursive calls 13109 fft2p3nopermafter(A, n/2, W,2*step); 13110 fft2p3nopermafter(An2, n/2, W,2*step); 13111 } 13112 13113 static void fft2p3( int *A, int n, int *W, int *T,int step=1) { 13114 int i,n2,t; 13115 if ( n==1 ) return; 13116 // if p is fixed, the code is about 2* faster 13117 if (n==4){ 13118 int w1=W[step]; 13119 #if 1 13120 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p3),w1,p3),f02p=addmod(f0,f2,p3),f02m=submod(f0,f2,p3),f13=addmod(f1,f3,p3); 13121 A[0]=addmod(f02p,f13,p3); 13122 A[1]=addmod(f02m,f01,p3); 13123 A[2]=submod(f02p,f13,p3); 13124 A[3]=submod(f02m,f01,p3); 13125 #else 13126 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=(f1-f3)*w1; 13127 A[0]=(f0+f1+f2+f3)%p3; 13128 A[1]=(f0-f2+f01)%p3; 13129 A[2]=(f0-f1+f2-f3)%p3; 13130 A[3]=(f0-f2-f01)%p3; 13131 #endif 13132 return; 13133 } 13134 if (n==2){ 13135 int f0=A[0],f1=A[1]; 13136 A[0]=addmod(f0,f1,p3); 13137 A[1]=submod(f0,f1,p3); 13138 return; 13139 } 13140 n2 = n/2; 13141 // Step 1 : arithmetic 13142 int * Tn2=T+n2,*An2=A+n2; 13143 for( i=0; i<n2; ++i ) { 13144 int Ai,An2i; 13145 Ai=A[i]; 13146 An2i=An2[i]; 13147 T[i] = addmod(Ai,An2i,p3); 13148 t = submod(Ai,An2i,p3); 13149 Tn2[i] = mulmodp3(t,W[i*step]); 13150 i++; 13151 Ai=A[i]; 13152 An2i=An2[i]; 13153 T[i] = addmod(Ai,An2i,p3); 13154 t = submod(Ai,An2i,p3); 13155 Tn2[i] = mulmodp3(t,W[i*step]); 13156 } 13157 // Step 2 : recursive calls 13158 fft2p3(T, n2, W, A,2*step); 13159 fft2p3(Tn2, n2, W, A+n2,2*step); 13160 // Step 3 : permute 13161 for( i=0; i<n2; ++i ) { 13162 A[2*i] = T[i]; 13163 A[2*i+1] = Tn2[i]; 13164 ++i; 13165 A[2*i] = T[i]; 13166 A[2*i+1] = Tn2[i]; 13167 } 13168 return; 13169 } 13170 13171 13172 static void fft2p4nopermbefore( int *A, int n, int *W) { 13173 int n2; 13174 if ( n==1 ) return; 13175 // if p is fixed, the code is about 2* faster 13176 const int p = 2113929217; 13177 if (n==4){ 13178 int w1=W[1]; 13179 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p),w1,p),f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 13180 A[0]=addmod(f02p,f13,p); 13181 A[1]=addmod(f02m,f01,p); 13182 A[2]=submod(f02p,f13,p); 13183 A[3]=submod(f02m,f01,p); 13184 return; 13185 } 13186 if (n==2){ 13187 int f0=A[0],f1=A[1]; 13188 A[0]=addmod(f0,f1,p); 13189 A[1]=submod(f0,f1,p); 13190 return; 13191 } 13192 n2 = n/2; 13193 fft2p4nopermbefore( A, n2, W+n2); 13194 int * An2=A+n2; 13195 fft2p4nopermbefore( An2, n2, W+n2); 13196 #if 1 13197 int * Aend=An2; 13198 for(; A<Aend; ) { 13199 int s = *A; 13200 int t = mulmod(*W,*An2,p); 13201 *A = addmod(s,t,p); 13202 *An2 = submod(s,t,p); 13203 ++A; ++An2; ++W; 13204 s = *A; 13205 t = mulmod(*W,*An2,p); 13206 *A = addmod(s,t,p); 13207 *An2 = submod(s,t,p); 13208 ++A; ++An2; ++W; 13209 s = *A; 13210 t = mulmod(*W,*An2,p); 13211 *A = addmod(s,t,p); 13212 *An2 = submod(s,t,p); 13213 ++A; ++An2; ++W; 13214 s = *A; 13215 t = mulmod(*W,*An2,p); 13216 *A = addmod(s,t,p); 13217 *An2 = submod(s,t,p); 13218 ++A; ++An2; ++W; 13219 } 13220 #else 13221 for( i=0; i<n2; i++ ) { 13222 int s = A[i]; 13223 int t = mulmod(W[i],An2[i],p); 13224 A[i] = addmod(s,t,p); 13225 An2[i] = submod(s,t,p); 13226 ++i; 13227 s = A[i]; 13228 t = mulmod(W[i],An2[i],p); 13229 A[i] = addmod(s,t,p); 13230 An2[i] = submod(s,t,p); 13231 ++i; 13232 s = A[i]; 13233 t = mulmod(W[i],An2[i],p); 13234 A[i] = addmod(s,t,p); 13235 An2[i] = submod(s,t,p); 13236 ++i; 13237 s = A[i]; 13238 t = mulmod(W[i],An2[i],p); 13239 A[i] = addmod(s,t,p); 13240 An2[i] = submod(s,t,p); 13241 } 13242 #endif 13243 } 13244 13245 static void fft2p4nopermafter( int *A, int n, int *W) { 13246 int n2; 13247 if ( n==1 ) return; 13248 // if p is fixed, the code is about 2* faster 13249 const int p = 2113929217 ; 13250 if (n==4){ 13251 int w1=W[1]; 13252 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p),w1,p),f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 13253 A[0]=addmod(f02p,f13,p); 13254 A[1]=addmod(f02m,f01,p); 13255 A[2]=submod(f02p,f13,p); 13256 A[3]=submod(f02m,f01,p); 13257 return; 13258 } 13259 if (n==2){ 13260 int f0=A[0],f1=A[1]; 13261 A[0]=addmod(f0,f1,p); 13262 A[1]=submod(f0,f1,p); 13263 return; 13264 } 13265 n2 = n/2; 13266 // Step 1 : arithmetic 13267 int *An2=A+n2; 13268 #if 1 13269 int * Acur=A,*An2cur=An2,*Wcur=W; 13270 for (;Acur!=An2;){ 13271 int Ai,An2i; 13272 Ai=*Acur; 13273 An2i=*An2cur; 13274 *Acur = addmod(Ai,An2i,p); 13275 *An2cur=((longlong(Ai)+p-An2i)* *Wcur) % p; 13276 ++Acur;++An2cur;++Wcur; 13277 Ai=*Acur; 13278 An2i=*An2cur; 13279 *Acur = addmod(Ai,An2i,p); 13280 *An2cur=((longlong(Ai)+p-An2i)* *Wcur) % p; 13281 ++Acur;++An2cur;++Wcur; 13282 } 13283 #else 13284 for( i=0; i<n2; ++i ) { 13285 int Ai,An2i; 13286 Ai=A[i]; 13287 An2i=An2[i]; 13288 A[i] = addmod(Ai,An2i,p); 13289 An2[i]=((longlong(Ai)+p-An2i)*W[i]) % p; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 13290 i++; 13291 Ai=A[i]; 13292 An2i=An2[i]; 13293 A[i] = addmod(Ai,An2i,p); 13294 An2[i]=((longlong(Ai)+p-An2i)*W[i]) % p; // t = submod(Ai,An2i,p); An2[i] = mulmod(t,W[i],p); 13295 } 13296 #endif 13297 // Step 2 : recursive calls 13298 fft2p4nopermafter(A, n2, W+n2); 13299 fft2p4nopermafter(An2, n2, W+n2); 13300 } 13301 13302 static void fft2( int *A, int n, int *W, int p, int *T ,bool permute=true) { 13303 int i,n2,t; 13304 if ( n==1 ) return; 13305 if (p==2013265921){ 13306 fft2p1(A,n,W,T); 13307 return; 13308 } 13309 if (p==1811939329){ 13310 fft2p2(A,n,W,T); 13311 return; 13312 } 13313 if (n==4){ 13314 int w1=W[1]; 13315 #if 1 13316 int f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=mulmod(submod(f1,f3,p),w1,p),f02p=addmod(f0,f2,p),f02m=submod(f0,f2,p),f13=addmod(f1,f3,p); 13317 A[0]=addmod(f02p,f13,p); 13318 A[1]=addmod(f02m,f01,p); 13319 A[2]=submod(f02p,f13,p); 13320 A[3]=submod(f02m,f01,p); 13321 #else 13322 longlong f0=A[0],f1=A[1],f2=A[2],f3=A[3],f01=(f1-f3)*w1; 13323 A[0]=(f0+f1+f2+f3)%p; 13324 A[1]=(f0-f2+f01)%p; 13325 A[2]=(f0-f1+f2-f3)%p; 13326 A[3]=(f0-f2-f01)%p; 13327 #endif 13328 return; 13329 } 13330 if (n==2){ 13331 int f0=A[0],f1=A[1]; 13332 A[0]=addmod(f0,f1,p); 13333 A[1]=submod(f0,f1,p); 13334 return; 13335 } 13336 n2 = n/2; 13337 // Step 1 : arithmetic 13338 int * Tn2=T+n2,*An2=A+n2; 13339 for( i=0; i<n2; i++ ) { 13340 int Ai,An2i; 13341 Ai=A[i]; 13342 An2i=An2[i]; 13343 T[i] = addmod(Ai,An2i,p); 13344 t = submod(Ai,An2i,p); 13345 Tn2[i] = mulmod(t,W[i],p); 13346 } 13347 // Step 2 : recursive calls 13348 fft2(T, n2, W+n2, p, A,permute); 13349 fft2(Tn2, n2, W+n2, p, A+n2,permute); 13350 if (!permute){ 13351 for( i=0; i<n2; i++ ) { 13352 A[2*i] = T[2*i]; 13353 A[2*i+1] = T[2*i+1]; 13354 } 13355 return; 13356 } 13357 // Step 3 : permute 13358 for( i=0; i<n2; i++ ) { 13359 A[2*i] = T[i]; 13360 A[2*i+1] = Tn2[i]; 13361 } 13362 return; 13363 } 13364 13365 13366 void fft2wp4(vector<int> & W,int n,int w){ 13367 W.reserve(n); 13368 const int p = p4; 13369 w=w % p; 13370 if (w<0) w += p; 13371 longlong wk=w; 13372 for (int N=n/2;N;N/=2,wk=(wk*wk)%p){ 13373 int ww=1; 13374 for (int i=0;i<N;ww=(ww*wk)%p,++i){ 13375 W.push_back(ww); 13376 } 13377 } 13378 } 13379 13380 void fft2w(vector<int> & W,int n,int w,int p){ 13381 W.reserve(n); 13382 w=w % p; 13383 if (w<0) w += p; 13384 longlong wk=w; 13385 for (int N=n/2;N;N/=2,wk=(wk*wk)%p){ 13386 int ww=1; 13387 for (int i=0;i<N;ww=(ww*wk)%p,++i){ 13388 W.push_back(ww); 13389 } 13390 } 13391 } 13392 13393 void fft2(int * A, int n, int w, int p,bool permute){ 13394 #ifndef FXCG 13395 if (debug_infolevel>2) 13396 CERR << CLOCK()*1e-6 << " begin fft2 int " << n << " memory " << memory_usage()*1e-6 << "M" << '\n'; 13397 #endif 13398 vector<int> W,T(n); 13399 fft2w(W,n,w,p); 13400 int * Aend=A+n; 13401 for (int * a=A;a<Aend;++a) 13402 if (*a<0) *a += p; 13403 fft2(A,n,&W.front(),p,&T.front(),permute); 13404 for (int * a=A;a<Aend;++a) 13405 if (*a<0) *a += p; 13406 #ifndef FXCG 13407 if (debug_infolevel>2) 13408 CERR << CLOCK()*1e-6 << " end fft int " << n << " memory " << memory_usage()*1e-6 << "M" << '\n'; 13409 #endif 13410 } 13411 13412 void makepositive(int * p,int n,int modulo){ 13413 int * pend=p+n; 13414 for (;p!=pend;++p){ 13415 int P=*p; 13416 if (P>=0) continue; 13417 P += modulo; 13418 P += (unsigned(P)>>31)*modulo; 13419 *p=P; 13420 } 13421 } 13422 13423 // copy source to target in reverse order 13424 void reverse_copy(const vector<int> & source,vector<int> & target){ 13425 const int * sb=&source.front(), * s=sb+source.size(); 13426 int * t=&target.front(), * tend=t+target.size(); 13427 for (;s>sb && t<tend;){ 13428 --s; 13429 *t=*s; 13430 ++t; 13431 } 13432 for (;t<tend;++t) 13433 *t=0; 13434 } 13435 13436 void makemodulop(int * a,int as,int modulo){ 13437 int *aend=a+as; 13438 if (modulo==p3){ 13439 for (;a!=aend;++a) 13440 *a %= p3; 13441 return; 13442 } 13443 if (modulo==p2){ 13444 for (;a!=aend;++a) 13445 *a %= p2; 13446 return; 13447 } 13448 if (modulo==p1){ 13449 for (;a!=aend;++a) 13450 *a %= p1; 13451 return; 13452 } 13453 for (;a!=aend;++a){ 13454 *a %= modulo; 13455 // if (*a<0) *a += modulo; // *a -= (unsigned(modulo-*a)>>31)*modulo; 13456 } 13457 } 13458 13459 // res=a*b mod p 13460 bool fft2mult(int ablinfnorm,const vector<int> & a,const vector<int> & b,vector<int> & res,int modulo,vector<int> & W,vector<int> & fftmult_p,vector<int> & fftmult_q,bool reverseatend,bool dividebyn,bool makeplus){ 13461 int as=int(a.size()),bs=int(b.size()),rs=as+bs-1; 13462 int logrs=sizeinbase2(rs); 13463 if (logrs>(modulo==p1?27:25)) return false; 13464 int n=(1u<<logrs); 13465 W.reserve(n); 13466 res.resize(n); 13467 #if 1 13468 //fftmult_p.clear(); 13469 fftmult_p.resize(n); 13470 //fftmult_q.clear(); 13471 fftmult_q.resize(n); 13472 reverse_copy(a,fftmult_p); 13473 reverse_copy(b,fftmult_q); 13474 #else 13475 fftmult_p=a;fftmult_q=b; 13476 reverse(fftmult_p.begin(),fftmult_p.end()); 13477 fftmult_p.resize(n); 13478 reverse(fftmult_q.begin(),fftmult_q.end()); 13479 fftmult_q.resize(n); 13480 #endif 13481 if (ablinfnorm>modulo){ 13482 makemodulop(&fftmult_p.front(),as,modulo); 13483 makemodulop(&fftmult_q.front(),bs,modulo); 13484 } 13485 // r:=1227303670; w:=powmod(r,2^(27-logrs),p1); 13486 // fft(p,w,p1);fft(q,w,p1); res=p.*q; ifft(res,w,p1); 13487 int r=1227303670; 13488 if (modulo==p1){ 13489 if (debug_infolevel>3) 13490 CERR << CLOCK()*1e-6 << " make+ p1 begin" << '\n'; 13491 if (makeplus){ 13492 makepositive(&fftmult_p.front(),as,p1); 13493 makepositive(&fftmult_q.front(),bs,p1); 13494 } 13495 if (debug_infolevel>3) 13496 CERR << CLOCK()*1e-6 << " make+ p1 end" << '\n'; 13497 int w=powmod(r,(1u<<(27-logrs)),p1); 13498 #if 0 13499 W.clear(); 13500 fft4wp1(W,n,w); 13501 fft4p1nopermafter(&fftmult_p.front(),n,&W.front()); 13502 fft4p1nopermafter(&fftmult_q.front(),n,&W.front()); 13503 for (int i=0;i<n;++i){ 13504 fftmult_p[i]=mulmod(fftmult_p[i],fftmult_q[i],p1); 13505 } 13506 w=invmod(w,p1); if (w<0) w+=p1; 13507 W.clear(); 13508 fft2wp1(W,n,w); 13509 fft2p1nopermbefore(&fftmult_p.front(),n,&W.front()); 13510 #else 13511 if (W.empty() || W[0]==0){ 13512 W.clear(); 13513 fft2wp1(W,n,w); 13514 } 13515 fft2p1nopermafter(&fftmult_p.front(),n,&W.front()); 13516 fft2p1nopermafter(&fftmult_q.front(),n,&W.front()); 13517 for (int i=0;i<n;++i){ 13518 fftmult_p[i]=mulmodp1(fftmult_p[i],fftmult_q[i]); 13519 } 13520 // vector<int> WW(W); fft_reverse(WW,p1); 13521 fft_reverse(W,p1); 13522 //w=invmod(w,p1); if (w<0) w+=p1; W.clear(); fft2wp1(W,n,w); 13523 fft2p1nopermbefore(&fftmult_p.front(),n,&W.front()); 13524 fft_reverse(W,p1); 13525 #endif 13526 fftmult_p.resize(rs); 13527 if (dividebyn){ 13528 int ninv=invmod(n,p1); if (ninv<0) ninv+=p1; 13529 for (int i=0;i<rs;++i){ 13530 fftmult_p[i]=mulmod(ninv,fftmult_p[i],p1); 13531 if (fftmult_p[i]>p1/2) 13532 fftmult_p[i]-=p1; 13533 } 13534 } 13535 if (reverseatend) 13536 reverse(fftmult_p.begin(),fftmult_p.end()); 13537 res.swap(fftmult_p); 13538 return true; 13539 } 13540 if (modulo==p2){// p2 := 1811939329 ; r:=814458146; order 2^26 13541 r=814458146; 13542 int w=powmod(r,(1u<<(26-logrs)),p2); 13543 if (makeplus){ 13544 if (debug_infolevel>3) 13545 CERR << CLOCK()*1e-6 << " make+ p2 begin" << '\n'; 13546 makepositive(&fftmult_p.front(),as,p2); 13547 makepositive(&fftmult_q.front(),bs,p2); 13548 if (debug_infolevel>3) 13549 CERR << CLOCK()*1e-6 << " make+ p2 end" << '\n'; 13550 } 13551 #if 0 13552 W.clear(); 13553 fft4wp2(W,n,w); 13554 fft4p2nopermafter(&fftmult_p.front(),n,&W.front()); 13555 fft4p2nopermafter(&fftmult_q.front(),n,&W.front()); 13556 for (int i=0;i<n;++i){ 13557 fftmult_p[i]=mulmodp2(fftmult_p[i],fftmult_q[i]); 13558 } 13559 w=invmod(w,p2); if (w<0) w+=p2; 13560 W.clear(); 13561 fft2wp2(W,n,w); 13562 fft2p2nopermbefore(&fftmult_p.front(),n,&W.front()); 13563 #else 13564 if (W.empty() || W[0]==0){ 13565 W.clear(); 13566 fft2wp2(W,n,w); 13567 } 13568 fft2p2nopermafter(&fftmult_p.front(),n,&W.front()); 13569 fft2p2nopermafter(&fftmult_q.front(),n,&W.front()); 13570 for (int i=0;i<n;++i){ 13571 fftmult_p[i]=mulmodp2(fftmult_p[i],fftmult_q[i]); 13572 } 13573 fft_reverse(W,p2); 13574 // w=invmod(w,p2); if (w<0) w+=p2; W.clear(); fft2wp2(W,n,w); 13575 fft2p2nopermbefore(&fftmult_p.front(),n,&W.front()); 13576 fft_reverse(W,p2); 13577 #endif 13578 fftmult_p.resize(rs); 13579 if (dividebyn){ 13580 int ninv=invmod(n,p2); if (ninv<0) ninv+=p2; 13581 for (int i=0;i<rs;++i){ 13582 fftmult_p[i]=mulmod(ninv,fftmult_p[i],p2); 13583 if (fftmult_p[i]>p2/2) 13584 fftmult_p[i]-=p2; 13585 } 13586 } 13587 if (reverseatend) 13588 reverse(fftmult_p.begin(),fftmult_p.end()); 13589 res.swap(fftmult_p); 13590 return true; 13591 } 13592 if (modulo==p3){// order 2^26 13593 r=2187; 13594 int w=powmod(r,(1u<<(26-logrs)),p3); 13595 if (makeplus){ 13596 makepositive(&fftmult_p.front(),as,p3); 13597 makepositive(&fftmult_q.front(),bs,p3); 13598 } 13599 if (W.empty() || W[0]==0){ 13600 W.clear(); 13601 fft2wp3(W,n,w); 13602 } 13603 fft2p3nopermafter(&fftmult_p.front(),n,&W.front()); 13604 fft2p3nopermafter(&fftmult_q.front(),n,&W.front()); 13605 for (int i=0;i<n;++i){ 13606 fftmult_p[i]=mulmodp3(fftmult_p[i],fftmult_q[i]); 13607 } 13608 fft_reverse(W,p3); 13609 // w=invmod(w,p3); if (w<0) w+=p3; W.clear(); fft2wp3(W,n,w); 13610 fft2p3nopermbefore(&fftmult_p.front(),n,&W.front()); 13611 fft_reverse(W,p3); 13612 fftmult_p.resize(rs); 13613 if (dividebyn){ 13614 int ninv=invmod(n,p3); if (ninv<0) ninv+=p3; 13615 for (int i=0;i<rs;++i){ 13616 fftmult_p[i]=mulmod(ninv,fftmult_p[i],p3); 13617 if (fftmult_p[i]>p3/2) 13618 fftmult_p[i]-=p3; 13619 } 13620 } 13621 if (reverseatend) 13622 reverse(fftmult_p.begin(),fftmult_p.end()); 13623 res.swap(fftmult_p); 13624 return true; 13625 } 13626 if (modulo==p4){// order 2^25 13627 r=1971140334; 13628 int w=powmod(r,(1u<<(25-logrs)),p4); 13629 if (makeplus){ 13630 makepositive(&fftmult_p.front(),as,p4); 13631 makepositive(&fftmult_q.front(),bs,p4); 13632 } 13633 if (W.empty() || W[0]==0){ 13634 W.clear(); 13635 fft2wp4(W,n,w); 13636 } 13637 fft2p4nopermafter(&fftmult_p.front(),n,&W.front()); 13638 fft2p4nopermafter(&fftmult_q.front(),n,&W.front()); 13639 for (int i=0;i<n;++i){ 13640 fftmult_p[i]=mulmodp4(fftmult_p[i],fftmult_q[i]); 13641 } 13642 fft_reverse(W,p4); 13643 // w=invmod(w,p4); if (w<0) w+=p4; W.clear(); fft2wp4(W,n,w); 13644 fft2p4nopermbefore(&fftmult_p.front(),n,&W.front()); 13645 fft_reverse(W,p4); 13646 fftmult_p.resize(rs); 13647 if (dividebyn){ 13648 int ninv=invmod(n,p4); if (ninv<0) ninv+=p4; 13649 for (int i=0;i<rs;++i){ 13650 fftmult_p[i]=mulmod(ninv,fftmult_p[i],p4); 13651 if (fftmult_p[i]>p4/2) 13652 fftmult_p[i]-=p4; 13653 } 13654 } 13655 if (reverseatend) 13656 reverse(fftmult_p.begin(),fftmult_p.end()); 13657 res.swap(fftmult_p); 13658 return true; 13659 } 13660 return false; 13661 } 13662 13663 void fft(int * f,int n,const int * w,int m,int * t,int p){ 13664 if (n==1) 13665 return ; 13666 int step=m/n; 13667 int k=0; 13668 if (n%2){ 13669 for (k=3;k*k<=n;k++){ 13670 if (!(n%k)) 13671 break; 13672 } 13673 } 13674 else 13675 k=2; 13676 if (k*k>n){ 13677 // prime size, slow discrete Fourier transform 13678 int *fj,*fend_=f+n-3,*fend=f+n; 13679 int * res=t; 13680 for (int i=0;i<n;++i){ 13681 int tmp (0); 13682 int pos=0,istep=i*step; 13683 for (fj=f;fj<fend_;fj+=3){ 13684 tmp = (tmp + longlong(fj[0])*w[pos])%p; 13685 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 13686 tmp = (tmp + longlong(fj[1])*w[pos])%p; 13687 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 13688 tmp = (tmp + longlong(fj[2])*w[pos])%p; 13689 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 13690 } 13691 for (;fj<fend;++fj){ 13692 tmp = (tmp + longlong(fj[0])*w[pos])%p; 13693 pos += istep-m; pos += (unsigned(pos)>>31)*m;// pos = (pos+istep)%m; 13694 } 13695 *res=tmp; 13696 ++res; 13697 } 13698 for (fj=f,res=t;fj<fend;++fj,++res){ 13699 *fj=*res; 13700 } 13701 return; 13702 } 13703 if (k!=2){ 13704 // assumes n is divisible by k, nk=n/k 13705 // P(X)=P_k(X)*[X^nk]^(k-1)+...+P_1(X) degree(P_k)<nk 13706 // P(w^(kj+l))= Q_l ( (w^k)^j ) 13707 // with Q_l=P_1^(w^l)+w^(nk)*P_2^(w^l)+... 13708 unsigned long n2=n/k; 13709 for (int j=0;j<k;j++){ 13710 // find Q[j] 13711 int * Qj=t+n2*j; 13712 for (unsigned i=0;i<n2;i++){ 13713 longlong tmp(0); 13714 int pos=0,jn2step=j*n2*step; 13715 const int * fi=&f[i], *fiend=fi+k*n2; 13716 for (;fi<fiend;fi+=n2){ 13717 tmp = (tmp+longlong(*fi)*w[pos]) % p; 13718 pos += jn2step-m; pos += (unsigned(pos)>>31)*m; 13719 } 13720 Qj[i]=(tmp*w[j*step*i])%p; 13721 } 13722 } 13723 for (int j=0;j<k;++j){ 13724 fft(t+n2*j,n2,w,m,f+n2*j,p); 13725 } 13726 // build fft 13727 for (unsigned i=0;i<n2;++i){ 13728 for (int j=0;j<k;++j,++f) 13729 *f=t[n2*j+i]; 13730 } 13731 return; 13732 } 13733 // Compute r0=sum_[j<n/2] (f_j+f_(j+n/2))*x^j 13734 // and r1=sum_[j<n/2] (f_j-f_(j+n/2))*omega^[step*j]*x^j 13735 unsigned long n2=n/2; 13736 int * r0=t, *r1=t+n2; 13737 int * it=f,*itn=f+n2,*itend=itn; 13738 const int *itk=w; 13739 for (;it!=itend;++itn,itk+=step,++it,++r0,++r1){ 13740 longlong a(*it),b(*itn); 13741 *r0=(a+b)%p; 13742 *r1=((a-b)*(*itk))%p; 13743 } 13744 // Recursive call 13745 int * r0f=f,*r1f=f+n2; 13746 fft(t,n2,w,m,r0f,p); 13747 fft(t+n2,n2,w,m,r1f,p); 13748 // Return a mix of r0/r1 13749 it=t; itend=t+n2; itn=t+n2; 13750 for (;it!=itend;){ 13751 *f=*it; 13752 ++it; ++f; 13753 *f=*itn; 13754 ++itn; ++f; 13755 } 13756 } 13757 13758 void fft(const vector<int> & f,const vector<int> & w ,vector<int> & res,int modulo){ 13759 #if 1 13760 res=f; 13761 vector<int> tmp(w.size()); 13762 fft(&res.front(),int(res.size()),&w.front(),int(w.size()),&tmp.front(),modulo); 13763 return; 13764 #endif 13765 // longlong M=longlong(modulo)*modulo; 13766 unsigned long n=long(f.size()); // unsigned long does not parse with gcc 13767 if (n==4){ 13768 int w1=w[w.size()/4]; 13769 longlong f0=f[0],f1=f[1],f2=f[2],f3=f[3],f01=(f1-f3)*w1; 13770 res.resize(4); 13771 res[0]=(f0+f1+f2+f3)%modulo; 13772 res[1]=(f0-f2+f01)%modulo; 13773 res[2]=(f0-f1+f2-f3)%modulo; 13774 res[3]=(f0-f2-f01)%modulo; 13775 return; 13776 } 13777 if (n==1){ 13778 res = f; 13779 return ; 13780 } 13781 unsigned long m=long(w.size()); 13782 unsigned long step=m/n; 13783 unsigned k=0; 13784 if (n%2){ 13785 for (k=3;k*k<=n;k++){ 13786 if (!(n%k)) 13787 break; 13788 } 13789 } 13790 else 13791 k=2; 13792 if (k*k>n){ 13793 // prime size, slow discrete Fourier transform 13794 res.clear(); 13795 res.reserve(n); 13796 longlong tmp; 13797 unsigned pos; 13798 for (unsigned i=0;i<n;++i){ 13799 tmp = 0; 13800 pos = 0; 13801 for (unsigned j=0;j<n;++j){ 13802 tmp = (tmp + longlong(f[j])*w[pos])%modulo; 13803 pos = (pos+i*step)%m; 13804 } 13805 res.push_back(int(tmp)); 13806 } 13807 return; 13808 } 13809 if (k!=2){ 13810 // assumes n is divisible by k, nk=n/k 13811 // P(X)=P_k(X)*[X^nk]^(k-1)+...+P_1(X) degree(P_k)<nk 13812 // P(w^(kj+l))= Q_l ( (w^k)^j ) 13813 // with Q_l=P_1^(w^l)+w^(nk)*P_2^(w^l)+... 13814 unsigned long n2=n/k; 13815 vector< vector<int> > Q(k),Qfft(k); 13816 for (unsigned j=0;j<k;++j) 13817 Q[j]=vector<int>(n2,0); 13818 longlong tmp; 13819 for (unsigned j=0;j<k;j++){ 13820 // find Q[j] 13821 for (unsigned i=0;i<n2;i++){ 13822 tmp=0; 13823 for (unsigned J=0;J<k;J++){ 13824 tmp = (tmp+longlong(f[J*n2+i])*w[(J*j*n2*step)%m])%modulo; 13825 } 13826 tmp=(tmp*w[j*step*i])%modulo; 13827 Q[j][i]=int(tmp); 13828 } 13829 fft(Q[j],w,Qfft[j],modulo); 13830 } 13831 // build fft 13832 res.clear(); 13833 res.reserve(n); 13834 for (unsigned i=0;i<n2;++i){ 13835 for (unsigned j=0;j<k;++j) 13836 res.push_back(Qfft[j][i]); 13837 } 13838 return; 13839 } 13840 // Compute r0=sum_[j<n/2] (f_j+f_(j+n/2))*x^j 13841 // and r1=sum_[j<n/2] (f_j-f_(j+n/2))*omega^[step*j]*x^j 13842 unsigned long n2=n/2; 13843 vector<int> r0,r1; 13844 r0.reserve(n2); r1.reserve(n2); 13845 vector<int>::const_iterator it=f.begin(),itn=it+n2,itend=itn,itk=w.begin(); 13846 for (;it!=itend;++itn,itk+=step,++it){ 13847 longlong a(*it),b(*itn); 13848 r0.push_back((a+b)%modulo); 13849 r1.push_back(((a-b)*(*itk))%modulo); 13850 } 13851 // Recursive call 13852 vector<int> r0f(n2); 13853 fft(r0,w,r0f,modulo); // r0 is not used anymore, alias for r1f 13854 fft(r1,w,r0,modulo); 13855 // Return a mix of r0/r1 13856 res.clear(); 13857 res.reserve(n); 13858 it=r0f.begin(); itend=it+n2; itn=r0.begin(); 13859 for (;it!=itend;){ 13860 res.push_back(*it); 13861 ++it; 13862 res.push_back(*itn); 13863 ++itn; 13864 } 13865 } 13866 13867 13868 // Convolution of p and q, omega a n-th root of unity, n=2^k 13869 // WARNING p0 and q0 are given in ascending power 13870 void fftconv(const modpoly & p,const modpoly & q,unsigned long k,unsigned long n,const gen & omega,modpoly & pq,environment * env){ 13871 vecteur w; 13872 w.reserve(n); 13873 w.push_back(1); 13874 gen omegan(omega),tmp; 13875 for (unsigned long i=1;i<n;++i){ 13876 w.push_back(omegan); 13877 omegan=omegan*omega; 13878 if (env && env->moduloon) 13879 omegan=smod(omegan,env->modulo); 13880 } 13881 modpoly alpha(n),beta(n),gamma(n); 13882 fft(p,w,alpha,env); 13883 fft(q,w,beta,env); 13884 for (unsigned long i=0;i<n;++i){ 13885 tmp=alpha[i]*beta[i]; 13886 if (env && env->moduloon) 13887 gamma[i]=smod(tmp,env->modulo); 13888 else 13889 gamma[i]=tmp; 13890 } 13891 vecteur winv(1,1); 13892 winv.reserve(n); 13893 for (unsigned long i=1;i<n;++i) 13894 winv.push_back(w[n-i]); 13895 fft(gamma,winv,pq,env); 13896 pq=pq/gen(int(n)); 13897 /* 13898 modpoly check(n); 13899 fft(alpha,winv,check,env); 13900 check=check/gen(int(n)); 13901 */ 13902 } 13903 13904 // Convolution of p and q, omega a n-th root of unity, n=2^k 13905 // p and q are given in descending power order 13906 void fftconv(const modpoly & p0,const modpoly & q0,unsigned long k,const gen & omega,modpoly & pq,environment * env){ 13907 unsigned long n= 1u <<k; 13908 // Adjust sizes 13909 modpoly p(p0),q(q0); 13910 reverse(p.begin(),p.end()); 13911 reverse(q.begin(),q.end()); 13912 unsigned long ps=long(p.size()),qs=long(q.size()); 13913 for (unsigned long i=ps;i<n;++i) 13914 p.push_back(0); 13915 for (unsigned long i=qs;i<n;++i) 13916 q.push_back(0); 13917 fftconv(p,q,k,n,omega,pq,env); 13918 reverse(pq.begin(),pq.end()); 13919 pq=trim(pq,env); 13920 } 13921 13922 void vectorlonglong2vecteur(const vector<longlong> & v,vecteur & w){ 13923 size_t s=v.size(); 13924 w.resize(s); 13925 for (size_t i=0;i<s;++i) 13926 w[i]=v[i]; 13927 } 13928 13929 void vecteur2vectorint(const vecteur & v,int p,vector<int> & res){ 13930 vecteur::const_iterator it=v.begin(),itend=v.end(); 13931 res.clear(); 13932 res.reserve(itend-it); 13933 int tmp; 13934 if (p==0){ 13935 for (;it!=itend;++it){ 13936 tmp=it->val; 13937 tmp += (unsigned(tmp)>>31)*p; // make it positive now! 13938 res.push_back(tmp); 13939 } 13940 } 13941 else { 13942 for (;it!=itend;++it){ 13943 if (it->type==_ZINT) 13944 tmp=modulo(*it->_ZINTptr,p); 13945 else 13946 tmp=it->val % p; 13947 tmp += (unsigned(tmp)>>31)*p; // make it positive now! 13948 res.push_back(tmp); 13949 } 13950 } 13951 } 13952 13953 struct thread_fftmult_t { 13954 const vecteur * p,*q; 13955 gen P,Q; 13956 vecteur * res; 13957 int prime; 13958 vector<int> * a,*b,*resp1,*resp2,*resp3,*Wp1,*Wp2,*Wp3,*Wp4,*tmp_p,*tmp_q; 13959 }; 13960 13961 13962 void * do_thread_fftmult(void * ptr_){ 13963 thread_fftmult_t * ptr=(thread_fftmult_t *) ptr_; 13964 modpoly curres; 13965 if (fftmultp1234(*ptr->p,*ptr->q,ptr->P,ptr->Q,curres,ptr->prime,*ptr->a,*ptr->b,*ptr->resp1,*ptr->resp2,*ptr->resp3,*ptr->Wp1,*ptr->Wp2,*ptr->Wp3,*ptr->Wp4,*ptr->tmp_p,*ptr->tmp_q,false)) 13966 return ptr; 13967 return 0; 13968 } 13969 13970 // valid values for nbits=24 or 16, zsize>=2 13971 #ifndef USE_GMP_REPLACEMENTS 13972 static void zsplit(const vecteur & p, int zsize,int nbits,vector<int> & pz){ 13973 size_t s=p.size(); 13974 int * target=&pz[0]; 13975 int nbytes=nbits/8; 13976 int mask=0xffffff; 13977 if (nbits==16) 13978 mask=0xffff; 13979 vector<unsigned> tmp(zsize+2); 13980 for (size_t i=0;i<s;++i,target+=zsize){ 13981 gen z=p[i]; 13982 if (z.type==_INT_){ 13983 int Z=z.val; 13984 if (Z>0){ 13985 *target = Z & mask; 13986 target[1] = Z >> nbits; 13987 } 13988 else { 13989 Z=-Z; 13990 *target = -(Z & mask); 13991 target[1] = -(Z >> nbits); 13992 } 13993 } 13994 else { 13995 size_t countp=0; 13996 for (int j=0;j<zsize+2;++j) 13997 tmp[j]=0; 13998 mpz_export(&tmp[0],&countp,-1,4,0,0,*z._ZINTptr); 13999 if (nbits==16){ 14000 for (int i=0;i<countp;++i){ 14001 target[2*i]=tmp[i] & 0xffff; 14002 target[2*i+1]=tmp[i] >> 16; 14003 } 14004 } 14005 else { 14006 int * targetsave=target; 14007 for (int i=0;i<countp;i+=3){ 14008 *target=tmp[i] & 0xffffff; 14009 ++target; 14010 *target=((tmp[i+1]&0xffff) << 8) | (tmp[i]>>24); 14011 ++target; 14012 *target=((tmp[i+2]&0xff)<< 16) | (tmp[i+1]>>16); 14013 ++target; 14014 *target=tmp[i+2] >> 8; 14015 ++target; 14016 } 14017 target = targetsave; 14018 } 14019 if (mpz_sgn(*z._ZINTptr)<0){ 14020 for (int i=0;i<zsize;++i) 14021 target[i]=-target[i]; 14022 } 14023 } 14024 } 14025 } 14026 #endif 14027 14028 #ifndef USE_GMP_REPLACEMENTS 14029 // pz is not const because we modify it in place for sign/carries handling 14030 static void zbuild(vector<longlong> & pz,int zsize,int nbits,vecteur & p){ 14031 size_t s=pz.size()/zsize; 14032 int base=1<<nbits; 14033 longlong base2=longlong(base)*base; 14034 int nbytes=nbits/8; 14035 longlong mask=0xffffffLL; 14036 int nbits2=2*nbits; 14037 if (nbits==16){ 14038 mask=0xffff; 14039 } 14040 vector<int> tmp(zsize+5); 14041 vector<unsigned> tmp2(zsize+2); 14042 mpz_t z; 14043 mpz_init(z); 14044 longlong * source=&pz[0]; 14045 for (size_t i=0;i<s;++i){ 14046 // handle sign/carry from source[0..zsize-1] to tmp[0..zsize+2] 14047 longlong * end=source+zsize; 14048 longlong * begin=source; 14049 // find sign 14050 for (--end;end>=begin;--end){ 14051 if (*end) 14052 break; 14053 } 14054 if (end<begin){ 14055 source += zsize; 14056 continue; // coeff in p is 0 14057 } 14058 // check previous for carry 14059 longlong U=*end; 14060 if (end>begin && U/(1<<nbits)==0){ 14061 *end=0; 14062 --end; 14063 *end += U*(1<<nbits); 14064 U=*end; 14065 } 14066 int sign=ulonglong(U)>>63; // 1 for neg, 0 for positive 14067 ++end; 14068 if (sign){ 14069 for (;begin<end;++begin){ 14070 *begin=-*begin; 14071 } 14072 } 14073 // now make all coeff positive 14074 longlong finalcarry=0; int finalpow2=0; 14075 begin=source; 14076 for (;;){ 14077 if (*begin>=0){ 14078 ++begin; 14079 if (begin==end) 14080 break; 14081 continue; 14082 } 14083 longlong s=1+(ulonglong(-*begin)>>nbits); 14084 *begin += s*base; 14085 ++begin; 14086 if (begin==end){ 14087 if (end==source+zsize){ 14088 CERR << "unexpected carry" << '\n'; 14089 break; 14090 } 14091 finalcarry=sign?s:-s; 14092 finalpow2=end-source; 14093 break; 14094 } 14095 *begin -= s; 14096 } 14097 // make all coeff smaller than base 14098 for (int j=0;j<zsize+5;++j) 14099 tmp[j]=0; 14100 int * ptr=&tmp[0]; 14101 begin=source; 14102 for (;;){ 14103 *ptr=(*begin) & mask; 14104 ++ptr; 14105 if (begin+1==end) 14106 break; 14107 begin[1] += (ulonglong(*begin) >> nbits); 14108 ++begin; 14109 } 14110 *ptr = (ulonglong(*begin) >> nbits) & mask; ++ptr; 14111 *ptr = (ulonglong(*begin) >> (2*nbits)) & mask; ++ptr; 14112 if (nbits==16) 14113 *ptr = (ulonglong(*begin) >> 48) & mask; 14114 source += zsize; 14115 // base 2^16/2^24 to 2^32 14116 for (int j=0;j<zsize+2;++j) 14117 tmp2[j]=0; 14118 if (nbits==16){ 14119 int s =(zsize+2)/2; 14120 for (int i=0;i<s;++i){ 14121 tmp2[i]=tmp[2*i] | (unsigned(tmp[2*i+1])<<16); 14122 } 14123 } 14124 else { 14125 int j=0; 14126 for (int i=0;i<zsize+2;i+=4){ 14127 tmp2[j]=tmp[i] | ((unsigned(tmp[i+1])&0xff)<<24); 14128 ++j; 14129 tmp2[j]=(tmp[i+1]>>8) | ((unsigned(tmp[i+2])&0xffff)<<16); 14130 ++j; 14131 tmp2[j]=(tmp[i+2]>>16) | (unsigned(tmp[i+3])<<8); 14132 ++j; 14133 } 14134 } 14135 mpz_import(z,zsize,-1,4,0,0,&tmp2[0]); 14136 if (sign) 14137 mpz_neg(z,z); 14138 if (mpz_sizeinbase(z,2)<31) 14139 p[i]=mpz_get_si(z); 14140 else 14141 p[i]=z; 14142 if (finalcarry){ 14143 p[i] = p[i]+gen(finalcarry)*pow(plus_two,finalpow2*nbits,context0); 14144 } 14145 } 14146 mpz_clear(z); 14147 } 14148 #endif 14149 14150 // ichinrem reconstruct in resp1 from resp1/resp2 14151 void ichinremp1p2(const std::vector<int> & resp1,const std::vector<int> & resp2,int n,std::vector<int> & Res,int modulo){ 14152 size_t rs=resp1.size(); 14153 if (&resp1!=&Res) 14154 Res.resize(rs); 14155 if (debug_infolevel>2) 14156 CERR << CLOCK()*1e-6 << " begin ichinremp1p2 mod " << modulo << '\n'; 14157 int p1modinv=-9;//invmod(p1,p2); 14158 int modulo2=modulo/2; 14159 int n1=invmod(n,p1); if (n1<0) n1+=p1; 14160 int n2=invmod(n,p2); if (n2<0) n2+=p2; 14161 for (int i=0;i<rs;++i){ 14162 int A=resp1[i],B=resp2[i]; 14163 A=mulmod(n1,A,p1); 14164 B=mulmod(n2,B,p2); 14165 // a mod p1, b mod p2 -> res mod p1*p2 14166 longlong res=A+((longlong(p1modinv)*(B-longlong(A)))%p2)*p1; 14167 //res += (ulonglong(res)>>63)*p1p2; res -= (ulonglong(p1p2/2-res)>>63)*modulo; 14168 if (res>p1p2sur2) res-=p1p2; else if (res<=-p1p2sur2) res+=p1p2; 14169 //while (res>p1p2sur2) res-=p1p2; while (res<-p1p2sur2) res+=p1p2; 14170 A=res % modulo; 14171 A += (unsigned(A)>>31)*modulo; // A now positive 14172 A -= (unsigned(modulo2-A)>>31)*modulo; 14173 // if (A>modulo2) A-=modulo; 14174 Res[i]=A; 14175 } 14176 if (debug_infolevel>2) 14177 CERR << CLOCK()*1e-6 << " end ichinremp1p2 mod " << modulo << '\n'; 14178 } 14179 14180 // ichinrem reconstruct in resp1 from resp1/resp2/resp3 14181 void ichinremp1p2p3(const std::vector<int> & resp1,const std::vector<int> & resp2,const std::vector<int> & resp3,int n,std::vector<int> & res,int modulo){ 14182 if (debug_infolevel>2) 14183 CERR << CLOCK()*1e-6 << " begin ichinremp1p2p3 " << modulo << '\n'; 14184 size_t rs=resp1.size(); 14185 if (&resp1!=&res) 14186 res.resize(rs); 14187 int n1=invmod(n,p1); if (n1<0) n1+=p1; 14188 int n2=invmod(n,p2); if (n2<0) n2+=p2; 14189 int n3=invmod(n,p3); if (n3<0) n3+=p3; 14190 int z1=invmod(p1,p2); if (z1<0) z1+=p2; 14191 int z2=invmod((longlong(p1)*p2) % p3,p3); if (z2<0) z2+=p3; 14192 int z3=(longlong(p1)*p2)%modulo; 14193 int modulo2=modulo/2; 14194 for (int i=0;i<rs;++i){ 14195 int u1=resp1[i],u2=resp2[i],u3=resp3[i]; 14196 //u1 += (unsigned(u1)>>31)*p1; 14197 //u2 += (unsigned(u2)>>31)*p2; 14198 //u3 += (unsigned(u3)>>31)*p3; 14199 u1=mulmod(n1,u1,p1); 14200 u2=mulmod(n2,u2,p2); 14201 //u3=mulmod(n3,u3,p3); 14202 int v1=u1; 14203 // 4 v2=(u2−v1)×z1 mod p2 14204 int v2=((longlong(u2)+p2-v1)*z1)%p2; 14205 // 5 t=(n3×u3−v1−v2×p1) mod p3 14206 int t=(longlong(u3)*n3-v1-longlong(v2)*p1)%p3; 14207 //t += (unsigned(t)>>31)*p3; // if (t<0) t+=p3; 14208 // 6 v3 =t×z2 mod p3 14209 int v3=smod(longlong(t)*z2,p3); 14210 // 7 u=(v1+v2×p1+v3×z3) mod q 14211 longlong u=(v1+longlong(v2)*p1+longlong(v3)*z3); 14212 u %= modulo; 14213 if (u>modulo2) u-=modulo; else if (u<-modulo2) u+=modulo; 14214 res[i]=u; 14215 } 14216 if (debug_infolevel>2) 14217 CERR << CLOCK()*1e-6 << " end ichinremp1p2p3 " << modulo << '\n'; 14218 } 14219 14220 void to_multi_fft(const vecteur & A,const gen & modulo,std::vector<int> & Wp1,std::vector<int> & Wp2,std::vector<int> & Wp3,unsigned long n,multi_fft_rep & f,bool reverse,bool makeplus){ 14221 f.modulo=modulo; 14222 vector<int> a,P; 14223 to_fft(A,0,Wp1,Wp2,Wp3,a,n,f.p1p2p3,reverse,makeplus); 14224 gen pip=p3*gen(longlong(p1)*p2); 14225 gen lim=4*gen(long(n))*gen(modulo)*gen(modulo); 14226 // linfnorm(A*B+C*D)<=(degree(A*B)+degree(C*D))*modulo^2<pip/2 14227 // 4*n because A*B+C*D can not be "reduced" more than half the degree of 14228 // A*B and C*D 14229 double start=n>8000?p1-1:std::sqrt(double(p1p2)/n)/2.0; 14230 for (int p=start;is_greater(lim,pip,context0);p--){ 14231 p=prevprime(p).val; 14232 if (p==p2 || p==p3) 14233 p=prevprime(p-1).val; 14234 P.push_back(p); 14235 pip=p*pip; 14236 } 14237 //CERR << P.size() << endl; 14238 f.v.resize(P.size()); 14239 for (int i=0;i<P.size();++i){ 14240 to_fft(A,P[i],Wp1,Wp2,Wp3,a,n,f.v[i],reverse,makeplus); 14241 #if 0 // 1 for debug 14242 vector<int> tmp,tmp2,tmp3,tmp4; vecteur AA(smod(A,f.v[i].modulo)); 14243 from_fft(f.v[i],Wp1,Wp2,Wp3,tmp,tmp2,tmp3,tmp4,true); 14244 addmod(tmp2,tmp3,2); 14245 #endif 14246 } 14247 } 14248 14249 // p -> f it's FFT representation, p is reversed before FFT is called 14250 // a is a temporary vector = p mod modulo after call 14251 // Wp1, Wp2, Wp3 is a vector of powers of the n-th root of unity mod p1,p2,p3 14252 // if size is not n or Wp[0]==0, Wp is computed 14253 // do not share Wp1/p2/p3 between different threads 14254 void to_fft(const vecteur & p,int modulo,std::vector<int> & Wp1,std::vector<int> & Wp2,std::vector<int> & Wp3,std::vector<int> & a,int n,fft_rep & f,bool reverse,bool makeplus){ 14255 if (modulo==0){ 14256 vecteur2vectorint(p,p1,a); 14257 to_fft(a,p1,Wp1,Wp2,Wp3,n,f,reverse,makeplus,false); 14258 vecteur2vectorint(p,p2,a); 14259 to_fft(a,p2,Wp1,Wp2,Wp3,n,f,reverse,makeplus,false); 14260 vecteur2vectorint(p,p3,a); 14261 to_fft(a,p3,Wp1,Wp2,Wp3,n,f,reverse,makeplus,false); 14262 f.modulo=0; 14263 } 14264 else { 14265 vecteur2vectorint(p,modulo,a); 14266 to_fft(a,modulo,Wp1,Wp2,Wp3,n,f,reverse,makeplus,true); 14267 } 14268 } 14269 14270 bool dop3(int modulo,int n){ 14271 //return true; 14272 return modulo==p3 || (modulo!=p1 && modulo!=p2 && modulo*double(modulo)>p1p2/(1.999999*n)); 14273 } 14274 14275 // Faster code would require truncated FFT, see David Harvey 14276 // A cache-friendly truncated FFT 14277 void to_fft(const std::vector<int> & a,int modulo,int w,std::vector<int> & Wp,int n,std::vector<int> & f,int reverse,bool makeplus,bool makemod){ 14278 #if defined GIAC_PRECOND || defined GIAC_CACHEW 14279 int nw=n; 14280 #else 14281 int nw=n/2; 14282 #endif 14283 double invp=find_invp(modulo); 14284 int s=giacmin(a.size(),n); 14285 int logrs=sizeinbase2(n-1); 14286 if (reverse==1){ 14287 #if 0 14288 if (&f!=&a) 14289 f=a; 14290 reverse_assign(f,n,modulo); 14291 #else 14292 if (&f==&a){ 14293 if (f.size()>n) 14294 reverse_assign(f,n,modulo); 14295 else { 14296 vector<int>::iterator it=f.begin(),itend=f.end(); 14297 for (;it!=itend;++it) 14298 *it += (*it>>31)&modulo; 14299 std::reverse(f.begin(),f.end()); 14300 f.resize(n); 14301 } 14302 } 14303 else 14304 reverse_assign(a,f,n,modulo); 14305 #endif 14306 } 14307 else { 14308 if (reverse==0){ 14309 if (&f!=&a) 14310 f=a; 14311 f.resize(n); 14312 } 14313 else { 14314 if (0 && &f!=&a && a.size()<=n){ 14315 f.clear(); f.reserve(n); 14316 f.resize(n-a.size()); 14317 vector<int>::const_iterator it=a.begin(),itend=a.end(); 14318 for (;it!=itend;++it) 14319 f.push_back(*it + ((*it>>31)&modulo)); 14320 } 14321 else { 14322 if (&f!=&a) 14323 f=a; 14324 vector<int>::iterator it=f.begin(),itend=f.end(); 14325 for (;it!=itend;++it) 14326 *it += (*it>>31)&modulo; 14327 if (f.size()>n){ // reduce mod x^n-1 14328 vector<int>::reverse_iterator it=f.rbegin(),itend=it+n,jt=itend,jtend=f.rend(); 14329 for (;jt<jtend;++jt){ 14330 int i=*it; 14331 i += *jt-modulo; 14332 i += (i>>31) & modulo; 14333 *it =i; 14334 ++it; 14335 if (it==itend) 14336 it=f.rbegin(); 14337 } 14338 f.erase(f.begin(),f.end()-n); 14339 } 14340 else 14341 f.insert(f.begin(),n-f.size(),0); 14342 } 14343 } 14344 } 14345 if (makemod) 14346 makemodulop(&f.front(),s,modulo); 14347 if (makeplus) 14348 makepositive(&f.front(),s,modulo); 14349 if (Wp.size()<nw || Wp[0]==0){ 14350 ++wpcount; 14351 Wp.clear(); 14352 fft2wp(Wp,n,w,modulo); 14353 } 14354 #ifdef GIAC_CACHEW 14355 int ws=Wp.size()/2,shift=0; 14356 for (;ws>=n;ws/=2) 14357 shift+=ws; 14358 //vector<int> dbgv(Wp.begin()+shift,Wp.end()); CERR << dbgv << "\n"; 14359 fft2pnopermafter(&f.front(),n,&Wp.front()+shift,modulo,invp,1); 14360 #else 14361 fft2pnopermafter(&f.front(),n,&Wp.front(),modulo,invp,Wp.size()/nw); 14362 #endif 14363 } 14364 14365 void to_fft(const std::vector<int> & a,int modulo,std::vector<int> & Wp1,std::vector<int> & Wp2,std::vector<int> & Wp3,int n,fft_rep & f,bool reverse,bool makeplus,bool makemod){ 14366 #if defined GIAC_PRECOND || defined GIAC_CACHEW 14367 int nw=n; 14368 #else 14369 int nw=n/2; 14370 #endif 14371 int s=giacmin(a.size(),n); 14372 f.modulo=modulo; 14373 int logrs=sizeinbase2(n-1); 14374 if (modulo!=p2 && modulo!=p3){ 14375 if (reverse){ 14376 f.modp1.resize(n); 14377 reverse_assign(a,f.modp1,n,p1); 14378 } 14379 else { 14380 f.modp1=a; 14381 f.modp1.resize(n); 14382 } 14383 if (makemod) 14384 makemodulop(&f.modp1.front(),s,p1); 14385 if (makeplus) makepositive(&f.modp1.front(),s,p1); 14386 const int r1=1227303670; 14387 if (Wp1.size()!=nw || Wp1[0]==0){ 14388 int w=powmod(r1,(1u<<(27-logrs)),p1); 14389 Wp1.clear(); 14390 fft2wp1(Wp1,n,w); 14391 } 14392 fft2p1nopermafter(&f.modp1.front(),n,&Wp1.front()); 14393 } 14394 if (modulo!=p1 && modulo!=p3){ 14395 if (reverse){ 14396 f.modp2.resize(n); 14397 reverse_assign(a,f.modp2,n,p2); 14398 } 14399 else { 14400 f.modp2=a; 14401 f.modp2.resize(n); 14402 } 14403 if (makemod) 14404 makemodulop(&f.modp2.front(),s,p2); 14405 if (makeplus) makepositive(&f.modp2.front(),s,p2); 14406 const int r2=814458146; 14407 if (Wp2.size()!=nw || Wp2[0]==0){ 14408 int w=powmod(r2,(1u<<(26-logrs)),p2); 14409 Wp2.clear(); 14410 fft2wp2(Wp2,n,w); 14411 } 14412 fft2p2nopermafter(&f.modp2.front(),n,&Wp2.front()); 14413 } 14414 if (dop3(modulo,n)){ 14415 if (reverse){ 14416 f.modp3.resize(n); 14417 reverse_assign(a,f.modp3,n,p3); 14418 } 14419 else { 14420 f.modp3=a; 14421 f.modp3.resize(n); 14422 } 14423 if (makemod) 14424 makemodulop(&f.modp3.front(),s,p3); 14425 if (makeplus) makepositive(&f.modp3.front(),s,p3); 14426 const int r3=2187; 14427 if (Wp3.size()!=nw || Wp3[0]==0){ 14428 int w=powmod(r3,(1u<<(26-logrs)),p3); 14429 Wp3.clear(); 14430 fft2wp3(Wp3,n,w); 14431 } 14432 fft2p3nopermafter(&f.modp3.front(),n,&Wp3.front()); 14433 } 14434 } 14435 14436 void multmodp1(const vector<int> & a,const vector<int> & b,vector<int> & c){ 14437 c.resize(a.size()); 14438 const int * aptr=&a.front(),*aend=aptr+a.size(),*bptr=&b.front(); 14439 int *cptr=&c.front(); 14440 for (;aptr!=aend;++aptr,++bptr,++cptr){ 14441 *cptr=(longlong(*aptr)*(*bptr))% p1; 14442 } 14443 } 14444 14445 void multmodp2(const vector<int> & a,const vector<int> & b,vector<int> & c){ 14446 c.resize(a.size()); 14447 const int * aptr=&a.front(),*aend=aptr+a.size(),*bptr=&b.front(); 14448 int *cptr=&c.front(); 14449 for (;aptr!=aend;++aptr,++bptr,++cptr){ 14450 *cptr=(longlong(*aptr)*(*bptr))% p2; 14451 } 14452 } 14453 14454 void multmodp3(const vector<int> & a,const vector<int> & b,vector<int> & c){ 14455 c.resize(a.size()); 14456 const int * aptr=&a.front(),*aend=aptr+a.size(),*bptr=&b.front(); 14457 int *cptr=&c.front(); 14458 for (;aptr!=aend;++aptr,++bptr,++cptr){ 14459 *cptr=(longlong(*aptr)*(*bptr))% p3; 14460 } 14461 } 14462 14463 bool dotmult(const fft_rep & fa,const fft_rep & fb,fft_rep & f){ 14464 if (fa.modulo!=fb.modulo) 14465 return false; 14466 f.modulo=fa.modulo; 14467 multmodp1(fa.modp1,fb.modp1,f.modp1); 14468 multmodp2(fa.modp2,fb.modp2,f.modp2); 14469 multmodp3(fa.modp3,fb.modp3,f.modp3); 14470 return true; 14471 } 14472 14473 // FFT representation f -> res 14474 // Wp1,p2,p3 should be computed with to_fft 14475 // do not share Wp1/p2/p3 between different threads 14476 // division by n=size of f.modp1/p2/p3 is done 14477 // result should normally be reversed at end 14478 // tmp1/p2/p3 are temporary vectors 14479 void from_fft(const fft_rep & f,std::vector<int> & Wp1,std::vector<int> & Wp2,std::vector<int> & Wp3,std::vector<int> & res,std::vector<int> & tmp1,std::vector<int> & tmp2,std::vector<int> & tmp3,bool reverseatend,bool revw){ 14480 int p=f.modulo; 14481 int n=p==p2?f.modp2.size():(p==p3?f.modp3.size():f.modp1.size()); 14482 if (p!=p2 && p!=p3){ 14483 tmp1=f.modp1; 14484 if (revw) fft_reverse(Wp1,p1); 14485 fft2p1nopermbefore(&tmp1.front(),n,&Wp1.front()); 14486 if (revw) fft_reverse(Wp1,p1); 14487 if (p==p1){ 14488 tmp1.swap(res); 14489 precond_mulmod(res,invmod(n,p1),p1); 14490 if (reverseatend) 14491 reverse(res.begin(),res.end()); 14492 return; 14493 } 14494 } 14495 if (p!=p1 && p!=p3){ 14496 tmp2=f.modp2; 14497 if (revw) fft_reverse(Wp2,p2); 14498 fft2p2nopermbefore(&tmp2.front(),n,&Wp2.front()); 14499 if (revw) fft_reverse(Wp2,p2); 14500 if (p==p2){ 14501 tmp2.swap(res); 14502 precond_mulmod(res,invmod(n,p2),p2); 14503 if (reverseatend) 14504 reverse(res.begin(),res.end()); 14505 return; 14506 } 14507 } 14508 if (dop3(f.modulo,n)){ 14509 tmp3=f.modp3; 14510 if (revw) fft_reverse(Wp3,p3); 14511 fft2p3nopermbefore(&tmp3.front(),n,&Wp3.front()); 14512 if (revw) fft_reverse(Wp3,p3); 14513 if (p==p3){ 14514 tmp3.swap(res); 14515 precond_mulmod(res,invmod(n,p3),p3); 14516 if (reverseatend) 14517 reverse(res.begin(),res.end()); 14518 return; 14519 } 14520 ichinremp1p2p3(tmp1,tmp2,tmp3,n,res,f.modulo); 14521 } 14522 else 14523 ichinremp1p2(tmp1,tmp2,n,res,f.modulo); 14524 if (reverseatend) 14525 reverse(res.begin(),res.end()); 14526 } 14527 14528 void from_fft(const std::vector<int> & f,int p,std::vector<int> & Wp,std::vector<int> & res,bool reverseatend,bool revw){ 14529 if (&res!=&f) res=f; 14530 int n=res.size(); 14531 #if defined GIAC_PRECOND || defined GIAC_CACHEW 14532 int nw=n; 14533 #else 14534 int nw=n/2; 14535 #endif 14536 double invp=find_invp(p); 14537 if (revw) fft_reverse(Wp,p); 14538 #ifdef GIAC_CACHEW 14539 int ws=Wp.size()/2,shift=0; 14540 for (;ws>=n;ws/=2) 14541 shift+=ws; 14542 //vector<int> dbgv(Wp.begin()+shift,Wp.end()); CERR << dbgv << "\n"; 14543 fft2pnopermbefore(&res.front(),n,&Wp.front()+shift,p,invp,1); 14544 #else 14545 fft2pnopermbefore(&res.front(),n,&Wp.front(),p,invp,Wp.size()/nw); 14546 #endif 14547 if (revw) fft_reverse(Wp,p); 14548 int m=invmod(n,p); 14549 precond_mulmod(res,m,p); 14550 if (reverseatend) 14551 reverse(res.begin(),res.end()); 14552 } 14553 14554 void from_multi_fft(const multi_fft_rep & f,std::vector<int> & Wp1,std::vector<int> & Wp2,std::vector<int> & Wp3,vecteur & res,bool reverseatend){ 14555 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 14556 const gen & modulo=f.modulo; 14557 gen pip; 14558 vector<int> tmp,tmp1,tmp2,tmp3; 14559 tmp=f.p1p2p3.modp1; 14560 int n=tmp.size(); 14561 fft2p1nopermbefore(&tmp.front(),n,&Wp1.front()); 14562 int ninv=invmod(n,p1); 14563 precond_mulmod(tmp,ninv,p1); 14564 tmp1=f.p1p2p3.modp2; 14565 fft2p2nopermbefore(&tmp1.front(),n,&Wp2.front()); 14566 ninv=invmod(n,p2); 14567 precond_mulmod(tmp,ninv,p2); 14568 int zsize=64+sizeinbase2(4*n*modulo*modulo); 14569 // vector_int2vecteur(tmp,res); vecteur cur; vector_int2vecteur(tmp1,cur); cur=ichinrem(res,cur,p1,p2); 14570 ichinremp1p2(tmp,tmp1,tmp.size(),res,zsize); 14571 pip=longlong(p1)*p2; 14572 tmp=f.p1p2p3.modp3; 14573 fft2p3nopermbefore(&tmp.front(),n,&Wp3.front()); 14574 ninv=invmod(n,p3); 14575 precond_mulmod(tmp,ninv,p3); 14576 //vector_int2vecteur(tmp,cur); cur=ichinrem(res,cur,pip,p3); 14577 ichinrem_inplace(res,tmp,pip,p3); 14578 pip=p3*pip; 14579 for (int i=0;i<f.v.size();++i){ 14580 int p=f.v[i].modulo; 14581 from_fft(f.v[i],Wp1,Wp2,Wp3,tmp,tmp1,tmp2,tmp3,false,false); 14582 // vector_int2vecteur(tmp,cur); cur=ichinrem(res,cur,pip,p); 14583 ichinrem_inplace(res,tmp,pip,p); 14584 pip=p*pip; 14585 } 14586 if (reverseatend) 14587 reverse(res.begin(),res.end()); 14588 fft_reverse(Wp1,p1); fft_reverse(Wp2,p2); fft_reverse(Wp3,p3); 14589 //smod(res,pip,res); 14590 } 14591 14592 // Product of polynomial with integer coeffs using FFT 14593 bool fftmultp1234(const modpoly & p,const modpoly & q,const gen &P,const gen &Q,modpoly & pq,int modulo, vector<int> & a,vector<int>&b,vector<int> &resp1,vector<int>&resp2,vector<int> & resp3, vector<int> & Wp1,vector<int> & Wp2,vector<int> & Wp3,vector<int> & Wp4,vector<int> &tmp_p,vector<int> &tmp_q,bool compute_pq){ 14594 int ps=int(p.size()),qs=int(q.size()),mindeg=giacmin(ps-1,qs-1); 14595 int rs=ps+qs-1; 14596 int logrs=sizeinbase2(rs); 14597 if (logrs>25) return false; 14598 int n=(1u<<logrs); 14599 gen PQ=P*Q; 14600 if (compute_pq){ pq.clear(); pq.reserve(rs); } 14601 #if 0 // def HAVE_LIBGMP 14602 if (modulo){ 14603 vector<int> a,b; 14604 int shift=int(std::ceil(std::log(modulo*double(modulo)*(mindeg+1))/std::log(2.0))); 14605 if (shift<=64) shift=64; 14606 else shift=128; 14607 if (shift==64){ 14608 if (debug_infolevel>2) 14609 CERR << CLOCK()*1e-6 << " begin Kronecker gmp conversion " << rs << '\n'; 14610 vecteur2vectorint(p,modulo,a); 14611 //makepositive(&a.front(),ps,modulo); 14612 vecteur2vectorint(q,modulo,b); 14613 //makepositive(&b.front(),qs,modulo); 14614 mpz_t tmp1,tmp2; 14615 mpz_init2(tmp1,shift*rs); 14616 mpz_init2(tmp2,shift*rs); 14617 vector<longlong> A(ps),B(qs),C(rs); 14618 for (int i=0;i<ps;++i) 14619 A[i]=a[i]; 14620 for (int i=0;i<qs;++i) 14621 B[i]=b[i]; 14622 mpz_import(tmp1,ps,1,sizeof(longlong),0,0,&A.front()); 14623 mpz_import(tmp2,qs,1,sizeof(longlong),0,0,&B.front()); 14624 //CERR << gen(tmp1) << '\n' << gen(tmp2) << '\n'; 14625 if (debug_infolevel>2) 14626 CERR << CLOCK()*1e-6 << " begin Kronecker gmp mult " << rs << '\n'; 14627 mpz_mul(tmp1,tmp1,tmp2); 14628 if (debug_infolevel>2) 14629 CERR << CLOCK()*1e-6 << " end Kronecker gmp mult " << rs << '\n'; 14630 size_t countp; 14631 mpz_export(&C.front(),&countp,1,sizeof(longlong),0,0,tmp1); 14632 for (int i=0;i<rs;++i){ 14633 int tmp(C[i] % modulo); 14634 if (tmp>modulo/2) tmp-=modulo; 14635 pq.push_back(tmp); 14636 } 14637 mpz_clear(tmp1); mpz_clear(tmp2); 14638 if (debug_infolevel>2) 14639 CERR << CLOCK()*1e-6 << " end Kronecker conversion " << rs << '\n'; 14640 return true; 14641 } 14642 } 14643 #endif 14644 // Check for large coefficients (degree not too large) 14645 // (around 1000 for coeff of size 2^degree(p)) 14646 // Following ntl src/ZZX1.c SSMul 14647 unsigned long l=gen(ps+qs-1).bindigits()-1; // m=2^l <= deg(p*q)+1 < 2^{l+1} 14648 // long m2 = 1u << (l + 1); /* m2 = 2m = 2^{l+1} */ 14649 gen pPQ=gen(giacmin(ps,qs))*P*Q+1; 14650 PQ=evalf_double(P*Q,1,context0); 14651 unsigned long bound=pPQ.bindigits()+1; // 2^bound=smod bound on coeff of p*q 14652 unsigned long r=(bound >> l)+1; 14653 #ifdef INT128 14654 // probably useful for very large degree not supported by p1 and p2 14655 // otherwise the condition is almost the same as for p1*p2 14656 // p1*p2=3647915701995307009, that's 0.4*p5 14657 if (0 && modulo!=p1 && modulo!=p2 && modulo!=p3 && modulo!=p4){ 14658 vector<longlong> W; 14659 vector<int> a,b; 14660 vector<longlong> ab; 14661 if (modulo && modulo*longlong(modulo)<p5/(2*mindeg)){ 14662 vecteur2vectorint(p,modulo,a); 14663 vecteur2vectorint(q,modulo,b); 14664 fft2p5(a,b,ab,W,modulo); // smod modulo at end 14665 vectorlonglong2vecteur(ab,pq); 14666 return true; 14667 } 14668 if (modulo==0 && P.type==_INT_ && Q.type==_INT_ && is_greater(p5,2*pPQ,context0)){ 14669 vecteur2vectorint(p,modulo,a); 14670 vecteur2vectorint(q,modulo,b); 14671 fft2p5(a,b,ab,W,0); // smod modulo at end 14672 vectorlonglong2vecteur(ab,pq); 14673 return true; 14674 } 14675 } 14676 #endif 14677 if (bound>(1<<(l-1))){ 14678 fftprod2rl(p,q,r,l,pq); 14679 if (modulo) 14680 pq=smod(pq,modulo); 14681 return true; 14682 } 14683 #if 1 // def FFTp1p2p3p4 14684 if (PQ.type==_DOUBLE_ && (modulo || !my_isinf(PQ._DOUBLE_val))){ 14685 double PQd=PQ._DOUBLE_val; 14686 if (modulo){ 14687 double pq2=modulo*double(modulo); 14688 // if (pq2<PQd) // change made january 2019 14689 PQd=pq2; // since we convert p to a mod modulo and make p positive 14690 } 14691 double test=PQd*(mindeg+1); 14692 if (test<p2*double(p1)/2 || modulo==p1 || modulo==p2 || modulo==p3 || modulo==p4){ 14693 int reduce=modulo?modulo:p1; 14694 vecteur2vectorint(p,reduce,a); 14695 vecteur2vectorint(q,reduce,b); 14696 if (debug_infolevel>2) 14697 CERR << CLOCK()*1e-6 << ( (modulo==p2 || modulo==p3 || modulo==p4)?" begin fft2 p234 ":" begin fft2 p1 ") << rs << '\n'; 14698 if (modulo==p3 || modulo==p4) { 14699 if (modulo==p3) 14700 fft2mult(reduce,a,b,resp1,p3,Wp3,tmp_p,tmp_q,false,true,false); 14701 else 14702 fft2mult(reduce,a,b,resp1,p4,Wp4,tmp_p,tmp_q,false,true,false); 14703 } 14704 else { 14705 if (modulo==p2) 14706 fft2mult(reduce,a,b,resp1,p2,Wp2,tmp_p,tmp_q,false,true,false); 14707 else 14708 fft2mult(reduce,a,b,resp1,p1,Wp1,tmp_p,tmp_q,false,true,false); 14709 } 14710 if (debug_infolevel>2) 14711 CERR << CLOCK()*1e-6 << ( (modulo==p2 || modulo==p3 || modulo==p4)?" end fft2 p234 ":" end fft2 p1 ") << rs << '\n'; 14712 if (test>=p1/2 && modulo!=p1 && modulo!=p2 && modulo!=p3 && modulo!=p4) { 14713 if (debug_infolevel>2) 14714 CERR << CLOCK()*1e-6 << " begin fft2 p2 " << rs << '\n'; 14715 if (!modulo){ 14716 vecteur2vectorint(p,p2,a); 14717 vecteur2vectorint(q,p2,b); 14718 } 14719 reduce=modulo?modulo:p2; 14720 fft2mult(reduce,a,b,resp2,p2,Wp2,tmp_p,tmp_q,false,true,false); 14721 if (debug_infolevel>2) 14722 CERR << CLOCK()*1e-6 << " end fft2 p2 " << rs << '\n'; 14723 int p1modinv=-9;//invmod(p1,p2); 14724 int modulo2=modulo/2; 14725 if (modulo){ 14726 for (int i=0;i<rs;++i){ 14727 int A=resp1[i],B=resp2[i]; 14728 // a mod p1, b mod p2 -> res mod p1*p2 14729 longlong res=A+((longlong(p1modinv)*(B-longlong(A)))%p2)*p1; 14730 //res += (ulonglong(res)>>63)*p1p2; res -= (ulonglong(p1p2/2-res)>>63)*modulo; 14731 if (res>p1p2sur2) res-=p1p2; else if (res<=-p1p2sur2) res+=p1p2; 14732 //while (res>p1p2sur2) res-=p1p2; while (res<-p1p2sur2) res+=p1p2; 14733 A=res % modulo; 14734 A += (unsigned(A)>>31)*modulo; // A now positive 14735 A -= (unsigned(modulo2-A)>>31)*modulo; 14736 // if (A>modulo2) A-=modulo; 14737 resp1[i]=A; 14738 } 14739 } 14740 else { 14741 for (int i=0;i<rs;++i){ 14742 int A=resp1[i],B=resp2[i]; 14743 // a mod p1, b mod p2 -> res mod p1*p2 14744 longlong res=A+((longlong(p1modinv)*(B-longlong(A)))%p2)*p1; 14745 //res += (ulonglong(res)>>63)*p1p2; res -= (ulonglong(p1p2/2-res)>>63)*modulo; 14746 if (res>p1p2sur2) res-=p1p2; else if (res<-p1p2sur2) res+=p1p2; 14747 pq.push_back(res); 14748 } 14749 } 14750 if (debug_infolevel>2) 14751 CERR << CLOCK()*1e-6 << " end fft2 chinrem " << rs << '\n'; 14752 if (!modulo){ 14753 reverse(pq.begin(),pq.end()); 14754 return true; 14755 } 14756 } 14757 reverse(resp1.begin(),resp1.end()); 14758 if (!modulo || compute_pq) 14759 vector_int2vecteur(resp1,pq); 14760 return true; 14761 } 14762 if (//0 && // uncomment to test code below 14763 modulo && logrs<=25 && test<p1*double(p2)*p4/2){ 14764 vecteur2vectorint(p,modulo,a); 14765 vecteur2vectorint(q,modulo,b); 14766 if (debug_infolevel>2) 14767 CERR << CLOCK()*1e-6 << " begin fftp1 " << rs << '\n'; 14768 fft2mult(modulo,a,b,resp1,p1,Wp1,tmp_p,tmp_q,false,false,false); 14769 if (debug_infolevel>2) 14770 CERR << CLOCK()*1e-6 << " begin fftp2 " << rs << '\n'; 14771 fft2mult(modulo,a,b,resp2,p2,Wp2,tmp_p,tmp_q,false,false,false); 14772 if (debug_infolevel>2) 14773 CERR << CLOCK()*1e-6 << " begin fftp4 " << rs << '\n'; 14774 fft2mult(modulo,a,b,resp3,p4,Wp4,tmp_p,tmp_q,false,false,false); 14775 if (debug_infolevel>2) 14776 CERR << CLOCK()*1e-6 << " begin ichinrem " << modulo << '\n'; 14777 int n1=invmod(n,p1); if (n1<0) n1+=p1; 14778 int n2=invmod(n,p2); if (n2<0) n2+=p2; 14779 int n3=invmod(n,p4); if (n3<0) n3+=p4; 14780 int z1=invmod(p1,p2); if (z1<0) z1+=p2; 14781 int z2=invmod((longlong(p1)*p2) % p4,p4); if (z2<0) z2+=p4; 14782 int z3=(longlong(p1)*p2)%modulo; 14783 int modulo2=modulo/2; 14784 for (int i=0;i<rs;++i){ 14785 int u1=resp1[i],u2=resp2[i],u3=resp3[i]; 14786 //u1 += (unsigned(u1)>>31)*p1; 14787 //u2 += (unsigned(u2)>>31)*p2; 14788 //u3 += (unsigned(u3)>>31)*p4; 14789 u1=mulmodp1(n1,u1); 14790 u2=mulmodp2(n2,u2); 14791 //u3=mulmod(n3,u3,p4); 14792 int v1=u1; 14793 // 4 v2=(u2−v1)×z1 mod p2 14794 int v2=((longlong(u2)+p2-v1)*z1)%p2; 14795 // 5 t=(n3×u3−v1−v2×p1) mod p4 14796 int t=(longlong(u3)*n3-v1-longlong(v2)*p1)%p4; 14797 t += (unsigned(t)>>31)*p4; // if (t<0) t+=p4; 14798 // 6 v3 =t×z2 mod p4 14799 int v3=smod(longlong(t)*z2,p4); 14800 // 7 u=(v1+v2×p1+v3×z3) mod q 14801 int u=(v1+longlong(v2)*p1+longlong(v3)*z3) % modulo; 14802 if (u>modulo2) u-=modulo; else if (u<-modulo2) u+=modulo; 14803 resp1[i]=u; 14804 } 14805 if (debug_infolevel>2) 14806 CERR << CLOCK()*1e-6 << " end ichinrem " << modulo << '\n'; 14807 reverse(resp1.begin(),resp1.end()); 14808 if (compute_pq) 14809 vector_int2vecteur(resp1,pq); 14810 return true; 14811 } 14812 if (modulo && test<p1*double(p2)*p3/2){ 14813 #if 0 // activate for checking to_fft and from_fft (+uncomment in previous if) 14814 fft_rep fa,fb,f; vector<int> tmp1,tmp2,tmp3; 14815 to_fft(p,modulo,Wp1,Wp2,Wp3,a,n,fa,true,false); 14816 to_fft(q,modulo,Wp1,Wp2,Wp3,b,n,fb,true,false); 14817 dotmult(fa,fb,f); 14818 from_fft(f,Wp1,Wp2,Wp3,resp1,tmp1,tmp2,tmp3,true,true); 14819 if (compute_pq) 14820 vector_int2vecteur(resp1,pq); 14821 trim_inplace(pq); 14822 return true; 14823 #endif 14824 vecteur2vectorint(p,modulo,a); 14825 vecteur2vectorint(q,modulo,b); 14826 if (debug_infolevel>2) 14827 CERR << CLOCK()*1e-6 << " begin fftp1 " << rs << '\n'; 14828 fft2mult(modulo,a,b,resp1,p1,Wp1,tmp_p,tmp_q,false,false,false); 14829 if (debug_infolevel>2) 14830 CERR << CLOCK()*1e-6 << " begin fftp2 " << rs << '\n'; 14831 fft2mult(modulo,a,b,resp2,p2,Wp2,tmp_p,tmp_q,false,false,false); 14832 if (debug_infolevel>2) 14833 CERR << CLOCK()*1e-6 << " begin fftp3 " << rs << '\n'; 14834 fft2mult(modulo,a,b,resp3,p3,Wp3,tmp_p,tmp_q,false,false,false); 14835 ichinremp1p2p3(resp1,resp2,resp3,n,resp1,modulo); 14836 reverse(resp1.begin(),resp1.end()); 14837 if (compute_pq) 14838 vector_int2vecteur(resp1,pq); 14839 return true; 14840 } 14841 } // PQ.type==_DOUBLE_ 14842 if (// 0 && 14843 modulo==0){ 14844 gen Bound=2*(mindeg+1)*P*Q; 14845 int nbits=256; 14846 int nthreads=threads_allowed?threads:1; 14847 #ifndef USE_GMP_REPLACEMENTS 14848 if (Bound.type==_ZINT) 14849 nbits=(mpz_sizeinbase(*Bound._ZINTptr,2)/64+1)*64; 14850 int nbytes=3; 14851 // if we use 3 bytes coeff wrt the additional variable 14852 // the min degree of arguments is zmindeg=(mindeg+1)*(1+nbits/24) 14853 // the max coefficient in the product is 2^24*2^24*zmindeg 14854 // it must be smaller than p1*p2/2 14855 if ((mindeg+1)*(1+nbits/24)>=(p1p2sur2>>48)) 14856 nbytes=2; 14857 //int pzbound = 1 << (8*nbytes); 14858 int zsize=1+nbits/(8*nbytes); 14859 // time required by int->poly fft about 2*zsize*fft(rs) where zsize=nbits/24 or nbits/16 14860 // time required by ichinrem fft: 4+3*(nbits/32-4)*fft(rs)+C/2*(nbits/32)^2 14861 // where C*(nbits/32) is about fft(rs) for rs=2^19 and nbits around 200 14862 // -> FFTMUL_INT_MAXBITS around 1000 14863 if ( //1 || 14864 (//0 && 14865 nbits>nthreads*FFTMUL_INT_MAXBITS)){ 14866 // add one more variable to convert long integer coefficients into that variable 14867 longlong RS=longlong(rs)*zsize; 14868 if (RS!=int(RS)) 14869 return false; 14870 logrs=sizeinbase2(RS); 14871 if (logrs>25) 14872 return false; 14873 int RS2=1<<logrs; 14874 vector<int> pz(p.size()*zsize),qz(q.size()*zsize); 14875 // split p and q in pz and qz using mpz_export with basis B=2^24 (3 bytes) 14876 // requires B=2^16 if min(degree) too large 14877 // 8 bits unused (zero-ed), zsize int per coefficient 14878 // mpz_export(&target,&countp,0,nbytes,0,8*(4-nbytes),integer); 14879 // sign is ignored by mpz_export 14880 if (debug_infolevel>2) 14881 CERR << CLOCK()*1e-6 << " begin fft2 bigint conversion " << zsize << '\n'; 14882 zsplit(p,zsize,nbytes*8,pz); 14883 zsplit(q,zsize,nbytes*8,qz); 14884 if (debug_infolevel>2) 14885 CERR << CLOCK()*1e-6 << " begin fft2 int " << rs << '\n'; 14886 // fftmult call below should be threaded... 14887 // CERR << pz << '\n' << qz << '\n'; 14888 // pz and qz must be positive! 14889 fft2mult(p1,pz,qz,resp1,p1,Wp1,tmp_p,tmp_q,false,true,true); 14890 fft2mult(p2,pz,qz,resp2,p2,Wp2,tmp_p,tmp_q,false,true,true); 14891 if (debug_infolevel>2) 14892 CERR << CLOCK()*1e-6 << " end fft2 int, begin ichinrem " << rs << '\n'; 14893 reverse(resp1.begin(),resp1.end()); 14894 reverse(resp2.begin(),resp2.end()); 14895 // resp1 and resp2 have size (p.size()+q.size())*rs-1 14896 // but coefficients above RS are 0 14897 vector<longlong> pqz(RS); 14898 int p1modinv=-9;//invmod(p1,p2); 14899 for (int i=0;i<RS;++i){ 14900 int A=resp1[i],B=resp2[i]; 14901 // A mod p1, B mod p2 -> res mod p1*p2 14902 longlong res=A+((longlong(p1modinv)*(B-A))%p2)*p1; 14903 if (res>p1p2sur2) res-=p1p2; 14904 else if (res<-p1p2sur2) res+=p1p2; 14905 pqz[i]=res; 14906 } 14907 //CERR << "pz:" << pz << '\n' <<"qz:" << qz << '\n' << "resp1:"<<resp1 << '\n' << "resp2"<<resp2 << '\n' ; 14908 //CERR << "pqz" << pqz << '\n'; 14909 pq.resize(rs); 14910 if (debug_infolevel>2) 14911 CERR << CLOCK()*1e-6 << " begin int back conversion " << zsize << '\n'; 14912 zbuild(pqz,zsize,nbytes*8,pq); 14913 if (debug_infolevel>2) 14914 CERR << CLOCK()*1e-6 << " end fft2 " << rs << '\n'; 14915 // fill pq from pqz using mpz_import 14916 // carry handling 14917 // sum(x_k*B^k): iquorem(x_k,b^2) add quo to x_{k+2}, 14918 // then iquorem(rem,b) add quo to x_{k+1} 14919 // after carry handling coefficients must be of type int and ||<2^24 14920 // put them into a vector<int>(zsize) then 14921 // mpz_import(mpz_target,count,0,nbytes,0,8*(4-nbytes),&array); 14922 // where mpz_target is pq[] 14923 return true; 14924 } 14925 #endif 14926 if (1){ 14927 // chinese remaindering 14928 if (debug_infolevel>2) 14929 CERR << CLOCK()*1e-6 << " begin fft2 int, p1 " << rs << '\n'; 14930 // first prime used is p1 14931 fftmultp1234(p,q,P,Q,pq,p1,a,b,resp1,resp2,resp3,Wp1,Wp2,Wp3,Wp4,tmp_p,tmp_q,false); 14932 if (debug_infolevel>2) 14933 CERR << CLOCK()*1e-6 << " end fft2 int p1 " << rs << '\n'; 14934 gen bound=p1; 14935 if (debug_infolevel>2) 14936 CERR << CLOCK()*1e-6 << " begin fft2 int p2 " << rs << '\n'; 14937 fftmultp1234(p,q,P,Q,pq,p2,a,b,resp2,resp1,resp3,Wp1,Wp2,Wp3,Wp4,tmp_p,tmp_q,false); 14938 if (debug_infolevel>2) 14939 CERR << CLOCK()*1e-6 << " end fft2 int p2 " << rs << '\n'; 14940 bound=p2*bound; 14941 #if 1 14942 ichinremp1p2(resp1,resp2,rs,pq,nbits); 14943 #else 14944 ichinrem_inplace(pq,curres,p1,p2); // pq=ichinrem(pq,curres,p1,p2); 14945 #endif 14946 modpoly curres; // not used 14947 gen bound_=bound; 14948 // valid primes m must verify m*m<1.8e18/mindeg 14949 int prime=p3; // prevprime((1<<30)).val;//prime=prevprime(p1-1).val;; 14950 vector<int> primes; 14951 for (int nprimes=0;is_greater(Bound,bound,context0);++nprimes){ 14952 primes.push_back(prime); 14953 bound=prime*bound; 14954 // using a prime above p3 might overflow 14955 // unless an additional reduction modulo p1/p2/p3 is done 14956 // after reduction modulo modulo in the recursive call 14957 // because e.g. submod might return a negative number 14958 if (logrs<=25 && prime==p3 && nprimes==0) 14959 prime=p4;//int(std::sqrt(1.8e18/mindeg)); 14960 else { 14961 if (prime==p4) 14962 prime=p2; 14963 prime=prevprime(prime-1).val; 14964 if (prime==p1 || prime==p2 || prime==p3) 14965 prime=prevprime(prime-1).val; 14966 } 14967 } 14968 bound=bound_; 14969 int ps=int(primes.size()); 14970 #ifdef HAVE_LIBPTHREAD 14971 if (nthreads>1){ 14972 vector<pthread_t> tab(nthreads); 14973 vector<thread_fftmult_t> multparam(nthreads); 14974 vector<bool> busy(nthreads,false); 14975 vector< vector<int> > av(nthreads,vector<int>(n)),bv(nthreads,vector<int>(n)),resp1v(nthreads,vector<int>(n)),resp2v(nthreads,vector<int>(n)),resp3v(nthreads,vector<int>(n)),Wp1v(nthreads,vector<int>(n)),Wp2v(nthreads,vector<int>(n)),Wp3v(nthreads,vector<int>(n)),Wp4v(nthreads,vector<int>(n)),tmp_pv(nthreads,vector<int>(n)),tmp_qv(nthreads,vector<int>(n)); 14976 for (int j=0;j<nthreads;++j){ 14977 thread_fftmult_t tmp={&p,&q,P,Q,&curres,0,&av[j],&bv[j],&resp1v[j],&resp2v[j],&resp3v[j],&Wp1v[j],&Wp2v[j],&Wp3v[j],&Wp4v[j],&tmp_pv[j],&tmp_qv[j]}; 14978 multparam[j]=tmp; 14979 } 14980 int i=0; 14981 for (;i<ps;){ 14982 if (debug_infolevel>2) 14983 CERR << CLOCK()*1e-6 << " Prime " << i << " of " << ps << '\n'; 14984 for (int j=0;j<nthreads;++j,++i){ 14985 if (i>=ps){ 14986 multparam[j].prime=0; 14987 busy[j]=false; 14988 continue; 14989 } 14990 multparam[j].prime=primes[i]; 14991 bool res=true; 14992 busy[j]=true; 14993 if (j<nthreads-1) res=pthread_create(&tab[j],(pthread_attr_t *) NULL,do_thread_fftmult,(void *) &multparam[j]); 14994 if (res){ 14995 do_thread_fftmult((void *)&multparam[j]); 14996 busy[j]=false; 14997 } 14998 } 14999 for (int j=0;j<nthreads;++j){ 15000 void * ptr=(void *)&nthreads; // non-zero initialisation 15001 if (j<nthreads-1 && busy[j]) 15002 pthread_join(tab[j],&ptr); 15003 } 15004 for (int j=0;j<nthreads;++j){ 15005 prime=multparam[j].prime; 15006 if (prime){ 15007 ichinrem_inplace(pq,resp1v[j],bound,prime); // pq=ichinrem(pq,curres,bound,prime); 15008 bound=prime*bound; 15009 } 15010 } 15011 } 15012 return true; 15013 } // end nthreads 15014 #endif // PTHREAD 15015 for (int i=0;i<ps;++i){ 15016 prime=primes[i]; 15017 curres.clear(); 15018 if (debug_infolevel>2) 15019 CERR << CLOCK()*1e-6 << " BEGIN FFT2 MOD " << prime << '\n'; 15020 fftmultp1234(p,q,P,Q,curres,prime,a,b,resp1,resp2,resp3,Wp1,Wp2,Wp3,Wp4,tmp_p,tmp_q,false); 15021 if (debug_infolevel>2) 15022 CERR << CLOCK()*1e-6 << " END FFT2 MOD " << prime << '\n'; 15023 ichinrem_inplace(pq,resp1,bound,prime); // pq=ichinrem(pq,curres,bound,prime); 15024 bound=prime*bound; 15025 } 15026 return true; 15027 } // end chinese remaindering method 15028 } // end if (modulo==0) 15029 #endif // FFTp1p2p3p4 15030 #if 1 15031 fftprod2rl(p,q,r,l,pq); 15032 #else 15033 unsigned long mr=r<<l; // 2^mr is also a smod bound on coeff op p*q 15034 // Now work modulo the integer N=2^{m*r}+1 15035 // let omega=2^r, omega is a 2m-root of unity 15036 // since (2^r)^m=-1 mod N 15037 // Generic code should not be used since optimizations apply here: 15038 // omega^k=2^(rk) for k<m, and =-2^(r*(k-m)) for k>=m 15039 // mod operation: if a<N^2, a=N*q+r=(2^(m*r)+1)*q+r=2^(m*r)*q+q+r 15040 // first do euclidean div by 2^(m*r) -> Q=q,R=q+r -> r=R-Q 15041 environment env; 15042 env.modulo=pow(plus_two,mr)+1; 15043 env.pn=env.modulo; 15044 env.moduloon=true; 15045 fftconv(p,q,l+1,pow(plus_two,r),pq,&env); 15046 #endif 15047 return true; 15048 } 15049 15050 bool fftmult(const modpoly & p,const modpoly & q,modpoly & pq,int modulo,int maxdeg){ 15051 vector<int> a,b,resp1,resp2,resp3,Wp1,Wp2,Wp3,Wp4,tmp_p,tmp_q; 15052 if (debug_infolevel>2) CERR << CLOCK()*1e-6 << " intnorm begin" << '\n'; 15053 gen P=intnorm(p,context0), Q=intnorm(q,context0); // coeff assumed to be integers -> no context 15054 if (debug_infolevel>2) CERR << CLOCK()*1e-6 << " intnorm end" << '\n'; 15055 return fftmultp1234(p,q,P,Q,pq,modulo,a,b,resp1,resp2,resp3,Wp1,Wp2,Wp3,Wp4,tmp_p,tmp_q,true); 15056 } 15057 15058 modpoly fftmult(const modpoly & p,const modpoly & q){ 15059 modpoly pq; 15060 fftmult(p,q,pq,0); 15061 return pq; 15062 } 15063 15064 gen fastnorm(const dense_POLY1 & pp,GIAC_CONTEXT){ 15065 gen tmp(0),r,I; 15066 for (unsigned i=0;i<pp.size();++i){ 15067 reim(pp[i],r,I,contextptr); 15068 tmp += abs(r,contextptr) + abs(I,contextptr); 15069 } 15070 return tmp; 15071 } 15072 #if 1 15073 bool giac_gcd_modular_algo1(polynome &p,polynome &q,polynome &d){ 15074 environment env,envtmp; 15075 dense_POLY1 pp(modularize(p,0,&env)),qq(modularize(q,0,&env)); 15076 if (is_undef(pp) || is_undef(qq)) 15077 return false; 15078 // COUT << "modular gcd 1 " << pp << " " << qq << '\n'; 15079 gen gcdfirstcoeff(gcd(pp.front(),qq.front(),context0)); 15080 int gcddeg= giacmin(int(pp.size()),int(qq.size()))-1; 15081 gen bound(pow(gen(2),gcddeg+1)* abs(gcdfirstcoeff,context0)); 15082 if (is_zero(im(pp,context0)) && is_zero(im(qq,context0))) 15083 bound=bound * min(norm(pp,context0), norm(qq,context0),context0); 15084 else 15085 bound = bound * min(fastnorm(pp,context0),fastnorm(qq,context0),context0); 15086 env.moduloon = true; 15087 // env.modulo=nextprime(max(gcdfirstcoeff+1,gen(30011),context0)); 15088 env.modulo=p1+1; 15089 env.pn=env.modulo; 15090 if (poly_is_real(p) && poly_is_real(q)) 15091 env.complexe=false; 15092 else 15093 env.complexe=true; 15094 // find most efficient max prime: prime^2<p1p2/(4*maxdeg) 15095 int maxdeg=giacmax(pp.size(),qq.size()); 15096 int maxp=std::sqrt(p1p2/4./maxdeg); 15097 gen productmodulo(1); 15098 dense_POLY1 currentgcd(p.dim),p_simp(p.dim),q_simp(p.dim),rem(p.dim); 15099 // 30011 leaves 267 primes below the 2^15 bound 15100 for (;;){ 15101 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 15102 while (is_zero(pp.front() % env.modulo) || is_zero(qq.front() % env.modulo)){ 15103 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 15104 if (env.complexe){ 15105 while (smod(env.modulo,4)!=1) 15106 env.modulo=prevprimep1p2p3(env.modulo.val,maxp,maxdeg); 15107 } 15108 } 15109 modpoly gcdmod; 15110 gcdmodpoly(pp,qq,&env,gcdmod); 15111 if (is_undef(gcdmod)) 15112 return false; 15113 // COUT << "Modulo:" << modulo << " " << gcdmod << '\n'; 15114 gen adjustcoeff=gcdfirstcoeff*invmod(gcdmod.front(),env.modulo); 15115 mulmodpoly(gcdmod,adjustcoeff,&env,gcdmod); 15116 int m=int(gcdmod.size())-1; 15117 if (!m){ 15118 d=polynome(gen(1),1); 15119 return true; 15120 } 15121 if (m>gcddeg) // this prime is bad, just ignore 15122 continue; 15123 // combine step 15124 if (m<gcddeg){ // previous prime was bad 15125 gcddeg=m; 15126 currentgcd=gcdmod; 15127 productmodulo=env.modulo; 15128 } 15129 else { 15130 // m==gcddeg, start combine 15131 if (productmodulo==gen(1)){ // no need to combine primes 15132 currentgcd=gcdmod; 15133 productmodulo=env.modulo; 15134 } 15135 else { 15136 // COUT << "Old gcd:" << productmodulo << " " << currentgcd << '\n' ; 15137 currentgcd=ichinrem(gcdmod,currentgcd,env.modulo,productmodulo); 15138 // COUT << "Combined to " << currentgcd << '\n'; 15139 productmodulo=productmodulo*env.modulo; 15140 } 15141 } 15142 // check candidate gcd 15143 modpoly dmod(modularize(currentgcd,productmodulo,&envtmp)); 15144 if (is_undef(dmod)) 15145 return false; 15146 ppz(dmod); 15147 if ( DenseDivRem(pp,dmod,p_simp,rem,true) && rem.empty() ){ 15148 if (DenseDivRem(qq,dmod,q_simp,rem,true) 15149 && (rem.empty())){ 15150 p=unmodularize(p_simp); 15151 q=unmodularize(q_simp); 15152 d=unmodularize(dmod); 15153 return true; 15154 } 15155 } 15156 } 15157 return false; 15158 } 15159 15160 #else // OLDGCD1 15161 15162 bool giac_gcd_modular_algo1(polynome &p,polynome &q,polynome &d){ 15163 environment env,envtmp; 15164 dense_POLY1 pp(modularize(p,0,&env)),qq(modularize(q,0,&env)); 15165 if (is_undef(pp) || is_undef(qq)) 15166 return false; 15167 // COUT << "modular gcd 1 " << pp << " " << qq << '\n'; 15168 gen gcdfirstcoeff(gcd(pp.front(),qq.front(),context0)); 15169 int gcddeg= giacmin(int(pp.size()),int(qq.size()))-1; 15170 gen bound(pow(gen(2),gcddeg+1)* abs(gcdfirstcoeff,context0)); 15171 if (is_zero(im(pp,context0)) && is_zero(im(qq,context0))) 15172 bound=bound * min(norm(pp,context0), norm(qq,context0),context0); 15173 else 15174 bound = bound * min(fastnorm(pp,context0),fastnorm(qq,context0),context0); 15175 env.moduloon = true; 15176 // env.modulo=nextprime(max(gcdfirstcoeff+1,gen(30011),context0)); 15177 env.modulo=30009; 15178 env.pn=env.modulo; 15179 if (poly_is_real(p) && poly_is_real(q)) 15180 env.complexe=false; 15181 else 15182 env.complexe=true; 15183 gen productmodulo(1); 15184 dense_POLY1 currentgcd(p.dim),p_simp(p.dim),q_simp(p.dim),rem(p.dim); 15185 // 30011 leaves 267 primes below the 2^15 bound 15186 for (;;){ 15187 env.modulo=nextprime(env.modulo+2); 15188 while (is_zero(pp.front() % env.modulo) || is_zero(qq.front() % env.modulo)){ 15189 env.modulo=nextprime(env.modulo+2); 15190 if (env.complexe){ 15191 while (smod(env.modulo,4)==1) 15192 env.modulo=nextprime(env.modulo+2); 15193 } 15194 } 15195 modpoly gcdmod; 15196 gcdmodpoly(pp,qq,&env,gcdmod); 15197 if (is_undef(gcdmod)) 15198 return false; 15199 // COUT << "Modulo:" << modulo << " " << gcdmod << '\n'; 15200 gen adjustcoeff=gcdfirstcoeff*invmod(gcdmod.front(),env.modulo); 15201 mulmodpoly(gcdmod,adjustcoeff,&env,gcdmod); 15202 int m=int(gcdmod.size())-1; 15203 if (!m){ 15204 d=polynome(gen(1),1); 15205 return true; 15206 } 15207 if (m>gcddeg) // this prime is bad, just ignore 15208 continue; 15209 // combine step 15210 if (m<gcddeg){ // previous prime was bad 15211 gcddeg=m; 15212 currentgcd=gcdmod; 15213 productmodulo=env.modulo; 15214 } 15215 else { 15216 // m==gcddeg, start combine 15217 if (productmodulo==gen(1)){ // no need to combine primes 15218 currentgcd=gcdmod; 15219 productmodulo=env.modulo; 15220 } 15221 else { 15222 // COUT << "Old gcd:" << productmodulo << " " << currentgcd << '\n' ; 15223 currentgcd=ichinrem(gcdmod,currentgcd,env.modulo,productmodulo); 15224 // COUT << "Combined to " << currentgcd << '\n'; 15225 productmodulo=productmodulo*env.modulo; 15226 } 15227 } 15228 // check candidate gcd 15229 modpoly dmod(modularize(currentgcd,productmodulo,&envtmp)); 15230 if (is_undef(dmod)) 15231 return false; 15232 ppz(dmod); 15233 if ( (DenseDivRem(pp,dmod,p_simp,rem,true)) && (rem.empty()) && (DenseDivRem(qq,dmod,q_simp,rem,true)) && (rem.empty()) ){ 15234 p=unmodularize(p_simp); 15235 q=unmodularize(q_simp); 15236 d=unmodularize(dmod); 15237 return true; 15238 } 15239 } 15240 return false; 15241 } 15242 #endif // OLDGCD1 15243 15244 #ifdef HAVE_LIBNTL 15245 #ifdef HAVE_LIBPTHREAD 15246 pthread_mutex_t ntl_mutex = PTHREAD_MUTEX_INITIALIZER; 15247 #endif 15248 15249 #if 0 15250 void ininttype2ZZ(const inttype & temp,const inttype & step,NTL::ZZ & z,const NTL::ZZ & zzstep){ 15251 if (temp==0){ 15252 long j=0; 15253 z=j; 15254 return; 15255 } 15256 inttype q; 15257 inttype rem(irem(temp,step,q)); 15258 #ifndef NO_STDEXCEPT 15259 if (rem.type!=_INT_) setsizeerr(gettext("modpoly.cc/ininttype2ZZ")); 15260 #endif 15261 long longtemp=rem.val; 15262 ininttype2ZZ(q,step,z,zzstep); 15263 NTL::ZZ zztemp; 15264 zztemp=longtemp; 15265 z=z*zzstep+zztemp; 15266 } 15267 #else 15268 bool ininttype2ZZ(const inttype & temp,const inttype & step,NTL::ZZ & z,const NTL::ZZ & zzstep){ 15269 if (temp.type==_INT_){ 15270 z=temp.val; 15271 return true; 15272 } 15273 if (temp.type!=_ZINT) 15274 return false; 15275 if (mpz_cmp_si(*temp._ZINTptr,0)<0){ 15276 bool b=ininttype2ZZ(-temp,step,z,zzstep); 15277 z=-z; 15278 return b; 15279 } 15280 vector<long> ecriture; 15281 inttype g(*temp._ZINTptr),r(*temp._ZINTptr); 15282 if (step.type==_INT_){ 15283 mpz_t & z=*g._ZINTptr; 15284 for (;mpz_cmp_si(z,0)!=0;){ 15285 ecriture.push_back(mpz_tdiv_qr_ui(z,*r._ZINTptr,z,step.val)); 15286 } 15287 } 15288 else { 15289 for (;g!=0;){ 15290 inttype q; 15291 inttype rem(irem(g,step,q)); 15292 #ifndef NO_STDEXCEPT 15293 if (rem.type!=_INT_) setsizeerr(gettext("modpoly.cc/ininttype2ZZ")); 15294 #endif 15295 long r=rem.val; 15296 ecriture.push_back(r); 15297 g=q; 15298 } 15299 } 15300 z=0; 15301 NTL::ZZ zztemp; 15302 for (int i=ecriture.size()-1;i>=0;--i){ 15303 z *= zzstep; 15304 zztemp=ecriture[i]; 15305 z += zztemp; 15306 } 15307 return true; 15308 // CERR << temp << " " << z <<'\n'; 15309 } 15310 #endif 15311 15312 NTL::ZZ inttype2ZZ(const inttype & i){ 15313 int s=1<<30; 15314 inttype step(s); // 2^16 15315 inttype temp(i),q; 15316 NTL::ZZ zzstep; 15317 zzstep=s; 15318 NTL::ZZ z; 15319 ininttype2ZZ(temp,step,z,zzstep); 15320 // COUT << "cl_I2ZZ" << i << " -> " << z << '\n'; 15321 return NTL::ZZ(z); 15322 } 15323 15324 void inZZ2inttype(const NTL::ZZ & zztemp,int shift,inttype & temp){ 15325 NTL::ZZ zzq(zztemp); 15326 vector<long> v; 15327 while (zzq!=0){ 15328 // v.push_back(NTL::DivRem(zzq,zzq,1<<shift)); 15329 v.push_back(NTL::trunc_long(zzq,shift)); 15330 NTL::RightShift(zzq,zzq,shift); 15331 } 15332 reverse(v.begin(),v.end()); 15333 temp=0; 15334 temp.uncoerce(NTL::NumBits(zzq)); 15335 #if 1 15336 for (size_t i=0;i<v.size();++i){ 15337 mpz_t & z=*temp._ZINTptr; 15338 mpz_mul_2exp(z,z,shift); 15339 mpz_add_ui(z,z,v[i]); 15340 } 15341 #else 15342 for (size_t i=0;i<v.size();++i){ 15343 longlong llongtemp=v[i]; 15344 temp=temp*step+inttype(llongtemp); 15345 } 15346 #endif 15347 } 15348 15349 15350 inttype ZZ2inttype(const NTL::ZZ & z){ 15351 if (z<0) 15352 return -ZZ2inttype(-z); 15353 inttype temp(0); 15354 NTL::ZZ zztemp(z); 15355 inZZ2inttype(zztemp,62,temp); 15356 // COUT << "zz2cl_I " << z << " -> " << temp << '\n'; 15357 return temp; 15358 } 15359 15360 NTL::ZZX tab2ZZX(const inttype * tab,int degree){ 15361 NTL::ZZX f; 15362 f.rep.SetMaxLength(degree+1); 15363 f.rep.SetLength(degree+1); 15364 for (int i=0;i<=degree;i++) 15365 SetCoeff(f,i,inttype2ZZ(tab[i])); 15366 return NTL::ZZX(f); 15367 } 15368 15369 void ZZX2tab(const NTL::ZZX & f,int & degree,inttype * & tab){ 15370 // COUT << f << '\n'; 15371 degree=deg(f); 15372 tab = new inttype[degree+1] ; 15373 for (int i=degree;i>=0;i--){ 15374 inttype c=ZZ2inttype(coeff(f,i)); 15375 tab[i]=c; 15376 } 15377 } 15378 15379 NTL::GF2X modpoly2GF2X(const modpoly & p){ 15380 NTL::GF2X f; 15381 int degree=p.size()-1; 15382 for (int i=0;i<=degree;i++) 15383 SetCoeff(f,i,p[degree-i].val); 15384 if (debug_infolevel>1) 15385 CERR << f << '\n'; 15386 return f; 15387 } 15388 15389 modpoly GF2X2modpoly(const NTL::GF2X & f){ 15390 // COUT << f << '\n'; 15391 int degree=deg(f); 15392 modpoly tab (degree+1) ; 15393 for (int i=degree;i>=0;i--){ 15394 tab[i]=int(unsigned(rep(coeff(f,i)))); 15395 } 15396 reverse(tab.begin(),tab.end()); 15397 return tab; 15398 } 15399 15400 // Don't forget to set the modulus with ZZ_p::init(p) before calling this 15401 NTL::ZZ_pX modpoly2ZZ_pX(const modpoly & p){ 15402 NTL::ZZ_pX f; 15403 int degree=p.size()-1; 15404 for (int i=0;i<=degree;i++){ 15405 NTL::ZZ_p tmp; 15406 conv(tmp,inttype2ZZ(p[degree-i])); 15407 SetCoeff(f,i,tmp); 15408 } 15409 if (debug_infolevel>10) CERR << f << '\n'; 15410 return f; 15411 } 15412 15413 modpoly ZZ_pX2modpoly(const NTL::ZZ_pX & f){ 15414 // COUT << f << '\n'; 15415 int degree=deg(f); 15416 modpoly tab (degree+1) ; 15417 for (int i=degree;i>=0;i--){ 15418 tab[i]=ZZ2inttype(rep(coeff(f,i))); 15419 } 15420 reverse(tab.begin(),tab.end()); 15421 return tab; 15422 } 15423 15424 NTL::ZZX modpoly2ZZX(const modpoly & p){ 15425 NTL::ZZX f; 15426 int degree=p.size()-1; 15427 for (int i=0;i<=degree;i++){ 15428 NTL::ZZ tmp=inttype2ZZ(p[degree-i]); 15429 SetCoeff(f,i,tmp); 15430 } 15431 if (debug_infolevel>10) CERR << f << '\n'; 15432 return f; 15433 } 15434 15435 modpoly ZZX2modpoly(const NTL::ZZX & f){ 15436 // COUT << f << '\n'; 15437 int degree=deg(f); 15438 modpoly tab (degree+1) ; 15439 for (int i=degree;i>=0;i--){ 15440 tab[i]=ZZ2inttype(coeff(f,i)); 15441 } 15442 reverse(tab.begin(),tab.end()); 15443 return tab; 15444 } 15445 15446 bool ntlresultant(const modpoly &p,const modpoly &q,const gen & modulo,gen & res,bool ntl_on_check){ 15447 if ( ntl_on_check && ntl_on(context0)==0) 15448 return false; 15449 #ifdef HAVE_LIBPTHREAD 15450 int locked=pthread_mutex_trylock(&ntl_mutex); 15451 #endif // HAVE_LIBPTHREAD 15452 if (locked) 15453 return false; 15454 bool ok=true; 15455 try { 15456 if (is_zero(modulo)){ 15457 NTL::ZZX P(modpoly2ZZX(p)); 15458 NTL::ZZX Q(modpoly2ZZX(q)); 15459 if (debug_infolevel) 15460 CERR << CLOCK()*1e-6 << " ntlresultant begin\n"; 15461 NTL::ZZ R(resultant(P,Q)); 15462 if (debug_infolevel) 15463 CERR << CLOCK()*1e-6 << " ntlresultant end\n"; 15464 res=ZZ2inttype(R); 15465 } 15466 else { 15467 NTL::ZZ_p::init(inttype2ZZ(modulo)); 15468 NTL::ZZ_pX P(modpoly2ZZ_pX(p)); 15469 NTL::ZZ_pX Q(modpoly2ZZ_pX(q)); 15470 if (debug_infolevel) 15471 CERR << CLOCK()*1e-6 << " ntlresultant mod begin\n"; 15472 NTL::ZZ_p R(resultant(P,Q)); 15473 if (debug_infolevel) 15474 CERR << CLOCK()*1e-6 << " ntlresultant mod end\n"; 15475 res=ZZ2inttype(NTL::rep(R)); 15476 } 15477 } catch(std::runtime_error & e){ 15478 ok=false; 15479 } 15480 #ifdef HAVE_LIBPTHREAD 15481 pthread_mutex_unlock(&ntl_mutex); 15482 #endif 15483 return ok; 15484 } 15485 15486 bool ntlxgcd(const modpoly &a,const modpoly &b,const gen & modulo,modpoly & u,modpoly &v,modpoly & d,bool ntl_on_check){ 15487 if (ntl_on_check && ntl_on(context0)==0) 15488 return false; 15489 #ifdef HAVE_LIBPTHREAD 15490 int locked=pthread_mutex_trylock(&ntl_mutex); 15491 #endif // HAVE_LIBPTHREAD 15492 if (locked) 15493 return false; 15494 bool ok=true; 15495 try { 15496 if (is_zero(modulo)){ 15497 NTL::ZZX A(modpoly2ZZX(a)); 15498 NTL::ZZX B(modpoly2ZZX(b)); 15499 NTL::ZZX U,V; NTL::ZZ R; 15500 if (debug_infolevel) 15501 CERR << CLOCK()*1e-6 << " ntlxgcd begin\n"; 15502 XGCD(R,U,V,A,B); 15503 if (debug_infolevel) 15504 CERR << CLOCK()*1e-6 << " ntlxgcd end\n"; 15505 u=ZZX2modpoly(U); 15506 v=ZZX2modpoly(V); 15507 d=makevecteur(ZZ2inttype(R)); 15508 if (debug_infolevel) 15509 CERR << CLOCK()*1e-6 << " ntlxgcd end convert\n"; 15510 ok=R!=0; 15511 } 15512 else { 15513 NTL::ZZ_p::init(inttype2ZZ(modulo)); 15514 NTL::ZZ_pX A(modpoly2ZZ_pX(a)); 15515 NTL::ZZ_pX B(modpoly2ZZ_pX(b)); 15516 NTL::ZZ_pX U,V,D; 15517 if (debug_infolevel) 15518 CERR << CLOCK()*1e-6 << " ntlxgcd begin\n"; 15519 XGCD(D,U,V,A,B); 15520 if (debug_infolevel) 15521 CERR << CLOCK()*1e-6 << " ntlxgcd end\n"; 15522 u=ZZ_pX2modpoly(U); 15523 v=ZZ_pX2modpoly(V); 15524 d=ZZ_pX2modpoly(D); 15525 if (debug_infolevel) 15526 CERR << CLOCK()*1e-6 << " ntlxgcd end convert\n"; 15527 } 15528 } catch(std::runtime_error & e){ 15529 ok=false; 15530 } 15531 #ifdef HAVE_LIBPTHREAD 15532 pthread_mutex_unlock(&ntl_mutex); 15533 #endif 15534 return ok; 15535 } 15536 15537 bool ntlgcd(const modpoly &a,const modpoly &b,const gen & modulo,modpoly & d,bool ntl_on_check){ 15538 if (ntl_on_check && ntl_on(context0)==0) 15539 return false; 15540 #ifdef HAVE_LIBPTHREAD 15541 int locked=pthread_mutex_trylock(&ntl_mutex); 15542 #endif // HAVE_LIBPTHREAD 15543 bool ok=true; 15544 if (locked) 15545 return false; 15546 try { 15547 if (is_zero(modulo)){ 15548 NTL::ZZX A(modpoly2ZZX(a)); 15549 NTL::ZZX B(modpoly2ZZX(b)); 15550 if (debug_infolevel) 15551 CERR << CLOCK()*1e-6 << " ntlgcd begin\n"; 15552 NTL::ZZX D(GCD(A,B)); 15553 if (debug_infolevel) 15554 CERR << CLOCK()*1e-6 << " ntlgcd end\n"; 15555 d=ZZX2modpoly(D); 15556 } 15557 else { 15558 NTL::ZZ_p::init(inttype2ZZ(modulo)); 15559 NTL::ZZ_pX A(modpoly2ZZ_pX(a)); 15560 NTL::ZZ_pX B(modpoly2ZZ_pX(b)); 15561 if (debug_infolevel) 15562 CERR << CLOCK()*1e-6 << " ntlgcd mod begin\n"; 15563 NTL::ZZ_pX D(GCD(A,B)); 15564 if (debug_infolevel) 15565 CERR << CLOCK()*1e-6 << " ntlgcd end\n"; 15566 d=ZZ_pX2modpoly(D); 15567 } 15568 } catch(std::runtime_error & e){ 15569 ok=false; 15570 } 15571 #ifdef HAVE_LIBPTHREAD 15572 pthread_mutex_unlock(&ntl_mutex); 15573 #endif 15574 return ok; 15575 } 15576 15577 // modular resultant using NTL 15578 bool polynome2tab(const polynome & p,int deg,inttype * tab){ 15579 inttype n0(0); 15580 if (p.dim!=1) return false; // setsizeerr(gettext("modpoly.cc/polynome2tab")); 15581 if (p.coord.empty()) 15582 return true; 15583 if ( deg!=p.lexsorted_degree()) return false; // setsizeerr(gettext("modpoly.cc/polynome2tab")); 15584 int curpow=deg; 15585 vector< monomial<gen> >::const_iterator it=p.coord.begin(); 15586 vector< monomial<gen> >::const_iterator itend=p.coord.end(); 15587 for (;it!=itend;++it){ 15588 int newpow=it->index.front(); 15589 for (;curpow>newpow;--curpow) 15590 tab[curpow]=n0; 15591 tab[curpow]=it->value; 15592 --curpow; 15593 } 15594 for (;curpow>-1;--curpow) 15595 tab[curpow]=n0; 15596 return true; 15597 } 15598 15599 polynome tab2polynome(const inttype * tab,int deg){ 15600 vector< monomial<gen> > v; 15601 index_t i; 15602 i.push_back(deg); 15603 const inttype * tabend=tab+deg+1; 15604 gen n0(0); 15605 for (;tab!=tabend;--i[0]){ 15606 --tabend; 15607 if (gen(*tabend)!=n0) 15608 v.push_back(monomial<gen>(gen(*tabend),i)); 15609 } 15610 return polynome(1,v); 15611 } 15612 15613 int ntlgcd(inttype *p, int pdeg,inttype * q,int qdeg, inttype * & res, int & resdeg,int debug=0){ 15614 NTL::ZZX f(tab2ZZX(p,pdeg)); 15615 NTL::ZZX g(tab2ZZX(q,qdeg)); 15616 NTL::ZZX d(GCD(f,g)); 15617 ZZX2tab(d,resdeg,res); 15618 return resdeg; 15619 } 15620 15621 bool gcd_modular_algo1(polynome &p,polynome &q,polynome &d,bool compute_cof){ 15622 if (ntl_on(context0)==0 || !poly_is_real(p) || !poly_is_real(q)) 15623 return giac_gcd_modular_algo1(p,q,d); 15624 int np=p.lexsorted_degree(); 15625 int nq=q.lexsorted_degree(); 15626 if (np<NTL_MODGCD || nq<NTL_MODGCD) 15627 return giac_gcd_modular_algo1(p,q,d); 15628 #ifdef HAVE_LIBPTHREAD 15629 int locked=pthread_mutex_trylock(&ntl_mutex); 15630 #endif // HAVE_LIBPTHREAD 15631 if (locked) 15632 return giac_gcd_modular_algo1(p,q,d); 15633 bool res=true; 15634 try { 15635 inttype * tabp = new inttype[np+1]; // dense rep of the polynomial 15636 if (!polynome2tab(p,np,tabp)){ 15637 delete [] tabp; 15638 return false; 15639 } 15640 inttype * tabq = new inttype[nq+1]; // dense rep of the polynomial 15641 if (!polynome2tab(q,nq,tabq)){ 15642 delete [] tabp; 15643 delete [] tabq; 15644 return false; 15645 } 15646 int nd; 15647 inttype * res; 15648 ntlgcd(tabp,np,tabq,nq,res,nd); 15649 d=tab2polynome(res,nd); 15650 // COUT << "PGCD=" << d << '\n'; 15651 delete [] res; 15652 delete [] tabp; 15653 delete [] tabq; 15654 if (compute_cof){ 15655 p = p/d; 15656 q = q/d; 15657 } 15658 } catch(std::runtime_error & e){ 15659 res=false; 15660 } 15661 #ifdef HAVE_LIBPTHREAD 15662 pthread_mutex_unlock(&ntl_mutex); 15663 #endif 15664 return res; 15665 } 15666 15667 #else // HAVE_LIBNTL 15668 bool ntlresultant(const modpoly &p,const modpoly &q,const gen & modulo,gen & res,bool ntl_on_check){ 15669 return false; 15670 } 15671 15672 bool ntlgcd(const modpoly &a,const modpoly &b,const gen & modulo,modpoly & d,bool ntl_on_check){ 15673 return false; 15674 } 15675 15676 15677 bool ntlxgcd(const modpoly &a,const modpoly &b,const gen & modulo,modpoly & reu,modpoly &v,modpoly & d,bool ntl_on_check){ 15678 return false; 15679 } 15680 15681 bool gcd_modular_algo1(polynome &p,polynome &q,polynome &d,bool compute_cof){ 15682 return giac_gcd_modular_algo1(p,q,d); 15683 } 15684 #endif // HAVE_LIBNTL 15685 15686 15687 #ifndef NO_NAMESPACE_GIAC 15688 } // namespace giac 15689 #endif // ndef NO_NAMESPACE_GIAC 15690