1 /* -*- mode:C++ ; compile-command: "g++ -I.. -g -c threaded.cc" -*- */
2 /*  Multivariate GCD for large data not covered by the heuristic GCD algo
3  *  Copyright (C) 2000,2014 B. Parisse, Institut Fourier, 38402 St Martin d'Heres
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef _GIAC_THREADED_H_
20 #define _GIAC_THREADED_H_
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 #include "first.h"
25 #ifdef HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #if defined WIN32 && !defined CLOCK && !defined BESTA_OS
29 #define CLOCK clock
30 #endif
31 
32 /*
33 #ifndef WIN32
34 #if defined(__APPLE__) || defined(__FreeBSD__) || defined(VISUALC) | defined(__NetBSD__)
35 #else // was #ifndef __APPLE__
36 #include <sys/sysinfo.h>
37 #endif
38 #endif // WIN32
39 */
40 #include <iostream>
41 #include <algorithm>
42 #include <stdexcept>
43 #include "vector.h"
44 #ifdef USTL
45 #include <uheap.h>
46 #include <umultimap.h>
47 #endif
48 #include <map>
49 #include "monomial.h"
50 #if defined HAVE_PTHREAD_H && defined HAVE_LIBPTHREAD
51 #include <pthread.h>
52 #endif
53 
54 #ifdef USE_GMP_REPLACEMENTS
55 #undef HAVE_GMPXX_H
56 #undef HAVE_LIBMPFR
57 #endif
58 
59 #ifdef HAVE_GMPXX_H
60 #include <gmpxx.h>
61 #endif
62 
63 #if defined HAVE_SYS_TIME_H && !defined VISUALC13
64 #include <time.h>
65 //#else
66 //#ifndef VISUALC13
67 //#define clock_t int
68 //#endif
69 //#define CLOCK() 0
70 #endif
71 
72 #ifndef NO_NAMESPACE_GIAC
73 namespace giac {
74 #endif // ndef NO_NAMESPACE_GIAC
75   void wait_1ms(int ms=1);
76   template<class U>
sizeinbase2(U n)77   inline unsigned sizeinbase2(U n){
78     unsigned i=0;
79     for (;n;++i){
80       n >>= 1;
81     }
82     return i;
83   }
nextpow2(ulonglong n)84   inline ulonglong nextpow2(ulonglong n){
85     unsigned i=sizeinbase2(n);
86     return 1ULL<<i;
87   }
88   std::vector<int> operator % (const std::vector<int> & a,int modulo);
89   std::vector<int> operator / (const std::vector<int> & v,const std::vector<int> & b);
90   std::vector<int> operator % (const std::vector<int> & v,const std::vector<int> & b);
91   bool operator > (const std::vector<int> & v,double q);
92 
93 
94   class gen;
95   class context;
96   gen _heap_mult(const gen & g0,const context *);
97   gen _modgcd_cachesize(const gen & g0,const context *);
98   gen _debug_infolevel(const gen & g0,const context *);
99   gen _background(const gen & g,const context *);
100 
101   typedef unsigned long long hashgcd_U;
102   //typedef unsigned int hashgcd_U; // replace with ulonglong for large index capacity on 32 bit CPU
103 
104 #ifdef HAVE_GMPXX_H
105 
is_zero(const mpz_class & a)106   inline bool is_zero(const mpz_class & a){
107     return a==0;
108   }
109 
110   mpz_class invmod(const mpz_class & a,int reduce);
111 
112   mpz_class smod(const mpz_class & a,int reduce);
113 
114 
type_operator_times(const mpz_class & a,const mpz_class & b,mpz_class & c)115   inline void type_operator_times(const mpz_class & a,const mpz_class & b,mpz_class & c){
116     // cerr << gen(c) << " = " << gen(a) << "*" << gen(b) << '\n';
117     mpz_mul(c.get_mpz_t(),a.get_mpz_t(),b.get_mpz_t());
118     // cerr << gen(c) << '\n';
119   }
120 
type_operator_reduce(const mpz_class & a,const mpz_class & b,mpz_class & c,int reduce)121   inline void type_operator_reduce(const mpz_class & a,const mpz_class & b,mpz_class & c,int reduce){
122     mpz_mul(c.get_mpz_t(),a.get_mpz_t(),b.get_mpz_t());
123     if (reduce){
124       c %= reduce;
125     }
126   }
127 
type_operator_plus_times(const mpz_class & a,const mpz_class & b,mpz_class & c)128   inline void type_operator_plus_times(const mpz_class & a,const mpz_class & b,mpz_class & c){
129     // cerr << gen(c) << " += " << gen(a) << "*" << gen(b) << '\n';
130     // c+=a*b
131     mpz_addmul(c.get_mpz_t(),a.get_mpz_t(),b.get_mpz_t());
132     // cerr << gen(c) << '\n';
133   }
134 
type_operator_plus_times_reduce(const mpz_class & a,const mpz_class & b,mpz_class & c,int reduce)135   inline void type_operator_plus_times_reduce(const mpz_class & a,const mpz_class & b,mpz_class & c,int reduce){
136     // c+=a*b % reduce;
137     mpz_addmul(c.get_mpz_t(),a.get_mpz_t(),b.get_mpz_t());
138     if (reduce){
139       c %= reduce;
140     }
141   }
142 
143 #endif // HAVE__GMPXX_H
144 
145   int invmod(longlong a,int reduce);
146 
147 #ifdef INT128
148 
is_zero(const int128_t & a)149   inline bool is_zero(const int128_t & a){
150     return a==0;
151   }
152 
153   int invmod(int128_t a,int reduce);
154 
155   int128_t smod(int128_t & a,int reduce);
156 
type_operator_times(const int128_t & a,const int128_t & b,int128_t & c)157   inline void type_operator_times(const int128_t & a,const int128_t & b,int128_t & c){
158     c = a*b;
159   }
160 
type_operator_reduce(const int128_t & a,const int128_t & b,int128_t & c,int reduce)161   inline void type_operator_reduce(const int128_t & a,const int128_t & b,int128_t & c,int reduce){
162     c =a*b;
163     if (reduce)
164       c %= reduce;
165   }
166 
type_operator_plus_times(const int128_t & a,const int128_t & b,int128_t & c)167   inline void type_operator_plus_times(const int128_t & a,const int128_t & b,int128_t & c){
168     c+=a*b;
169   }
170 
type_operator_plus_times_reduce(const int128_t & a,const int128_t & b,int128_t & c,int reduce)171   inline void type_operator_plus_times_reduce(const int128_t & a,const int128_t & b,int128_t & c,int reduce){
172     c +=a*b;
173     if (reduce)
174       c %= reduce;
175   }
176 
177 #endif
178 
179 
180   bool is_zero(const std::vector<int> & v);
181   int hornermod(const std::vector<int> & v,int alpha,int modulo,bool unsig=false);
182   // v <- v+w % m
183   void addmod(std::vector<int> & v,const std::vector<int> & w,int m);
184   // v <- v+w % m
185   void addmod(std::vector< std::vector<int> > & v,const std::vector< std::vector<int> > & w,int m);
186   // v <- v-w % m
187   void submod(std::vector<int> & v,const std::vector<int> & w,int m);
188   // v <- w-v % m
189   void submodneg(std::vector<int> & v,const std::vector<int> & w,int m);
190   // v <- v*k % m
191   void mulmod(std::vector<int> & v,int k,int m);
192   // v <- v*k % m
193   void mulmod(std::vector< std::vector<int> > & v,int k,int m);
mulmod(int k,std::vector<int> & v,int m)194   inline void mulmod(int k,std::vector<int> & v,int m){    mulmod(v,k,m);  }
195   void mulmod(const std::vector<int> & v,int m,std::vector<int> & w,int modulo);
196   // Note that smallmult may fail if the degree of a and b * modulo^2
197   // overflows in a longlong, so keep modulo not too large
198   void smallmult(const std::vector<int>::const_iterator & ita0,const std::vector<int>::const_iterator & ita_end,const std::vector<int>::const_iterator & itb0,const std::vector<int>::const_iterator & itb_end,std::vector<int> & new_coord,int modulo);
199   void mulsmall(const std::vector<int>::const_iterator & ita0,const std::vector<int>::const_iterator & ita_end,const std::vector<int>::const_iterator & itb0,const std::vector<int>::const_iterator & itb_end,int modulo,std::vector<int> & new_coord);
200   // res=a*b mod pmin,modulo
201   void mulext(const std::vector<int> & a,const std::vector<int> & b,const std::vector<int> & pmin,int modulo,std::vector<int> & res);
202   // a *=b mod pmin, modulo
203   void mulext(std::vector<int> & a,const std::vector<int> & b,const std::vector<int> & pmin,int modulo);
204   bool DivRem(const std::vector< std::vector<int> > & a,const std::vector< std::vector<int> > & b,const std::vector<int> * pminptr,int modulo,std::vector< std::vector<int> > & q,std::vector< std::vector<int> > & r);
205   bool invmodext(const std::vector<int> & a_,const std::vector<int> & pmin,int modulo,std::vector<int> & u0);
206 
207   // FIXME: put a #define in index.h if index_t=vector<int> and skip defs here
208   // or make them as template
209   std::vector<int> operator + (const std::vector<int> & a, const std::vector<int> & b);
210   std::vector<int> operator - (const std::vector<int> & a, const std::vector<int> & b);
211   std::vector<int> operator - (const std::vector<int> & a);
type_operator_times(const std::vector<int> & a,const std::vector<int> & b,std::vector<int> & c)212   inline void type_operator_times(const std::vector<int> & a,const std::vector<int> & b,std::vector<int> & c){
213     c.clear();
214 #ifndef NO_STDEXCEPT
215     setsizeerr("type_operator_times ext");
216 #endif
217   }
218 
type_operator_plus_times(const std::vector<int> & a,const std::vector<int> & b,std::vector<int> & c)219   inline void type_operator_plus_times(const std::vector<int> & a,const std::vector<int> & b,std::vector<int> & c){
220     c.clear();
221 #ifndef NO_STDEXCEPT
222     setsizeerr("type_operator_reduce ext");
223 #endif
224   }
225 
226 
227   extern double heap_mult,modgcd_cachesize;
228   // -1, -2, -3, force heap chains/multimap/map chains/heap multiplication
229   // 0 always hashmap
230   // >=1 use heap when product of number of monomials is >= value
231 
232   extern bool threads_allowed;
233   extern int threads;
234 
235   extern int debug_infolevel;
236   int invmod(int n,int modulo);
237   int smod(int a,int b); // where b is assumed to be positive
238   // v <- v*k % m
239   void mulmod(std::vector<int> & v,int k,int m);
240 
longlong2mpz(longlong i,mpz_t * mpzptr)241   inline void longlong2mpz(longlong i,mpz_t *mpzptr){
242     int ii((long)i);
243     if (i==ii)
244       mpz_set_si(*mpzptr,ii);
245     else {
246       bool signe=(i<0);
247       if (signe)
248 	i=-i;
249       unsigned int i1=i>>32;
250       unsigned int i2=(unsigned int)i;
251       mpz_set_ui(*mpzptr,i1); // was mpz_init_set_ui probably a BUG
252       mpz_mul_2exp(*mpzptr,*mpzptr,32);
253       mpz_add_ui(*mpzptr,*mpzptr,i2);
254       if (signe)
255 	mpz_neg(*mpzptr,*mpzptr);
256     }
257   }
258 
259   // tmp is an allocated mpz_t
mpz2longlong(mpz_t * ptr,mpz_t * tmp,longlong & ans)260   inline void mpz2longlong(mpz_t * ptr,mpz_t * tmp,longlong & ans){
261     int i=mpz_sgn(*ptr);
262     if (i<0)
263       mpz_neg(*ptr,*ptr);
264     mpz_tdiv_q_2exp(*tmp,*ptr,31);
265     ans=mpz_get_ui(*tmp);
266     ans <<= 31;
267     mpz_tdiv_r_2exp(*tmp,*ptr,31);
268     ans += mpz_get_ui(*tmp);
269     if (i<0){
270       mpz_neg(*ptr,*ptr);
271       ans = -ans;
272     }
273   }
274 
275 #ifdef INT128
276   // tmp is an allocated mpz_t
mpz2int128(mpz_t * ptr,mpz_t * tmp,int128_t & ans)277   inline void mpz2int128(mpz_t * ptr,mpz_t * tmp,int128_t & ans){
278     int i=mpz_sgn(*ptr);
279     if (i<0)
280       mpz_neg(*ptr,*ptr);
281     mpz_tdiv_q_2exp(*tmp,*ptr,93);
282     ans=mpz_get_ui(*tmp);
283     ans <<= 93;
284     mpz_tdiv_q_2exp(*tmp,*ptr,62);
285     mpz_tdiv_r_2exp(*tmp,*tmp,31);
286     ans += (int128_t(mpz_get_ui(*tmp)) << 62);
287     mpz_tdiv_q_2exp(*tmp,*ptr,31);
288     mpz_tdiv_r_2exp(*tmp,*tmp,31);
289     ans += (ulonglong(mpz_get_ui(*tmp))<<31);
290     mpz_tdiv_r_2exp(*tmp,*ptr,31);
291     ans += mpz_get_ui(*tmp);
292     if (i<0){
293       mpz_neg(*ptr,*ptr);
294       ans = -ans;
295     }
296   }
297 
298 #endif
299 
300   class my_mpz {
301   public:
302     mpz_t ptr;
my_mpz()303     my_mpz(){ mpz_init(ptr); }
my_mpz(long l)304     my_mpz(long l){ mpz_init_set_si(ptr,l); }
my_mpz(const my_mpz & z)305     my_mpz(const my_mpz & z){ mpz_init_set(ptr,z.ptr); }
~my_mpz()306     ~my_mpz() { mpz_clear(ptr); }
307     my_mpz operator - () const { my_mpz tmp(*this); mpz_neg(tmp.ptr,tmp.ptr); return tmp;}
308     my_mpz & operator = (const my_mpz & a) {mpz_set(ptr,a.ptr); return *this;}
309   };
310 
311   my_mpz operator % (const my_mpz & a,const my_mpz & b);
312   my_mpz operator + (const my_mpz & a,const my_mpz & b);
313   my_mpz operator - (const my_mpz & a,const my_mpz & b);
314   my_mpz operator * (const my_mpz & a,const my_mpz & b);
315   my_mpz operator / (const my_mpz & a,const my_mpz & b);
316   my_mpz invmod(const my_mpz & a,int reduce);
317   my_mpz smod(const my_mpz & a,int reduce);
318   my_mpz operator %= (my_mpz & a,const my_mpz & b);
319   my_mpz operator += (my_mpz & a,const my_mpz & b);
320   my_mpz operator -= (my_mpz & a,const my_mpz & b);
321 
322 
is_zero(const my_mpz & a)323   inline bool is_zero(const my_mpz & a){
324     return !mpz_sgn(a.ptr);
325   }
326 
type_operator_times(const my_mpz & a,const my_mpz & b,my_mpz & c)327   inline void type_operator_times(const my_mpz & a,const my_mpz & b,my_mpz & c){
328     mpz_mul(c.ptr,a.ptr,b.ptr);
329     // c=a*b;
330   }
331 
type_operator_reduce(const my_mpz & a,const my_mpz & b,my_mpz & c,int reduce)332   inline void type_operator_reduce(const my_mpz & a,const my_mpz & b,my_mpz & c,int reduce){
333     mpz_mul(c.ptr,a.ptr,b.ptr);
334     if (reduce)
335       mpz_fdiv_r_ui(c.ptr,c.ptr,reduce);
336   }
337 
type_operator_plus_times(const my_mpz & a,const my_mpz & b,my_mpz & c)338   inline void type_operator_plus_times(const my_mpz & a,const my_mpz & b,my_mpz & c){
339     // c+=a*b
340     mpz_addmul(c.ptr,a.ptr,b.ptr);
341   }
342 
type_operator_plus_times_reduce(const my_mpz & a,const my_mpz & b,my_mpz & c,int reduce)343   inline void type_operator_plus_times_reduce(const my_mpz & a,const my_mpz & b,my_mpz & c,int reduce){
344     // c+=a*b % reduce;
345     mpz_addmul(c.ptr,a.ptr,b.ptr);
346     if (reduce)
347       mpz_fdiv_r_ui(c.ptr,c.ptr,reduce);
348   }
349 
350   template<class T,class U>
351   struct T_unsigned {
352     T g;
353     U u;
T_unsignedT_unsigned354     T_unsigned(const T & myg,const U & myu): g(myg),u(myu) {};
T_unsignedT_unsigned355     T_unsigned(): g(0),u(0) {};
356     bool operator == (const T_unsigned<T,U> & tu) const { return g==tu.g && u==tu.u; }
357     bool operator !=(const T_unsigned<T,U> & tu) const { return g==tu.g && u!=tu.u; }
358   };
359 
360   // warning, < is > so that monomial ordering is ok after back-conversion
361   template <class T,class U>
362   inline bool operator < (const T_unsigned<T,U> & gu1,const T_unsigned<T,U> & gu2){
363     return gu1.u > gu2.u;
364   }
365   typedef T_unsigned<int,unsigned> int_unsigned;
366 
367 #ifdef KHICAS
368   template<class T,class U>
369   stdostream & operator << (stdostream & os, const std::vector< T_unsigned<T,U> > & v){
370     typename std::vector< T_unsigned<T,U> >::const_iterator it=v.begin(),itend=v.end();
371     for (;it!=itend;++it){
372       os << "(" << it->g << "," << it->u << "),";
373     }
374     return os << '\n';
375   }
376 #endif
377 #ifdef NSPIRE
378   template<class T,class U,class I>
379   nio::ios_base<I> & operator << (nio::ios_base<I> & os, const std::vector< T_unsigned<T,U> > & v){
380     typename std::vector< T_unsigned<T,U> >::const_iterator it=v.begin(),itend=v.end();
381     for (;it!=itend;++it){
382       os << "(" << it->g << "," << it->u << "),";
383     }
384     return os << '\n';
385   }
386 #else
387   template<class T,class U>
388   std::ostream & operator << (std::ostream & os, const std::vector< T_unsigned<T,U> > & v){
389     typename std::vector< T_unsigned<T,U> >::const_iterator it=v.begin(),itend=v.end();
390     for (;it!=itend;++it){
391       os << "(" << it->g << "," << it->u << "),";
392     }
393     return os << '\n';
394   }
395 #endif
396 
397 #if defined VISUALC || defined __clang__
398   template<class T>
399   class vector_size64:public std::vector<T>{
400   };
401   template<class T>
402   class vector_size32:public std::vector<T>{
403   };
404 #else
405   // "smart" vector, T must be a type of size 64bits, sizeof(T)=8
406   // and vector must be of size 192 bits, sizeof=24
407   template<class T>
408   class vector_size64 {
409   public:
410     longlong taille;
411     T v0;
412     T v1;
vector_size64()413     vector_size64(){ taille=1;}
vector_size64(size_t t)414     vector_size64(size_t t){
415       if (t>2){
416 	taille=0; * (longlong *) &v0=0; * (longlong *) &v1=0;
417 	std::vector<T> tmp(t);
418 	std::vector<T> & v = * (std::vector<T> *) &taille;
419 	std::swap(tmp,v);
420       }
421       else { taille=2*t+1; * (longlong *) &v0=0; * (longlong *) &v1=0;}
422     }
vector_size64(size_t t,const T & elem)423     vector_size64(size_t t,const T & elem){
424       if (t>2){
425 	taille=0; * (longlong *) &v0=0; * (longlong *) &v1=0;
426 	std::vector<T> tmp(t,elem);
427 	std::vector<T> & v = * (std::vector<T> *) &taille;
428 	std::swap(tmp,v);
429       }
430       else { taille=2*t+1; v0=elem; v1=elem;}
431     }
~vector_size64()432     ~vector_size64(){
433       if (taille%2==0){
434 	std::vector<T> & v = * (std::vector<T> *) &taille;
435 	std::vector<T> tmp;
436 	std::swap(tmp,v);
437       }
438     }
push_back(T t)439     void push_back(T t){
440       if (taille %2){
441 	if (taille==5){
442 	  std::vector<T> tmp(4);
443 	  tmp.front()=v0;
444 	  tmp[1]=v1;
445 	  tmp[2]=t;
446 	  tmp.pop_back();
447 	  taille=0; * (longlong *) &v0=0; * (longlong *) &v1=0;
448 	  std::vector<T> & v = * (std::vector<T> *) &taille;
449 	  std::swap(tmp,v);
450 	  return;
451 	}
452 	taille += 2;
453 	if (taille==5)
454 	  v1=t;
455 	else
456 	  v0=t;
457       }
458       else {
459 	std::vector<T> & v = * (std::vector<T> *) &taille;
460 	v.push_back(t);
461       }
462     }
pop_back()463     void pop_back(){
464       if (taille%2)
465 	taille -=2;
466       else {
467 	std::vector<T> & v = * (std::vector<T> *) &taille;
468 	v.pop_back();
469       }
470     }
clear()471     void clear(){
472       if (taille%2)
473 	taille=1;
474       else {
475 	std::vector<T> & v = * (std::vector<T> *) &taille;
476 	v.clear();
477       }
478     }
479     T operator [] (size_t pos) const {
480       if (taille%2)
481 	return pos?v1:v0;
482       else {
483 	std::vector<T> & v = * (std::vector<T> *) &taille;
484 	return v[pos];
485       }
486     }
size()487     size_t size() const {
488       return end()-begin();
489     }
capacity()490     size_t capacity() const {
491       if (taille % 2)
492 	return 0;
493       std::vector<T> & v = * (std::vector<T> *) &taille;
494       return v.capacity();
495     }
begin()496     typename std::vector<T>::const_iterator begin() const {
497       if (taille %2)
498 	return typename std::vector<T>::const_iterator(&v0);
499       else {
500 	std::vector<T> & v = * (std::vector<T> *) &taille;
501 	return v.begin();
502       }
503     }
begin()504     typename std::vector<T>::iterator begin() {
505       if (taille %2)
506 	return typename std::vector<T>::iterator(&v0);
507       else {
508 	std::vector<T> & v = * (std::vector<T> *) &taille;
509 	return v.begin();
510       }
511     }
end()512     typename std::vector<T>::const_iterator end() const {
513       if (taille %2)
514 	return typename std::vector<T>::const_iterator(&v0+taille/2);
515       else {
516 	std::vector<T> & v = * (std::vector<T> *) &taille;
517 	return v.end();
518       }
519     }
end()520     typename std::vector<T>::iterator end() {
521       if (taille %2)
522 	return typename std::vector<T>::iterator(&v0+taille/2);
523       else {
524 	std::vector<T> & v = * (std::vector<T> *) &taille;
525 	return v.end();
526       }
527     }
528   };
529 
530   // vector of type T of size 32 bits (sizeof==4), not too large
531   template<class T>
532   struct mytab{
533     T * tab;
534     unsigned int _size;
535     unsigned int _capacity;
536   };
537 
538 #if 0
539   template<class T>
540   class vector_size32 {
541   public:
542     struct {
543       unsigned int taille;
544       T v0;
545       T v1;
546       T v2;
547       T v3;
548       T v4;
549       T v5;
550       T v6;
551     };
552     vector_size32(){ taille=1;}
553     vector_size32(size_t t){
554       if (t>7){
555 	mytab<T> * mytabptr = (mytab<T> *) this;
556 	mytabptr->tab = new T[t];
557 	mytabptr->_size = mytabptr->_capacity = t;
558 	int * ptr=(int *)mytabptr->tab;
559 	for (size_t i=0;i<t;++ptr,++i)
560 	  *ptr=0;
561       }
562       else {
563 	taille=2*t+1;
564 	* (int *) &v0=0; * (longlong *) &v1=0;
565 	* (longlong *) &v3=0;
566 	* (longlong *) &v5=0;
567       }
568     }
569     vector_size32(size_t t,const T & elem){
570       if (t>7){
571 	mytab<T> * mytabptr = (mytab<T> *) this;
572 	mytabptr->tab = new T[t];
573 	mytabptr->_size = mytabptr->_capacity = t;
574 	T * ptr=mytabptr->tab;
575 	for (size_t i=0;i<t;++ptr,++i)
576 	  *ptr=elem;
577       }
578       else {
579 	taille=2*t+1;
580 	v6=v5=v4=v3=v2=v1=v0=elem;
581       }
582     }
583     ~vector_size32(){
584       if (taille%2==0){
585 	mytab<T> * mytabptr = (mytab<T> *) this;
586 	delete [] mytabptr->tab;
587       }
588     }
589     void push_back(T t){
590       if (taille %2){
591 	if (taille==2*7+1){
592 	  T * ptr = new T[16];
593 	  *ptr=v0; ++ptr;
594 	  *ptr=v1; ++ptr;
595 	  *ptr=v2; ++ptr;
596 	  *ptr=v3; ++ptr;
597 	  *ptr=v4; ++ptr;
598 	  *ptr=v5; ++ptr;
599 	  *ptr=v6; ++ptr;
600 	  *ptr=t;
601 	  mytab<T> * mytabptr = (mytab<T> *) this;
602 	  mytabptr->tab = ptr-7;
603 	  mytabptr->_size = 8;
604 	  mytabptr->_capacity = 16;
605 	  return;
606 	}
607 	*(&v0+taille/2)=t;
608 	taille += 2;
609       }
610       else {
611 	mytab<T> * mytabptr = (mytab<T> *) this;
612 	if (mytabptr->_size>=mytabptr->_capacity){
613 	  mytabptr->_capacity *=2;
614 	  T * ptr = new T[mytabptr->_capacity];
615 	  memcpy(ptr,mytabptr->tab,mytabptr->_size*sizeof(T));
616 	  delete [] mytabptr->tab;
617 	  mytabptr->tab=ptr;
618 	}
619 	mytabptr->tab[mytabptr->_size]=t;
620 	++mytabptr->_size;
621       }
622     }
623     void pop_back(){
624       if (taille%2)
625 	taille -=2;
626       else {
627 	mytab<T> * mytabptr = (mytab<T> *) this;
628 	--mytabptr->_size;
629       }
630     }
631     void clear(){
632       if (taille%2)
633 	taille=1;
634       else {
635 	mytab<T> * mytabptr = (mytab<T> *) this;
636 	mytabptr->_size=0;
637       }
638     }
639     T operator [] (size_t pos) const {
640       if (taille%2)
641 	return *(&v0+pos);
642       else {
643 	mytab<T> * mytabptr = (mytab<T> *) this;
644 	return mytabptr->tab[pos];
645       }
646     }
647 #else
648   template<class T>
649   class vector_size32 {
650   public:
651     struct {
652       unsigned int taille;
653       T v0;
654       T v1;
655       T v2;
656     };
vector_size32()657     vector_size32(){ taille=1;}
vector_size32(size_t t)658     vector_size32(size_t t){
659       if (t>3){
660 	mytab<T> * mytabptr = (mytab<T> *) this;
661 	mytabptr->tab = new T[t];
662 	mytabptr->_size = mytabptr->_capacity = t;
663 	int * ptr=(int *)mytabptr->tab;
664 	for (size_t i=0;i<t;++ptr,++i)
665 	  *ptr=0;
666       }
667       else { taille=2*t+1; * (int *) &v0=0; * (longlong *) &v1=0;}
668     }
vector_size32(size_t t,const T & elem)669     vector_size32(size_t t,const T & elem){
670       if (t>3){
671 	mytab<T> * mytabptr = (mytab<T> *) this;
672 	mytabptr->tab = new T[t];
673 	mytabptr->_size = mytabptr->_capacity = t;
674 	T * ptr=mytabptr->tab;
675 	for (size_t i=0;i<t;++ptr,++i)
676 	  *ptr=elem;
677       }
678       else { taille=2*t+1; v2=v1=v0=elem;}
679     }
~vector_size32()680     ~vector_size32(){
681       if (taille%2==0){
682 	mytab<T> * mytabptr = (mytab<T> *) this;
683 	delete [] mytabptr->tab;
684       }
685     }
push_back(T t)686     void push_back(T t){
687       if (taille %2){
688 	if (taille==7){
689 	  T * ptr = new T[6];
690 	  *ptr=v0; ++ptr;
691 	  *ptr=v1; ++ptr;
692 	  *ptr=v2; ++ptr;
693 	  *ptr=t;
694 	  mytab<T> * mytabptr = (mytab<T> *) this;
695 	  mytabptr->tab = ptr-3;
696 	  mytabptr->_size = 4;
697 	  mytabptr->_capacity = 6;
698 	  return;
699 	}
700 	*(&v0+taille/2)=t;
701 	taille += 2;
702       }
703       else {
704 	mytab<T> * mytabptr = (mytab<T> *) this;
705 	if (mytabptr->_size>=mytabptr->_capacity){
706 	  mytabptr->_capacity *=2;
707 	  T * ptr = new T[mytabptr->_capacity];
708 	  memcpy(ptr,mytabptr->tab,mytabptr->_size*sizeof(T));
709 	  delete [] mytabptr->tab;
710 	  mytabptr->tab=ptr;
711 	}
712 	mytabptr->tab[mytabptr->_size]=t;
713 	++mytabptr->_size;
714       }
715     }
pop_back()716     void pop_back(){
717       if (taille%2)
718 	taille -=2;
719       else {
720 	mytab<T> * mytabptr = (mytab<T> *) this;
721 	--mytabptr->_size;
722       }
723     }
clear()724     void clear(){
725       if (taille%2)
726 	taille=1;
727       else {
728 	mytab<T> * mytabptr = (mytab<T> *) this;
729 	mytabptr->_size=0;
730       }
731     }
732     T operator [] (size_t pos) const {
733       if (taille%2)
734 	return *(&v0+pos); // pos?((pos==2)?v2:v1):v0;
735       else {
736 	mytab<T> * mytabptr = (mytab<T> *) this;
737 	return mytabptr->tab[pos];
738       }
739     }
740 #endif
begin()741     typename std::vector<T>::const_iterator begin() const {
742       if (taille %2)
743 	return typename std::vector<T>::const_iterator(&v0);
744       else {
745 	mytab<T> * mytabptr = (mytab<T> *) this;
746 	return typename std::vector<T>::const_iterator(mytabptr->tab);
747       }
748     }
begin()749     typename std::vector<T>::iterator begin() {
750       if (taille %2)
751 	return typename std::vector<T>::iterator(&v0);
752       else {
753 	mytab<T> * mytabptr = (mytab<T> *) this;
754 	return typename std::vector<T>::iterator(mytabptr->tab);
755       }
756     }
end()757     typename std::vector<T>::const_iterator end() const {
758       if (taille %2)
759 	return typename std::vector<T>::const_iterator(&v0+taille/2);
760       else {
761 	mytab<T> * mytabptr = (mytab<T> *) this;
762 	return typename std::vector<T>::const_iterator(mytabptr->_tab+mytabptr->_size);
763       }
764     }
size()765     size_t size() const {
766       if (taille % 2)
767 	return taille/2;
768       mytab<T> * mytabptr = (mytab<T> *) this;
769       return mytabptr->_size;
770     }
capacity()771     size_t capacity() const {
772       if (taille % 2)
773 	return 6;
774       mytab<T> * mytabptr = (mytab<T> *) this;
775       return mytabptr->_capacity;
776     }
end()777     typename std::vector<T>::iterator end() {
778       if (taille %2)
779 	return typename std::vector<T>::iterator(&v0+taille/2);
780       else {
781 	mytab<T> * mytabptr = (mytab<T> *) this;
782 	return typename std::vector<T>::iterator(mytabptr->tab+mytabptr->_size);
783       }
784     }
785   };
786 #endif
787 
788   class hash_function_unsigned_object {
789   public:
790     // inline size_t operator () (size_t x) const { return x; }
operator()791     inline size_t operator () (int x) const { return x; }
operator()792     inline size_t operator () (unsigned x) const { return x; }
operator()793     inline size_t operator () (long long unsigned x) const { return x%12345701; }
operator()794     inline size_t operator () (long long x) const { return x%12345701; }
hash_function_unsigned_object()795     hash_function_unsigned_object() {};
796   };
797 
798   bool mod_gcd(const std::vector< T_unsigned<int,hashgcd_U> > & p0,const std::vector< T_unsigned<int,hashgcd_U> > & q0,int modulo,std::vector< T_unsigned<int,hashgcd_U> > & pgcd, std::vector< T_unsigned<int,hashgcd_U> > & pcof, std::vector< T_unsigned<int,hashgcd_U> > & qcof,const std::vector<hashgcd_U> & vars, bool compute_cofactors,int nthreads);
799 
800   bool mod_gcd(const std::vector< T_unsigned<int,hashgcd_U> > & p_orig,const std::vector< T_unsigned<int,hashgcd_U> > & q_orig,int modulo,std::vector< T_unsigned<int,hashgcd_U> > & d, std::vector< T_unsigned<int,hashgcd_U> > & pcofactor, std::vector< T_unsigned<int,hashgcd_U> > & qcofactor,const std::vector<hashgcd_U> & vars, bool compute_pcofactor,bool compute_qcofactor,int nthreads);
801 
802   int modsqrtminus1(int modulo);
803   bool gcd(const std::vector< T_unsigned<gen,hashgcd_U> > & p_orig,const std::vector< T_unsigned<gen,hashgcd_U> > & q_orig,std::vector< T_unsigned<gen,hashgcd_U> > & d, std::vector< T_unsigned<gen,hashgcd_U> > & pcofactor, std::vector< T_unsigned<gen,hashgcd_U> > & qcofactor,const std::vector<hashgcd_U> & vars, bool compute_cofactors,int nthreads=1);
804 
805   bool gcd_ext(const std::vector< T_unsigned<gen,hashgcd_U> > & p_orig,const std::vector< T_unsigned<gen,hashgcd_U> > & q_orig,std::vector< T_unsigned<gen,hashgcd_U> > & d, std::vector< T_unsigned<gen,hashgcd_U> > & pcofactor, std::vector< T_unsigned<gen,hashgcd_U> > & qcofactor,const std::vector<hashgcd_U> & vars, bool compute_cofactors,int nthreads=1);
806 
807   template<class T,class U>
smalladd(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v)808   void smalladd(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v){
809     if (&v1==&v || &v2==&v){
810       std::vector< T_unsigned<T,U> > tmp;
811       smalladd(v1,v2,tmp);
812       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
813       return;
814     }
815     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end(),it2=v2.begin(),it2end=v2.end();
816     T g;
817     v.clear();
818     v.reserve((it1end-it1)+(it2end-it2)); // worst case
819     for (;it1!=it1end && it2!=it2end;){
820       if (it1->u==it2->u){
821 	g=it1->g+it2->g;
822 	if (!is_zero(g))
823 	  v.push_back(T_unsigned<T,U>(g,it1->u));
824 	++it1;
825 	++it2;
826       }
827       else {
828 	if (it1->u>it2->u){
829 	  v.push_back(*it1);
830 	  ++it1;
831 	}
832 	else {
833 	  v.push_back(*it2);
834 	  ++it2;
835 	}
836       }
837     }
838     for (;it1!=it1end;++it1)
839       v.push_back(*it1);
840     for (;it2!=it2end;++it2)
841       v.push_back(*it2);
842   }
843 
844   template<class T,class U,class R>
smalladd(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const R & reduce)845   void smalladd(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const R & reduce){
846     if (&v1==&v || &v2==&v){
847       std::vector< T_unsigned<T,U> > tmp;
848       smalladd(v1,v2,tmp,reduce);
849       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
850       return;
851     }
852     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end(),it2=v2.begin(),it2end=v2.end();
853     T g;
854     v.clear();
855     v.reserve((it1end-it1)+(it2end-it2)); // worst case
856     for (;it1!=it1end && it2!=it2end;){
857       if (it1->u==it2->u){
858 	g=it1->g+it2->g;
859 	g=g%reduce;
860 	if (!is_zero(g))
861 	  v.push_back(T_unsigned<T,U>(g,it1->u));
862 	++it1;
863 	++it2;
864       }
865       else {
866 	if (it1->u>it2->u){
867 	  v.push_back(*it1);
868 	  ++it1;
869 	}
870 	else {
871 	  v.push_back(*it2);
872 	  ++it2;
873 	}
874       }
875     }
876     for (;it1!=it1end;++it1)
877       v.push_back(*it1);
878     for (;it2!=it2end;++it2)
879       v.push_back(*it2);
880   }
881 
882   // v:=v1+x*v2
883   template<class T,class U,class R>
smalladdmult(const std::vector<T_unsigned<T,U>> & v1,const T & x,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const R & reduce)884   void smalladdmult(const std::vector< T_unsigned<T,U> > & v1,const T & x,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const R & reduce){
885     if (x==0){
886       if (&v!=&v1)
887 	v=v1;
888       return;
889     }
890     if (&v1==&v || &v2==&v){
891       std::vector< T_unsigned<T,U> > tmp;
892       smalladdmult(v1,x,v2,tmp,reduce);
893       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
894       return;
895     }
896     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end(),it2=v2.begin(),it2end=v2.end();
897     T g;
898     v.clear();
899     v.reserve((it1end-it1)+(it2end-it2)); // worst case
900     for (;it1!=it1end && it2!=it2end;){
901       if (it1->u==it2->u){
902 	g=it1->g;
903 	type_operator_plus_times_reduce(x,it2->g,g,reduce);
904 	if (!is_zero(g))
905 	  v.push_back(T_unsigned<T,U>(g,it1->u));
906 	++it1;
907 	++it2;
908       }
909       else {
910 	if (it1->u>it2->u){
911 	  v.push_back(*it1);
912 	  ++it1;
913 	}
914 	else {
915 	  type_operator_reduce(x,it2->g,g,reduce);
916 	  v.push_back(T_unsigned<T,U>(g,it2->u));
917 	  ++it2;
918 	}
919       }
920     }
921     for (;it1!=it1end;++it1)
922       v.push_back(*it1);
923     for (;it2!=it2end;++it2){
924       type_operator_reduce(x,it2->g,g,reduce);
925       v.push_back(T_unsigned<T,U>(g,it2->u));
926     }
927   }
928 
929   template<class T,class U>
smallsub(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v)930   void smallsub(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v){
931     if (&v1==&v || &v2==&v){
932       std::vector< T_unsigned<T,U> > tmp;
933       smallsub(v1,v2,tmp);
934       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
935       return;
936     }
937     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end(),it2=v2.begin(),it2end=v2.end();
938     T g;
939     v.clear();
940     v.reserve((it1end-it1)+(it2end-it2)); // worst case
941     for (;it1!=it1end && it2!=it2end;){
942       if (it1->u==it2->u){
943 	g=it1->g-it2->g;
944 	if (!is_zero(g))
945 	  v.push_back(T_unsigned<T,U>(g,it1->u));
946 	++it1;
947 	++it2;
948       }
949       else {
950 	if (it1->u>it2->u){
951 	  v.push_back(*it1);
952 	  ++it1;
953 	}
954 	else {
955 	  v.push_back(T_unsigned<T,U>(-it2->g,it2->u));
956 	  ++it2;
957 	}
958       }
959     }
960     for (;it1!=it1end;++it1)
961       v.push_back(*it1);
962     for (;it2!=it2end;++it2)
963       v.push_back(T_unsigned<T,U>(-it2->g,it2->u));
964   }
965 
966   template<class T,class U,class R>
smallsub(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const R & reduce)967   void smallsub(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const R & reduce){
968     if (&v1==&v || &v2==&v){
969       std::vector< T_unsigned<T,U> > tmp;
970       smallsub(v1,v2,tmp,reduce);
971       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
972       return;
973     }
974     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end(),it2=v2.begin(),it2end=v2.end();
975     T g;
976     v.clear();
977     v.reserve((it1end-it1)+(it2end-it2)); // worst case
978     for (;it1!=it1end && it2!=it2end;){
979       if (it1->u==it2->u){
980 	g=it1->g-it2->g;
981 	g=g%reduce;
982 	if (!is_zero(g))
983 	  v.push_back(T_unsigned<T,U>(g,it1->u));
984 	++it1;
985 	++it2;
986       }
987       else {
988 	if (it1->u>it2->u){
989 	  v.push_back(*it1);
990 	  ++it1;
991 	}
992 	else {
993 	  v.push_back(T_unsigned<T,U>(-it2->g,it2->u));
994 	  ++it2;
995 	}
996       }
997     }
998     for (;it1!=it1end;++it1)
999       v.push_back(*it1);
1000     for (;it2!=it2end;++it2)
1001       v.push_back(T_unsigned<T,U>(-it2->g,it2->u));
1002   }
1003 
1004   template<class T,class U>
smallmult(const T & g,const std::vector<T_unsigned<T,U>> & v1,std::vector<T_unsigned<T,U>> & v)1005   void smallmult(const T & g,const std::vector< T_unsigned<T,U> > & v1,std::vector< T_unsigned<T,U> > & v){
1006     if (is_zero(g)){
1007       v.clear();
1008       return;
1009     }
1010     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end();
1011     if (&v1==&v){
1012       typename std::vector< T_unsigned<T,U> >::iterator it1=v.begin(),it1end=v.end();
1013       for (;it1!=it1end;++it1){
1014 	it1->g = g*it1->g;
1015       }
1016     }
1017     else {
1018       v.clear();
1019       v.reserve(it1end-it1); // worst case
1020       for (;it1!=it1end;++it1){
1021 	v.push_back(T_unsigned<T,U>(g*it1->g,it1->u));
1022       }
1023     }
1024   }
1025 
1026   template<class T,class U,class R>
smallmult(const T & g,const std::vector<T_unsigned<T,U>> & v1,std::vector<T_unsigned<T,U>> & v,const R & reduce)1027   void smallmult(const T & g,const std::vector< T_unsigned<T,U> > & v1,std::vector< T_unsigned<T,U> > & v,const R & reduce){
1028     if (is_zero(g)){
1029       v.clear();
1030       return;
1031     }
1032     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end();
1033     if (&v1==&v){
1034       typename std::vector< T_unsigned<T,U> >::iterator it1=v.begin(),it1end=v.end();
1035       for (;it1!=it1end;++it1){
1036 	type_operator_reduce(g,it1->g,it1->g,reduce);
1037 	// it1->g = (g*it1->g) % reduce;
1038       }
1039     }
1040     else {
1041       v.clear();
1042       v.reserve(it1end-it1); // worst case
1043       T res;
1044       for (;it1!=it1end;++it1){
1045 	type_operator_reduce(g,it1->g,res,reduce);
1046 	v.push_back(T_unsigned<T,U>(res,it1->u));
1047 	// v.push_back(T_unsigned<T,U>((g*it1->g)%reduce,it1->u));
1048       }
1049     }
1050   }
1051 
1052   template<class T,class U>
smalldiv(const std::vector<T_unsigned<T,U>> & v1,const T & g,std::vector<T_unsigned<T,U>> & v)1053   void smalldiv(const std::vector< T_unsigned<T,U> > & v1,const T & g,std::vector< T_unsigned<T,U> > & v){
1054     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end();
1055     if (&v1==&v){
1056       typename std::vector< T_unsigned<T,U> >::iterator it1=v.begin(),it1end=v.end();
1057       for (;it1!=it1end;++it1){
1058 	it1->g = it1->g/g;
1059       }
1060     }
1061     else {
1062       v.clear();
1063       v.reserve(it1end-it1); // worst case
1064       for (;it1!=it1end;++it1){
1065 	v.push_back(T_unsigned<T,U>(it1->g/g,it1->u));
1066       }
1067     }
1068   }
1069 
1070   template<class T,class U>
smallshift(const std::vector<T_unsigned<T,U>> & v1,U shift,std::vector<T_unsigned<T,U>> & v)1071   void smallshift(const std::vector< T_unsigned<T,U> > & v1,U shift,std::vector< T_unsigned<T,U> > & v){
1072     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1.begin(),it1end=v1.end();
1073     if (&v1==&v){
1074       typename std::vector< T_unsigned<T,U> >::iterator it1=v.begin(),it1end=v.end();
1075       for (;it1!=it1end;++it1){
1076 	it1->u += shift;
1077       }
1078     }
1079     else {
1080       v.clear();
1081       v.reserve(it1end-it1); // worst case
1082       for (;it1!=it1end;++it1){
1083 	v.push_back(T_unsigned<T,U>(it1->g,it1->u+shift));
1084       }
1085     }
1086   }
1087 
1088   // eval v1 at vars.back()=g
1089   // should be used with reduce!=0, T=int or longlong
1090   // (powmod should be defined for T,U,T)
1091   // * should not overflow in T, if T=int, reduce must be < 46340
1092   // this could be fixed using type_operator_... instead of *
1093   template<class T,class U,class R>
smallhorner(const std::vector<T_unsigned<T,U>> & v1,const T & g,const std::vector<U> & vars,std::vector<T_unsigned<T,U>> & v,const R & reduce)1094   void smallhorner(const std::vector< T_unsigned<T,U> > & v1,const T & g,const std::vector<U> & vars,std::vector< T_unsigned<T,U> > & v,const R& reduce){
1095     typename std::vector< T_unsigned<T,U> >::const_iterator it=v1.begin(),itend=v1.end();
1096     U deg=vars.back();
1097     v.clear();
1098     v.reserve((itend-it)/deg);
1099     for (;it!=itend;){
1100       // find smallest possible monomial degree for next element of v
1101       U minu=(it->u/deg)*deg;
1102       U prevdiffu=it->u-minu,diffu;
1103       T tmp;
1104       for (;it!=itend;++it){
1105 	if (it->u<minu){
1106 	  if (prevdiffu)
1107 	    tmp = tmp*powmod(g,prevdiffu,reduce);
1108 	  prevdiffu=0;
1109 	  tmp = tmp%reduce;
1110 	  v.push_back(T_unsigned<T,U>(tmp,minu));
1111 	  break;
1112 	}
1113 	diffu=it->u-minu;
1114 	if (prevdiffu!=diffu){
1115 	  if (prevdiffu==diffu+1)
1116 	    tmp = tmp*g;
1117 	  else
1118 	    tmp = tmp*powmod(g,prevdiffu-diffu,reduce);
1119 	}
1120 	tmp += it->g;
1121 	tmp = tmp%reduce;
1122 	prevdiffu=diffu;
1123 	if (!diffu){
1124 	  v.push_back(T_unsigned<T,U>(tmp,minu));
1125 	  break;
1126 	}
1127       }
1128       if (prevdiffu){
1129 	tmp=tmp*powmod(g,prevdiffu,reduce)%reduce;
1130 	v.push_back(T_unsigned<T,U>(tmp,minu));
1131       }
1132     }
1133   }
1134 
1135   // eval v1 at vars.back()=g
1136   // should be used with T=gen
1137   template<class T,class U>
smallhorner(const std::vector<T_unsigned<T,U>> & v1,const T & g,const std::vector<U> & vars,std::vector<T_unsigned<T,U>> & v)1138   void smallhorner(const std::vector< T_unsigned<T,U> > & v1,const T & g,const std::vector<U> & vars,std::vector< T_unsigned<T,U> > & v){
1139     typename std::vector< T_unsigned<T,U> >::const_iterator it=v1.begin(),itend=v1.end();
1140     U deg=vars.back();
1141     v.clear();
1142     v.reserve((itend-it)/deg);
1143     for (;it!=itend;){
1144       // find smallest possible monomial degree for next element of v
1145       U minu=(it->u/deg)*deg;
1146       U prevdiffu=it->u-minu,diffu;
1147       T tmp(0);
1148       for (;it!=itend;){
1149 	if (it->u<minu){
1150 	  if (prevdiffu)
1151 	    tmp = tmp*pow(g,(unsigned long) prevdiffu);
1152 	  prevdiffu=0;
1153 	  v.push_back(T_unsigned<T,U>(tmp,minu));
1154 	  break;
1155 	}
1156 	diffu=it->u-minu;
1157 	if (prevdiffu!=diffu){
1158 	  if (prevdiffu==diffu+1)
1159 	    tmp = tmp*g;
1160 	  else
1161 	    tmp = tmp*pow(g,(unsigned long)prevdiffu-diffu);
1162 	}
1163 	tmp += it->g;
1164 	++it;
1165 	prevdiffu=diffu;
1166 	if (!diffu){
1167 	  v.push_back(T_unsigned<T,U>(tmp,minu));
1168 	  break;
1169 	}
1170       }
1171       if (prevdiffu){
1172 	tmp=tmp*pow(g,(unsigned long)prevdiffu);
1173 	v.push_back(T_unsigned<T,U>(tmp,minu));
1174       }
1175     }
1176   }
1177 
1178   template<class T,class U,class R>
1179   void smallmult(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const R & reduce,size_t possible_size);
1180 
1181   template<class T,class U,class R>
smallmulpoly_interpolate(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const std::vector<U> & vars,const index_t & vdeg,const R & reduce)1182   void smallmulpoly_interpolate(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const std::vector<U> & vars,const index_t & vdeg,const R& reduce){
1183     int dim=int(vars.size());
1184     if (dim==1){
1185       smallmult(v1,v2,v,reduce,0);
1186       return;
1187     }
1188     std::vector<U> vars1=vars;
1189     vars1.pop_back();
1190     int s=vdeg[dim-1];
1191     v.clear();
1192     std::vector< T_unsigned<T,U> > tmp2,tmp3;
1193     std::vector< T_unsigned<T,U> > * tab = new std::vector< T_unsigned<T,U> >[s];
1194     for (int alpha=0;alpha<s;++alpha){
1195       smallhorner<T,U>(v1,alpha,vars,tmp2,reduce);
1196       smallhorner<T,U>(v2,alpha,vars,tmp3,reduce);
1197       smallmulpoly_interpolate<T,U>(tmp2,tmp3,tab[alpha],vars1,vdeg,reduce);
1198       CERR << alpha << ":" << tab[alpha] << '\n';
1199     }
1200     // divided differences
1201     for (int k=1;k<s;++k){
1202       CERR << k << '\n';
1203       for (int j=s-1;j>=k;--j){
1204 	smallsub(tab[j],tab[j-1],tmp2,reduce);
1205 	smallmult(invmod(k,reduce),tmp2,tab[j],reduce);
1206 	CERR << tab[j] ;
1207       }
1208     }
1209     // interpolation
1210     for (int alpha=s-1;alpha>=0;--alpha){
1211       smallmult<T,U,R>(-alpha,v,tmp2,reduce);
1212       smallshift(v,U(1),v); // multiply v*(x-alpha)
1213       smalladd<T,U,R>(v,tmp2,tmp3,reduce);
1214       smalladd<T,U,R>(tmp3,tab[alpha],v,reduce);
1215     }
1216     delete [] tab;
1217   }
1218 
1219   template<class T,class U>
smallmulpoly_interpolate(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const std::vector<U> & vars,const index_t & vdeg)1220   void smallmulpoly_interpolate(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const std::vector<U> & vars,const index_t & vdeg){
1221     int dim=int(vars.size());
1222     if (dim==1){
1223       smallmult(v1,v2,v,0,0);
1224       return;
1225     }
1226     std::vector<U> vars1=vars;
1227     vars1.pop_back();
1228     int s=vdeg[dim-1];
1229     v.clear();
1230     std::vector< T_unsigned<T,U> > tmp2,tmp3;
1231     std::vector< T_unsigned<T,U> > * tab = new std::vector< T_unsigned<T,U> >[s];
1232     for (int alpha=0;alpha<s;++alpha){
1233       smallhorner<T,U>(v1,alpha,vars,tmp2);
1234       smallhorner<T,U>(v2,alpha,vars,tmp3);
1235       smallmulpoly_interpolate<T,U>(tmp2,tmp3,tab[alpha],vars1,vdeg);
1236       // CERR << tab[alpha] << '\n';
1237     }
1238     // divided differences
1239     for (int k=1;k<s;++k){
1240       for (int j=s-1;j>=k;--j){
1241 	smallsub(tab[j],tab[j-1],tmp2);
1242 	smalldiv(tmp2,T(k),tab[j]);
1243       }
1244     }
1245     // interpolation
1246     for (int alpha=s-1;alpha>=0;--alpha){
1247       smallmult<T,U>(-alpha,v,tmp2);
1248       smallshift(v,U(1),v); // multiply v*(x-alpha)
1249       smalladd<T,U>(v,tmp2,tmp3);
1250       smalladd<T,U>(tmp3,tab[alpha],v);
1251     }
1252     delete [] tab;
1253   }
1254 
1255   template<class T,class U,class R>
smallmulpoly_interpolate(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const index_t & vdeg,const R & reduce)1256   void smallmulpoly_interpolate(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const index_t & vdeg,const R & reduce){
1257     int dim=int(vdeg.size());
1258     std::vector<U> vars(dim);
1259     vars.back()=vdeg.back();
1260     for (int i=dim-1;i>0;--i){
1261       vars[i-1]=vars[i]*vdeg[i-1];
1262     }
1263     smallmulpoly_interpolate(v1,v2,v,vars,vdeg,reduce);
1264   }
1265 
1266   template<class T,class U>
smallmulpoly_interpolate(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const index_t & vdeg)1267   void smallmulpoly_interpolate(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const index_t & vdeg){
1268     int dim=int(vdeg.size());
1269     std::vector<U> vars(dim);
1270     vars.back()=vdeg.back();
1271     for (int i=dim-1;i>0;--i){
1272       vars[i-1]=vars[i]*vdeg[i-1];
1273     }
1274     smallmulpoly_interpolate(v1,v2,v,vars,vdeg);
1275   }
1276 
1277   template<class U>
1278   struct u_pair_index {
1279     U u;
1280     unsigned i1,i2;
u_pair_indexu_pair_index1281     u_pair_index(unsigned _i1,unsigned _i2,U _u):u(_u),i1(_i1),i2(_i2) {}
u_pair_indexu_pair_index1282     u_pair_index():u(0),i1(0),i2(0) {}
1283   };
1284   template<class U>
1285   inline bool operator < (const u_pair_index<U> & p1,const u_pair_index<U> & p2){
1286     return p1.u<p2.u;
1287   }
1288 
1289   template<class U>
1290   inline bool operator <= (const u_pair_index<U> & p1,const u_pair_index<U> & p2){
1291     return p1.u<=p2.u;
1292   }
1293 
1294   template<class T>
in_out_heap(T * tab,size_t size,T value)1295   void in_out_heap(T * tab,size_t size,T value){
1296     unsigned childindex=2,holeindex=0;
1297     while (childindex<size){
1298       // find largest child until end of tab
1299       register T * ptr=tab+childindex;
1300       if (*ptr<*(ptr-1)){
1301 	--childindex;
1302 	--ptr;
1303       }
1304       *(tab+holeindex)=*ptr;
1305       holeindex=childindex;
1306       childindex = (holeindex+1) << 1;
1307     }
1308     if (childindex==size){
1309       --childindex;
1310       *(tab+holeindex)=*(tab+childindex);
1311       holeindex=childindex;
1312     }
1313     // now the hole is at the bottom, replace it with value and promote value
1314     *(tab+holeindex)=value;
1315     // childindex is now the parent
1316     while (holeindex){
1317       childindex=(holeindex-1) >> 1;
1318       if (*(tab+holeindex)<=*(tab+childindex))
1319 	break;
1320       std::swap<T>(*(tab+childindex),*(tab+holeindex));
1321       holeindex=childindex;
1322     }
1323   }
1324 
1325   template<class U>
inverse_order(const U & u1,const U & u2)1326   inline bool inverse_order(const U & u1,const U & u2){
1327     return u1<u2;
1328   }
1329 
1330   template<class U>
1331   struct U_unsigned {
1332     U u;
1333     unsigned v;
U_unsignedU_unsigned1334     U_unsigned(U _u,unsigned _v):u(_u),v(_v) {}
U_unsignedU_unsigned1335     U_unsigned():u(0) {}
1336   };
1337   template<class U>
1338   inline bool operator < (const U_unsigned<U> & p1,const U_unsigned<U> & p2){
1339     return p1.u<p2.u;
1340   }
1341 
1342   // Possible improvement for threaded execution:
1343   // make each j of the k threads compute terms of the product with
1344   // degree wrt 1st var = j % k
1345   // at the end merge the results
1346   // For this, we might mark positions in v1 and v2 where the degree
1347   // wrt to the 1st var changes
1348   template<class T,class U,class R>
smallmult(const std::vector<T_unsigned<T,U>> & v1,const std::vector<T_unsigned<T,U>> & v2,std::vector<T_unsigned<T,U>> & v,const R & reduce,size_t possible_size)1349   void smallmult(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,const R & reduce,size_t possible_size){
1350     if (v1.empty()){
1351       v.clear(); return;
1352     }
1353     if (v2.empty()){
1354       v.clear(); return;
1355     }
1356     // if (&v2==&v && v2.size()==1 && v2.front().u==0 && is_one(v2.front().g)) return;
1357     if (&v1==&v || &v2==&v){
1358       std::vector< T_unsigned<T,U> > tmp;
1359       smallmult(v1,v2,tmp,reduce,possible_size);
1360       std::swap< std::vector< T_unsigned<T,U> > >(v,tmp);
1361       return;
1362     }
1363     typename std::vector< T_unsigned<T,U> >::const_iterator it1beg=v1.begin(),it1=v1.begin(),it1end=v1.end(),it2beg=v2.begin(),it2,it2end=v2.end();
1364     T g1,g;
1365     U u1=it1beg->u,u2=it2beg->u,u;
1366     v.clear();
1367     unsigned v1s=unsigned(it1end-it1beg),v2s=unsigned(it2end-it2beg);
1368     double v1v2=double(v1s)*v2s;
1369     U u12=u1+u2; // size of the array for array multiplication
1370     // compare u12 and v1v2*ln(v1v2)
1371     if ( heap_mult>=0 && possible_size && u12<512e6/sizeof(T) && std::log(double(std::min(v1s,v2s)))/std::log(2.0)>1+2*u12/v1v2){
1372       if (debug_infolevel>20)
1373 	CERR << "array multiplication, v1 size " << v1s << " v2 size " << v2s << " u1+u2 " << u12 << '\n';
1374       // array multiplication
1375       T * prod = new T[unsigned(u12+1)];
1376       for (u=0;u<=u12;++u)
1377 	prod[u]=T(0);
1378       typename std::vector< T_unsigned<T,U> >::const_iterator slice_it2beg=it2beg,slice_it2end=it2end;
1379       const int slice_size=60;
1380       for (;slice_it2beg<slice_it2end;slice_it2beg=it2end){
1381 	it2beg=slice_it2beg;
1382 	it2end=it2beg+slice_size;
1383 	if (it2end>slice_it2end)
1384 	  it2end=slice_it2end;
1385 	for (it1=it1beg;it1!=it1end;++it1){
1386 	  g1=it1->g;
1387 	  u1=it1->u;
1388 	  if (is_zero(reduce)){
1389 	    for (it2=it2beg;it2!=it2end;++it2){
1390 	      type_operator_plus_times(g1,it2->g,prod[u1+it2->u]);
1391 	    }
1392 	  }
1393 	  else {
1394 	    for (it2=it2beg;it2!=it2end;++it2){
1395 	      type_operator_plus_times_reduce(g1,it2->g,prod[u1+it2->u],reduce);
1396 	    }
1397 	  }
1398 	}
1399       }
1400       int n=0;
1401       for (u=u12;;--u){
1402 	if (!is_zero(prod[u]))
1403 	  ++n;
1404 	if (!u)
1405 	  break;
1406       }
1407       v.reserve(n);
1408       for (u=u12;;--u){
1409 	if (!is_zero(prod[u]))
1410 	  v.push_back(T_unsigned<T,U>(prod[u],u));
1411 	if (!u)
1412 	  break;
1413       }
1414       delete [] prod ;
1415       return;
1416     }
1417     bool use_heap=(heap_mult>0 && v1v2>=heap_mult);
1418     if (debug_infolevel>20){
1419       CERR << "// " << CLOCK() << "using ";
1420       if (use_heap)
1421 	CERR << "heap";
1422       else
1423 	CERR << heap_mult;
1424       CERR<< " multiplication" << '\n';
1425     }
1426     if (heap_mult<0 || use_heap){
1427       if (v1s>v2s){
1428 	smallmult(v2,v1,v,reduce,possible_size);
1429 	return;
1430       }
1431       if (v1s<128)
1432 	use_heap=false; // use heap instead of heap of chains
1433       if (heap_mult==-1 || use_heap ){
1434 	// using heap of chains
1435 	// std::vector< vector_size64< std::pair<unsigned,unsigned> > > vindex(v1s);
1436 	std::vector< std::vector< std::pair<unsigned,unsigned> > > vindex(v1s);
1437 #if 0
1438 	for (int i=0;i<v1s;++i)
1439 	  vindex[i].reserve(32);
1440 #endif
1441 #ifdef HEAP_STATS
1442 	double count1=0,count2=0,total=double(v1s)*v2s;
1443 #endif
1444 	U_unsigned<U> * heap = new U_unsigned<U>[v1s] ; // pointers to v2 monomials
1445 	U_unsigned<U> * heap0, *heapbeg=heap,* heapend=heap+v1s;
1446 	for (it1=it1beg,heap0=heap;heap0!=heapend;++heap0,++it1){
1447 	  // vindex[it1-it1beg]=vector_size64< std::pair<unsigned,unsigned> >(1,std::pair<unsigned,unsigned>(it1-it1beg,0));
1448 	  vindex[it1-it1beg].push_back(std::pair<unsigned,unsigned>(unsigned(it1-it1beg),0));//vindex[it1-it1beg]=std::vector< std::pair<unsigned,unsigned> >(1,std::pair<unsigned,unsigned>(unsigned(it1-it1beg),0));
1449 	  *heap0=U_unsigned<U>(it1->u+u2,unsigned(it1-it1beg));
1450 	}
1451 	// vector_size64< std::pair<unsigned,unsigned> > nouveau;
1452 	std::vector< std::pair<unsigned,unsigned> > nouveau;
1453 	for (;heapbeg!=heapend;){
1454 	  U topu=heapbeg->u;
1455 	  if (!v.empty() && v.back().u==heapbeg->u){
1456 	    g=v.back().g;
1457 	    v.pop_back();
1458 	  }
1459 	  else
1460 	    g=T(0);
1461 	  std::vector< std::pair<unsigned,unsigned> >::iterator it,itend;
1462 	  nouveau.clear();
1463 	  while (heapend!=heapbeg && topu==heapbeg->u){
1464 	    // add all elements of the top chain
1465 	    it=vindex[heapbeg->v].begin();
1466 	    itend=vindex[heapbeg->v].end();
1467 	    for (;it!=itend;++it){
1468 	      unsigned & its=it->second;
1469 	      type_operator_plus_times_reduce((it1beg+it->first)->g,(it2beg+its)->g,g,reduce);
1470 	      // increment 2nd poly index of the elements of the top chain
1471 	      ++its;
1472 	      if (its!=v2s)
1473 		nouveau.push_back(*it);
1474 	    }
1475 #ifdef USTL
1476 	    ustl::pop_heap(heapbeg,heapend);
1477 #else
1478 	    std::pop_heap(heapbeg,heapend);
1479 #endif
1480 	    --heapend;
1481 	  }
1482 	  if (!is_zero(g))
1483 	    v.push_back(T_unsigned<T,U>(g,topu));
1484 	  // erase top node, then push each element of the incremented top chain
1485 	  it=nouveau.begin();
1486 	  itend=nouveau.end();
1487 	  U prevu=0; int previndex=-1;
1488 	  for (;it!=itend;++it){
1489 	    u=(it1beg+it->first)->u+(it2beg+it->second)->u;
1490 	    if (u==prevu && previndex>=0){
1491 	      vindex[previndex].push_back(*it);
1492 #ifdef HEAP_STATS
1493 	      ++count1;
1494 #endif
1495 	      continue;
1496 	    }
1497 	    prevu=u;
1498 	    // check if u is in the path to the root of the heap
1499 	    unsigned holeindex=unsigned(heapend-heapbeg),parentindex;
1500 	    if (holeindex && u==heapbeg->u){
1501 	      vindex[previndex=heapbeg->v].push_back(*it);
1502 #ifdef HEAP_STATS
1503 	      ++count1;
1504 #endif
1505 	      continue;
1506 	    }
1507 	    bool done=false;
1508 	    while (holeindex){
1509 	      parentindex=(holeindex-1) >> 1;
1510 	      U pu=(heapbeg+parentindex)->u;
1511 	      if (u<pu)
1512 		break;
1513 	      if (u==pu){
1514 		done=true;
1515 #ifdef HEAP_STATS
1516 		++count2;
1517 #endif
1518 		vindex[previndex=(heapbeg+parentindex)->v].push_back(*it);
1519 		break;
1520 	      }
1521 	      holeindex=parentindex;
1522 	    }
1523 	    if (!done){
1524 	      // not found, add a new node to the heap
1525 	      vindex[previndex=heapend->v].clear();
1526 	      vindex[previndex].push_back(*it);
1527 	      heapend->u=u;
1528 	      ++heapend;
1529 #ifdef USTL
1530 	      ustl::push_heap(heapbeg,heapend);
1531 #else
1532 	      std::push_heap(heapbeg,heapend);
1533 #endif
1534 	    }
1535 	  }
1536 	} // end for heapbeg!=heapend
1537 #ifdef HEAP_STATS
1538 	if (debug_infolevel>20)
1539 	  CERR << CLOCK() << " heap_mult, %age of chains" << count1/total << " " << count2/total << " " << '\n';
1540 #endif
1541 #if 0
1542 	for (int i=0;i<v1s;++i)
1543 	  CERR << vindex[i].size() << "," << vindex[i].capacity() << '\n';
1544 #endif
1545 	delete [] heap;
1546 	return;
1547       }
1548 #ifndef USTL
1549       if (heap_mult==-2){
1550 	// using multimap for f*g, seems slower than heap
1551 	typedef std::multimap< U,std::pair<unsigned,unsigned> > mmap;
1552 	mmap M;
1553 	typename mmap::iterator Mbeg,Mit=M.begin(),Mitbeg,Mend;
1554 	// fill M with (f_i,g_1)
1555 	for (it1=it1end-1;;){
1556 	  Mit=M.insert(Mit,std::pair<U,std::pair<unsigned,unsigned> >(it1->u+u2,std::pair<unsigned,unsigned>(unsigned(it1-it1beg),0)));
1557 	  if (it1==it1beg)
1558 	    break;
1559 	  --it1;
1560 	}
1561 	// find top degree coefficient of the product then
1562 	// replace top range degree elements or erase them
1563 	for (;!M.empty();){
1564 	  Mend=M.end();
1565 	  Mbeg=M.begin();
1566 	  Mitbeg=Mend;
1567 	  --Mitbeg;
1568 	  U deg=Mitbeg->first;
1569 	  T coeff;
1570 	  for (;Mitbeg!=Mbeg;--Mitbeg){
1571 	    if (Mitbeg->first!=deg)
1572 	      break;
1573 	  }
1574 	  if (Mitbeg->first!=deg)
1575 	    ++Mitbeg;
1576 	  Mit=Mitbeg;
1577 	  type_operator_reduce((it1beg+Mit->second.first)->g,(it2beg+Mit->second.second)->g,coeff,reduce);
1578 	  for (++Mit;Mit!=Mend;++Mit){
1579 	    type_operator_plus_times_reduce((it1beg+Mit->second.first)->g,(it2beg+Mit->second.second)->g,coeff,reduce);
1580 	  }
1581 	  if (!is_zero(coeff))
1582 	    v.push_back(T_unsigned<T,U>(coeff,deg));
1583 	  Mit=Mitbeg;
1584 	  std::vector< std::pair<unsigned,unsigned> > vp;
1585 	  for (;Mit!=Mend;++Mit){
1586 	    std::pair<unsigned,unsigned> p=Mit->second;
1587 	    ++p.second;
1588 	    if (p.second<v2s)
1589 	      vp.push_back(p);
1590 	  }
1591 	  M.erase(Mitbeg,Mend);
1592 	  std::vector< std::pair<unsigned,unsigned> > ::iterator vit=vp.begin(),vitend=vp.end();
1593 	  for (;vit!=vitend;++vit)
1594 	    M.insert(std::pair<U,std::pair<unsigned,unsigned> >((it1beg+vit->first)->u+(it2beg+vit->second)->u,*vit));
1595 	}
1596 	return;
1597       }
1598       if (heap_mult==-3){ // using map of U -> chains
1599 	typedef std::map< U,std::vector<std::pair<unsigned,unsigned> > > mmap;
1600 	std::vector< std::pair<unsigned,unsigned> >::iterator it,itend;
1601 	mmap M;
1602 	typename mmap::iterator Mit=M.begin();
1603 	// fill M with (f_i,g_1)
1604 	for (it1=it1end-1;;){
1605 	  Mit=M.insert(Mit,std::pair<U,std::vector< std::pair<unsigned,unsigned> > >(it1->u+u2,std::vector< std::pair<unsigned,unsigned> > (1, std::pair<unsigned,unsigned>(unsigned(it1-it1beg),0) )));
1606 	  if (it1==it1beg)
1607 	    break;
1608 	  --it1;
1609 	}
1610 	while (!M.empty()){
1611 	  Mit=M.end();
1612 	  --Mit;
1613 	  u=Mit->first;
1614 	  it=Mit->second.begin();
1615 	  itend=Mit->second.end();
1616 	  g=T(0);
1617 	  std::vector< std::pair<unsigned,unsigned> > nouveau;
1618 	  for (;it!=itend;++it){
1619 	    type_operator_plus_times_reduce((it1beg+it->first)->g,(it2beg+it->second)->g,g,reduce);
1620 	    ++it->second;
1621 	    if (it->second!=v2s)
1622 	      nouveau.push_back(*it);
1623 	  }
1624 	  if (!is_zero(g))
1625 	    v.push_back(T_unsigned<T,U>(g,u));
1626 	  M.erase(Mit);
1627 	  it=nouveau.begin();
1628 	  itend=nouveau.end();
1629 	  for (;it!=itend;++it){
1630 	    u=(it1beg+it->first)->u+(it2beg+it->second)->u;
1631 	    Mit=M.find(u);
1632 	    if (Mit!=M.end())
1633 	      Mit->second.push_back(std::pair<unsigned,unsigned>(it->first,it->second));
1634 	    else
1635 	      M[u]=std::vector< std::pair<unsigned,unsigned> >(1,std::pair<unsigned,unsigned>(it->first,it->second));
1636 	  }
1637 	}
1638 	return;
1639       }
1640 #endif
1641       // using heap
1642       u_pair_index<U> newelem, * heap = new u_pair_index<U>[v1s] ; // pointers to v2 monomials
1643       u_pair_index<U> * heap0, *heapbeg=heap,* heapend=heap+v1s, * heaplast=heap+v1s-1;
1644       for (it1=it1beg,heap0=heap;heap0!=heapend;++heap0,++it1){
1645 	*heap0=u_pair_index<U>(unsigned(it1-it1beg),0,it1->u+u2);
1646       }
1647       for (;heapbeg!=heapend;){
1648 	if (!v.empty() && v.back().u==heapbeg->u){
1649 	  type_operator_plus_times_reduce((it1beg+heapbeg->i1)->g,(it2beg+heapbeg->i2)->g,v.back().g,reduce);
1650 	  if ( is_zero(v.back().g) )
1651 	    v.pop_back();
1652 	}
1653 	else {
1654 	  type_operator_reduce((it1beg+heapbeg->i1)->g,(it2beg+heapbeg->i2)->g,g,reduce);
1655 	  v.push_back(T_unsigned<T,U>(g,heapbeg->u));
1656 	}
1657 	++heapbeg->i2;
1658 	if (heapbeg->i2==v2s){
1659 #ifdef USTL
1660 	  ustl::pop_heap(heapbeg,heapend);
1661 #else
1662 	  std::pop_heap(heapbeg,heapend);
1663 #endif
1664 	  // std::pop_heap(heapbeg,heapend);
1665 	  --heaplast;
1666 	  --heapend;
1667 	}
1668 	else {
1669 #ifdef USTL
1670 	  ustl::pop_heap(heapbeg,heapend);
1671 #else
1672 	  std::pop_heap(heapbeg,heapend);
1673 #endif
1674 	  // std::pop_heap(heapbeg,heapend);
1675 	  heaplast->u=(it1beg+heaplast->i1)->u+(it2beg+heaplast->i2)->u;
1676 #ifdef USTL
1677 	  ustl::push_heap(heapbeg,heapend);
1678 #else
1679 	  std::push_heap(heapbeg,heapend);
1680 #endif
1681 	  // in_out_heap(heapbeg,heapend-heapbeg,newelem);
1682 	}
1683       }
1684       delete [] heap;
1685     } // end if (heapmult)
1686     else {
1687 #ifdef HASH_MAP_NAMESPACE
1688       typedef HASH_MAP_NAMESPACE::hash_map< U,T,hash_function_unsigned_object > hash_prod ;
1689       hash_prod produit(possible_size); // try to avoid reallocation
1690       // cout << "hash " << CLOCK() << '\n';
1691 #else
1692 #ifdef USTL
1693       typedef ustl::map<U,T> hash_prod;
1694 #else
1695       typedef std::map<U,T> hash_prod;
1696 #endif
1697       // cout << "small map" << '\n';
1698       hash_prod produit;
1699 #endif
1700       typename hash_prod::iterator prod_it,prod_itend;
1701       for (;it1!=it1end;++it1){
1702 	g1=it1->g;
1703 	u1=it1->u;
1704 	if (!is_zero(reduce)){
1705 	  for (it2=it2beg;it2!=it2end;++it2){
1706 	    u=u1+it2->u;
1707 	    prod_it=produit.find(u);
1708 	    if (prod_it==produit.end())
1709 	      type_operator_reduce(g1,it2->g,produit[u],reduce); // g=g1*it2->g;
1710 	    else
1711 	      type_operator_plus_times_reduce(g1,it2->g,prod_it->second,reduce);
1712 	  }
1713 	}
1714 	else {
1715 	  for (it2=it2beg;it2!=it2end;++it2){
1716 	    u=u1+it2->u;
1717 	    prod_it=produit.find(u);
1718 	    if (prod_it==produit.end()){
1719 	      type_operator_times(g1,it2->g,produit[u]); // g=g1*it2->g;
1720 	    }
1721 	    else {
1722 	      type_operator_plus_times(g1,it2->g,prod_it->second);
1723 	      // g=g1*it2->g;
1724 	      // prod_it->second+=g;
1725 	    }
1726 	  }
1727 	}
1728       }
1729       T_unsigned<T,U> gu;
1730       prod_it=produit.begin(),prod_itend=produit.end();
1731       v.reserve(produit.size());
1732       for (;prod_it!=prod_itend;++prod_it){
1733 	if (!is_zero(gu.g=prod_it->second)){
1734 	  gu.u=prod_it->first;
1735 	  v.push_back(gu);
1736 	}
1737       }
1738       // CERR << "smallmult sort " << CLOCK() << '\n';
1739       sort(v.begin(),v.end());
1740       // CERR << "smallmult sort end " << CLOCK() << '\n';
1741     } // endif // HEAP_MULT
1742   }
1743 
1744 
1745   template<class T,class U,class V>
1746   struct triplet {
1747     T first;
1748     U second;
1749     V third;
triplettriplet1750     triplet (const T & f,const U & s,const V & t):first(f),second(s),third(t){};
triplettriplet1751     triplet():first(0),second(0),third(0){};
1752   };
1753 
1754   template<class T,class U,class R>
1755   struct threadmult_t {
1756     const std::vector< T_unsigned<T,U> > * v1ptr ;
1757     std::vector< typename std::vector< T_unsigned<T,U> >::const_iterator > * v1ptrs, * v2ptrs;
1758     std::vector< T_unsigned<T,U> > * vptr;
1759     U degdiv;
1760     unsigned current_deg;
1761     unsigned clock;
1762     R reduce;
1763     int status;
1764     bool use_heap;
1765     T * prod;
1766     std::vector< std::vector< triplet<unsigned,unsigned,int> > > * vindexptr;
1767     std::vector< std::vector< triplet<unsigned short,unsigned short,int> > > * vsmallindexptr;
1768     U_unsigned<U> * heapptr;
1769   };
1770 
1771 #if defined HAVE_PTHREAD_H && defined HAVE_LIBPTHREAD
1772 
do_threadmult(void * ptr)1773   template<class T,class U,class R> void * do_threadmult(void * ptr){
1774     threadmult_t<T,U,R> * argptr = (threadmult_t<T,U,R> *) ptr;
1775     argptr->status=1;
1776     argptr->clock=CLOCK();
1777     R reduce=argptr->reduce;
1778     const std::vector< T_unsigned<T,U> > * v1 = argptr->v1ptr;
1779     std::vector< typename std::vector< T_unsigned<T,U> >::const_iterator >  * v2ptrs = argptr->v2ptrs;
1780     std::vector< T_unsigned<T,U> > & v = *argptr->vptr;
1781     typename std::vector< T_unsigned<T,U> >::const_iterator it1=v1->begin(),it1beg=v1->begin(),it1end=v1->end(),it2beg,it2,it2end;
1782     T g1,g;
1783     U u1,u2,u,degdiv=argptr->degdiv;
1784     int d1=0,d2,v2deg=v2ptrs->size()-2,v1deg=argptr->v1ptrs->size()-2,d=argptr->current_deg;
1785     T * prod=argptr->prod;
1786     if (prod){
1787       for (int slice_d2=0;slice_d2<=v2deg;++slice_d2){
1788 	if (slice_d2>d) break;
1789 	d1=d-slice_d2;
1790 	if (d1>v1deg) continue;
1791 	it1beg=(*argptr->v1ptrs)[d1];
1792 	it1end=(*argptr->v1ptrs)[d1+1];
1793 	typename std::vector< T_unsigned<T,U> >::const_iterator slice_it2beg=(*v2ptrs)[slice_d2],slice_it2end=(*v2ptrs)[slice_d2+1];
1794 	const int slice_size=56;
1795 	for (; slice_it2beg<slice_it2end;slice_it2beg=it2end){
1796 	  it2beg=slice_it2beg;
1797 	  it2end=it2beg+slice_size;
1798 	  if (it2end>slice_it2end)
1799 	    it2end=slice_it2end;
1800 	  for (it1=it1beg;it1!=it1end;++it1){
1801 	    u1=it1->u;
1802 	    g1=it1->g;
1803 #if 1
1804 	    if ((it1+3)<it1end){
1805 	      // 4 monomials have same main degree: make the product simult
1806 	      T * prod0=prod+u1, * prod1=prod+(it1+1)->u,*prod2=prod+(it1+2)->u,*prod3=prod+(it1+3)->u;
1807 	      T g1_1=(it1+1)->g,g1_2=(it1+2)->g,g1_3=(it1+3)->g,g2;
1808 	      if (is_zero(reduce)){
1809 		it2end -= 4;
1810 		for (it2=it2beg;it2<=it2end;it2+=4){
1811 		  u2=it2->u;
1812 		  g2=it2->g;
1813 		  type_operator_plus_times(g1,g2,prod0[u2]);
1814 		  type_operator_plus_times(g1_1,g2,prod1[u2]);
1815 		  type_operator_plus_times(g1_2,g2,prod2[u2]);
1816 		  type_operator_plus_times(g1_3,g2,prod3[u2]);
1817 		  u2=(it2+1)->u;
1818 		  g2=(it2+1)->g;
1819 		  type_operator_plus_times(g1,g2,prod0[u2]);
1820 		  type_operator_plus_times(g1_1,g2,prod1[u2]);
1821 		  type_operator_plus_times(g1_2,g2,prod2[u2]);
1822 		  type_operator_plus_times(g1_3,g2,prod3[u2]);
1823 		  u2=(it2+2)->u;
1824 		  g2=(it2+2)->g;
1825 		  type_operator_plus_times(g1,g2,prod0[u2]);
1826 		  type_operator_plus_times(g1_1,g2,prod1[u2]);
1827 		  type_operator_plus_times(g1_2,g2,prod2[u2]);
1828 		  type_operator_plus_times(g1_3,g2,prod3[u2]);
1829 		  u2=(it2+3)->u;
1830 		  g2=(it2+3)->g;
1831 		  type_operator_plus_times(g1,g2,prod0[u2]);
1832 		  type_operator_plus_times(g1_1,g2,prod1[u2]);
1833 		  type_operator_plus_times(g1_2,g2,prod2[u2]);
1834 		  type_operator_plus_times(g1_3,g2,prod3[u2]);
1835 		}
1836 		it2end += 4;
1837 		for (;it2!=it2end;++it2){
1838 		  u2=it2->u;
1839 		  g2=it2->g;
1840 		  type_operator_plus_times(g1,g2,prod0[u2]);
1841 		  type_operator_plus_times(g1_1,g2,prod1[u2]);
1842 		  type_operator_plus_times(g1_2,g2,prod2[u2]);
1843 		  type_operator_plus_times(g1_3,g2,prod3[u2]);
1844 		}
1845 	      }
1846 	      else {
1847 		for (it2=it2beg;it2!=it2end;++it2){
1848 		  u2=it2->u;
1849 		  g2=it2->g;
1850 		  type_operator_plus_times_reduce(g1,g2,prod0[u2],reduce);
1851 		  type_operator_plus_times_reduce(g1_1,g2,prod1[u2],reduce);
1852 		  type_operator_plus_times_reduce(g1_2,g2,prod2[u2],reduce);
1853 		  type_operator_plus_times_reduce(g1_3,g2,prod3[u2],reduce);
1854 		}
1855 	      }
1856 	      it1 += 3;
1857 	      continue;
1858 	    }
1859 #endif
1860 	    T * prod0 = prod+u1;
1861 	    if (is_zero(reduce)){
1862 	      for (it2=it2beg;it2!=it2end;++it2){
1863 		type_operator_plus_times(g1,it2->g,prod0[it2->u]);
1864 		// prod_it->second += g;
1865 	      }
1866 	    }
1867 	    else {
1868 	      for (it2=it2beg;it2!=it2end;++it2){
1869 		type_operator_plus_times_reduce(g1,it2->g,prod0[it2->u],reduce);
1870 	      }
1871 	    }
1872 	  } // end it1 loop
1873 	} // end slice_it2 loop
1874       } // end slice_degree2 loop
1875       argptr->clock = CLOCK() - argptr->clock;
1876       argptr->status=2;
1877       return &v; // not used, all is stored in prod
1878     }
1879     // thread-heap multiplication
1880     if (//false
1881 	argptr->use_heap
1882 	){
1883       // using heap of chains
1884       // int v1s=it1end-it1,v2s=it2end-it2;
1885       std::vector< std::vector< triplet<unsigned,unsigned,int> > > * vindexptr=argptr->vindexptr;
1886       std::vector< std::vector< triplet<unsigned short,unsigned short,int> > > * vsmallindexptr=argptr->vsmallindexptr;
1887       bool smallindex=vsmallindexptr;
1888       U_unsigned<U> * heap = argptr->heapptr ; // pointers to v2 monomials
1889       U_unsigned<U> * heap0, *heapbeg=heap,* heapend;
1890 #if 0
1891       // initial fill of the heap
1892       int count=0;
1893       for (it1=it1beg,heap0=heap;it1!=it1end;++it1){
1894 	u1=it1->u;
1895 	d1=u1/degdiv;
1896 	if (d1>d) // first partial degree too large
1897 	  continue;
1898 	d2=v2deg-(d-d1);
1899 	if (d2<0) // first partial degree too small
1900 	  continue;
1901 	it2=(*v2ptrs)[d2]; // first monomial of second poly having a compatible partial degree
1902 	it2end=(*v2ptrs)[d2+1];
1903 	if (it2==it2end)
1904 	  continue;
1905 	u2=it2->u;
1906 	if (int(u2/degdiv+d1)!=d)
1907 	  continue;
1908 	if (smallindex){
1909 	  (*vsmallindexptr)[count].clear();
1910 	  (*vsmallindexptr)[count].push_back(triplet<unsigned short,unsigned short,int>(it1-it1beg,0,d2));
1911 	}
1912 	else {
1913 	  (*vindexptr)[count].clear();
1914 	  (*vindexptr)[count].push_back(triplet<unsigned,unsigned,int>(it1-it1beg,0,d2));
1915 	}
1916 	*heap0=U_unsigned<U>(u1+u2,count);
1917 	++heap0;
1918 	++count;
1919       }
1920       heapend=heap0;
1921       std::make_heap(heapbeg,heapend);
1922 #else
1923       // initial fill of the heap
1924       int count=0;
1925       heap0=heap;
1926       d1=it1->u/degdiv;
1927       int v1ptrpos=0;
1928       if (d1>d)
1929 	v1ptrpos=d1-d;
1930       it1=(*argptr->v1ptrs)[v1ptrpos];
1931       for (;it1!=it1end;){
1932 	u1=it1->u;
1933 	d1=u1/degdiv;
1934 	if (d1>d){ // first partial degree too large, should not happen
1935 	  while ((*argptr->v1ptrs)[v1ptrpos]<=it1)
1936 	    ++v1ptrpos;
1937 	  it1=(*argptr->v1ptrs)[v1ptrpos];
1938 	  continue;
1939 	}
1940 	d2=v2deg-(d-d1);
1941 	if (d2<0){ // first partial degree too small
1942 	  break;
1943 	  //++it1; continue;
1944 	}
1945 	it2=(*v2ptrs)[d2]; // first monomial of second poly having a compatible partial degree
1946 	it2end=(*v2ptrs)[d2+1];
1947 	if (it2==it2end){
1948 	  while ((*argptr->v1ptrs)[v1ptrpos]<=it1)
1949 	    ++v1ptrpos;
1950 	  it1=(*argptr->v1ptrs)[v1ptrpos];
1951 	  continue;
1952 	}
1953 	u2=it2->u;
1954 	if (int(u2/degdiv+d1)!=d){
1955 	  while ((*argptr->v1ptrs)[v1ptrpos]<=it1)
1956 	    ++v1ptrpos;
1957 	  it1=(*argptr->v1ptrs)[v1ptrpos];
1958 	  continue;
1959 	}
1960 	for (;it1!=it1end;++it1){
1961 	  u1=it1->u;
1962 	  if (u1<longlong(d1)*degdiv){
1963 	    while ((*argptr->v1ptrs)[v1ptrpos]<=it1)
1964 	      ++v1ptrpos;
1965 	    //CERR << it1->u << " | " << (*argptr->v1ptrs)[v1ptrpos]->u  << '\n';
1966 	    break;
1967 	  }
1968 	  if (smallindex){
1969 	    (*vsmallindexptr)[count].clear();
1970 	    (*vsmallindexptr)[count].push_back(triplet<unsigned short,unsigned short,int>(it1-it1beg,0,d2));
1971 	  }
1972 	  else {
1973 	    (*vindexptr)[count].clear();
1974 	    (*vindexptr)[count].push_back(triplet<unsigned,unsigned,int>(it1-it1beg,0,d2));
1975 	  }
1976 	  *heap0=U_unsigned<U>(u1+u2,count);
1977 	  ++heap0;
1978 	  ++count;
1979 	}
1980       } // end for (it1=it1beg...)
1981       heapend=heap0;
1982       std::make_heap(heapbeg,heapend);
1983 #endif
1984       if (smallindex){
1985 	std::vector< triplet<unsigned short,unsigned short,int> > nouveau;
1986 	for (;heapbeg!=heapend;){
1987 	  U topu=heapbeg->u;
1988 	  if (!v.empty() && v.back().u==heapbeg->u){
1989 	    g=v.back().g;
1990 	    v.pop_back();
1991 	  }
1992 	  else
1993 	    g=T(0);
1994 	  std::vector< triplet<unsigned short,unsigned short,int> >::iterator it,itend;
1995 	  std::vector< triplet<unsigned short,unsigned short,int> > * vptr;
1996 	  nouveau.clear();
1997 	  while (heapend!=heapbeg && topu==heapbeg->u){
1998 	    // add all elements of the top chain
1999 	    vptr = &(*vsmallindexptr)[heapbeg->v];
2000 	    it=vptr->begin();
2001 	    itend=vptr->end();
2002 	    for (;it!=itend;++it){
2003 	      it2beg=(*v2ptrs)[it->third];
2004 	      type_operator_plus_times_reduce((it1beg+it->first)->g,(it2beg+it->second)->g,g,reduce);
2005 	      // increment 2nd poly index of the elements of the top chain
2006 	      ++it->second;
2007 	      // check if it is still with a compatible partial degree
2008 	      if (it->second<(*v2ptrs)[it->third+1]-it2beg)
2009 		nouveau.push_back(*it);
2010 	    }
2011 #ifdef USTL
2012 	    ustl::pop_heap(heapbeg,heapend);
2013 #else
2014 	    std::pop_heap(heapbeg,heapend);
2015 #endif
2016 	    // std::pop_heap(heapbeg,heapend);
2017 	    --heapend;
2018 	  }
2019 	  if (!is_zero(g))
2020 	    v.push_back(T_unsigned<T,U>(g,topu));
2021 	  // erase top node, then push each element of the incremented top chain
2022 	  it=nouveau.begin();
2023 	  itend=nouveau.end();
2024 	  U prevu=0; int previndex=-1;
2025 	  for (;it!=itend;++it){
2026 	    u=(it1beg+it->first)->u+((*v2ptrs)[it->third]+it->second)->u;
2027 	    if (u==prevu && previndex>=0){
2028 	      vptr=&(*vsmallindexptr)[previndex];
2029 	      vptr->push_back(*it);
2030 	      continue;
2031 	    }
2032 	    prevu=u;
2033 	    // check if u is in the path to the root of the heap
2034 	    unsigned holeindex=heapend-heapbeg,parentindex;
2035 	    if (holeindex && u==heapbeg->u){
2036 	      vptr=&(*vsmallindexptr)[previndex=heapbeg->v];
2037 	      vptr->push_back(*it);
2038 	      continue;
2039 	    }
2040 	    bool done=false;
2041 	    while (holeindex){
2042 	      parentindex=(holeindex-1) >> 1;
2043 	      if (u<(heapbeg+parentindex)->u)
2044 		break;
2045 	      if (u==(heapbeg+parentindex)->u){
2046 		done=true;
2047 		vptr=&(*vsmallindexptr)[previndex=(heapbeg+parentindex)->v];
2048 		vptr->push_back(*it);
2049 		break;
2050 	      }
2051 	      holeindex=parentindex;
2052 	    }
2053 	    if (!done){
2054 	      // not found, add a new node to the heap
2055 	      vptr=&(*vsmallindexptr)[previndex=heapend->v];
2056 	      vptr->clear();
2057 	      vptr->push_back(*it);
2058 	      heapend->u=u;
2059 	      ++heapend;
2060 #ifdef USTL
2061 	      ustl::push_heap(heapbeg,heapend);
2062 #else
2063 	      std::push_heap(heapbeg,heapend);
2064 #endif
2065 	      // std::push_heap(heapbeg,heapend);
2066 	    }
2067 	  }
2068 	} // end for heapbeg!=heapend
2069       } // end smallindex
2070       else {
2071 	std::vector< triplet<unsigned,unsigned,int> > nouveau;
2072 	for (;heapbeg!=heapend;){
2073 	  U topu=heapbeg->u;
2074 	  if (!v.empty() && v.back().u==heapbeg->u){
2075 	    g=v.back().g;
2076 	    v.pop_back();
2077 	  }
2078 	  else
2079 	    g=T(0);
2080 	  std::vector< triplet<unsigned,unsigned,int> >::iterator it,itend;
2081 	  std::vector< triplet<unsigned,unsigned,int> > * vptr;
2082 	  nouveau.clear();
2083 	  while (heapend!=heapbeg && topu==heapbeg->u){
2084 	    // add all elements of the top chain
2085 	    vptr = &(*vindexptr)[heapbeg->v];
2086 	    it=vptr->begin();
2087 	    itend=vptr->end();
2088 	    for (;it!=itend;++it){
2089 	      it2beg=(*v2ptrs)[it->third];
2090 	      type_operator_plus_times_reduce((it1beg+it->first)->g,(it2beg+it->second)->g,g,reduce);
2091 	      // increment 2nd poly index of the elements of the top chain
2092 	      ++it->second;
2093 	      // check if it is still with a compatible partial degree
2094 	      if (it->second<(*v2ptrs)[it->third+1]-it2beg)
2095 		nouveau.push_back(*it);
2096 	    }
2097 #ifdef USTL
2098 	    ustl::pop_heap(heapbeg,heapend);
2099 #else
2100 	    std::pop_heap(heapbeg,heapend);
2101 #endif
2102 	    // std::pop_heap(heapbeg,heapend);
2103 	    --heapend;
2104 	  }
2105 	  if (!is_zero(g))
2106 	    v.push_back(T_unsigned<T,U>(g,topu));
2107 	  // erase top node, then push each element of the incremented top chain
2108 	  it=nouveau.begin();
2109 	  itend=nouveau.end();
2110 	  U prevu=0; int previndex=-1;
2111 	  for (;it!=itend;++it){
2112 	    u=(it1beg+it->first)->u+((*v2ptrs)[it->third]+it->second)->u;
2113 	    if (u==prevu && previndex>=0){
2114 	      vptr=&(*vindexptr)[previndex];
2115 	      vptr->push_back(*it);
2116 	      continue;
2117 	    }
2118 	    prevu=u;
2119 	    // check if u is in the path to the root of the heap
2120 	    unsigned holeindex=heapend-heapbeg,parentindex;
2121 	    if (holeindex && u==heapbeg->u){
2122 	      vptr=&(*vindexptr)[previndex=heapbeg->v];
2123 	      vptr->push_back(*it);
2124 	      continue;
2125 	    }
2126 	    bool done=false;
2127 	    while (holeindex){
2128 	      parentindex=(holeindex-1) >> 1;
2129 	      if (u<(heapbeg+parentindex)->u)
2130 		break;
2131 	      if (u==(heapbeg+parentindex)->u){
2132 		done=true;
2133 		vptr=&(*vindexptr)[previndex=(heapbeg+parentindex)->v];
2134 		vptr->push_back(*it);
2135 		break;
2136 	      }
2137 	      holeindex=parentindex;
2138 	    }
2139 	    if (!done){
2140 	      // not found, add a new node to the heap
2141 	      vptr=&(*vindexptr)[previndex=heapend->v];
2142 	      vptr->clear();
2143 	      vptr->push_back(*it);
2144 	      heapend->u=u;
2145 	      ++heapend;
2146 #ifdef USTL
2147 	      ustl::push_heap(heapbeg,heapend);
2148 #else
2149 	      std::push_heap(heapbeg,heapend);
2150 #endif
2151 	      // std::push_heap(heapbeg,heapend);
2152 	    }
2153 	  }
2154 	} // end for heapbeg!=heapend
2155       } // end els (smallindex)
2156       argptr->clock = CLOCK() - argptr->clock;
2157       argptr->status=2;
2158       return &v;
2159     }
2160 #ifdef HASH_MAP_NAMESPACE
2161     typedef HASH_MAP_NAMESPACE::hash_map< U,T,hash_function_unsigned_object > hash_prod ;
2162     hash_prod produit; // try to avoid reallocation
2163 #else
2164     typedef std::map<U,T> hash_prod;
2165     // cout << "small map" << '\n';
2166     hash_prod produit;
2167 #endif
2168     typename hash_prod::iterator prod_it,prod_itend;
2169     for (;it1!=it1end;++it1){
2170       u1=it1->u;
2171       d1=u1/degdiv;
2172       if (d1>d)
2173 	continue;
2174       d2=v2deg-(d-d1);
2175       if (d2<0) // degree of d1 incompatible
2176 	break;
2177       g1=it1->g;
2178       it2beg=(*v2ptrs)[d2];
2179       it2end=(*v2ptrs)[d2+1];
2180       if (!is_zero(reduce)){
2181 	for (it2=it2beg;it2!=it2end;++it2){
2182 	  u2=it2->u;
2183 	  u=u1+u2;
2184 	  prod_it=produit.find(u);
2185 	  if (prod_it==produit.end())
2186 	    type_operator_reduce(g1,it2->g,produit[u],reduce); // g=g1*it2->g;
2187 	  else
2188 	    type_operator_plus_times_reduce(g1,it2->g,prod_it->second,reduce);
2189 	}
2190       }
2191       else {
2192 	for (it2=it2beg;it2!=it2end;++it2){
2193 	  u2=it2->u;
2194 	  u=u1+u2;
2195 	  prod_it=produit.find(u);
2196 	  if (prod_it==produit.end()){
2197 	    type_operator_times(g1,it2->g,g); // g=g1*it2->g;
2198 	    produit[u]=g;
2199 	  }
2200 	  else
2201 	    type_operator_plus_times(g1,it2->g,prod_it->second);
2202 	  // prod_it->second += g;
2203 	}
2204       }
2205     }
2206     T_unsigned<T,U> gu;
2207     prod_it=produit.begin(),prod_itend=produit.end();
2208     v.clear();
2209     v.reserve(produit.size());
2210     for (;prod_it!=prod_itend;++prod_it){
2211       if (!is_zero(gu.g=prod_it->second)){
2212 	gu.u=prod_it->first;
2213 	v.push_back(gu);
2214       }
2215     }
2216     // CERR << "do_threadmult end " << CLOCK() << '\n';
2217     sort(v.begin(),v.end());
2218     // CERR << "do_threadmult sort end " << CLOCK() << '\n';
2219     argptr->clock = CLOCK() - argptr->clock;
2220     argptr->status = 2;
2221     return &v;
2222   }
2223 
2224   template<class T,class U,class R>
2225   bool threadmult(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,U degdiv,const R & reduce,size_t possible_size=100){
2226     if (!threads_allowed)
2227       return false;
2228     v.clear();
2229     if (v1.empty() || v2.empty())
2230       return true;
2231     size_t v1si=v1.size(),v2si=v2.size();
2232     double v1s=double(v1si),v2s=double(v2si);
2233     if (v2si<v1si)
2234       return threadmult<T,U>(v2,v1,v,degdiv,reduce,possible_size);
2235     unsigned threads_time=0;
2236 
2237     int nthreads=threads;
2238 
2239     if (nthreads<2)
2240       ; // return false;
2241     double v1v2=v1s*v2s;
2242     T * prod =0;
2243     U u1=v1.front().u, u2=v2.front().u;
2244     U u12=u1+u2; // size of the array for array multiplication
2245     // compare u12 and v1v2*ln(v1v2)
2246     if ( heap_mult>=0 && possible_size>100 && u12<512e6/sizeof(T) && u12<v1v2*std::log(double(possible_size))*2){
2247       if (debug_infolevel>20)
2248 	CERR << "array multiplication, v1 size " << v1s << " v2 size " << v2s << " u1+u2 " << u12 << '\n';
2249       // array multiplication
2250       prod = new T[u12+1];
2251       for (U u=0;u<=u12;++u)
2252 	prod[u]=T(0);
2253     }
2254     // if array multiplication is faster, set prod
2255     bool use_heap = (heap_mult<0) || (heap_mult>0 && v1v2>heap_mult);
2256     if (!prod && use_heap
2257 	&& nthreads<2// nthreads>1 //  //
2258 	) // multi-thread heap disabled because of locks by inserting in chains
2259       return false;
2260     if (debug_infolevel>20){
2261       CERR << "// " << CLOCK() << "using threaded " ;
2262       if (use_heap)
2263 	CERR << "heap";
2264       else
2265 	CERR << "hash";
2266       CERR << " multiplication" << '\n';
2267     }
2268     unsigned d2=v2.front().u/degdiv,deg1v=v1.front().u/degdiv+d2;
2269     int cur_deg=-1,prev_deg=d2;
2270     // initialize iterators to the degree beginning
2271     std::vector< typename std::vector< T_unsigned<T,U> >::const_iterator > v1it,v2it;
2272     typename std::vector< T_unsigned<T,U> >::const_iterator it=v2.begin(),itend=v2.end();
2273     for (v2it.push_back(it);it!=itend;++it){
2274       cur_deg=it->u/degdiv;
2275       if (cur_deg==prev_deg)
2276 	continue;
2277       for (int i=prev_deg-1;i>=cur_deg;--i){
2278 	v2it.push_back(it);
2279 	if (!i)
2280 	  break;
2281       }
2282       prev_deg=cur_deg;
2283     }
2284     for (int i=prev_deg-1;i>=0;--i)
2285       v2it.push_back(it);
2286     v2it.push_back(it);
2287     it=v1.begin();itend=v1.end(); prev_deg=v1.front().u/degdiv;
2288     for (v1it.push_back(it);it!=itend;++it){
2289       cur_deg=it->u/degdiv;
2290       if (cur_deg==prev_deg)
2291 	continue;
2292       for (int i=prev_deg-1;i>=cur_deg;--i){
2293 	v1it.push_back(it);
2294 	if (!i)
2295 	  break;
2296       }
2297       prev_deg=cur_deg;
2298     }
2299     for (int i=prev_deg-1;i>=0;--i)
2300       v1it.push_back(it);
2301     v1it.push_back(it);
2302     // degree of product wrt to the main variable
2303     // will launch deg1v+1 threads to compute each degree
2304     pthread_t tab[deg1v+1];
2305     // threadmult_t<T,U> arg[deg1v+1];
2306     threadmult_t<T,U,R> * arg=new threadmult_t<T,U,R>[deg1v+1];
2307     possible_size=0;
2308     int res=0;
2309     int i=deg1v;
2310     bool smallindex=(v1s<65535 && v2s<65535);
2311     if (debug_infolevel>20)
2312       CERR << CLOCK()*1e-6 << " product " << v1s << "*" << v2s << " smallindex " << smallindex << '\n';
2313     if (
2314 	// true ||
2315 	nthreads==1){
2316       U_unsigned<U> *heapptr=new U_unsigned<U>[v1si];
2317       std::vector< std::vector< triplet<unsigned,unsigned,int> > >* vindexptr=smallindex?0:(new std::vector< std::vector< triplet<unsigned,unsigned,int> > >(v1si));
2318       std::vector< std::vector< triplet<unsigned short,unsigned short,int> > >* vsmallindexptr=smallindex?(new std::vector< std::vector< triplet<unsigned short,unsigned short,int> > >(v1si)):0;
2319       for (;i>=0;--i){
2320 	arg[i].v1ptr=&v1;
2321 	arg[i].v1ptrs=&v1it;
2322 	arg[i].v2ptrs=&v2it;
2323 	arg[i].vptr = new std::vector< T_unsigned<T,U> >;
2324 	if (i!=int(deg1v))
2325 	  arg[i].vptr->reserve(arg[i+1].vptr->size());
2326 	arg[i].degdiv=degdiv;
2327 	arg[i].current_deg=i;
2328 	arg[i].reduce=reduce;
2329 	arg[i].status=0;
2330 	arg[i].prod=prod;
2331 	if (use_heap){
2332 	  arg[i].use_heap=use_heap;
2333 	  arg[i].heapptr=heapptr;
2334 	  arg[i].vindexptr=vindexptr;
2335 	  arg[i].vsmallindexptr=vsmallindexptr;
2336 	}
2337 	else {
2338 	  arg[i].use_heap=false;
2339 	  arg[i].heapptr=0;
2340 	  arg[i].vindexptr=0;
2341 	  arg[i].vsmallindexptr=0;
2342 	}
2343 	if (debug_infolevel>30)
2344 	  CERR << "Computing degree " << i << " " << CLOCK() << '\n';
2345 	do_threadmult<T,U,R>(&arg[i]);
2346 	threads_time += arg[i].clock;
2347 	possible_size += arg[i].vptr->size();
2348       }
2349       delete [] heapptr;
2350       if (vindexptr) delete vindexptr;
2351       if (vsmallindexptr) delete vsmallindexptr;
2352     } // end nthreads==1
2353     else {
2354 #if 1
2355       // FIXME find max possible size of heap and vindex/vsmallindex
2356       // instead of v1si
2357       unsigned d1=v1.begin()->u/degdiv;
2358       size_t v1si_tab[deg1v+1];
2359       for (int j=0;j<=deg1v;++j)
2360 	v1si_tab[j]=0;
2361       for (int deg1=d1;deg1>=0;--deg1){
2362 	size_t nmonom= v1it[(d1-deg1)+1]-v1it[d1-deg1];
2363 	for (int deg2=d2;deg2>=0;--deg2){
2364 	  if (v2it[d2-deg2+1]-v2it[d2-deg2])
2365 	    v1si_tab[deg1+deg2] += nmonom;
2366 	}
2367       }
2368       size_t v1si_eff=0;
2369       for (int j=0;j<=deg1v;++j){
2370 	if (v1si_eff<v1si_tab[j])
2371 	  v1si_eff=v1si_tab[j];
2372       }
2373       // end v1si_eff size determination
2374       if (debug_infolevel>20)
2375 	CERR << "effective heap size " << v1si_eff << ", v1si=" << v1si << '\n';
2376 #else
2377       size_t v1si_eff=v1si;
2378 #endif
2379       std::vector<int> in_progress;
2380       // create initials threads
2381       for (int j=0;i>=0 && j<nthreads;--i,++j){
2382 	in_progress.push_back(i);
2383 	arg[i].v1ptr=&v1;
2384 	arg[i].v1ptrs=&v1it;
2385 	arg[i].v2ptrs=&v2it;
2386 	arg[i].vptr = new std::vector< T_unsigned<T,U> >;
2387 	arg[i].degdiv=degdiv;
2388 	arg[i].current_deg=i;
2389 	arg[i].reduce=reduce;
2390 	arg[i].status=0;
2391 	arg[i].prod=prod;
2392 	if (use_heap){
2393 	  arg[i].use_heap=use_heap;
2394 	  arg[i].heapptr=new U_unsigned<U>[v1si_eff];
2395 	  arg[i].vindexptr=smallindex?0:(new std::vector< std::vector< triplet<unsigned,unsigned,int> > >(v1si_eff));
2396 	  arg[i].vsmallindexptr=smallindex?(new std::vector< std::vector< triplet<unsigned short,unsigned short,int> > >(v1si_eff)):0;
2397 	}
2398 	else {
2399 	  arg[i].use_heap=false;
2400 	  arg[i].heapptr=0;
2401 	  arg[i].vindexptr=0;
2402 	  arg[i].vsmallindexptr=0;
2403 	}
2404 	res=pthread_create(&tab[i],(pthread_attr_t *) NULL,do_threadmult<T,U,R>,(void *) &arg[i]);
2405 	if (res){
2406 	  // should cancel previous threads and delete created arg[i].vptr
2407 	  delete [] arg;
2408 	  return false;
2409 	}
2410       } // end initial thread creation
2411       // now wait for each thread and create replacement threads
2412       while (1){
2413 	int nb=in_progress.size();
2414 	int todo=0,towait=0;
2415 	for (int j=0;j<nb;++j){
2416 	  int k=in_progress[j];
2417 	  if (k>=0){
2418 	    int concurrent=arg[k].status;
2419 	    if (concurrent==2){
2420 	      void * ptr;
2421 	      pthread_join(tab[k],&ptr);
2422 	      threads_time += arg[k].clock;
2423 	      possible_size += arg[k].vptr->size();
2424 	      if (i>=0){
2425 		arg[i].v1ptr=&v1;
2426 		arg[i].v1ptrs=&v1it;
2427 		arg[i].v2ptrs=&v2it;
2428 		arg[i].vptr = new std::vector< T_unsigned<T,U> >;
2429 		arg[i].vptr->reserve((3*arg[k].vptr->size())/2); // ???
2430 		arg[i].degdiv=degdiv;
2431 		arg[i].current_deg=i;
2432 		arg[i].reduce=reduce;
2433 		arg[i].status=0;
2434 		arg[i].use_heap=use_heap;
2435 		arg[i].prod=prod;
2436 		arg[i].heapptr=arg[k].heapptr;
2437 		arg[i].vindexptr=arg[k].vindexptr;
2438 		arg[i].vsmallindexptr=arg[k].vsmallindexptr;
2439 		res=pthread_create(&tab[i],(pthread_attr_t *) NULL,do_threadmult<T,U,R>,(void *) &arg[i]);
2440 		if (res){
2441 		  // should cancel previous threads and delete created arg[i].vptr
2442 		  delete [] arg;
2443 		  return false;
2444 		}
2445 		in_progress[j]=i;
2446 		--i;
2447 		++todo;
2448 	      } // end if (i>=0)
2449 	      else
2450 		in_progress[j]=-1;
2451 	    } // if (concurrent!=2)
2452 	    else
2453 	      ++towait;
2454 	  } // end if (k>=0)
2455 	} // end for (j=0;j<=nb;++j)
2456 	if (!todo && !towait)
2457 	  break;
2458 	if (towait)
2459 	  usleep(1);
2460       } // end while(1)
2461       if (use_heap){
2462 	i=deg1v;
2463 	for (int j=0;i>=0 && j<nthreads;--i,++j){
2464 	  delete [] arg[i].heapptr;
2465 	  if (arg[i].vindexptr){
2466 	    if (debug_infolevel>21){
2467 	      CERR << "heap_mult vindex size/capacity for i=" << i << '\n';
2468 	      std::vector< std::vector< triplet<unsigned,unsigned,int> > > & v=*arg[i].vindexptr;
2469 	      for (int j=0;j<v.size();++j)
2470 		CERR << v[j].size() << " " << v[j].capacity() << '\n';
2471 	    }
2472 	    delete arg[i].vindexptr;
2473 	    arg[i].vindexptr=0;
2474 	  }
2475 	  if (arg[i].vsmallindexptr){
2476 	    if (debug_infolevel>21){
2477 	      CERR << "heap_mult vsmallindex size/capacity for i=" << i << '\n';
2478 	      std::vector< std::vector< triplet<unsigned short,unsigned short,int> > > & v=*arg[i].vsmallindexptr;
2479 	      for (int j=0;j<v.size();++j)
2480 		CERR << v[j].size() << " " << v[j].capacity() << '\n';
2481 	    }
2482 	    delete arg[i].vsmallindexptr;
2483 	    arg[i].vsmallindexptr=0;
2484 	  }
2485 	}
2486       }
2487     } // end else of if (nthreads==1)
2488     if (debug_infolevel>20)
2489       CERR << CLOCK()*1e-6 << "Begin copy " << '\n';
2490     // store to v
2491     if (prod){
2492       int n=0;
2493       for (U u=u12;;--u){
2494 	if (!is_zero(prod[u]))
2495 	  ++n;
2496 	if (!u)
2497 	  break;
2498       }
2499       v.reserve(n);
2500       for (U u=u12;;--u){
2501 	if (!is_zero(prod[u]))
2502 	  v.push_back(T_unsigned<T,U>(prod[u],u));
2503 	if (!u)
2504 	  break;
2505       }
2506       delete [] prod ;
2507       for (int i=deg1v;i>=0;--i){
2508 	delete arg[i].vptr;
2509       }
2510     }
2511     else {
2512       v.reserve(possible_size);
2513       for (int i=deg1v;i>=0;--i){
2514 	typename std::vector< T_unsigned<T,U> >::const_iterator it=arg[i].vptr->begin(),itend=arg[i].vptr->end();
2515 	for (;it!=itend;++it){
2516 	  v.push_back(*it);
2517 	}
2518 	delete arg[i].vptr;
2519       }
2520       /*
2521       v=std::vector< T_unsigned<T,U> >(possible_size);
2522       typename std::vector< T_unsigned<T,U> >::const_iterator jt=v.begin();
2523       for (int i=deg1v;i>=0;--i){
2524 	typename std::vector< T_unsigned<T,U> >::const_iterator it=arg[i].vptr->begin(),itend=arg[i].vptr->end();
2525 #ifdef VISUALC
2526 	for (;it!=itend;++jt,++it){
2527 	  *jt=*it;
2528 	}
2529 #else
2530 	memcpy(*(void **) &jt,*(void **)&it,(itend-it)*sizeof(T_unsigned<T,U>));
2531 	jt += (itend-it);
2532 #endif
2533 	delete arg[i].vptr;
2534       }
2535       */
2536     }
2537     if (debug_infolevel>20)
2538       CERR << CLOCK()*1e-6 << "End copy " << '\n';
2539     delete [] arg;
2540     return true;
2541   }
2542 
2543 #else // PTHREAD
2544   template<class T,class U,class R>
2545   bool threadmult(const std::vector< T_unsigned<T,U> > & v1,const std::vector< T_unsigned<T,U> > & v2,std::vector< T_unsigned<T,U> > & v,U degdiv,const R& reduce,size_t possible_size=100){
2546     return false;
2547   }
2548 
2549 #endif // PTHREAD
2550 
2551   template<class U>
partial_degrees2vars(const index_t & d,std::vector<U> & vars)2552   void partial_degrees2vars(const index_t & d,std::vector<U> & vars){
2553     int dim=int(d.size());
2554     vars[dim-1]=1;
2555     for (int i=dim-2;i>=0;--i){
2556       vars[i]=(1+d[i+1])*vars[i+1];
2557     }
2558   }
2559 
2560   template<class U>
partial_degrees(U u,const std::vector<U> & vars,index_t & res)2561   void partial_degrees(U u,const std::vector<U> & vars,index_t & res){
2562     for (int k=int(vars.size())-1;k>0;--k){
2563       U v=u%vars[k-1];
2564       res[k]=v/vars[k];
2565     }
2566     res[0]=u/vars[0];
2567   }
2568 
2569   template<class T,class U>
partial_degrees(const std::vector<T_unsigned<T,U>> & a,const std::vector<U> & vars,index_t & res)2570   void partial_degrees(const std::vector< T_unsigned<T,U> > & a,const std::vector<U> & vars,index_t & res){
2571     typename std::vector< T_unsigned<T,U> >::const_iterator it=a.begin(),itend=a.end();
2572     int dim=int(vars.size());
2573     res.clear(); res.resize(dim);
2574     index_t cur(dim);
2575     for (;it!=itend;++it){
2576       partial_degrees(it->u,vars,cur);
2577       index_lcm(res,cur,res);
2578     }
2579   }
2580 
2581   // convert u monomial from vars source to vars target, tmp is a temp index_t
2582   template<class U>
convert(U & u,const std::vector<U> & source,const std::vector<U> & target,index_t & tmp)2583   void convert(U & u,const std::vector<U> & source,const std::vector<U> & target,index_t & tmp){
2584     partial_degrees(u,source,tmp);
2585     u=0;
2586     for (int i=int(source.size())-1;i>=0;--i){
2587       u += target[i]*tmp[i];
2588     }
2589   }
2590 
2591   template<class T,class U>
convert(std::vector<T_unsigned<T,U>> & a,const std::vector<U> & source,const std::vector<U> & target)2592   void convert(std::vector< T_unsigned<T,U> > & a,const std::vector<U> & source,const std::vector<U> & target){
2593     index_t tmp(source.size());
2594     typename std::vector< T_unsigned<T,U> >::iterator it=a.begin(),itend=a.end();
2595     for (;it!=itend;++it){
2596       convert(it->u,source,target,tmp);
2597     }
2598   }
2599 
hashdivrem_finish_later(longlong a)2600   inline bool hashdivrem_finish_later(longlong a){ return false;}
hashdivrem_finish_later(double a)2601   inline bool hashdivrem_finish_later(double a){return false;}
hashdivrem_finish_later(int a)2602   inline bool hashdivrem_finish_later(int a){ return false; }
hashdivrem_finish_later(const vecteur & v)2603   inline bool hashdivrem_finish_later(const vecteur & v){ return false; }
hashdivrem_finish_later(const std::vector<int> & v)2604   inline bool hashdivrem_finish_later(const std::vector<int> & v){ return false; }
2605 #ifdef INT128
hashdivrem_finish_later(int128_t a)2606   inline bool hashdivrem_finish_later(int128_t a){return false;}
2607 #endif
2608 
hashdivrem_finish_later(const gen & a)2609   inline bool hashdivrem_finish_later(const gen & a){return true;}
hashdivrem_finish_later(const my_mpz & a)2610   inline bool hashdivrem_finish_later(const my_mpz & a){return true;}
2611 #ifdef HAVE_GMPXX_H
hashdivrem_finish_later(const mpz_class & a)2612   inline bool hashdivrem_finish_later(const mpz_class & a){return true;}
2613 #endif
2614 
2615   template<class U>
one_index_smaller(U u,U v,const std::vector<int> & varsshift)2616   inline bool one_index_smaller(U u,U v,const std::vector<int> & varsshift){
2617     if (u<v)
2618       return true;
2619     return false;
2620     /*
2621     // the code below is too slow
2622     std::vector<int>::const_iterator it=varsshift.begin(),itend=varsshift.end();
2623     for (;it!=itend;++it){
2624       int shift=*it;
2625       U u1=(u>>shift);
2626       U v1=(v>>shift);
2627       if (u1<v1)
2628 	return true;
2629       u -= (u1 << shift);
2630       v -= (v1 << shift);
2631     }
2632     return false;
2633     */
2634   }
2635 
2636   // #define HEAP_STATS
2637   // note that U may be of type vector of int or an int
2638   // + is used to multiply monomials and - to divide
2639   // / should return the quotient of the main variable exponent
2640   // > should return true if a monomial has main degree >
2641   // vars is the list of monomials x,y,z,etc. as translated in U type
2642   // quo_only==-3 means heap div (compute quo and rem)
2643   // quo_only==-2 means compute quotient only using heap div
2644   // quo_only==-1 heap quotient then guess between heap remainder
2645   //              or r=a-b*q must be done by caller (if returns 2)
2646   // quo_only==0 array division or univariate with hashmap
2647   // quo_only==1 means we want to check that b divides a
2648   // quo_only==2 means we know that b divides a and we search the cofactor
2649   // quo_only==3 array division if enough memory
2650   // quo_only>3 univariate division with hashmap or map
2651   // for coefficients monomial storage
2652   // returns 1 if ok, 2 if ok but remainder not computed, 0 or -1 otherwise
2653   template<class T,class U,class R>
2654   int hashdivrem(const std::vector< T_unsigned<T,U> > & a,const std::vector< T_unsigned<T,U> > & b,std::vector< T_unsigned<T,U> > & q,std::vector< T_unsigned<T,U> > & r,const std::vector<U> & vars,const R & reduce,double qmax,bool allowrational,int quo_only=0){
2655     // CERR << "hashdivrem dim " << vars.size() << " clock " << CLOCK() << '\n';
2656     q.clear();
2657     r.clear();
2658     if (a.empty()){
2659       return 1;
2660     }
2661     if (b.empty()){
2662       r=a;
2663       return 1;
2664     }
2665     U mainv=vars.front();
2666     unsigned mainvar=0;
2667     for (; (mainv >>= 1) ;++mainvar)
2668       ;
2669     U bu=b.front().u;
2670     int bdeg=int(bu >> mainvar);
2671     int adeg=int(a.front().u >> mainvar),rdeg;
2672     if (adeg<bdeg){
2673       r=a;
2674       return 1;
2675     }
2676     U rstop=U(bdeg) << mainvar;
2677     typename std::vector< T_unsigned<T,U> >::iterator it1,it1end;
2678     typename std::vector< T_unsigned<T,U> >::const_iterator it2,it2end,itbbeg=b.begin(),itbend=b.end(),ita=a.begin(),itaend=a.end(),cit,citend,qbeg;
2679     T binv=b.front().g;
2680     if (!is_zero(reduce))
2681       binv=invmod(binv,reduce);
2682     if (b.size()==1){
2683       for (cit=a.begin(),citend=a.end();cit!=citend;++cit){
2684 	if (rstop>cit->u)
2685 	  break;
2686 	if (cit->u<b.front().u)
2687 	  return 0;
2688 	T qn;
2689 	// qn=reduce?smod(cit->g*binv,reduce):cit->g/binv;
2690 	if (!is_zero(reduce))
2691 	  type_operator_reduce(cit->g,binv,qn,reduce);
2692 	else
2693 	  qn=cit->g/binv;
2694 	if (qmax!=0 && qn>qmax)
2695 	  return -1;
2696 	if (is_zero(reduce) && !allowrational && !is_zero(cit->g % binv))
2697 	  return 0;
2698 	q.push_back(T_unsigned<T,U>(qn,cit->u-b.front().u));
2699       }
2700       for (;cit!=citend;++cit)
2701 	r.push_back(*cit);
2702       // CERR << "hashdivrem end dim " << vars.size() << " clock " << CLOCK() << '\n';
2703       return 1;
2704     }
2705     unsigned as=unsigned(a.size()),bs=unsigned(b.size());
2706     double v1v2=double(as)*bs;
2707     // FIXME, if bdeg==0
2708     if (
2709 	(!quo_only || quo_only==3) &&
2710 	//quo_only==3 && //is_zero(reduce) &&
2711 	bdeg && as>=a.front().u/25. // array div disabled, probably too much memory used
2712 	&& heap_mult>=0 && a.front().u < 512e6/sizeof(T)){
2713       U umax=a.front().u,u;
2714       if (debug_infolevel>1)
2715 	CERR << CLOCK()*1e-6 << " array division, a size " << a.size() << " b size " << b.size() << " u " << umax << '\n';
2716       // array division
2717       T * rem = new T[unsigned(umax+1)];
2718       for (u=0;u<=umax;++u)
2719 	rem[u]=T(0);
2720       // find maincoeff of b
2721       std::vector< T_unsigned<T,U> > lcoeffb;
2722       for (cit=b.begin(),citend=b.end();cit!=citend;++cit){
2723 	register U u=cit->u;
2724 	if (rstop>u)
2725 	  break;
2726 	lcoeffb.push_back(T_unsigned<T,U>(cit->g,u-rstop));
2727       }
2728       // fill rem with a
2729       for (cit=a.begin(),citend=a.end();cit!=citend;++cit){
2730 	rem[cit->u]=cit->g;
2731       }
2732       std::vector< T_unsigned<T,U> > maincoeff,quo,tmp;
2733       for (rdeg=adeg;rdeg>=bdeg;--rdeg){
2734 	U ushift=U(rdeg) << mainvar;
2735 	maincoeff.clear();
2736 	quo.clear();
2737 	tmp.clear();
2738 	// find leading coeff of rem
2739 	for (;umax>=ushift;--umax){
2740 	  T & g=rem[umax];
2741 	  if (!is_zero(g))
2742 	    maincoeff.push_back(T_unsigned<T,U>(g,umax-ushift));
2743 	}
2744 	if (maincoeff.empty())
2745 	  continue;
2746 	ushift=U(rdeg-bdeg) << mainvar;
2747 	// divide maincoeff by lcoeff(b)
2748 	// this is done by recursion except when univariate
2749 	if (vars.size()==1){
2750 	  if (lcoeffb.size()!=1 || maincoeff.size()!=1)
2751 	    return 0;
2752 	  T res;
2753 	  if (!is_zero(reduce))
2754 	    type_operator_reduce(maincoeff.front().g,binv,res,reduce);// res=smod(maincoeff.front().g*binv,reduce);
2755 	  else {
2756 	    res=maincoeff.front().g/binv;
2757 	    if (qmax && res>qmax){
2758 	      delete [] rem;
2759 	      return -1;
2760 	    }
2761 	    if (!allowrational && !is_zero(maincoeff.front().g%binv) ){
2762 	      delete [] rem;
2763 	      return 0;
2764 	    }
2765 	  }
2766 	  quo.push_back(T_unsigned<T,U>(res,maincoeff.front().u+ushift));
2767 	  q.push_back(quo.back());
2768 	}
2769 	else {
2770 	  int recdivres=hashdivrem(maincoeff,lcoeffb,quo,tmp,std::vector<U>(vars.begin()+1,vars.end()),reduce,qmax,allowrational);
2771 	  if (recdivres<1){
2772 	    delete [] rem;
2773 	    return recdivres;
2774 	  }
2775 	  if (!tmp.empty()){
2776 	    delete [] rem;
2777 	    return 0;
2778 	  }
2779 	  for (it1=quo.begin(),it1end=quo.end();it1!=it1end;++it1){
2780 	    it1->u += ushift;
2781 	    q.push_back(*it1);
2782 	  }
2783 	}
2784 	// rem -= quo*b
2785 	for (cit=itbbeg;cit!=itbend;++cit){
2786 	  T g1=-cit->g;
2787 	  U u,u1=cit->u;
2788 	  // int deg1=u1/mainvar;
2789 	  if (!is_zero(reduce)){
2790 	    for (it2=quo.begin(),it2end=quo.end();it2!=it2end;++it2){
2791 	      u=u1+it2->u;
2792 	      register int deg = int(u >> mainvar); // deg=deg1+it2->u/mainvar;
2793 	      if (deg<rdeg){
2794 		type_operator_plus_times_reduce(g1,it2->g,rem[u],reduce);
2795 	      }
2796 	    }
2797 	  }
2798 	  else {
2799 	    for (it2=quo.begin(),it2end=quo.end();it2!=it2end;++it2){
2800 	      u=u1+it2->u;
2801 	      register int deg=int(u >> mainvar);
2802 	      if (deg<rdeg){
2803 		type_operator_plus_times(g1,it2->g,rem[u]);
2804 	      }
2805 	    }
2806 	  }
2807 	}
2808 	// end rem -= quo*b
2809       }
2810       // move rem to r
2811       for (;;--umax){
2812 	T & g=rem[umax];
2813 	if (!is_zero(g))
2814 	  r.push_back(T_unsigned<T,U>(g,umax));
2815 	if (umax==0)
2816 	  break;
2817       }
2818       delete [] rem;
2819       return 1;
2820     } // end array division
2821     bool use_heap=false &&
2822       (heap_mult>0
2823        && v1v2>=heap_mult
2824        );
2825 #if 1 // heap division
2826     if (heap_mult<0 || use_heap ||
2827 	(quo_only && quo_only<3)){
2828 	  // quo_only<3){
2829       bool norem=quo_only==2 || quo_only==-2;
2830       norem=false; // currently disabled, it's not clear it wins something
2831       std::vector<int> varsshift; // vars=2^varsshift
2832       if (norem){
2833 	for (size_t i=0;i<vars.size();++i){
2834 	  int j=sizeinbase2(vars[i])-1;
2835 	  if (vars[i]!=(U(1)<<j))
2836 	    norem=false;
2837 	  varsshift.push_back(j);
2838 	}
2839       }
2840 #ifdef HEAP_STATS
2841       unsigned chain=0,nochain=0,typeopreduce=0,nullq=0;
2842 #endif
2843       if (debug_infolevel>1)
2844 	CERR << CLOCK()*1e-6 << " heap division, a size " << a.size() << " b size " << b.size() << " vars " << vars << '\n';
2845       // heap division:
2846       // ita an iterator on a, initial value a.begin()
2847       // a heap with the current state of q*b, initialized to empty heap
2848       // loop:
2849       // compare degree of the iterator in a and heap top
2850       // if both are < rstop (degree of b) break
2851       // if equal: add monomial of a to heap top coeff, move a iterator
2852       // if a>: add term to q and to the heap by mult/dividing by binv
2853       // if heap >: ++ heaptop b iterator, add term to q and to the heap
2854       // after division loop:
2855       // finish the heap multiplication into r
2856       //
2857       // vindex[i] is the heap chain corresponding to index i
2858       // it contains pairs of index in g and q
2859       std::vector< std::vector< std::pair<unsigned,unsigned> > > vindex(bs) ; // ,std::vector< std::pair<unsigned,unsigned> >(4));
2860       U_unsigned<U> * heap = new U_unsigned<U>[bs], * heapbeg =heap, * heapend=heap ;
2861       T g;
2862       U heapu,u;
2863       // qnouveau is used to store new pairs of products g*q when the monomial in q is not yet known
2864       std::vector< std::pair<unsigned,unsigned> > qnouveau(bs);
2865       std::vector< std::pair<unsigned,unsigned> > nouveau;
2866       // initialize qnouveau to fill the heap when first quotient term computed
2867       for (unsigned int i=0;i<bs;++i){
2868 	qnouveau[i]=std::pair<unsigned,unsigned>(i,0);
2869 	*(heap+i)=U_unsigned<U>(0,i);
2870       }
2871       for (;;){
2872 	g=T(0);
2873 	// compare current position in a with heap top
2874 	if (heapbeg!=heapend){
2875 	  heapu=heapbeg->u;
2876 	  if (ita!=itaend && ita->u>heapu){
2877 	    if (ita->u>=bu){
2878 	      heapu=ita->u;
2879 	      g=ita->g;
2880 	      ++ita;
2881 	    }
2882 	  }
2883 	  else {
2884 	    if (heapu>=bu){
2885 	      // find all pairs having heapu as monomial
2886 	      std::vector< std::pair<unsigned,unsigned> >::iterator it,itend;
2887 	      nouveau.clear();
2888 	      while (heapend!=heapbeg && heapu==heapbeg->u){
2889 		//nouveau.clear();
2890 		// add all elements of the top chain
2891 		std::vector< std::pair<unsigned,unsigned> > & V=vindex[heapbeg->v];
2892 		it=V.begin();
2893 		itend=V.end();
2894 		qbeg=q.begin();
2895 		size_t qsize=q.size();
2896 		for (;it!=itend;++it){
2897 #ifdef HEAP_STATS
2898 		  ++typeopreduce;
2899 #endif
2900 		  unsigned & its=it->second;
2901 		  type_operator_plus_times_reduce((itbbeg+it->first)->g,(qbeg+its)->g,g,reduce);
2902 		  // increment 2nd poly index of the elements of the top chain
2903 		  ++its;
2904 		  if (its<qsize){
2905 		    nouveau.push_back(*it);
2906 		  }
2907 		  else // wait for computation of a new term of a before adding to the heap
2908 		    qnouveau.push_back(*it);
2909 		}
2910 		// erase top node,
2911 #ifdef USTL
2912 		ustl::pop_heap(heapbeg,heapend);
2913 #else
2914 		std::pop_heap(heapbeg,heapend);
2915 #endif
2916 		// std::pop_heap(heapbeg,heapend);
2917 		--heapend;
2918 	      } // while heapend!=heapbeg && heapu==
2919 	      {
2920 		// push each element of the incremented top chain
2921 		it=nouveau.begin();
2922 		itend=nouveau.end();
2923 		for (;it!=itend;++it) {
2924 		  u=(itbbeg+it->first)->u+(qbeg+it->second)->u;
2925 		  if (norem && one_index_smaller(u,bu,varsshift)) continue;
2926 		  // check if u is in the path to the root of the heap
2927 		  unsigned holeindex=unsigned(heapend-heapbeg),parentindex;
2928 		  if (holeindex && u==heapbeg->u){
2929 #ifdef HEAP_STATS
2930 		    ++chain;
2931 #endif
2932 		    vindex[heapbeg->v].push_back(*it);
2933 		    continue;
2934 		  }
2935 		  bool done=false;
2936 		  while (holeindex){
2937 		    parentindex=(holeindex-1) >> 1;
2938 		    U pu=(heapbeg+parentindex)->u;
2939 		    if (u<pu)
2940 		      break;
2941 		    if (u==pu){
2942 		      done=true;
2943 		      vindex[(heapbeg+parentindex)->v].push_back(*it);
2944 #ifdef HEAP_STATS
2945 		      ++chain;
2946 #endif
2947 		      break;
2948 		    }
2949 		    holeindex=parentindex;
2950 		  }
2951 		  if (!done){
2952 #ifdef HEAP_STATS
2953 		    ++nochain;
2954 #endif
2955 		    // not found, add a new node to the heap
2956 		    std::vector< std::pair<unsigned,unsigned> > & V=vindex[heapend->v];
2957 		    V.clear();
2958 		    V.push_back(*it);
2959 		    heapend->u=u;
2960 		    ++heapend;
2961 #ifdef USTL
2962 		    ustl::push_heap(heapbeg,heapend);
2963 #else
2964 		    std::push_heap(heapbeg,heapend);
2965 #endif
2966 		    // std::push_heap(heapbeg,heapend);
2967 		  }
2968 		} // end adding incremented pairs from nouveau
2969 	      } // end loop on monomial of the heap having the same u
2970 	      if (heapu==ita->u){
2971 		g = ita->g -g ; // FIXME must be reduced!
2972 		if (!is_zero(reduce))
2973 		  g = g % reduce;
2974 		++ita;
2975 	      }
2976 	      else
2977 		g=-g;
2978 	      if (is_zero(g)){
2979 #ifdef HEAP_STATS
2980 		++nullq;
2981 #endif
2982 		continue;
2983 	      }
2984 	    } // end if (heapu>=bu)
2985 	  } // end else ita->u>heapu
2986 	} // if heap non empty
2987 	else {
2988 	  if (ita!=itaend && (heapu=ita->u)>=bu){
2989 	    g=ita->g;
2990 	    ++ita;
2991 	  }
2992 	} // end if (!heap.empty())
2993 	if (is_zero(g))
2994 	  break;
2995 	if (is_zero(reduce) && !allowrational && !is_zero(g % binv)){
2996 	  delete [] heap;
2997 	  return 0;
2998 	}
2999 	// g=reduce?smod(g*binv,reduce):g/binv;
3000 	if (!is_zero(reduce))
3001 	  type_operator_reduce(g,binv,g,reduce);
3002 	else
3003 	  g=g/binv;
3004 	if (qmax && (g>qmax || -g>qmax)){
3005 	  delete [] heap;
3006 	  return -1;
3007 	}
3008 	// FIXME check that heapu has all components>=bu, otherwise should be in remainder
3009 	// new quotient term
3010 	q.push_back(T_unsigned<T,U>(g,heapu-bu));
3011 	// explore qnouveau and add terms to the heap
3012 	std::vector< std::pair<unsigned,unsigned> >::iterator it,itend;
3013 	it=qnouveau.begin();
3014 	itend=qnouveau.end();
3015 	U prevu=0; int previndex=-1;
3016 	for (;it!=itend;++it){
3017 	  if (!it->first) // leading term of b already taken in account
3018 	    continue;
3019 	  u=(itbbeg+it->first)->u+(q.begin()+it->second)->u;
3020 	  if (norem && one_index_smaller(u,bu,varsshift)) continue;
3021 	  if (u==prevu && previndex>=0){
3022 #ifdef HEAP_STATS
3023 	    ++chain;
3024 #endif
3025 	    vindex[previndex].push_back(*it);
3026 	    continue;
3027 	  }
3028 	  prevu=u;
3029 	  // check if u is in the path to the root of the heap
3030 	  unsigned holeindex=unsigned(heapend-heapbeg),parentindex;
3031 	  if (holeindex && u==heapbeg->u){
3032 #ifdef HEAP_STATS
3033 	    ++chain;
3034 #endif
3035 	    vindex[previndex=heapbeg->v].push_back(*it);
3036 	    continue;
3037 	  }
3038 	  bool done=false;
3039 	  while (holeindex){
3040 	    parentindex=(holeindex-1) >> 1;
3041 	    U pu=(heapbeg+parentindex)->u;
3042 	    if (u<pu)
3043 	      break;
3044 	    if (u==pu){
3045 #ifdef HEAP_STATS
3046 	      ++chain;
3047 #endif
3048 	      done=true;
3049 	      vindex[previndex=(heapbeg+parentindex)->v].push_back(*it);
3050 	      break;
3051 	    }
3052 	    holeindex=parentindex;
3053 	  }
3054 	  if (!done){
3055 #ifdef HEAP_STATS
3056 	    ++nochain;
3057 #endif
3058 	    // not found, add a new node to the heap
3059 	    std::vector< std::pair<unsigned,unsigned> > & V=vindex[(previndex=heapend->v)];
3060 	    V.clear();
3061 	    V.push_back(*it);
3062 	    heapend->u=u;
3063 	    ++heapend;
3064 #ifdef USTL
3065 	    ustl::push_heap(heapbeg,heapend);
3066 #else
3067 	    std::push_heap(heapbeg,heapend);
3068 #endif
3069 	    // std::push_heap(heapbeg,heapend);
3070 	  }
3071 	} // end adding incremented pairs from qnouveau
3072 	qnouveau.clear();
3073       } // for (;;)
3074 #ifdef HEAP_STATS
3075       if (debug_infolevel)
3076 	CERR << "chain " << chain << ", nochain " << nochain << ", type_op_reduce " << typeopreduce << " null quotients" << nullq << '\n';
3077 #endif
3078       // r still empty
3079       if (debug_infolevel>2)
3080 	CERR << CLOCK()*1e-6 << " Finished computing quotient, size " << q.size() << '\n' ;
3081       if (quo_only==2 || quo_only==-2){
3082 	delete [] heap;
3083 	return 1;
3084       }
3085       if (quo_only==-1 ){
3086 	// try to compare heap mult and array mult, return for array mult only
3087 	double qb=double(q.size())*b.size();
3088 	qb /= a.size();
3089 	if (debug_infolevel>1)
3090 	  CERR << CLOCK()*1e-6 << " qb=" << qb << '\n';
3091 	if (qb>100){
3092 	  // the coefficients might be not optimal (mpz_class instead of int)
3093 	  if (hashdivrem_finish_later(a.front().g)){
3094 	    delete [] heap;
3095 	    return 2;
3096 	  }
3097 	  // the monomials are not stored efficiently for array *, compress
3098 	  index_t adeg,bdeg,qdeg,bqdeg;
3099 	  partial_degrees(a,vars,adeg);
3100 	  partial_degrees(b,vars,bdeg);
3101 	  partial_degrees(q,vars,qdeg);
3102 	  bqdeg=bdeg+qdeg;
3103 	  bqdeg=index_lcm(bqdeg,adeg);
3104 	  std::vector<U> newvars(vars.size());
3105 	  partial_degrees2vars(bqdeg,newvars);
3106 	  std::vector< T_unsigned<T,U> > acopy(a),bcopy(b),qcopy(q),bq;
3107 	  convert(acopy,vars,newvars);
3108 	  convert(bcopy,vars,newvars);
3109 	  convert(qcopy,vars,newvars);
3110 	  if (debug_infolevel>1)
3111 	    CERR << CLOCK()*1e-6 << " compress monomials done" <<'\n';
3112 	  if (!threadmult(bcopy,qcopy,bq,newvars.front(),reduce,a.size()))
3113 	    smallmult(bcopy,qcopy,bq,reduce,as);
3114 	  if (!is_zero(reduce))
3115 	    smallsub(acopy,bq,r,reduce);
3116 	  else
3117 	    smallsub(acopy,bq,r);
3118 	  if (debug_infolevel>1)
3119 	    CERR << CLOCK()*1e-6 << " uncompress monomials" <<'\n';
3120 	  convert(r,newvars,vars);
3121 	  if (debug_infolevel>1)
3122 	    CERR << CLOCK()*1e-6 << " uncompress monomials end"<< '\n';
3123 	  delete [] heap;
3124 	  return 1;
3125 	}
3126       }
3127       // now q is computed, combine a and remaining product to r
3128       for (;heapbeg!=heapend;){
3129 	heapu=heapbeg->u;
3130 	if (ita!=itaend){
3131 	  if (ita->u>heapu){
3132 	    r.push_back(*ita);
3133 	    ++ita;
3134 	    continue;
3135 	  }
3136 	  if (ita->u<heapu)
3137 	    g=T(0);
3138 	  else {
3139 	    g=-ita->g; // opposite sign since we neg at the end
3140 	    ++ita;
3141 	  }
3142 	} // ita!=itaend
3143 	// add all terms from the heap with same monomial
3144 	while (heapbeg!=heapend && heapbeg->u==heapu){
3145 	  qnouveau.clear();
3146 	  std::vector< std::pair<unsigned,unsigned> >::iterator it,itend;
3147 	  it=vindex[heapbeg->v].begin();
3148 	  itend=vindex[heapbeg->v].end();
3149 	  for (;it!=itend;++it){
3150 	    unsigned & its=it->second;
3151 	    type_operator_plus_times_reduce((itbbeg+it->first)->g,(q.begin()+its)->g,g,reduce);
3152 	    // increment 2nd poly index of the elements of the top chain
3153 	    ++its;
3154 	    if (its<q.size())
3155 	      qnouveau.push_back(*it);
3156 	  }
3157 	  // erase top node,
3158 #ifdef USTL
3159 	  ustl::pop_heap(heapbeg,heapend);
3160 #else
3161 	  std::pop_heap(heapbeg,heapend);
3162 #endif
3163 	  // std::pop_heap(heapbeg,heapend);
3164 	  --heapend;
3165 	  // push each element of the incremented top chain
3166 	  it=qnouveau.begin();
3167 	  itend=qnouveau.end();
3168 	  U prevu=0; int previndex=-1;
3169 	  for (;it!=itend;++it){
3170 	    u=(itbbeg+it->first)->u+(q.begin()+it->second)->u;
3171 	    if (u==prevu && previndex>=0){
3172 	      vindex[previndex].push_back(*it);
3173 	      continue;
3174 	    }
3175 	    prevu=u;
3176 	    // check if u is in the path to the root of the heap
3177 	    unsigned holeindex=unsigned(heapend-heapbeg),parentindex;
3178 	    if (holeindex && u==heapbeg->u){
3179 	      vindex[(previndex=heapbeg->v)].push_back(*it);
3180 	      continue;
3181 	    }
3182 	    bool done=false;
3183 	    while (holeindex){
3184 	      parentindex=(holeindex-1) >> 1;
3185 	      U pu=(heapbeg+parentindex)->u;
3186 	      if (u<pu)
3187 		break;
3188 	      if (u==pu){
3189 		done=true;
3190 		vindex[(previndex=(heapbeg+parentindex)->v)].push_back(*it);
3191 		break;
3192 	      }
3193 	      holeindex=parentindex;
3194 	    }
3195 	    if (!done){
3196 	      // not found, add a new node to the heap
3197 	      std::vector< std::pair<unsigned,unsigned> > & V=vindex[(previndex=heapend->v)];
3198 	      V.clear();
3199 	      V.push_back(*it);
3200 	      heapend->u=u;
3201 	      ++heapend;
3202 #ifdef USTL
3203 	      ustl::push_heap(heapbeg,heapend);
3204 #else
3205 	      std::push_heap(heapbeg,heapend);
3206 #endif
3207 	      // std::push_heap(heapbeg,heapend);
3208 	    }
3209 	  } // end adding incremented pairs from nouveau
3210 	} // end while heapbeg!=heapend && heapbeg->u==heapu
3211 	// add -g to r
3212 	if (!is_zero(g))
3213 	  r.push_back(T_unsigned<T,U>(-g,heapu));
3214       } // end for (heapbeg!=heapend)
3215       for (;ita!=itaend;++ita)
3216 	r.push_back(*ita);
3217       delete [] heap;
3218       return 1;
3219     }
3220 #endif // heap division
3221 #ifdef HASH_MAP_NAMESPACE
3222     typedef HASH_MAP_NAMESPACE::hash_map< U,T,hash_function_unsigned_object> hash_prod ;
3223     std::vector< hash_prod > produit(adeg+1);
3224 #else
3225 #ifdef USTL
3226     typedef ustl::map<U,T> hash_prod;
3227 #else
3228     typedef std::map<U,T> hash_prod;
3229 #endif
3230     std::vector< hash_prod > produit(adeg+1);
3231 #endif
3232     typename hash_prod::iterator prod_it,prod_itend;
3233     hash_prod * hashptr;
3234     // find maincoeff of b
3235     std::vector< T_unsigned<T,U> > lcoeffb;
3236     for (cit=b.begin(),citend=b.end();cit!=citend;++cit){
3237       register U u=cit->u;
3238       if (rstop>u)
3239 	break;
3240       lcoeffb.push_back(T_unsigned<T,U>(cit->g,u-rstop));
3241     }
3242     // copy a to remainder
3243     std::vector< T_unsigned<T,U> > maincoeff,quo,tmp;
3244 #if 1
3245     // memory estimations
3246 #if 1 // def VISUALC
3247     size_t * produit_s=(size_t *) malloc((adeg+1)*sizeof(size_t));
3248     // size_t * produit_s=(size_t *) alloca((adeg+1)*sizeof(size_t));
3249 #else
3250     size_t produit_s[adeg+1];
3251 #endif
3252     for (int i=0;i<=adeg;++i)
3253       produit_s[i]=0;
3254     size_t curcoeffsize=0;
3255     for (cit=a.begin(),citend=a.end();cit!=citend;++cit){
3256       U u=cit->u;
3257       ++produit_s[unsigned(u >> mainvar)];
3258     }
3259     for (int i=0;i<=adeg;++i){
3260       produit.reserve(int(produit_s[i]*1.1));
3261       if (i>=bdeg && curcoeffsize<produit_s[i])
3262 	curcoeffsize=produit_s[i];
3263     }
3264 #if 1 // def VISUALC
3265     free(produit_s);
3266 #endif
3267     curcoeffsize = int(1.1*curcoeffsize);
3268     maincoeff.reserve(curcoeffsize);
3269     quo.reserve(curcoeffsize);
3270     q.reserve(curcoeffsize*2);
3271     tmp.reserve(curcoeffsize);
3272 #endif // memory estimates
3273     std::vector<U> vars2(vars.begin()+1,vars.end());
3274     // do it
3275     for (cit=a.begin(),citend=a.end();cit!=citend;++cit){
3276       U u=cit->u;
3277       produit[unsigned(u >> mainvar)][u]=cit->g;
3278     }
3279     for (rdeg=adeg;rdeg>=bdeg;--rdeg){
3280       if (debug_infolevel>20)
3281 	CERR << "hashdivrem degree " << rdeg << " " << CLOCK() << '\n';
3282       if (produit[rdeg].empty())
3283 	continue;
3284       // find degree of remainder and main coeff
3285       maincoeff.clear(); quo.clear(); tmp.clear();
3286       U ushift=U(rdeg) << mainvar;
3287       for (prod_it=produit[rdeg].begin(),prod_itend=produit[rdeg].end();prod_it!=prod_itend;++prod_it){
3288 	if (!is_zero(prod_it->second))
3289 	  maincoeff.push_back(T_unsigned<T,U>(prod_it->second,prod_it->first-ushift));
3290       }
3291       if (maincoeff.empty())
3292 	continue;
3293       sort(maincoeff.begin(),maincoeff.end());
3294       ushift=U(rdeg-bdeg) << mainvar;
3295       // divide maincoeff by lcoeff(b)
3296       // this is done by recursion except when univariate
3297       if (vars.size()==1){
3298 	if (lcoeffb.size()!=1 || maincoeff.size()!=1)
3299 	  return 0;
3300 	T res;
3301 	if (!is_zero(reduce))
3302 	  type_operator_reduce(maincoeff.front().g,binv,res,reduce);// res=smod(maincoeff.front().g*binv,reduce);
3303 	else {
3304 	  res=maincoeff.front().g/binv;
3305 	  if (qmax && res>qmax)
3306 	    return -1;
3307 	  if (!allowrational && !is_zero(maincoeff.front().g%binv) )
3308 	    return 0;
3309 	}
3310 	quo.push_back(T_unsigned<T,U>(res,maincoeff.front().u+ushift));
3311 	q.push_back(quo.back());
3312       }
3313       else {
3314 	int recdivres=hashdivrem(maincoeff,lcoeffb,quo,tmp,vars2,reduce,qmax,allowrational);
3315 	if (recdivres<1)
3316 	  return recdivres;
3317 	if (!tmp.empty())
3318 	  return 0;
3319 	for (it1=quo.begin(),it1end=quo.end();it1!=it1end;++it1){
3320 	  it1->u += ushift;
3321 	  q.push_back(*it1);
3322 	}
3323       }
3324       // remainder -= quo*b
3325       for (cit=itbbeg;cit!=itbend;++cit){
3326 	T g1=-cit->g;
3327 	U u,u1=cit->u;
3328 	// int deg1=u1/mainvar;
3329 	if (!is_zero(reduce)){
3330 	  for (it2=quo.begin(),it2end=quo.end();it2!=it2end;++it2){
3331 	    u=u1+it2->u;
3332 	    register int deg = int(u >> mainvar); // deg=deg1+it2->u/mainvar;
3333 	    if (deg<rdeg){
3334 	      hashptr = &produit[deg];
3335 	      prod_it=hashptr->find(u);
3336 	      if (prod_it==hashptr->end())
3337 		//(*hashptr)[u]=(g1*it2->g)%reduce;
3338 		type_operator_reduce(g1,it2->g,(*hashptr)[u],reduce);
3339 	      else {
3340 		// prod_it->second += g1*it2->g;
3341 		// prod_it->second %= reduce;
3342 		type_operator_plus_times_reduce(g1,it2->g,prod_it->second,reduce);
3343 		if (is_zero(prod_it->second)) hashptr->erase(prod_it);
3344 	      }
3345 	    }
3346 	  }
3347 	}
3348 	else {
3349 	  for (it2=quo.begin(),it2end=quo.end();it2!=it2end;++it2){
3350 	    u=u1+it2->u;
3351 	    register int deg=int(u >> mainvar);
3352 	    if (deg<rdeg){
3353 	      hashptr = &produit[deg];
3354 	      prod_it=hashptr->find(u);
3355 	      if (prod_it==hashptr->end()){
3356 		type_operator_times(g1,it2->g,(*hashptr)[u]);
3357 	      }
3358 	      else {
3359 		type_operator_plus_times(g1,it2->g,prod_it->second);
3360 		if (is_zero(prod_it->second)) hashptr->erase(prod_it);
3361 	      }
3362 	    }
3363 	  }
3364 	}
3365       }
3366       // end rem -= quo*b
3367     } // end for (redg=...)
3368 #if 0
3369     CERR << "dim " << vars.size() << ", curcoeffsize " << curcoeffsize << '\n' << "maincoeff " << maincoeff.size() << "," << maincoeff.capacity() << '\n' << "quo " << quo.size() << "," << quo.capacity() << '\n' << "q " << q.size() << "," << q.capacity() << '\n';
3370 #endif
3371     // copy remainder to r and sort
3372     unsigned rsize=0;
3373     for (int i=0;i<bdeg;++i)
3374       rsize += unsigned(produit[i].size());
3375     r.reserve(rsize);
3376     T_unsigned<T,U> gu;
3377     for (int i=bdeg-1;i>=0;--i){
3378       for (prod_it=produit[i].begin(),prod_itend=produit[i].end();prod_it!=prod_itend;++prod_it){
3379 	if (!is_zero(gu.g=prod_it->second)){
3380 	  gu.u=prod_it->first;
3381 	  r.push_back(gu);
3382 	}
3383       }
3384     }
3385     // IMPROVE: might do partial sort
3386     sort(r.begin(),r.end());
3387     return 1;
3388   }
3389 
3390 
3391 
3392   template<class T,class U>
is_content_trivially_1(const typename std::vector<T_unsigned<T,U>> & v,U mainvar)3393   bool is_content_trivially_1(const typename std::vector< T_unsigned<T,U> > & v,U mainvar){
3394 #ifdef HASH_MAP_NAMESPACE
3395     typedef HASH_MAP_NAMESPACE::hash_map< U,T,hash_function_unsigned_object > hash_prod ;
3396     hash_prod produit; // try to avoid reallocation
3397     // cout << "hash " << CLOCK() << '\n';
3398 #else
3399 #ifdef USTL
3400     typedef ustl::map<U,T> hash_prod;
3401 #else
3402     typedef std::map<U,T> hash_prod;
3403 #endif
3404     // cout << "small map" << '\n';
3405     hash_prod produit;
3406 #endif
3407     U outer_index,inner_index;
3408     typename std::vector< T_unsigned<T,U> >::const_iterator it=v.begin(),itend=v.end();
3409     for (;it!=itend;++it){
3410       outer_index=it->u % mainvar;
3411       inner_index=it->u/mainvar;
3412       typename hash_prod::iterator jt=produit.find(outer_index),jtend=produit.end();
3413       if (jt==jtend){
3414 	if (inner_index==0)
3415 	  return true;
3416 	produit[outer_index]=it->g;
3417       }
3418       else
3419 	jt->second += it->g;
3420     }
3421     return false;
3422   }
3423 
3424   template<class T,class U>
peval_x1_xn(typename std::vector<T_unsigned<T,U>>::const_iterator it,typename std::vector<T_unsigned<T,U>>::const_iterator itend,const typename std::vector<T> & v,const typename std::vector<U> & vars,const T & reduce)3425   T peval_x1_xn(
3426 		typename std::vector< T_unsigned<T,U> >::const_iterator it,
3427 		typename std::vector< T_unsigned<T,U> >::const_iterator itend,
3428 		const typename std::vector<T> & v,
3429 		const typename std::vector<U> & vars,
3430 		const T & reduce){
3431     if (vars.empty())
3432       return it->g;
3433     int dim=int(vars.size())-1,nterms;
3434     if (dim!=int(v.size())){
3435 #ifndef NO_STDEXCEPT
3436       throw(std::runtime_error("Invalid dimension"));
3437 #endif
3438       return T(0);
3439     }
3440     U mainvar=vars.front(),var2=vars.back(),uend,u,prevu;
3441     T x=v.back();
3442     typename std::vector< T_unsigned<T,U> >::const_iterator itstop;
3443     typename std::vector<U>::const_iterator jtbeg=vars.begin(),jtend=vars.end(),jt;
3444     ++jtbeg;
3445     --jtend;
3446     typename std::vector<T>::const_iterator ktbeg=v.begin(),kt;
3447     T ans=0,total=0;
3448     for (;it!=itend;){
3449       // group monomials with the same x1..xn-1
3450       prevu = it->u % mainvar;
3451       if (dim==1)
3452 	uend=0;
3453       else {
3454 	const U & varxn=vars[dim-1];
3455 	uend =(prevu/varxn)*varxn;
3456       }
3457       nterms = (prevu-uend)/var2;
3458       ans = it->g;
3459       if (nterms>2 && itend-it>nterms && (itstop=it+nterms)->u % mainvar==uend){
3460 	// dense case
3461 	for (;it!=itstop;){
3462 	  ++it;
3463 	  ans = (ans*x+it->g)%reduce;
3464 	}
3465 	++it;
3466       }
3467       else {
3468 	for (++it;it!=itend;++it){
3469 	  u = it->u %mainvar;
3470 	  if (u<uend)
3471 	    break;
3472 	  if (prevu-u==var2)
3473 	    ans = (ans*x+it->g)%reduce;
3474 	  else
3475 	    ans = (ans*powmod(x,(prevu-u)/var2,reduce)+it->g)%reduce;
3476 	  prevu=u;
3477 	}
3478 	ans = (ans*powmod(x,(prevu-uend)/var2,reduce))%reduce;
3479       }
3480       for (jt=jtbeg,kt=ktbeg;jt!=jtend;++jt,++kt){
3481 	// px=px*powmod(*kt,u / *jt,modulo);
3482 	ans = (ans*powmod(*kt,uend / *jt,reduce))%reduce;
3483 	uend = uend % *jt;
3484       }
3485       total = (total+ans) % reduce;
3486     }
3487     return total;
3488   }
3489 
3490   // eval p at x2...xn
3491   template<class T,class U>
peval_x2_xn(const typename std::vector<T_unsigned<T,U>> & p,const typename std::vector<T> & v,const std::vector<U> & vars,std::vector<T_unsigned<T,U>> & res,const T & reduce)3492   void peval_x2_xn(const typename std::vector< T_unsigned<T,U> > & p,
3493 		   const typename std::vector<T> & v,
3494 		   const std::vector<U> & vars,
3495 		   std::vector< T_unsigned<T,U> > & res,
3496 		   const T & reduce){
3497     U mainvar=vars.front(),deg1;
3498     res.clear();
3499     typename std::vector< T_unsigned<T,U> >::const_iterator it=p.begin(),itend=p.end(),it2;
3500     for (;it!=itend;){
3501       deg1=(it->u/mainvar)*mainvar;
3502       for (it2=it;it2!=itend;++it2){
3503 	if (it2->u<deg1)
3504 	  break;
3505       }
3506       T tmp=peval_x1_xn<T,U>(it,it2,v,vars,reduce);
3507       it=it2;
3508       if (!is_zero(tmp))
3509 	res.push_back(T_unsigned<T,U>(tmp,deg1));
3510     }
3511   }
3512 
3513 #ifndef NO_NAMESPACE_GIAC
3514 } // namespace giac
3515 #endif // NO_NAMESPACE_GIAC
3516 
3517 #endif // _GIAC_THREADED_H
3518