1 /*
2     Copyright (C) 2009 William Hart
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #ifdef __unix__
13 #include <unistd.h> /* sysconf */
14 #endif
15 
16 #if defined(_WIN32) || defined(WIN32)
17 #include <windows.h> /* GetSytemInfo */
18 #endif
19 
20 #if defined(_MSC_VER) && HAVE_PTHREAD
21 #include <atomic.h>
22 #endif
23 
24 #include <stdlib.h>
25 
26 #include <gmp.h>
27 #include "flint.h"
28 #include "fmpz.h"
29 
30 /* Always free larger mpz's to avoid wasting too much heap space */
31 #define FLINT_MPZ_MAX_CACHE_LIMBS 64
32 
33 #define PAGES_PER_BLOCK 16
34 
35 /* The number of new mpz's allocated at a time */
36 #define MPZ_BLOCK 64
37 
38 FLINT_TLS_PREFIX __mpz_struct ** mpz_free_arr = NULL;
39 FLINT_TLS_PREFIX ulong mpz_free_num = 0;
40 FLINT_TLS_PREFIX ulong mpz_free_alloc = 0;
41 
42 static slong flint_page_size;
43 static slong flint_mpz_structs_per_block;
44 static slong flint_page_mask;
45 
flint_get_page_size()46 slong flint_get_page_size()
47 {
48 #if defined(__unix__)
49    return sysconf(_SC_PAGESIZE);
50 #elif defined(_WIN32) || defined(WIN32)
51    SYSTEM_INFO si;
52    GetSystemInfo(&si);
53    return si.dwPageSize;
54 #else
55    return 4096;
56 #endif
57 }
58 
flint_align_ptr(void * ptr,slong size)59 void * flint_align_ptr(void * ptr, slong size)
60 {
61     slong mask = ~(size - 1);
62 
63     return (void *)((mask & (slong) ptr) + size);
64 }
65 
_fmpz_new_mpz(void)66 __mpz_struct * _fmpz_new_mpz(void)
67 {
68     if (mpz_free_num == 0) /* allocate more mpz's */
69     {
70         void * aligned_ptr, * ptr;
71 
72         slong i, j, num, block_size, skip;
73 
74         flint_page_size = flint_get_page_size();
75         block_size = PAGES_PER_BLOCK*flint_page_size;
76         flint_page_mask = ~(flint_page_size - 1);
77 
78         /* get new block */
79         ptr = flint_malloc(block_size + flint_page_size);
80 
81         /* align to page boundary */
82         aligned_ptr = flint_align_ptr(ptr, flint_page_size);
83 
84         /* set free count to zero and determine if this is the main thread */
85         ((fmpz_block_header_s *) ptr)->count = 0;
86 #if HAVE_PTHREAD
87         ((fmpz_block_header_s *) ptr)->thread = pthread_self();
88 #endif
89         /* how many __mpz_structs worth are dedicated to header, per page */
90         skip = (sizeof(fmpz_block_header_s) - 1)/sizeof(__mpz_struct) + 1;
91 
92         /* total number of number of __mpz_structs worth per page */
93         num = flint_page_size/sizeof(__mpz_struct);
94 
95         flint_mpz_structs_per_block = PAGES_PER_BLOCK*(num - skip);
96 
97         for (i = 0; i < PAGES_PER_BLOCK; i++)
98         {
99             __mpz_struct * page_ptr = (__mpz_struct *)((slong) aligned_ptr + i*flint_page_size);
100 
101             /* set pointer in each page to start of entire block */
102             ((fmpz_block_header_s *) page_ptr)->address = ptr;
103 
104             for (j = skip; j < num; j++)
105             {
106                 mpz_init2(page_ptr + j, 2*FLINT_BITS);
107 
108                 /*
109                    Cannot be lifted from loop due to possibility of
110                    gc calling _fmpz_clear_mpz during call to mpz_init_2
111                 */
112                 if (mpz_free_num >= mpz_free_alloc)
113                 {
114                     mpz_free_alloc = FLINT_MAX(mpz_free_num + 1, mpz_free_alloc * 2);
115                     mpz_free_arr = flint_realloc(mpz_free_arr, mpz_free_alloc * sizeof(__mpz_struct *));
116                 }
117 
118                 mpz_free_arr[mpz_free_num++] = page_ptr + j;
119             }
120         }
121     }
122 
123     return mpz_free_arr[--mpz_free_num];
124 }
125 
_fmpz_clear_mpz(fmpz f)126 void _fmpz_clear_mpz(fmpz f)
127 {
128     __mpz_struct * ptr = COEFF_TO_PTR(f);
129 
130     /* check free count for block is zero, else this mpz came from a thread */
131     fmpz_block_header_s * header_ptr = (fmpz_block_header_s *)((slong) ptr & flint_page_mask);
132 
133     header_ptr = (fmpz_block_header_s *) header_ptr->address;
134 
135     /* clean up if this is left over from another thread */
136 #if HAVE_PTHREAD
137     if (header_ptr->count != 0 || !pthread_equal(header_ptr->thread, pthread_self()))
138 #else
139     if (header_ptr->count != 0)
140 #endif
141     {
142         int new_count;
143 
144         mpz_clear(ptr);
145 
146 #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && HAVE_PTHREAD
147        new_count = __atomic_add_fetch(&(header_ptr->count), 1, __ATOMIC_SEQ_CST);
148 #elif defined(_MSC_VER) && HAVE_PTHREAD
149        new_count = atomic_add_fetch(&(header_ptr->count), 1);
150 #else
151        new_count = ++header_ptr->count;
152 #endif
153         if (new_count == flint_mpz_structs_per_block)
154 
155             flint_free(header_ptr);
156     } else
157     {
158         if (ptr->_mp_alloc > FLINT_MPZ_MAX_CACHE_LIMBS)
159             mpz_realloc2(ptr, 2*FLINT_BITS);
160 
161         if (mpz_free_num == mpz_free_alloc)
162         {
163             mpz_free_alloc = FLINT_MAX(64, mpz_free_alloc * 2);
164             mpz_free_arr = flint_realloc(mpz_free_arr, mpz_free_alloc * sizeof(__mpz_struct *));
165         }
166 
167         mpz_free_arr[mpz_free_num++] = ptr;
168     }
169 }
170 
_fmpz_cleanup_mpz_content(void)171 void _fmpz_cleanup_mpz_content(void)
172 {
173     ulong i;
174 
175     for (i = 0; i < mpz_free_num; i++)
176     {
177        int new_count;
178        fmpz_block_header_s * ptr;
179 
180        mpz_clear(mpz_free_arr[i]);
181 
182        /* update count of cleared mpz's for block */
183        ptr = (fmpz_block_header_s *)((slong) mpz_free_arr[i] & ~(flint_page_size - 1));
184 
185        ptr = (fmpz_block_header_s *) ptr->address;
186 
187 #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && HAVE_PTHREAD
188        new_count = __atomic_add_fetch(&(ptr->count), 1, __ATOMIC_SEQ_CST);
189 #elif defined(_MSC_VER) && HAVE_PTHREAD
190        new_count = atomic_add_fetch(&(ptr->count), 1);
191 #else /* may be a very small leak with pthreads */
192        new_count = ++ptr->count;
193 #endif
194        if (new_count == flint_mpz_structs_per_block)
195           flint_free(ptr);
196     }
197 
198     mpz_free_num = mpz_free_alloc = 0;
199 }
200 
_fmpz_cleanup(void)201 void _fmpz_cleanup(void)
202 {
203     _fmpz_cleanup_mpz_content();
204     flint_free(mpz_free_arr);
205     mpz_free_arr = NULL;
206 }
207 
_fmpz_promote(fmpz_t f)208 __mpz_struct * _fmpz_promote(fmpz_t f)
209 {
210     if (!COEFF_IS_MPZ(*f)) /* f is small so promote it first */
211     {
212         __mpz_struct * mpz_ptr = _fmpz_new_mpz();
213         (*f) = PTR_TO_COEFF(mpz_ptr);
214         return mpz_ptr;
215     }
216     else /* f is large already, just return the pointer */
217         return COEFF_TO_PTR(*f);
218 }
219 
_fmpz_promote_val(fmpz_t f)220 __mpz_struct * _fmpz_promote_val(fmpz_t f)
221 {
222     fmpz c = (*f);
223     if (!COEFF_IS_MPZ(c)) /* f is small so promote it */
224     {
225         __mpz_struct * mpz_ptr = _fmpz_new_mpz();
226         (*f) = PTR_TO_COEFF(mpz_ptr);
227         flint_mpz_set_si(mpz_ptr, c);
228         return mpz_ptr;
229     }
230     else /* f is large already, just return the pointer */
231         return COEFF_TO_PTR(c);
232 }
233 
_fmpz_demote_val(fmpz_t f)234 void _fmpz_demote_val(fmpz_t f)
235 {
236     __mpz_struct * mpz_ptr = COEFF_TO_PTR(*f);
237     int size = mpz_ptr->_mp_size;
238 
239     if (size == 1 || size == -1)
240     {
241         ulong uval = mpz_ptr->_mp_d[0];
242 
243         if (uval <= (ulong) COEFF_MAX)
244         {
245             _fmpz_clear_mpz(*f);
246             *f = size * (fmpz) uval;
247         }
248     }
249     else if (size == 0)  /* value is 0 */
250     {
251         _fmpz_clear_mpz(*f);
252         *f = 0;
253     }
254 
255     /* don't do anything if value has to be multi precision */
256 }
257 
_fmpz_init_readonly_mpz(fmpz_t f,const mpz_t z)258 void _fmpz_init_readonly_mpz(fmpz_t f, const mpz_t z)
259 {
260    __mpz_struct *ptr;
261    *f = WORD(0);
262    ptr = _fmpz_promote(f);
263 
264    mpz_clear(ptr);
265    *ptr = *z;
266 }
267 
_fmpz_clear_readonly_mpz(mpz_t z)268 void _fmpz_clear_readonly_mpz(mpz_t z)
269 {
270     if (((z->_mp_size == 1 || z->_mp_size == -1) && (z->_mp_d[0] <= COEFF_MAX))
271         || (z->_mp_size == 0))
272     {
273         mpz_clear(z);
274     }
275 }
276