1 /*
2    Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 
26 
27 #include "ndbd_malloc_impl.hpp"
28 #include <ndb_global.h>
29 #include <EventLogger.hpp>
30 #include <portlib/NdbMem.h>
31 
32 #ifdef NDB_WIN
sbrk(int increment)33 void *sbrk(int increment)
34 {
35   return (void*)-1;
36 }
37 #endif
38 
39 extern EventLogger * g_eventLogger;
40 
41 static int f_method_idx = 0;
42 #ifdef NDBD_MALLOC_METHOD_SBRK
43 static const char * f_method = "SMsm";
44 #else
45 static const char * f_method = "MSms";
46 #endif
47 #define MAX_CHUNKS 10
48 
49 #ifdef VM_TRACE
50 #ifndef NDBD_RANDOM_START_PAGE
51 #define NDBD_RANDOM_START_PAGE
52 #endif
53 #endif
54 
55 #ifdef NDBD_RANDOM_START_PAGE
56 static Uint32 g_random_start_page_id = 0;
57 #endif
58 
59 /*
60  * For muti-threaded ndbd, these calls are used for locking around
61  * memory allocation operations.
62  *
63  * For single-threaded ndbd, they are no-ops (but still called, to avoid
64  * having to compile this file twice).
65  */
66 extern void mt_mem_manager_init();
67 extern void mt_mem_manager_lock();
68 extern void mt_mem_manager_unlock();
69 
70 #define ZONE_LO 0
71 #define ZONE_HI 1
72 
73 /**
74  * POOL_RECORD_BITS == 13 => 32 - 13 = 19 bits for page
75  */
76 #define ZONE_LO_BOUND (1u << 19)
77 
78 #include <NdbOut.hpp>
79 
80 extern void ndbd_alloc_touch_mem(void * p, size_t sz, volatile Uint32 * watchCounter);
81 
82 static
83 bool
do_malloc(Uint32 pages,InitChunk * chunk,Uint32 * watchCounter,void * baseaddress)84 do_malloc(Uint32 pages,
85           InitChunk* chunk,
86           Uint32 *watchCounter,
87           void * baseaddress)
88 {
89   pages += 1;
90   void * ptr = 0;
91   Uint32 sz = pages;
92 
93 retry:
94   if (watchCounter)
95     *watchCounter = 9;
96 
97   char method = f_method[f_method_idx];
98   switch(method){
99   case 0:
100     return false;
101   case 'S':
102   case 's':
103   {
104     ptr = 0;
105     while (ptr == 0)
106     {
107       if (watchCounter)
108         *watchCounter = 9;
109 
110       ptr = sbrk(sizeof(Alloc_page) * sz);
111 
112       if (ptr == (void*)-1)
113       {
114 	if (method == 'S')
115 	{
116 	  f_method_idx++;
117 	  goto retry;
118 	}
119 
120 	ptr = 0;
121 	sz = 1 + (9 * sz) / 10;
122 	if (pages >= 32 && sz < 32)
123 	{
124 	  sz = pages;
125 	  f_method_idx++;
126 	  goto retry;
127 	}
128       }
129       else if (UintPtr(ptr) < UintPtr(baseaddress))
130       {
131         /**
132          * Unusable memory :(
133          */
134         ndbout_c("sbrk(%lluMb) => %p which is less than baseaddress!!",
135                  Uint64((sizeof(Alloc_page) * sz) >> 20), ptr);
136         f_method_idx++;
137         goto retry;
138       }
139     }
140     break;
141   }
142   case 'M':
143   case 'm':
144   {
145     ptr = 0;
146     while (ptr == 0)
147     {
148       if (watchCounter)
149         *watchCounter = 9;
150 
151       ptr = malloc(sizeof(Alloc_page) * sz);
152       if (UintPtr(ptr) < UintPtr(baseaddress))
153       {
154         ndbout_c("malloc(%lluMb) => %p which is less than baseaddress!!",
155                  Uint64((sizeof(Alloc_page) * sz) >> 20), ptr);
156         free(ptr);
157         ptr = 0;
158       }
159 
160       if (ptr == 0)
161       {
162 	if (method == 'M')
163 	{
164 	  f_method_idx++;
165 	  goto retry;
166 	}
167 
168 	sz = 1 + (9 * sz) / 10;
169 	if (pages >= 32 && sz < 32)
170 	{
171 	  f_method_idx++;
172 	  goto retry;
173 	}
174       }
175     }
176     break;
177   }
178   default:
179     return false;
180   }
181 
182   chunk->m_cnt = sz;
183   chunk->m_ptr = (Alloc_page*)ptr;
184   const UintPtr align = sizeof(Alloc_page) - 1;
185   if (UintPtr(ptr) & align)
186   {
187     chunk->m_cnt--;
188     chunk->m_ptr = (Alloc_page*)((UintPtr(ptr) + align) & ~align);
189   }
190 
191 #ifdef UNIT_TEST
192   ndbout_c("do_malloc(%d) -> %p %d", pages, ptr, chunk->m_cnt);
193   if (1)
194   {
195     Uint32 sum = 0;
196     Alloc_page* page = chunk->m_ptr;
197     for (Uint32 i = 0; i<chunk->m_cnt; i++, page++)
198     {
199       page->m_data[0*1024] = 0;
200       page->m_data[1*1024] = 0;
201       page->m_data[2*1024] = 0;
202       page->m_data[3*1024] = 0;
203       page->m_data[4*1024] = 0;
204       page->m_data[5*1024] = 0;
205       page->m_data[6*1024] = 0;
206       page->m_data[7*1024] = 0;
207     }
208   }
209 #endif
210 
211   return true;
212 }
213 
214 Uint32
ndb_log2(Uint32 input)215 Ndbd_mem_manager::ndb_log2(Uint32 input)
216 {
217   if (input > 65535)
218     return 16;
219   input = input | (input >> 8);
220   input = input | (input >> 4);
221   input = input | (input >> 2);
222   input = input | (input >> 1);
223   Uint32 output = (input & 0x5555) + ((input >> 1) & 0x5555);
224   output = (output & 0x3333) + ((output >> 2) & 0x3333);
225   output = output + (output >> 4);
226   output = (output & 0xf) + ((output >> 8) & 0xf);
227   return output;
228 }
229 
Ndbd_mem_manager()230 Ndbd_mem_manager::Ndbd_mem_manager()
231 {
232   m_base_page = 0;
233   memset(m_buddy_lists, 0, sizeof(m_buddy_lists));
234   memset(m_resource_limit, 0, sizeof(m_resource_limit));
235 
236   if (sizeof(Free_page_data) != (4 * (1 << FPD_2LOG)))
237   {
238     g_eventLogger->error("Invalid build, ndbd_malloc_impl.cpp:%d", __LINE__);
239     abort();
240   }
241   mt_mem_manager_init();
242 }
243 
244 void*
get_memroot() const245 Ndbd_mem_manager::get_memroot() const
246 {
247 #ifdef NDBD_RANDOM_START_PAGE
248   return (void*)(m_base_page - g_random_start_page_id);
249 #else
250   return (void*)m_base_page;
251 #endif
252 }
253 
254 /**
255  *
256  * resource 0 has following semantics:
257  *
258  * m_min  - remaining reserved for other resources
259  * m_curr - sum(m_curr) for other resources (i.e total in use)
260  * m_max  - totally allocated from OS
261  *
262  * resource N has following semantics:
263  *
264  * m_min = reserved
265  * m_curr = currently used
266  * m_max = max alloc, 0 = no limit
267  *
268  */
269 void
set_resource_limit(const Resource_limit & rl)270 Ndbd_mem_manager::set_resource_limit(const Resource_limit& rl)
271 {
272   Uint32 id = rl.m_resource_id;
273   assert(id < XX_RL_COUNT);
274 
275   Uint32 reserve = id ? rl.m_min : 0;
276   mt_mem_manager_lock();
277   Uint32 current_reserved = m_resource_limit[0].m_min;
278 
279   m_resource_limit[id] = rl;
280   m_resource_limit[id].m_curr = 0;
281   m_resource_limit[0].m_min = current_reserved + reserve;
282   mt_mem_manager_unlock();
283 }
284 
285 bool
get_resource_limit(Uint32 id,Resource_limit & rl) const286 Ndbd_mem_manager::get_resource_limit(Uint32 id, Resource_limit& rl) const
287 {
288   if (id < XX_RL_COUNT)
289   {
290     mt_mem_manager_lock();
291     rl = m_resource_limit[id];
292     mt_mem_manager_unlock();
293     return true;
294   }
295   return false;
296 }
297 
298 static
299 inline
300 void
check_resource_limits(Resource_limit * rl)301 check_resource_limits(Resource_limit* rl)
302 {
303 #ifdef VM_TRACE
304   Uint32 curr = 0;
305   Uint32 res_alloc = 0;
306   Uint32 shared_alloc = 0;
307   Uint32 sumres = 0;
308   for (Uint32 i = 1; i<XX_RL_COUNT; i++)
309   {
310     curr += rl[i].m_curr;
311     sumres += rl[i].m_min;
312     assert(rl[i].m_max == 0 || rl[i].m_curr <= rl[i].m_max);
313     if (rl[i].m_curr > rl[i].m_min)
314     {
315       shared_alloc += rl[i].m_curr - rl[i].m_min;
316       res_alloc += rl[i].m_min;
317     }
318     else
319     {
320       res_alloc += rl[i].m_curr;
321     }
322   }
323   assert(curr == rl[0].m_curr);
324   assert(res_alloc + shared_alloc == curr);
325   assert(res_alloc <= sumres);
326   assert(sumres == res_alloc + rl[0].m_min);
327   assert(rl[0].m_curr <= rl[0].m_max);
328 #endif
329 }
330 
331 int
cmp_chunk(const void * chunk_vptr_1,const void * chunk_vptr_2)332 cmp_chunk(const void * chunk_vptr_1, const void * chunk_vptr_2)
333 {
334   InitChunk * ptr1 = (InitChunk*)chunk_vptr_1;
335   InitChunk * ptr2 = (InitChunk*)chunk_vptr_2;
336   if (ptr1->m_ptr < ptr2->m_ptr)
337     return -1;
338   if (ptr1->m_ptr > ptr2->m_ptr)
339     return 1;
340   assert(false);
341   return 0;
342 }
343 
344 bool
init(Uint32 * watchCounter,bool alloc_less_memory)345 Ndbd_mem_manager::init(Uint32 *watchCounter, bool alloc_less_memory)
346 {
347   assert(m_base_page == 0);
348 
349   if (watchCounter)
350     *watchCounter = 9;
351 
352   Uint32 pages = 0;
353   Uint32 max_page = 0;
354   Uint32 reserved = m_resource_limit[0].m_min;
355   if (m_resource_limit[0].m_max)
356   {
357     pages = m_resource_limit[0].m_max;
358   }
359   else
360   {
361     pages = m_resource_limit[0].m_min; // reserved
362   }
363 
364   if (m_resource_limit[0].m_min == 0)
365   {
366     m_resource_limit[0].m_min = pages;
367   }
368 
369   const Uint64 pg = Uint64(sizeof(Alloc_page));
370   g_eventLogger->info("Ndbd_mem_manager::init(%d) min: %lluMb initial: %lluMb",
371                       alloc_less_memory,
372                       (pg*m_resource_limit[0].m_min)>>20,
373                       (pg*pages) >> 20);
374 
375   if (pages == 0)
376   {
377     return false;
378   }
379 
380 #if SIZEOF_CHARP == 4
381   Uint64 sum = (pg*pages);
382   if (sum >= (Uint64(1) << 32))
383   {
384     g_eventLogger->error("Trying to allocate more that 4Gb with 32-bit binary!!");
385     return false;
386   }
387 #endif
388 
389 #ifdef NDBD_RANDOM_START_PAGE
390   /**
391    * In order to find bad-users of page-id's
392    *   we add a random offset to the page-id's returned
393    *   however, due to ZONE_LO that offset can't be that big
394    *   (since we at get_page don't know if it's a HI/LO page)
395    */
396   Uint32 max_rand_start = ZONE_LO_BOUND - 1;
397   if (max_rand_start > pages)
398   {
399     max_rand_start -= pages;
400     if (max_rand_start > 0x10000)
401       g_random_start_page_id = 0x10000 + (rand() % (max_rand_start - 0x10000));
402     else if (max_rand_start)
403       g_random_start_page_id = rand() % max_rand_start;
404 
405     assert(Uint64(pages) + Uint64(g_random_start_page_id) <= 0xFFFFFFFF);
406 
407     ndbout_c("using g_random_start_page_id: %u (%.8x)",
408              g_random_start_page_id, g_random_start_page_id);
409   }
410 #endif
411 
412   /**
413    * Do malloc
414    */
415   Uint32 allocated = 0;
416   while (m_unmapped_chunks.size() < MAX_CHUNKS && allocated < pages)
417   {
418     InitChunk chunk;
419     memset(&chunk, 0, sizeof(chunk));
420 
421     if (do_malloc(pages - allocated, &chunk, watchCounter, m_base_page))
422     {
423       if (watchCounter)
424         *watchCounter = 9;
425 
426       m_unmapped_chunks.push_back(chunk);
427       allocated += chunk.m_cnt;
428     }
429     else
430     {
431       break;
432     }
433   }
434 
435   if (allocated < m_resource_limit[0].m_min)
436   {
437     g_eventLogger->
438       error("Unable to alloc min memory from OS: min: %lldMb "
439             " allocated: %lldMb",
440             (Uint64)(sizeof(Alloc_page)*m_resource_limit[0].m_min) >> 20,
441             (Uint64)(sizeof(Alloc_page)*allocated) >> 20);
442     return false;
443   }
444   else if (allocated < pages)
445   {
446     g_eventLogger->
447       warning("Unable to alloc requested memory from OS: min: %lldMb"
448               " requested: %lldMb allocated: %lldMb",
449               (Uint64)(sizeof(Alloc_page)*m_resource_limit[0].m_min)>>20,
450               (Uint64)(sizeof(Alloc_page)*m_resource_limit[0].m_max)>>20,
451               (Uint64)(sizeof(Alloc_page)*allocated)>>20);
452     if (!alloc_less_memory)
453       return false;
454   }
455 
456   /**
457    * Sort chunks...
458    */
459   qsort(m_unmapped_chunks.getBase(), m_unmapped_chunks.size(),
460         sizeof(InitChunk), cmp_chunk);
461 
462   m_base_page = m_unmapped_chunks[0].m_ptr;
463 
464   for (Uint32 i = 0; i<m_unmapped_chunks.size(); i++)
465   {
466     UintPtr start = UintPtr(m_unmapped_chunks[i].m_ptr) - UintPtr(m_base_page);
467     start >>= (2 + BMW_2LOG);
468     assert((Uint64(start) >> 32) == 0);
469     m_unmapped_chunks[i].m_start = Uint32(start);
470     Uint64 last64 = start + m_unmapped_chunks[i].m_cnt;
471     assert((last64 >> 32) == 0);
472     Uint32 last = Uint32(last64);
473 
474     if (last > max_page)
475       max_page = last;
476   }
477 
478   m_resource_limit[0].m_resource_id = max_page;
479   m_resource_limit[0].m_min = reserved;
480   m_resource_limit[0].m_max = 0;
481 
482   return true;
483 }
484 
485 void
map(Uint32 * watchCounter,bool memlock,Uint32 resources[])486 Ndbd_mem_manager::map(Uint32 * watchCounter, bool memlock, Uint32 resources[])
487 {
488   Uint32 limit = ~(Uint32)0;
489   Uint32 sofar = 0;
490 
491   if (resources != 0)
492   {
493     limit = 0;
494     for (Uint32 i = 0; resources[i] ; i++)
495     {
496       limit += m_resource_limit[resources[i]].m_min;
497     }
498   }
499 
500   while (m_unmapped_chunks.size() && sofar < limit)
501   {
502     Uint32 remain = limit - sofar;
503 
504     unsigned idx = m_unmapped_chunks.size() - 1;
505     InitChunk * chunk = &m_unmapped_chunks[idx];
506     if (watchCounter)
507       *watchCounter = 9;
508 
509     if (chunk->m_cnt > remain)
510     {
511       /**
512        * Split chunk
513        */
514       Uint32 extra = chunk->m_cnt - remain;
515       chunk->m_cnt = remain;
516 
517       InitChunk newchunk;
518       newchunk.m_start = chunk->m_start + remain;
519       newchunk.m_ptr = m_base_page + newchunk.m_start;
520       newchunk.m_cnt = extra;
521       m_unmapped_chunks.push_back(newchunk);
522 
523       // pointer could have changed after m_unmapped_chunks.push_back
524       chunk = &m_unmapped_chunks[idx];
525     }
526 
527     ndbd_alloc_touch_mem(chunk->m_ptr,
528                          chunk->m_cnt * sizeof(Alloc_page),
529                          watchCounter);
530 
531     if (memlock)
532     {
533       /**
534        * memlock pages that I added...
535        */
536       if (watchCounter)
537         *watchCounter = 9;
538 
539       /**
540        * Don't memlock everything in one go...
541        *   cause then process won't be killable
542        */
543       const Alloc_page * start = chunk->m_ptr;
544       Uint32 cnt = chunk->m_cnt;
545       while (cnt > 32768) // 1G
546       {
547         if (watchCounter)
548           *watchCounter = 9;
549 
550         NdbMem_MemLock(start, 32768 * sizeof(Alloc_page));
551         start += 32768;
552         cnt -= 32768;
553       }
554       if (watchCounter)
555         *watchCounter = 9;
556 
557       NdbMem_MemLock(start, cnt * sizeof(Alloc_page));
558     }
559 
560     grow(chunk->m_start, chunk->m_cnt);
561     sofar += chunk->m_cnt;
562 
563     m_unmapped_chunks.erase(idx);
564   }
565 
566   mt_mem_manager_lock();
567   check_resource_limits(m_resource_limit);
568   mt_mem_manager_unlock();
569 
570   if (resources == 0 && memlock)
571   {
572     NdbMem_MemLockAll(1);
573   }
574 }
575 
576 #include <NdbOut.hpp>
577 
578 void
grow(Uint32 start,Uint32 cnt)579 Ndbd_mem_manager::grow(Uint32 start, Uint32 cnt)
580 {
581   assert(cnt);
582   Uint32 start_bmp = start >> BPP_2LOG;
583   Uint32 last_bmp = (start + cnt - 1) >> BPP_2LOG;
584 
585 #if SIZEOF_CHARP == 4
586   assert(start_bmp == 0 && last_bmp == 0);
587 #endif
588 
589   if (start_bmp != last_bmp)
590   {
591     Uint32 tmp = ((start_bmp + 1) << BPP_2LOG) - start;
592     grow(start, tmp);
593     grow((start_bmp + 1) << BPP_2LOG, cnt - tmp);
594     return;
595   }
596 
597   if ((start + cnt) == ((start_bmp + 1) << BPP_2LOG))
598   {
599     cnt--; // last page is always marked as empty
600   }
601 
602   for (Uint32 i = 0; i<m_used_bitmap_pages.size(); i++)
603     if (m_used_bitmap_pages[i] == start_bmp)
604       goto found;
605 
606   if (start != (start_bmp << BPP_2LOG))
607   {
608 
609     ndbout_c("ndbd_malloc_impl.cpp:%d:grow(%d, %d) %d!=%d not using %uMb"
610 	     " - Unable to use due to bitmap pages missaligned!!",
611 	     __LINE__, start, cnt, start, (start_bmp << BPP_2LOG),
612 	     (cnt >> (20 - 15)));
613     g_eventLogger->error("ndbd_malloc_impl.cpp:%d:grow(%d, %d) not using %uMb"
614                          " - Unable to use due to bitmap pages missaligned!!",
615                          __LINE__, start, cnt,
616                          (cnt >> (20 - 15)));
617 
618     dump();
619     return;
620   }
621 
622 #ifdef UNIT_TEST
623   ndbout_c("creating bitmap page %d", start_bmp);
624 #endif
625 
626   {
627     Alloc_page* bmp = m_base_page + start;
628     memset(bmp, 0, sizeof(Alloc_page));
629     cnt--;
630     start++;
631   }
632   m_used_bitmap_pages.push_back(start_bmp);
633 
634 found:
635   if (cnt)
636   {
637     mt_mem_manager_lock();
638     m_resource_limit[0].m_curr += cnt;
639     m_resource_limit[0].m_max += cnt;
640     if (start >= ZONE_LO_BOUND)
641     {
642       Uint64 mbytes = ((Uint64(cnt) * 32) + 1023) / 1024;
643       ndbout_c("Adding %uMb to ZONE_HI (%u,%u)", (Uint32)mbytes, start, cnt);
644       release(start, cnt);
645     }
646     else if (start + cnt <= ZONE_LO_BOUND)
647     {
648       Uint64 mbytes = ((Uint64(cnt)*32) + 1023) / 1024;
649       ndbout_c("Adding %uMb to ZONE_LO (%u,%u)", (Uint32)mbytes, start, cnt);
650       release(start, cnt);
651     }
652     else
653     {
654       Uint32 cnt0 = ZONE_LO_BOUND - start;
655       Uint32 cnt1 = start + cnt - ZONE_LO_BOUND;
656       Uint64 mbytes0 = ((Uint64(cnt0)*32) + 1023) / 1024;
657       Uint64 mbytes1 = ((Uint64(cnt1)*32) + 1023) / 1024;
658       ndbout_c("Adding %uMb to ZONE_LO (split %u,%u)", (Uint32)mbytes0,
659                start, cnt0);
660       ndbout_c("Adding %uMb to ZONE_HI (split %u,%u)", (Uint32)mbytes1,
661                ZONE_LO_BOUND, cnt1);
662       release(start, cnt0);
663       release(ZONE_LO_BOUND, cnt1);
664     }
665     mt_mem_manager_unlock();
666   }
667 }
668 
669 void
release(Uint32 start,Uint32 cnt)670 Ndbd_mem_manager::release(Uint32 start, Uint32 cnt)
671 {
672   assert(m_resource_limit[0].m_curr >= cnt);
673   assert(start);
674   m_resource_limit[0].m_curr -= cnt;
675 
676   set(start, start+cnt-1);
677 
678   Uint32 zone = start < ZONE_LO_BOUND ? 0 : 1;
679   release_impl(zone, start, cnt);
680 }
681 
682 void
release_impl(Uint32 zone,Uint32 start,Uint32 cnt)683 Ndbd_mem_manager::release_impl(Uint32 zone, Uint32 start, Uint32 cnt)
684 {
685   assert(start);
686 
687   Uint32 test = check(start-1, start+cnt);
688   if (start != ZONE_LO_BOUND && test & 1)
689   {
690     Free_page_data *fd = get_free_page_data(m_base_page + start - 1,
691 					    start - 1);
692     Uint32 sz = fd->m_size;
693     Uint32 left = start - sz;
694     remove_free_list(zone, left, fd->m_list);
695     cnt += sz;
696     start = left;
697   }
698 
699   Uint32 right = start + cnt;
700   if (right != ZONE_LO_BOUND && test & 2)
701   {
702     Free_page_data *fd = get_free_page_data(m_base_page+right, right);
703     Uint32 sz = fd->m_size;
704     remove_free_list(zone, right, fd->m_list);
705     cnt += sz;
706   }
707 
708   insert_free_list(zone, start, cnt);
709 }
710 
711 void
alloc(AllocZone zone,Uint32 * ret,Uint32 * pages,Uint32 min)712 Ndbd_mem_manager::alloc(AllocZone zone,
713                         Uint32* ret, Uint32 *pages, Uint32 min)
714 {
715   if (zone == NDB_ZONE_ANY)
716   {
717     Uint32 save = * pages;
718     alloc_impl(ZONE_HI, ret, pages, min);
719     if (*pages)
720       return;
721     * pages = save;
722   }
723 
724   alloc_impl(ZONE_LO, ret, pages, min);
725 }
726 
727 void
alloc_impl(Uint32 zone,Uint32 * ret,Uint32 * pages,Uint32 min)728 Ndbd_mem_manager::alloc_impl(Uint32 zone,
729                              Uint32* ret, Uint32 *pages, Uint32 min)
730 {
731   Int32 i;
732   Uint32 start;
733   Uint32 cnt = * pages;
734   Uint32 list = ndb_log2(cnt - 1);
735 
736   assert(cnt);
737   assert(list <= 16);
738 
739   for (i = list; i < 16; i++)
740   {
741     if ((start = m_buddy_lists[zone][i]))
742     {
743 /* ---------------------------------------------------------------- */
744 /*       PROPER AMOUNT OF PAGES WERE FOUND. NOW SPLIT THE FOUND     */
745 /*       AREA AND RETURN THE PART NOT NEEDED.                       */
746 /* ---------------------------------------------------------------- */
747 
748       Uint32 sz = remove_free_list(zone, start, i);
749       Uint32 extra = sz - cnt;
750       assert(sz >= cnt);
751       if (extra)
752       {
753 	insert_free_list(zone, start + cnt, extra);
754 	clear_and_set(start, start+cnt-1);
755       }
756       else
757       {
758 	clear(start, start+cnt-1);
759       }
760       * ret = start;
761       assert(m_resource_limit[0].m_curr + cnt <= m_resource_limit[0].m_max);
762       return;
763     }
764   }
765 
766   /**
767    * Could not find in quaranteed list...
768    *   search in other lists...
769    */
770 
771   Int32 min_list = ndb_log2(min - 1);
772   assert((Int32)list >= min_list);
773   for (i = list - 1; i >= min_list; i--)
774   {
775     if ((start = m_buddy_lists[zone][i]))
776     {
777       Uint32 sz = remove_free_list(zone, start, i);
778       Uint32 extra = sz - cnt;
779       if (sz > cnt)
780       {
781 	insert_free_list(zone, start + cnt, extra);
782 	sz -= extra;
783 	clear_and_set(start, start+sz-1);
784       }
785       else
786       {
787 	clear(start, start+sz-1);
788       }
789 
790       * ret = start;
791       * pages = sz;
792       assert(m_resource_limit[0].m_curr + sz <= m_resource_limit[0].m_max);
793       return;
794     }
795   }
796   * pages = 0;
797 }
798 
799 void
insert_free_list(Uint32 zone,Uint32 start,Uint32 size)800 Ndbd_mem_manager::insert_free_list(Uint32 zone, Uint32 start, Uint32 size)
801 {
802   Uint32 list = ndb_log2(size) - 1;
803   Uint32 last = start + size - 1;
804 
805   Uint32 head = m_buddy_lists[zone][list];
806   Free_page_data* fd_first = get_free_page_data(m_base_page+start,
807 						start);
808   fd_first->m_list = list;
809   fd_first->m_next = head;
810   fd_first->m_prev = 0;
811   fd_first->m_size = size;
812 
813   Free_page_data* fd_last = get_free_page_data(m_base_page+last, last);
814   fd_last->m_list = list;
815   fd_last->m_next = head;
816   fd_last->m_prev = 0;
817   fd_last->m_size = size;
818 
819   if (head)
820   {
821     Free_page_data* fd = get_free_page_data(m_base_page+head, head);
822     assert(fd->m_prev == 0);
823     assert(fd->m_list == list);
824     fd->m_prev = start;
825   }
826 
827   m_buddy_lists[zone][list] = start;
828 }
829 
830 Uint32
remove_free_list(Uint32 zone,Uint32 start,Uint32 list)831 Ndbd_mem_manager::remove_free_list(Uint32 zone, Uint32 start, Uint32 list)
832 {
833   Free_page_data* fd = get_free_page_data(m_base_page+start, start);
834   Uint32 size = fd->m_size;
835   Uint32 next = fd->m_next;
836   Uint32 prev = fd->m_prev;
837   assert(fd->m_list == list);
838 
839   if (prev)
840   {
841     assert(m_buddy_lists[zone][list] != start);
842     fd = get_free_page_data(m_base_page+prev, prev);
843     assert(fd->m_next == start);
844     assert(fd->m_list == list);
845     fd->m_next = next;
846   }
847   else
848   {
849     assert(m_buddy_lists[zone][list] == start);
850     m_buddy_lists[zone][list] = next;
851   }
852 
853   if (next)
854   {
855     fd = get_free_page_data(m_base_page+next, next);
856     assert(fd->m_list == list);
857     assert(fd->m_prev == start);
858     fd->m_prev = prev;
859   }
860 
861   return size;
862 }
863 
864 void
dump() const865 Ndbd_mem_manager::dump() const
866 {
867   mt_mem_manager_lock();
868   for (Uint32 zone = 0; zone < 2; zone ++)
869   {
870     for (Uint32 i = 0; i<16; i++)
871     {
872       printf(" list: %d - ", i);
873       Uint32 head = m_buddy_lists[zone][i];
874       while(head)
875       {
876         Free_page_data* fd = get_free_page_data(m_base_page+head, head);
877         printf("[ i: %d prev %d next %d list %d size %d ] ",
878                head, fd->m_prev, fd->m_next, fd->m_list, fd->m_size);
879         head = fd->m_next;
880       }
881       printf("EOL\n");
882     }
883 
884     for (Uint32 i = 0; i<XX_RL_COUNT; i++)
885     {
886       printf("ri: %d min: %d curr: %d max: %d\n",
887              i,
888              m_resource_limit[i].m_min,
889              m_resource_limit[i].m_curr,
890              m_resource_limit[i].m_max);
891     }
892   }
893   mt_mem_manager_unlock();
894 }
895 
896 void*
alloc_page(Uint32 type,Uint32 * i,AllocZone zone)897 Ndbd_mem_manager::alloc_page(Uint32 type, Uint32* i, AllocZone zone)
898 {
899   Uint32 idx = type & RG_MASK;
900   assert(idx && idx < XX_RL_COUNT);
901   mt_mem_manager_lock();
902   Resource_limit tot = m_resource_limit[0];
903   Resource_limit rl = m_resource_limit[idx];
904 
905   Uint32 cnt = 1;
906   Uint32 res0 = (rl.m_curr < rl.m_min) ? 1 : 0;
907   Uint32 limit = (rl.m_max == 0 || rl.m_curr < rl.m_max) ? 0 : 1; // Over limit
908   Uint32 free = (tot.m_min + tot.m_curr < tot.m_max) ? 1 : 0; // Has free
909 
910   assert(tot.m_min >= res0);
911 
912   if (likely(res0 == 1 || (limit == 0 && free == 1)))
913   {
914     alloc(zone, i, &cnt, 1);
915     if (likely(cnt))
916     {
917       m_resource_limit[0].m_curr = tot.m_curr + cnt;
918       m_resource_limit[0].m_min = tot.m_min - res0;
919       m_resource_limit[idx].m_curr = rl.m_curr + cnt;
920 
921       check_resource_limits(m_resource_limit);
922       mt_mem_manager_unlock();
923 #ifdef NDBD_RANDOM_START_PAGE
924       *i += g_random_start_page_id;
925       return m_base_page + *i - g_random_start_page_id;
926 #else
927       return m_base_page + *i;
928 #endif
929     }
930   }
931   mt_mem_manager_unlock();
932   return 0;
933 }
934 
935 void
release_page(Uint32 type,Uint32 i)936 Ndbd_mem_manager::release_page(Uint32 type, Uint32 i)
937 {
938   Uint32 idx = type & RG_MASK;
939   assert(idx && idx < XX_RL_COUNT);
940   mt_mem_manager_lock();
941   Resource_limit tot = m_resource_limit[0];
942   Resource_limit rl = m_resource_limit[idx];
943 
944 #ifdef NDBD_RANDOM_START_PAGE
945   i -= g_random_start_page_id;
946 #endif
947 
948   Uint32 sub = (rl.m_curr <= rl.m_min) ? 1 : 0; // Over min ?
949   release(i, 1);
950   m_resource_limit[0].m_curr = tot.m_curr - 1;
951   m_resource_limit[0].m_min = tot.m_min + sub;
952   m_resource_limit[idx].m_curr = rl.m_curr - 1;
953 
954   check_resource_limits(m_resource_limit);
955   mt_mem_manager_unlock();
956 }
957 
958 void
alloc_pages(Uint32 type,Uint32 * i,Uint32 * cnt,Uint32 min)959 Ndbd_mem_manager::alloc_pages(Uint32 type, Uint32* i, Uint32 *cnt, Uint32 min)
960 {
961   Uint32 idx = type & RG_MASK;
962   assert(idx && idx < XX_RL_COUNT);
963   mt_mem_manager_lock();
964   Resource_limit tot = m_resource_limit[0];
965   Resource_limit rl = m_resource_limit[idx];
966 
967   Uint32 req = *cnt;
968 
969   Uint32 max = rl.m_max - rl.m_curr;
970   Uint32 res0 = rl.m_min - rl.m_curr;
971   Uint32 free_shared = tot.m_max - (tot.m_min + tot.m_curr);
972 
973   Uint32 res1;
974   if (rl.m_curr + req <= rl.m_min)
975   {
976     // all is reserved...
977     res0 = req;
978     res1 = 0;
979   }
980   else
981   {
982     req = rl.m_max ? max : req;
983     res0 = (rl.m_curr > rl.m_min) ? 0 : res0;
984     res1 = req - res0;
985 
986     if (unlikely(res1 > free_shared))
987     {
988       res1 = free_shared;
989       req = res0 + res1;
990     }
991   }
992 
993   // req = pages to alloc
994   // res0 = portion that is reserved
995   // res1 = part that is over reserver
996   assert (res0 + res1 == req);
997   assert (tot.m_min >= res0);
998 
999   if (likely(req))
1000   {
1001     // Hi order allocations can always use any zone
1002     alloc(NDB_ZONE_ANY, i, &req, min);
1003     * cnt = req;
1004     if (unlikely(req < res0)) // Got min than what was reserved :-(
1005     {
1006       res0 = req;
1007     }
1008     assert(tot.m_min >= res0);
1009     assert(tot.m_curr + req <= tot.m_max);
1010 
1011     m_resource_limit[0].m_curr = tot.m_curr + req;
1012     m_resource_limit[0].m_min = tot.m_min - res0;
1013     m_resource_limit[idx].m_curr = rl.m_curr + req;
1014     check_resource_limits(m_resource_limit);
1015     mt_mem_manager_unlock();
1016 #ifdef NDBD_RANDOM_START_PAGE
1017     *i += g_random_start_page_id;
1018 #endif
1019     return ;
1020   }
1021   mt_mem_manager_unlock();
1022   * cnt = req;
1023 #ifdef NDBD_RANDOM_START_PAGE
1024   *i += g_random_start_page_id;
1025 #endif
1026   return;
1027 }
1028 
1029 void
release_pages(Uint32 type,Uint32 i,Uint32 cnt)1030 Ndbd_mem_manager::release_pages(Uint32 type, Uint32 i, Uint32 cnt)
1031 {
1032   Uint32 idx = type & RG_MASK;
1033   assert(idx && idx < XX_RL_COUNT);
1034   mt_mem_manager_lock();
1035   Resource_limit tot = m_resource_limit[0];
1036   Resource_limit rl = m_resource_limit[idx];
1037 
1038 #ifdef NDBD_RANDOM_START_PAGE
1039   i -= g_random_start_page_id;
1040 #endif
1041 
1042   release(i, cnt);
1043 
1044   Uint32 currnew = rl.m_curr - cnt;
1045   if (rl.m_curr > rl.m_min)
1046   {
1047     if (currnew < rl.m_min)
1048     {
1049       m_resource_limit[0].m_min = tot.m_min + (rl.m_min - currnew);
1050     }
1051   }
1052   else
1053   {
1054     m_resource_limit[0].m_min = tot.m_min + cnt;
1055   }
1056   m_resource_limit[0].m_curr = tot.m_curr - cnt;
1057   m_resource_limit[idx].m_curr = currnew;
1058   check_resource_limits(m_resource_limit);
1059   mt_mem_manager_unlock();
1060 }
1061 
1062 #ifdef UNIT_TEST
1063 
1064 #include <Vector.hpp>
1065 #include <NdbTick.h>
1066 
1067 struct Chunk {
1068   Uint32 pageId;
1069   Uint32 pageCount;
1070 };
1071 
1072 struct Timer
1073 {
1074   Uint64 sum;
1075   Uint32 cnt;
1076 
TimerTimer1077   Timer() { sum = cnt = 0;}
1078 
1079   struct timeval st;
1080 
startTimer1081   void start() {
1082     gettimeofday(&st, 0);
1083   }
1084 
calc_diffTimer1085   Uint64 calc_diff() {
1086     struct timeval st2;
1087     gettimeofday(&st2, 0);
1088     Uint64 diff = st2.tv_sec;
1089     diff -= st.tv_sec;
1090     diff *= 1000000;
1091     diff += st2.tv_usec;
1092     diff -= st.tv_usec;
1093     return diff;
1094   }
1095 
stopTimer1096   void stop() {
1097     add(calc_diff());
1098   }
1099 
addTimer1100   void add(Uint64 diff) { sum += diff; cnt++;}
1101 
printTimer1102   void print(const char * title) const {
1103     float ps = sum;
1104     ps /= cnt;
1105     printf("%s %fus/call %lld %d\n", title, ps, sum, cnt);
1106   }
1107 };
1108 
1109 int
main(int argc,char ** argv)1110 main(int argc, char** argv)
1111 {
1112   int sz = 1*32768;
1113   int run_time = 30;
1114   if (argc > 1)
1115     sz = 32*atoi(argv[1]);
1116 
1117   if (argc > 2)
1118     run_time = atoi(argv[2]);
1119 
1120   char buf[255];
1121   Timer timer[4];
1122   printf("Startar modul test av Page Manager %dMb %ds\n",
1123 	 (sz >> 5), run_time);
1124   g_eventLogger->createConsoleHandler();
1125   g_eventLogger->setCategory("keso");
1126   g_eventLogger->enable(Logger::LL_ON, Logger::LL_INFO);
1127   g_eventLogger->enable(Logger::LL_ON, Logger::LL_CRITICAL);
1128   g_eventLogger->enable(Logger::LL_ON, Logger::LL_ERROR);
1129   g_eventLogger->enable(Logger::LL_ON, Logger::LL_WARNING);
1130 
1131 #define DEBUG 0
1132 
1133   Ndbd_mem_manager mem;
1134   Resource_limit rl;
1135   rl.m_min = 0;
1136   rl.m_max = sz;
1137   rl.m_curr = 0;
1138   rl.m_resource_id = 0;
1139   mem.set_resource_limit(rl);
1140   rl.m_min = sz < 16384 ? sz : 16384;
1141   rl.m_max = 0;
1142   rl.m_resource_id = 1;
1143   mem.set_resource_limit(rl);
1144 
1145   mem.init(NULL);
1146   mem.dump();
1147   printf("pid: %d press enter to continue\n", getpid());
1148   fgets(buf, sizeof(buf), stdin);
1149   Vector<Chunk> chunks;
1150   time_t stop = time(0) + run_time;
1151   for(Uint32 i = 0; time(0) < stop; i++){
1152     //mem.dump();
1153 
1154     // Case
1155     Uint32 c = (rand() % 100);
1156     if (c < 50)
1157     {
1158       c = 0;
1159     }
1160     else if (c < 93)
1161     {
1162       c = 1;
1163     }
1164     else
1165     {
1166       c = 2;
1167     }
1168 
1169     Uint32 alloc = 1 + rand() % 3200;
1170 
1171     if(chunks.size() == 0 && c == 0)
1172     {
1173       c = 1 + rand() % 2;
1174     }
1175 
1176     if(DEBUG)
1177       printf("loop=%d ", i);
1178     switch(c){
1179     case 0:{ // Release
1180       const int ch = rand() % chunks.size();
1181       Chunk chunk = chunks[ch];
1182       chunks.erase(ch);
1183       timer[0].start();
1184       Uint64 start = NdbTick_CurrentMillisecond();
1185       mem.release(chunk.pageId, chunk.pageCount);
1186       timer[0].stop();
1187       if(DEBUG)
1188 	printf(" release %d %d\n", chunk.pageId, chunk.pageCount);
1189     }
1190       break;
1191     case 2: { // Seize(n) - fail
1192       alloc += sz;
1193       // Fall through
1194     }
1195     case 1: { // Seize(n) (success)
1196       Chunk chunk;
1197       chunk.pageCount = alloc;
1198       if (DEBUG)
1199       {
1200 	printf(" alloc %d -> ", alloc); fflush(stdout);
1201       }
1202       timer[0].start();
1203       mem.alloc(&chunk.pageId, &chunk.pageCount, 1);
1204       Uint64 diff = timer[0].calc_diff();
1205 
1206       if (DEBUG)
1207 	printf("%d %d", chunk.pageId, chunk.pageCount);
1208       assert(chunk.pageCount <= alloc);
1209       if(chunk.pageCount != 0){
1210 	chunks.push_back(chunk);
1211 	if(chunk.pageCount != alloc) {
1212 	  timer[2].add(diff);
1213 	  if (DEBUG)
1214 	    printf(" -  Tried to allocate %d - only allocated %d - free: %d",
1215 		   alloc, chunk.pageCount, 0);
1216 	}
1217 	else
1218 	{
1219 	  timer[1].add(diff);
1220 	}
1221       } else {
1222 	timer[3].add(diff);
1223 	if (DEBUG)
1224 	  printf("  Failed to alloc %d pages with %d pages free",
1225 		 alloc, 0);
1226       }
1227       if (DEBUG)
1228 	printf("\n");
1229     }
1230       break;
1231     }
1232   }
1233   if (!DEBUG)
1234     while(chunks.size() > 0){
1235       Chunk chunk = chunks.back();
1236       mem.release(chunk.pageId, chunk.pageCount);
1237       chunks.erase(chunks.size() - 1);
1238     }
1239 
1240   const char *title[] = {
1241     "release   ",
1242     "alloc full",
1243     "alloc part",
1244     "alloc fail"
1245   };
1246   for(Uint32 i = 0; i<4; i++)
1247     timer[i].print(title[i]);
1248 
1249   mem.dump();
1250 }
1251 
1252 template class Vector<Chunk>;
1253 
1254 #endif
1255 
1256 template class Vector<InitChunk>;
1257