1 /*
2 Copyright (c) 2005, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #ifndef SUPER_POOL_HPP
26 #define SUPER_POOL_HPP
27
28 #include <ndb_global.h>
29
30 #include <pc.hpp>
31 #include <ErrorReporter.hpp>
32
33 #define JAM_FILE_ID 281
34
35
36 /*
37 * SuperPool - super pool for record pools (abstract class)
38 *
39 * Documents: SuperPool GroupPool RecordPool<T>
40 *
41 * SUPER POOL
42 *
43 * A "super pool" is a shared pool of pages of fixed size. A "record
44 * pool" is a pool of records of fixed size. One super pool instance is
45 * used by a number of record pools to allocate their memory. A special
46 * case is a "page pool" where a record is a simple page whose size
47 * divides super pool page size.
48 *
49 * A record pool allocates memory in pages. Thus each used page is
50 * associated with one record pool and one record type. The records on
51 * a page form an array starting at start of page. Thus each record has
52 * an index within the page. Any last partial record which does not fit
53 * on the page is disregarded.
54 *
55 * I-VALUE
56 *
57 * The old "i-p" principle is kept. A reference to a super pool page or
58 * record is stored as an "i-value" from which the record pointer "p" is
59 * computed. In super pool the i-value is a Uint32 with two parts:
60 *
61 * - "ip" index of page within super pool (high "pageBits")
62 * - "ir" index of record within page (low "recBits")
63 *
64 * At most 16 recBits are used, the rest are zero.
65 *
66 * The translation between "ip" and page address is described in next
67 * section. Once page address is known, the record address is found
68 * from "ir" in the obvious way.
69 *
70 * One advantage of i-value is that it can be verified. The level of
71 * verification can depend on compile options.
72 *
73 * - "v1" check i-value specifies valid page
74 * - "v2" check record type matches page type, see below
75 * - "v3" check record is in use
76 * - "v4" check unused record is unmodified
77 *
78 * Another advantage of a 32-bit i-value is that it extends the space of
79 * 32-bit addressable records on a 64-bit platform.
80 *
81 * MEMORY ROOT
82 *
83 * This super pool requires a "memory root" i.e. a memory address such
84 * that the index of a page "ip" satisfies
85 *
86 * page address = memory root + (signed)ip * page size
87 *
88 * This is possible on all platforms, provided that the memory root and
89 * all pages are either on the heap or on the stack, in order to keep
90 * the size of "ip" reasonably small.
91 *
92 * The cast (signed)ip is done as integer of pageBits bits. "ip" has
93 * same sign bit as i-value "i" so (signed)ip = (Int32)i >> recBits.
94 *
95 * RESERVED I-VALUES
96 *
97 * RNIL is 0xffffff00 (signed -256). It is used everywhere in NDB as
98 * "null pointer" i.e. as an i-value which does not point to a record.
99 * In addition the signed values -255 to -1 are reserved for use by the
100 * application.
101 *
102 * An i-value with all "ir" bits set is used as terminator in free
103 * record list. Unlike RNIL, it still has valid page bits "ip".
104 *
105 * Following restrictions avoid hitting the reserved values:
106 *
107 * - pageBits is <= 30
108 * - the maximum "ip" value 2^pageBits-1 (signed -1) is not used
109 * - the maximum "ir" value 2^recBits-1 is not used
110 *
111 * PAGE ENTRIES
112 *
113 * Each super pool page has a "page entry". It contains:
114 *
115 * - page type
116 * - i-value of first free record on page
117 * - page use count, to see if page can be freed
118 * - pointers (as i-values) to next and previous page in list
119 *
120 * Page entry cannot be stored on the page itself since this prevents
121 * aligning pages to OS block size and the use of BATs for page pools in
122 * NDB. For now the implementation provides an array of page entries
123 * with place for all potential (2^pageBits) entries.
124 *
125 * PAGE TYPE
126 *
127 * Page type is unique to the record pool using the super pool. It is
128 * assigned in record pool constructor. Page type zero means that the
129 * page is free i.e. not allocated to a record pool.
130 *
131 * Each "i-p" conversion checks ("v2") that the record belongs to same
132 * pool as the page. This check is much more common than page or record
133 * allocation. To make it cache effective, there is a separate page
134 * type array. It truncates type to one non-zero byte.
135 *
136 * GROUP POOL
137 *
138 * Each record pool belongs to a group. The group specifies minimum
139 * size or memory percentage the group must be able to allocate. The
140 * sum of the minimum sizes of group pools is normally smaller than
141 * super pool size. This provides unclaimed memory which a group can
142 * use temporarily to allocate more than its minimum.
143 *
144 * The record pools within a group compete freely for the available
145 * memory within the group.
146 *
147 * Typical exmaple is group of all metadata pools. The group allows
148 * specifying the memory to reserve for metadata, without having to
149 * specify number of tables, attributes, indexes, triggers, etc.
150 *
151 * PAGE LISTS
152 *
153 * Super pool has free page list. Each group pool uses it to allocate
154 * its own free page list. And each record pool within the group uses
155 * the group's free list to allocate its pages.
156 *
157 * A page allocated to a record pool has a use count i.e. number of used
158 * records. When use count drops to zero the page can be returned to
159 * the group. This is not necessarily done at once.
160 *
161 * The list of free records in a record pool has two levels. There are
162 * available pages (some free) and a singly linked free list within the
163 * page. A page allocated to record pool is on one of 4 lists:
164 *
165 * - free page (all free, available, could be returned to group)
166 * - busy page (some free, some used, available)
167 * - full page (none free)
168 * - current page (list of one), see below
169 *
170 * Some usage types (temporary pools) may never free records. They pay
171 * a small penalty for the extra overhead.
172 *
173 * RECORD POOL
174 *
175 * A pool of records which allocates its memory from a super pool
176 * instance via a group pool. There are 3 basic operations:
177 *
178 * - getPtr - translate i-value to pointer-to-record p
179 * - seize - allocate record
180 * - release - free record
181 *
182 * CURRENT PAGE
183 *
184 * getPtr is a fast computation which does not touch the page entry.
185 * For seize (and release) there is a small optimization.
186 *
187 * The "current page" is the page of latest seize. It is unlinked from
188 * its normal list and the free record pointer is stored under record
189 * pool instance.
190 *
191 * The page remains current until there is a seize and the page is full.
192 * Then the real page entry and its list membership are updated, and
193 * a new page is made current.
194 *
195 * This implies that each (active) record pool allocates at least one
196 * page which is never returned to the group.
197 *
198 * PAGE POLICY
199 *
200 * A group pool returns its "excess" (above minimum) free pages to the
201 * super pool immediately.
202 *
203 * Allocating a new page to a record pool is expensive due to free list
204 * setup. Therefore a record pool should not always return empty pages
205 * to the group. Policies:
206 *
207 * - "pp1" never return empty page to the group
208 * - "pp2" always return empty (non-current) page to the group
209 * - "pp3" simple hysteresis
210 *
211 * Last one "pp3" is used. It works as follows:
212 *
213 * When a page becomes free, check if number of free records exceeds
214 * some fixed fraction of all records. If it does, move all free pages
215 * to the group. Current page is ignored in the check.
216 *
217 * TODO
218 *
219 * Define abstract class SuperAlloc. Make SuperPool a concrete class
220 * with SuperAlloc instance in ctor. Replace HeapPool by HeapAlloc.
221 */
222
223 // Types forward.
224 class GroupPool;
225
226 class SuperPool {
227 public:
228 // Type of i-value, used to reference both pages and records.
229 typedef Uint32 PtrI;
230
231 // Page entry.
232 struct PageEnt {
233 PageEnt();
234 Uint16 m_pageType; // zero if not in record pool
235 Uint16 m_useCount; // used records on the page
236 PtrI m_freeRecI; // first free record on the page
237 PtrI m_nextPageI;
238 PtrI m_prevPageI;
239 };
240
241 // Doubly-linked list of page entries.
242 struct PageList {
243 PageList();
244 PageList(PtrI pageI);
245 PtrI m_headPageI;
246 PtrI m_tailPageI;
247 Uint32 m_pageCount;
248 };
249
250 // Constructor. Gives page size in bytes (must be power of 2) and
251 // number of bits to use for page index "ip" in i-value.
252 SuperPool(Uint32 pageSize, Uint32 pageBits);
253
254 // Destructor.
255 virtual ~SuperPool() = 0;
256
257 // Move all pages from second list to end of first list.
258 void movePages(PageList& pl1, PageList& pl2);
259
260 // Add page to beginning of page list.
261 void addHeadPage(PageList& pl, PtrI pageI);
262
263 // Add page to end of page list.
264 void addTailPage(PageList& pl, PtrI pageI);
265
266 // Remove any page from page list.
267 void removePage(PageList& pl, PtrI pageI);
268
269 // Translate i-value ("ri" ignored) to page entry.
270 PageEnt& getPageEnt(PtrI pageI);
271
272 // Translate i-value ("ri" ignored) to page address.
273 void* getPageP(PtrI pageI);
274
275 // Translate page address to i-value. Address must be page-aligned to
276 // memory root. Returns RNIL if "ip" range exceeded.
277 PtrI getPageI(void* pageP);
278
279 // Record pool info.
280 struct RecInfo {
281 RecInfo(GroupPool& gp, Uint32 recSize);
282 GroupPool& m_groupPool;
283 Uint32 m_recSize;
284 Uint16 m_recType;
285 Uint16 m_maxPerPage;
286 PtrI m_freeRecI; // first free record on current page
287 Uint32 m_useCount; // used records excluding current page
288 PageList m_pageList[3]; // 0-free 1-busy 2-full
289 Uint16 m_hyX; // hysteresis fraction x/y in "pp3"
290 Uint16 m_hyY;
291 };
292
293 // Translate i-value to record address.
294 void* getRecP(PtrI recI, RecInfo& ri);
295
296 // Count records on page free list.
297 Uint32 getFreeCount(RecInfo& ri, PtrI freeRecPtrI);
298
299 // Compute total number of pages in pool.
300 Uint32 getRecPageCount(RecInfo& ri);
301
302 // Compute total number of records (used or not) in pool.
303 Uint32 getRecTotCount(RecInfo& ri);
304
305 // Compute total number of used records in pool.
306 Uint32 getRecUseCount(RecInfo& ri);
307
308 // Compute record pool page list index (0,1,2).
309 Uint32 getRecPageList(RecInfo& ri, PageEnt& pe);
310
311 // Add current page.
312 void addCurrPage(RecInfo& ri, PtrI pageI);
313
314 // Remove current page.
315 void removeCurrPage(RecInfo& ri);
316
317 // Get page with some free records and make it current. Takes head of
318 // used or free list, or else gets free page from group pool.
319 bool getAvailPage(RecInfo& ri);
320
321 // Get free page from group pool and add it to record pool free list.
322 // This is an expensive subroutine of getAvailPage(RecInfo&):
323 PtrI getFreePage(RecInfo& ri);
324
325 // Get free detached (not on list) page from group pool.
326 PtrI getFreePage(GroupPool& gp);
327
328 // Get free detached page from super pool.
329 PtrI getFreePage();
330
331 // Get new free detached page from the implementation.
332 virtual PtrI getNewPage() = 0;
333
334 // Initialize free list etc. Subroutine of getFreePage(RecInfo&).
335 void initFreePage(RecInfo& ri, PtrI pageI);
336
337 // Release record which is not on current page.
338 void releaseNotCurrent(RecInfo& ri, PtrI recI);
339
340 // Free pages from record pool according to page policy.
341 void freeRecPages(RecInfo& ri);
342
343 // Free all pages in record pool.
344 void freeAllRecPages(RecInfo& ri, bool force);
345
346 // Set pool size parameters in pages. Call allocMemory() for changes
347 // (such as extra mallocs) to take effect.
348 void setInitPages(Uint32 initPages);
349 void setIncrPages(Uint32 incrPages);
350 void setMaxPages(Uint32 maxPages);
351
352 // Get number of pages reserved by all groups.
353 Uint32 getGpMinPages();
354
355 // Get number of pages reserved to a group.
356 Uint32 getMinPages(GroupPool& gp);
357
358 // Get max number of pages a group can try to allocate.
359 Uint32 getMaxPages(GroupPool& gp);
360
361 // Allocate more memory according to current parameters. Returns
362 // false if no new memory was allocated. Otherwise returns true,
363 // even if the amount allocated was less than requested.
364 virtual bool allocMemory() = 0;
365
366 // Debugging.
367 void verify(RecInfo& ri);
368 void verifyPageList(PageList& pl);
369
370 // Super pool parameters.
371 const Uint32 m_pageSize;
372 const Uint16 m_pageBits;
373 const Uint16 m_recBits;
374 const Uint32 m_recMask;
375 // Implementation must set up these 3 pointers.
376 void* m_memRoot;
377 PageEnt* m_pageEnt;
378 Uint8* m_pageType;
379 // Free page list.
380 PageList m_freeList;
381 // Free pages and sizes.
382 Uint32 m_initPages;
383 Uint32 m_incrPages;
384 Uint32 m_maxPages;
385 Uint32 m_totPages;
386 Uint32 m_typeCount;
387 // Reserved and allocated by group pools.
388 Uint32 m_groupMinPct;
389 Uint32 m_groupMinPages;
390 Uint32 m_groupTotPages;
391 };
392
393 inline SuperPool::PageEnt&
getPageEnt(PtrI pageI)394 SuperPool::getPageEnt(PtrI pageI)
395 {
396 Uint32 ip = pageI >> m_recBits;
397 return m_pageEnt[ip];
398 }
399
400 inline void*
getPageP(PtrI ptrI)401 SuperPool::getPageP(PtrI ptrI)
402 {
403 Int32 ip = (Int32)ptrI >> m_recBits;
404 return (Uint8*)m_memRoot + ip * (my_ptrdiff_t)m_pageSize;
405 }
406
407 inline void*
getRecP(PtrI ptrI,RecInfo & ri)408 SuperPool::getRecP(PtrI ptrI, RecInfo& ri)
409 {
410 Uint32 ip = ptrI >> m_recBits;
411 assert(m_pageType[ip] == (ri.m_recType & 0xFF));
412 Uint32 ir = ptrI & m_recMask;
413 return (Uint8*)getPageP(ptrI) + ir * ri.m_recSize;
414 }
415
416 /*
417 * GroupPool - subset of a super pool pages (concrete class)
418 */
419
420 class GroupPool {
421 public:
422 // Types.
423 typedef SuperPool::PageList PageList;
424
425 // Constructor.
426 GroupPool(SuperPool& sp);
427
428 // Destructor.
429 ~GroupPool();
430
431 // Set minimum pct reserved in super pool.
432 void setMinPct(Uint32 resPct);
433
434 // Set minimum pages reserved in super pool.
435 void setMinPages(Uint32 resPages);
436
437 SuperPool& m_superPool;
438 Uint32 m_minPct;
439 Uint32 m_minPages;
440 Uint32 m_totPages;
441 PageList m_freeList;
442 };
443
444 /*
445 * RecordPool - record pool using one super pool instance (template)
446 */
447
448 template <class T>
449 class RecordPool {
450 public:
451 // Constructor.
452 RecordPool(GroupPool& gp);
453
454 // Destructor.
455 ~RecordPool();
456
457 // Update pointer ptr.p according to i-value ptr.i.
458 void getPtr(Ptr<T>& ptr);
459
460 // Allocate record from the pool.
461 bool seize(Ptr<T>& ptr);
462
463 // Return record to the pool.
464 void release(Ptr<T>& ptr);
465
466 // todo variants of basic methods
467
468 // Return all pages to group pool. The force flag is required if
469 // there are any used records.
470 void freeAllRecPages(bool force);
471
472 SuperPool& m_superPool;
473 SuperPool::RecInfo m_recInfo;
474 };
475
476 template <class T>
477 inline
RecordPool(GroupPool & gp)478 RecordPool<T>::RecordPool(GroupPool& gp) :
479 m_superPool(gp.m_superPool),
480 m_recInfo(gp, sizeof(T))
481 {
482 }
483
484 template <class T>
485 inline
~RecordPool()486 RecordPool<T>::~RecordPool()
487 {
488 freeAllRecPages(true);
489 }
490
491 template <class T>
492 inline void
getPtr(Ptr<T> & ptr)493 RecordPool<T>::getPtr(Ptr<T>& ptr)
494 {
495 void* recP = m_superPool.getRecP(ptr.i, m_recInfo);
496 ptr.p = static_cast<T*>(recP);
497 }
498
499 template <class T>
500 inline bool
seize(Ptr<T> & ptr)501 RecordPool<T>::seize(Ptr<T>& ptr)
502 {
503 SuperPool& sp = m_superPool;
504 SuperPool::RecInfo& ri = m_recInfo;
505 Uint32 recMask = sp.m_recMask;
506 // get current page
507 if ((ri.m_freeRecI & recMask) != recMask ||
508 sp.getAvailPage(ri)) {
509 SuperPool::PtrI recI = ri.m_freeRecI;
510 void* recP = sp.getRecP(recI, ri);
511 ri.m_freeRecI = *(Uint32*)recP;
512 ptr.i = recI;
513 ptr.p = static_cast<T*>(recP);
514 return true;
515 }
516 ptr.i = RNIL;
517 ptr.p = 0;
518 return false;
519 }
520
521 template <class T>
522 inline void
release(Ptr<T> & ptr)523 RecordPool<T>::release(Ptr<T>& ptr)
524 {
525 SuperPool& sp = m_superPool;
526 SuperPool::RecInfo& ri = m_recInfo;
527 SuperPool::PtrI recI = ptr.i;
528 Uint32 recMask = sp.m_recMask;
529 // check if current page
530 if ((recI & ~recMask) == (ri.m_freeRecI & ~recMask)) {
531 void* recP = sp.getRecP(recI, ri);
532 *(Uint32*)recP = ri.m_freeRecI;
533 ri.m_freeRecI = recI;
534 } else {
535 sp.releaseNotCurrent(ri, recI);
536 }
537 ptr.i = RNIL;
538 ptr.p = 0;
539 }
540
541 template <class T>
542 inline void
freeAllRecPages(bool force)543 RecordPool<T>::freeAllRecPages(bool force)
544 {
545 SuperPool& sp = m_superPool;
546 sp.freeAllRecPages(m_recInfo, force);
547 }
548
549 /*
550 * HeapPool - SuperPool on heap (concrete class)
551 *
552 * A super pool based on malloc with memory root on the heap. This
553 * pool type has 2 realistic uses:
554 *
555 * - a small pool with only initial malloc and pageBits set to match
556 * - the big pool from which all heap allocations are done
557 *
558 * A smart malloc may break "ip" limit by using different VM areas for
559 * different sized requests. For this reason malloc is done in units of
560 * increment size if possible. Memory root is set to the page-aligned
561 * address from first page malloc.
562 */
563
564 class HeapPool : public SuperPool {
565 public:
566 // Describes malloc area. The areas are kept in singly linked list.
567 // There is a list head and pointers to current and last area.
568 struct Area {
569 Area();
570 Area* m_nextArea;
571 PtrI m_firstPageI;
572 Uint32 m_currPage;
573 void* m_memory; // from malloc
574 void* m_pages; // page-aligned pages
575 Uint32 m_numPages; // number of pages
576 };
577
578 // Constructor.
579 HeapPool(Uint32 pageSize, Uint32 pageBits);
580
581 // Destructor.
582 virtual ~HeapPool();
583
584 // Get new page from current area.
585 virtual PtrI getNewPage();
586
587 // Allocate fixed arrays.
588 bool allocInit();
589
590 // Allocate array of aligned pages.
591 bool allocArea(Area* ap, Uint32 tryPages);
592
593 // Allocate memory.
allocMemory()594 virtual bool allocMemory() { return allocMemoryImpl();}
595 bool allocMemoryImpl();
596
597 // List of malloc areas.
598 Area m_areaHead;
599 Area* m_currArea;
600 Area* m_lastArea;
601 };
602
603
604 #undef JAM_FILE_ID
605
606 #endif
607