1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 #define NXT_MEM_ZONE_PAGE_FREE      0
11 /*
12  * A page was never allocated before so it should be filled with
13  * junk on the first time allocation if memory debugging is enabled.
14  */
15 #define NXT_MEM_ZONE_PAGE_FRESH     1
16 
17 /* An entire page is currently used, no chunks inside the page. */
18 #define NXT_MEM_ZONE_PAGE_USED      2
19 
20 
21 typedef struct nxt_mem_zone_page_s  nxt_mem_zone_page_t;
22 
23 struct nxt_mem_zone_page_s {
24     /*
25      * A size of page chunks if value is greater than or equal to 16.
26      * Otherwise it is used to mark page state: NXT_MEM_ZONE_PAGE_FREE,
27      * NXT_MEM_ZONE_PAGE_FRESH, and NXT_MEM_ZONE_PAGE_USED.
28      */
29     uint16_t               size;
30 
31     /* A number of free chunks of a chunked page. */
32     uint16_t               chunks;
33 
34     union {
35         /* A chunk bitmap if a number of chunks is lesser than 32. */
36         uint8_t            map[4];
37         /*
38          * The count is a number of successive occupied pages in the first
39          * page.  In the next occupied pages and in all free pages the count
40          * is zero, because a number of successive free pages is stored in
41          * free block size resided in beginning of the first free page.
42          */
43         uint32_t           count;
44     } u;
45 
46     /* Used for slot list of pages with free chunks. */
47     nxt_mem_zone_page_t    *next;
48 
49     /*
50      * Used to link of all pages including free, chunked and occupied
51      * pages to coalesce free pages.
52      */
53     nxt_queue_link_t       link;
54 };
55 
56 
57 typedef struct {
58     uint32_t               size;
59     uint32_t               chunks;
60     uint32_t               start;
61     uint32_t               map_size;
62     nxt_mem_zone_page_t    *pages;
63 } nxt_mem_zone_slot_t;
64 
65 
66 typedef struct {
67     NXT_RBTREE_NODE        (node);
68     uint32_t               size;
69 } nxt_mem_zone_free_block_t;
70 
71 
72 struct nxt_mem_zone_s {
73     nxt_thread_spinlock_t  lock;
74     nxt_mem_zone_page_t    *pages;
75     nxt_mem_zone_page_t    sentinel_page;
76     nxt_rbtree_t           free_pages;
77 
78     uint32_t               page_size_shift;
79     uint32_t               page_size_mask;
80     uint32_t               max_chunk_size;
81     uint32_t               small_bitmap_min_size;
82 
83     u_char                 *start;
84     u_char                 *end;
85 
86     nxt_mem_zone_slot_t    slots[];
87 };
88 
89 
90 #define                                                                       \
91 nxt_mem_zone_page_addr(zone, page)                                            \
92     (void *) (zone->start + ((page - zone->pages) << zone->page_size_shift))
93 
94 
95 #define                                                                       \
96 nxt_mem_zone_addr_page(zone, addr)                                            \
97     &zone->pages[((u_char *) addr - zone->start) >> zone->page_size_shift]
98 
99 
100 #define                                                                       \
101 nxt_mem_zone_page_is_free(page)                                               \
102     (page->size < NXT_MEM_ZONE_PAGE_USED)
103 
104 
105 #define                                                                       \
106 nxt_mem_zone_page_is_chunked(page)                                            \
107     (page->size >= 16)
108 
109 
110 #define                                                                       \
111 nxt_mem_zone_page_bitmap(zone, slot)                                          \
112     (slot->size < zone->small_bitmap_min_size)
113 
114 
115 #define                                                                       \
116 nxt_mem_zone_set_chunk_free(map, chunk)                                       \
117     map[chunk / 8] &= ~(0x80 >> (chunk & 7))
118 
119 
120 #define                                                                       \
121 nxt_mem_zone_chunk_is_free(map, chunk)                                        \
122     ((map[chunk / 8] & (0x80 >> (chunk & 7))) == 0)
123 
124 
125 #define                                                                       \
126 nxt_mem_zone_fresh_junk(p, size)                                              \
127     nxt_memset((p), 0xA5, size)
128 
129 
130 #define                                                                       \
131 nxt_mem_zone_free_junk(p, size)                                               \
132     nxt_memset((p), 0x5A, size)
133 
134 
135 static uint32_t nxt_mem_zone_pages(u_char *start, size_t zone_size,
136     nxt_uint_t page_size);
137 static void *nxt_mem_zone_slots_init(nxt_mem_zone_t *zone,
138     nxt_uint_t page_size);
139 static void nxt_mem_zone_slot_init(nxt_mem_zone_slot_t *slot,
140     nxt_uint_t page_size);
141 static intptr_t nxt_mem_zone_rbtree_compare(nxt_rbtree_node_t *node1,
142     nxt_rbtree_node_t *node2);
143 static void *nxt_mem_zone_alloc_small(nxt_mem_zone_t *zone,
144     nxt_mem_zone_slot_t *slot, size_t size);
145 static nxt_uint_t nxt_mem_zone_alloc_chunk(uint8_t *map, nxt_uint_t offset,
146     nxt_uint_t size);
147 static void *nxt_mem_zone_alloc_large(nxt_mem_zone_t *zone, size_t alignment,
148     size_t size);
149 static nxt_mem_zone_page_t *nxt_mem_zone_alloc_pages(nxt_mem_zone_t *zone,
150     size_t alignment, uint32_t pages);
151 static nxt_mem_zone_free_block_t *
152     nxt_mem_zone_find_free_block(nxt_mem_zone_t *zone, nxt_rbtree_node_t *node,
153     uint32_t alignment, uint32_t pages);
154 static const char *nxt_mem_zone_free_chunk(nxt_mem_zone_t *zone,
155     nxt_mem_zone_page_t *page, void *p);
156 static void nxt_mem_zone_free_pages(nxt_mem_zone_t *zone,
157     nxt_mem_zone_page_t *page, nxt_uint_t count);
158 
159 
160 static nxt_log_moderation_t  nxt_mem_zone_log_moderation = {
161     NXT_LOG_ALERT, 2, "mem_zone_alloc() failed, not enough memory",
162     NXT_LOG_MODERATION
163 };
164 
165 
166 nxt_mem_zone_t *
nxt_mem_zone_init(u_char * start,size_t zone_size,nxt_uint_t page_size)167 nxt_mem_zone_init(u_char *start, size_t zone_size, nxt_uint_t page_size)
168 {
169     uint32_t                   pages;
170     nxt_uint_t                 n;
171     nxt_mem_zone_t             *zone;
172     nxt_mem_zone_page_t        *page;
173     nxt_mem_zone_free_block_t  *block;
174 
175     if (nxt_slow_path((page_size & (page_size - 1)) != 0)) {
176         nxt_thread_log_alert("mem zone page size must be a power of 2");
177         return NULL;
178     }
179 
180     pages = nxt_mem_zone_pages(start, zone_size, page_size);
181     if (pages == 0) {
182         return NULL;
183     }
184 
185     zone = (nxt_mem_zone_t *) start;
186 
187     /* The function returns address after all slots. */
188     page = nxt_mem_zone_slots_init(zone, page_size);
189 
190     zone->pages = page;
191 
192     for (n = 0; n < pages; n++) {
193         page[n].size = NXT_MEM_ZONE_PAGE_FRESH;
194     }
195 
196     /*
197      * A special sentinel page entry marked as used does not correspond
198      * to a real page.  The entry simplifies neighbour queue nodes check
199      * in nxt_mem_zone_free_pages().
200      */
201     zone->sentinel_page.size = NXT_MEM_ZONE_PAGE_USED;
202     nxt_queue_sentinel(&zone->sentinel_page.link);
203     nxt_queue_insert_after(&zone->sentinel_page.link, &page->link);
204 
205     /* rbtree of free pages. */
206 
207     nxt_rbtree_init(&zone->free_pages, nxt_mem_zone_rbtree_compare);
208 
209     block = (nxt_mem_zone_free_block_t *) zone->start;
210     block->size = pages;
211 
212     nxt_rbtree_insert(&zone->free_pages, &block->node);
213 
214     return zone;
215 }
216 
217 
218 static uint32_t
nxt_mem_zone_pages(u_char * start,size_t zone_size,nxt_uint_t page_size)219 nxt_mem_zone_pages(u_char *start, size_t zone_size, nxt_uint_t page_size)
220 {
221     u_char          *end;
222     size_t          reserved;
223     nxt_uint_t      n, pages, size, chunks, last;
224     nxt_mem_zone_t  *zone;
225 
226     /*
227      * Find all maximum chunk sizes which zone page can be split on
228      * with minimum 16-byte step.
229      */
230     last = page_size / 16;
231     n = 0;
232     size = 32;
233 
234     do {
235         chunks = page_size / size;
236 
237         if (last != chunks) {
238             last = chunks;
239             n++;
240         }
241 
242         size += 16;
243 
244     } while (chunks > 1);
245 
246     /*
247      * Find number of usable zone pages except zone bookkeeping data,
248      * slots, and pages entries.
249      */
250     reserved = sizeof(nxt_mem_zone_t) + (n * sizeof(nxt_mem_zone_slot_t));
251 
252     end = nxt_trunc_ptr(start + zone_size, page_size);
253     zone_size = end - start;
254 
255     pages = (zone_size - reserved) / (page_size + sizeof(nxt_mem_zone_page_t));
256 
257     if (reserved > zone_size || pages == 0) {
258         nxt_thread_log_alert("mem zone size is too small: %uz", zone_size);
259         return 0;
260     }
261 
262     reserved += pages * sizeof(nxt_mem_zone_page_t);
263     nxt_memzero(start, reserved);
264 
265     zone = (nxt_mem_zone_t *) start;
266 
267     zone->start = nxt_align_ptr(start + reserved, page_size);
268     zone->end = end;
269 
270     nxt_thread_log_debug("mem zone pages: %uD, unused:%z", pages,
271                          end - (zone->start + pages * page_size));
272 
273     /*
274      * If a chunk size is lesser than zone->small_bitmap_min_size
275      * bytes, a page's chunk bitmap is larger than 32 bits and the
276      * bimap is placed at the start of the page.
277      */
278     zone->small_bitmap_min_size = page_size / 32;
279 
280     zone->page_size_mask = page_size - 1;
281     zone->max_chunk_size = page_size / 2;
282 
283     n = zone->max_chunk_size;
284 
285     do {
286         zone->page_size_shift++;
287         n /= 2;
288     } while (n != 0);
289 
290     return (uint32_t) pages;
291 }
292 
293 
294 static void *
nxt_mem_zone_slots_init(nxt_mem_zone_t * zone,nxt_uint_t page_size)295 nxt_mem_zone_slots_init(nxt_mem_zone_t *zone, nxt_uint_t page_size)
296 {
297     nxt_uint_t           n, size, chunks;
298     nxt_mem_zone_slot_t  *slot;
299 
300     slot = zone->slots;
301 
302     slot[0].chunks = page_size / 16;
303     slot[0].size = 16;
304 
305     n = 0;
306     size = 32;
307 
308     for ( ;; ) {
309         chunks = page_size / size;
310 
311         if (slot[n].chunks != chunks) {
312 
313             nxt_mem_zone_slot_init(&slot[n], page_size);
314 
315             nxt_thread_log_debug(
316                            "mem zone size:%uD chunks:%uD start:%uD map:%uD",
317                            slot[n].size, slot[n].chunks + 1,
318                            slot[n].start, slot[n].map_size);
319 
320             n++;
321 
322             if (chunks == 1) {
323                 return &slot[n];
324             }
325         }
326 
327         slot[n].chunks = chunks;
328         slot[n].size = size;
329         size += 16;
330     }
331 }
332 
333 
334 static void
nxt_mem_zone_slot_init(nxt_mem_zone_slot_t * slot,nxt_uint_t page_size)335 nxt_mem_zone_slot_init(nxt_mem_zone_slot_t *slot, nxt_uint_t page_size)
336 {
337     /*
338      * Calculate number of bytes required to store a chunk bitmap
339      * and align it to 4 bytes.
340      */
341     slot->map_size = nxt_align_size(((slot->chunks + 7) / 8), 4);
342 
343     /* If chunk size is not a multiple of zone page size, there
344      * is surplus space which can be used for the chunk's bitmap.
345      */
346     slot->start = page_size - slot->chunks * slot->size;
347 
348     /* slot->chunks should be one less than actual number of chunks. */
349     slot->chunks--;
350 
351     if (slot->map_size > 4) {
352         /* A page's chunks bitmap is placed at the start of the page. */
353 
354         if (slot->start < slot->map_size) {
355             /*
356              * There is no surplus space or the space is too
357              * small for chunks bitmap, so use the first chunks.
358              */
359             if (slot->size < slot->map_size) {
360                 /* The first chunks are occupied by bitmap. */
361                 slot->chunks -= slot->map_size / slot->size;
362                 slot->start = nxt_align_size(slot->map_size, 16);
363 
364             } else {
365                 /* The first chunk is occupied by bitmap. */
366                 slot->chunks--;
367                 slot->start = slot->size;
368             }
369         }
370     }
371 }
372 
373 
374 /*
375  * Round up to the next highest power of 2.  The algorithm is
376  * described in "Bit Twiddling Hacks" by Sean Eron Anderson.
377  */
378 
379 nxt_inline uint32_t
nxt_next_highest_power_of_two(uint32_t n)380 nxt_next_highest_power_of_two(uint32_t n)
381 {
382     n--;
383     n |= n >> 1;
384     n |= n >> 2;
385     n |= n >> 4;
386     n |= n >> 8;
387     n |= n >> 16;
388     n++;
389 
390     return n;
391 }
392 
393 
394 static intptr_t
nxt_mem_zone_rbtree_compare(nxt_rbtree_node_t * node1,nxt_rbtree_node_t * node2)395 nxt_mem_zone_rbtree_compare(nxt_rbtree_node_t *node1, nxt_rbtree_node_t *node2)
396 {
397     u_char                     *start1, *end1, *start2, *end2;
398     uint32_t                   n, size, size1, size2;
399     nxt_mem_zone_free_block_t  *block1, *block2;
400 
401     block1 = (nxt_mem_zone_free_block_t *) node1;
402     block2 = (nxt_mem_zone_free_block_t *) node2;
403 
404     size1 = block1->size;
405     size2 = block2->size;
406 
407     /*
408      * This subtractions do not overflow if number of pages of a free
409      * block is below 2^31-1.  This allows to use blocks up to 128G if
410      * a zone page size is just 64 bytes.
411      */
412     n = size1 - size2;
413 
414     if (n != 0) {
415         return n;
416     }
417 
418     /*
419      * Sort equally sized blocks by their capability to allocate memory with
420      * alignment equal to the size rounded the previous higest power of 2.
421      */
422 
423     /* Round the size to the previous higest power of two. */
424     size = nxt_next_highest_power_of_two(size1) >> 1;
425 
426     /* Align the blocks' start and end to the rounded size. */
427     start1 = nxt_align_ptr(block1, size);
428     end1 = nxt_trunc_ptr((u_char *) block1 + size1, size);
429 
430     start2 = nxt_align_ptr(block2, size);
431     end2 = nxt_trunc_ptr((u_char *) block2 + size2, size);
432 
433     return (end1 - start1) - (end2 - start2);
434 }
435 
436 
437 void *
nxt_mem_zone_zalloc(nxt_mem_zone_t * zone,size_t size)438 nxt_mem_zone_zalloc(nxt_mem_zone_t *zone, size_t size)
439 {
440     void  *p;
441 
442     p = nxt_mem_zone_align(zone, 1, size);
443 
444     if (nxt_fast_path(p != NULL)) {
445         nxt_memzero(p, size);
446     }
447 
448     return p;
449 }
450 
451 
452 void *
nxt_mem_zone_align(nxt_mem_zone_t * zone,size_t alignment,size_t size)453 nxt_mem_zone_align(nxt_mem_zone_t *zone, size_t alignment, size_t size)
454 {
455     void                 *p;
456     nxt_mem_zone_slot_t  *slot;
457 
458     if (nxt_slow_path((alignment - 1) & alignment) != 0) {
459         /* Alignment must be a power of 2. */
460         return NULL;
461     }
462 
463     if (size <= zone->max_chunk_size && alignment <= zone->max_chunk_size) {
464         /* All chunks are aligned to 16. */
465 
466         if (alignment > 16) {
467             /*
468              * Chunks which size is power of 2 are aligned to the size.
469              * So allocation size should be increased to the next highest
470              * power of two.  This can waste memory, but a main consumer
471              * of aligned allocations is lvlhsh which anyway allocates
472              * memory with alignment equal to size.
473              */
474             size = nxt_next_highest_power_of_two(size);
475             size = nxt_max(size, alignment);
476         }
477 
478         /*
479          * Find a zone slot with appropriate chunk size.
480          * This operation can be performed without holding lock.
481          */
482         for (slot = zone->slots; slot->size < size; slot++) { /* void */ }
483 
484         nxt_thread_log_debug("mem zone alloc: @%uz:%uz chunk:%uD",
485                              alignment, size, slot->size);
486 
487         nxt_thread_spin_lock(&zone->lock);
488 
489         p = nxt_mem_zone_alloc_small(zone, slot, size);
490 
491     } else {
492 
493         nxt_thread_log_debug("mem zone alloc: @%uz:%uz", alignment, size);
494 
495         nxt_thread_spin_lock(&zone->lock);
496 
497         p = nxt_mem_zone_alloc_large(zone, alignment, size);
498     }
499 
500     nxt_thread_spin_unlock(&zone->lock);
501 
502     if (nxt_fast_path(p != NULL)) {
503         nxt_thread_log_debug("mem zone alloc: %p", p);
504 
505     } else {
506         nxt_log_alert_moderate(&nxt_mem_zone_log_moderation, nxt_thread_log(),
507                     "nxt_mem_zone_alloc(%uz, %uz) failed, not enough memory",
508                     alignment, size);
509     }
510 
511     return p;
512 }
513 
514 
515 static void *
nxt_mem_zone_alloc_small(nxt_mem_zone_t * zone,nxt_mem_zone_slot_t * slot,size_t size)516 nxt_mem_zone_alloc_small(nxt_mem_zone_t *zone, nxt_mem_zone_slot_t *slot,
517     size_t size)
518 {
519     u_char               *p;
520     uint8_t              *map;
521     nxt_mem_zone_page_t  *page;
522 
523     page = slot->pages;
524 
525     if (nxt_fast_path(page != NULL)) {
526 
527         p = nxt_mem_zone_page_addr(zone, page);
528 
529         if (nxt_mem_zone_page_bitmap(zone, slot)) {
530             /* A page's chunks bitmap is placed at the start of the page. */
531             map = p;
532 
533         } else {
534             map = page->u.map;
535         }
536 
537         p += nxt_mem_zone_alloc_chunk(map, slot->start, slot->size);
538 
539         page->chunks--;
540 
541         if (page->chunks == 0) {
542             /*
543              * Remove full page from the zone slot list of pages with
544              * free chunks.
545              */
546             slot->pages = page->next;
547 #if (NXT_DEBUG)
548             page->next = NULL;
549 #endif
550         }
551 
552         return p;
553     }
554 
555     page = nxt_mem_zone_alloc_pages(zone, 1, 1);
556 
557     if (nxt_fast_path(page != NULL)) {
558 
559         slot->pages = page;
560 
561         page->size = slot->size;
562         /* slot->chunks are already one less. */
563         page->chunks = slot->chunks;
564         page->u.count = 0;
565         page->next = NULL;
566 
567         p = nxt_mem_zone_page_addr(zone, page);
568 
569         if (nxt_mem_zone_page_bitmap(zone, slot)) {
570             /* A page's chunks bitmap is placed at the start of the page. */
571             map = p;
572             nxt_memzero(map, slot->map_size);
573 
574         } else {
575             map = page->u.map;
576         }
577 
578         /* Mark the first chunk as busy. */
579         map[0] = 0x80;
580 
581         return p + slot->start;
582     }
583 
584     return NULL;
585 }
586 
587 
588 static nxt_uint_t
nxt_mem_zone_alloc_chunk(uint8_t * map,nxt_uint_t offset,nxt_uint_t size)589 nxt_mem_zone_alloc_chunk(uint8_t *map, nxt_uint_t offset, nxt_uint_t size)
590 {
591     uint8_t     mask;
592     nxt_uint_t  n;
593 
594     n = 0;
595 
596     /* The page must have at least one free chunk. */
597 
598     for ( ;; ) {
599         /* The bitmap is always aligned to uint32_t. */
600 
601         if (*(uint32_t *) &map[n] != 0xFFFFFFFF) {
602 
603             do {
604                 if (map[n] != 0xFF) {
605 
606                     mask = 0x80;
607 
608                     do {
609                         if ((map[n] & mask) == 0) {
610                             /* The free chunk is found. */
611                             map[n] |= mask;
612                             return offset;
613                         }
614 
615                         offset += size;
616                         mask >>= 1;
617 
618                     } while (mask != 0);
619 
620                 } else {
621                     /* Fast-forward: all 8 chunks are occupied. */
622                     offset += size * 8;
623                 }
624 
625                 n++;
626 
627             } while (n % 4 != 0);
628 
629         } else {
630             /* Fast-forward: all 32 chunks are occupied. */
631             offset += size * 32;
632             n += 4;
633         }
634     }
635 }
636 
637 
638 static void *
nxt_mem_zone_alloc_large(nxt_mem_zone_t * zone,size_t alignment,size_t size)639 nxt_mem_zone_alloc_large(nxt_mem_zone_t *zone, size_t alignment, size_t size)
640 {
641     uint32_t             pages;
642     nxt_mem_zone_page_t  *page;
643 
644     pages = (size + zone->page_size_mask) >> zone->page_size_shift;
645 
646     page = nxt_mem_zone_alloc_pages(zone, alignment, pages);
647 
648     if (nxt_fast_path(page != NULL)) {
649         return nxt_mem_zone_page_addr(zone, page);
650     }
651 
652     return NULL;
653 }
654 
655 
656 static nxt_mem_zone_page_t *
nxt_mem_zone_alloc_pages(nxt_mem_zone_t * zone,size_t alignment,uint32_t pages)657 nxt_mem_zone_alloc_pages(nxt_mem_zone_t *zone, size_t alignment, uint32_t pages)
658 {
659     u_char                     *p;
660     size_t                     prev_size;
661     uint32_t                   prev_pages, node_pages, next_pages;
662     nxt_uint_t                 n;
663     nxt_mem_zone_page_t        *prev_page, *page, *next_page;
664     nxt_mem_zone_free_block_t  *block, *next_block;
665 
666     block = nxt_mem_zone_find_free_block(zone,
667                                          nxt_rbtree_root(&zone->free_pages),
668                                          alignment, pages);
669 
670     if (nxt_slow_path(block == NULL)) {
671         return NULL;
672     }
673 
674     node_pages = block->size;
675 
676     nxt_rbtree_delete(&zone->free_pages, &block->node);
677 
678     p = nxt_align_ptr(block, alignment);
679     page = nxt_mem_zone_addr_page(zone, p);
680 
681     prev_size = p - (u_char *) block;
682 
683     if (prev_size != 0) {
684         prev_pages = prev_size >>= zone->page_size_shift;
685         node_pages -= prev_pages;
686 
687         block->size = prev_pages;
688         nxt_rbtree_insert(&zone->free_pages, &block->node);
689 
690         prev_page = nxt_mem_zone_addr_page(zone, block);
691         nxt_queue_insert_after(&prev_page->link, &page->link);
692     }
693 
694     next_pages = node_pages - pages;
695 
696     if (next_pages != 0) {
697         next_page = &page[pages];
698         next_block = nxt_mem_zone_page_addr(zone, next_page);
699         next_block->size = next_pages;
700 
701         nxt_rbtree_insert(&zone->free_pages, &next_block->node);
702         nxt_queue_insert_after(&page->link, &next_page->link);
703     }
704 
705     /* Go through pages after all rbtree operations to not trash CPU cache. */
706 
707     page[0].u.count = pages;
708 
709     for (n = 0; n < pages; n++) {
710 
711         if (page[n].size == NXT_MEM_ZONE_PAGE_FRESH) {
712             nxt_mem_zone_fresh_junk(nxt_mem_zone_page_addr(zone, &page[n]),
713                                     zone->page_size_mask + 1);
714         }
715 
716         page[n].size = NXT_MEM_ZONE_PAGE_USED;
717     }
718 
719     return page;
720 }
721 
722 
723 /*
724  * Free blocks are sorted by size and then if the sizes are equal
725  * by aligned allocation capabilty.  The former criterion is just
726  * comparison with a requested size and it can be used for iteractive
727  * search.  The later criterion cannot be tested only by the requested
728  * size and alignment, so recursive in-order tree traversal is required
729  * to find a suitable free block.  nxt_mem_zone_find_free_block() uses
730  * only recursive in-order tree traversal because anyway the slowest part
731  * of the algorithm are CPU cache misses.  Besides the last tail recursive
732  * call may be optimized by compiler into iteractive search.
733  */
734 
735 static nxt_mem_zone_free_block_t *
nxt_mem_zone_find_free_block(nxt_mem_zone_t * zone,nxt_rbtree_node_t * node,uint32_t alignment,uint32_t pages)736 nxt_mem_zone_find_free_block(nxt_mem_zone_t *zone, nxt_rbtree_node_t *node,
737     uint32_t alignment, uint32_t pages)
738 {
739     u_char                     *aligned, *end;
740     nxt_mem_zone_free_block_t  *block, *free_block;
741 
742     if (node == nxt_rbtree_sentinel(&zone->free_pages)) {
743         return NULL;
744     }
745 
746     block = (nxt_mem_zone_free_block_t *) node;
747 
748     if (pages <= block->size) {
749 
750         free_block = nxt_mem_zone_find_free_block(zone, block->node.left,
751                                                   alignment, pages);
752         if (free_block != NULL) {
753             return free_block;
754         }
755 
756         aligned = nxt_align_ptr(block, alignment);
757 
758         if (pages == block->size) {
759             if (aligned == (u_char *) block) {
760                 /* Exact match. */
761                 return block;
762             }
763 
764         } else {  /* pages < block->size */
765             aligned += pages << zone->page_size_shift;
766             end = nxt_pointer_to(block, block->size << zone->page_size_shift);
767 
768             if (aligned <= end) {
769                 return block;
770             }
771         }
772     }
773 
774     return nxt_mem_zone_find_free_block(zone, block->node.right,
775                                         alignment, pages);
776 }
777 
778 
779 void
nxt_mem_zone_free(nxt_mem_zone_t * zone,void * p)780 nxt_mem_zone_free(nxt_mem_zone_t *zone, void *p)
781 {
782     nxt_uint_t           count;
783     const char           *err;
784     nxt_mem_zone_page_t  *page;
785 
786     nxt_thread_log_debug("mem zone free: %p", p);
787 
788     if (nxt_fast_path(zone->start <= (u_char *) p
789                       && (u_char *) p < zone->end))
790     {
791         page = nxt_mem_zone_addr_page(zone, p);
792 
793         nxt_thread_spin_lock(&zone->lock);
794 
795         if (nxt_mem_zone_page_is_chunked(page)) {
796             err = nxt_mem_zone_free_chunk(zone, page, p);
797 
798         } else if (nxt_slow_path(nxt_mem_zone_page_is_free(page))) {
799             err = "page is already free";
800 
801         } else if (nxt_slow_path((uintptr_t) p & zone->page_size_mask) != 0) {
802             err = "invalid pointer to chunk";
803 
804         } else {
805             count = page->u.count;
806 
807             if (nxt_fast_path(count != 0)) {
808                 nxt_mem_zone_free_junk(p, count * zone->page_size_mask + 1);
809                 nxt_mem_zone_free_pages(zone, page, count);
810                 err = NULL;
811 
812             } else {
813                 /* Not the first allocated page. */
814                 err = "pointer to wrong page";
815             }
816         }
817 
818         nxt_thread_spin_unlock(&zone->lock);
819 
820     } else {
821         err = "pointer is out of zone";
822     }
823 
824     if (nxt_slow_path(err != NULL)) {
825         nxt_thread_log_alert("nxt_mem_zone_free(%p): %s", p, err);
826     }
827 }
828 
829 
830 static const char *
nxt_mem_zone_free_chunk(nxt_mem_zone_t * zone,nxt_mem_zone_page_t * page,void * p)831 nxt_mem_zone_free_chunk(nxt_mem_zone_t *zone, nxt_mem_zone_page_t *page,
832     void *p)
833 {
834     u_char               *map;
835     uint32_t             size, offset, chunk;
836     nxt_mem_zone_page_t  *pg, **ppg;
837     nxt_mem_zone_slot_t  *slot;
838 
839     size = page->size;
840 
841     /* Find a zone slot with appropriate chunk size. */
842     for (slot = zone->slots; slot->size < size; slot++) { /* void */ }
843 
844     offset = (uintptr_t) p & zone->page_size_mask;
845     offset -= slot->start;
846 
847     chunk = offset / size;
848 
849     if (nxt_slow_path(offset != chunk * size)) {
850         return "pointer to wrong chunk";
851     }
852 
853     if (nxt_mem_zone_page_bitmap(zone, slot)) {
854         /* A page's chunks bitmap is placed at the start of the page. */
855         map = (u_char *) ((uintptr_t) p & ~((uintptr_t) zone->page_size_mask));
856 
857     } else {
858         map = page->u.map;
859     }
860 
861     if (nxt_mem_zone_chunk_is_free(map, chunk)) {
862         return "chunk is already free";
863     }
864 
865     nxt_mem_zone_set_chunk_free(map, chunk);
866 
867     nxt_mem_zone_free_junk(p, page->size);
868 
869     if (page->chunks == 0) {
870         page->chunks = 1;
871 
872         /* Add the page to the head of slot list of pages with free chunks. */
873         page->next = slot->pages;
874         slot->pages = page;
875 
876     } else if (page->chunks != slot->chunks) {
877         page->chunks++;
878 
879     } else {
880 
881         if (map != page->u.map) {
882             nxt_mem_zone_free_junk(map, slot->map_size);
883         }
884 
885         /*
886          * All chunks are free, remove the page from the slot list of pages
887          * with free chunks and add the page to the free pages tree.
888          */
889         ppg = &slot->pages;
890 
891         for (pg = slot->pages; pg != NULL; pg = pg->next) {
892 
893             if (pg == page) {
894                 *ppg = page->next;
895                 break;
896             }
897 
898             ppg = &pg->next;
899         }
900 
901         nxt_mem_zone_free_pages(zone, page, 1);
902     }
903 
904     return NULL;
905 }
906 
907 
908 static void
nxt_mem_zone_free_pages(nxt_mem_zone_t * zone,nxt_mem_zone_page_t * page,nxt_uint_t count)909 nxt_mem_zone_free_pages(nxt_mem_zone_t *zone, nxt_mem_zone_page_t *page,
910     nxt_uint_t count)
911 {
912     nxt_mem_zone_page_t        *prev_page, *next_page;
913     nxt_mem_zone_free_block_t  *block, *prev_block, *next_block;
914 
915     page->size = NXT_MEM_ZONE_PAGE_FREE;
916     page->chunks = 0;
917     page->u.count = 0;
918     page->next = NULL;
919 
920     nxt_memzero(&page[1], (count - 1) * sizeof(nxt_mem_zone_page_t));
921 
922     next_page = nxt_queue_link_data(page->link.next, nxt_mem_zone_page_t, link);
923 
924     if (nxt_mem_zone_page_is_free(next_page)) {
925 
926         /* Coalesce with the next free pages. */
927 
928         nxt_queue_remove(&next_page->link);
929         nxt_memzero(next_page, sizeof(nxt_mem_zone_page_t));
930 
931         next_block = nxt_mem_zone_page_addr(zone, next_page);
932         count += next_block->size;
933         nxt_rbtree_delete(&zone->free_pages, &next_block->node);
934     }
935 
936     prev_page = nxt_queue_link_data(page->link.prev, nxt_mem_zone_page_t, link);
937 
938     if (nxt_mem_zone_page_is_free(prev_page)) {
939 
940         /* Coalesce with the previous free pages. */
941 
942         nxt_queue_remove(&page->link);
943 
944         prev_block = nxt_mem_zone_page_addr(zone, prev_page);
945         count += prev_block->size;
946         nxt_rbtree_delete(&zone->free_pages, &prev_block->node);
947 
948         prev_block->size = count;
949         nxt_rbtree_insert(&zone->free_pages, &prev_block->node);
950 
951         return;
952     }
953 
954     block = nxt_mem_zone_page_addr(zone, page);
955     block->size = count;
956     nxt_rbtree_insert(&zone->free_pages, &block->node);
957 }
958