1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2011-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /*
22  * Description: Scheduler specific pre-allocators. Each scheduler
23  *              thread allocates memory in its own private chunk of
24  *              memory. Memory blocks deallocated by remote
25  *              schedulers (or other threads) are passed back to
26  *              the chunk owner via a lock-free data structure.
27  *
28  * Author: 	Rickard Green
29  */
30 
31 #ifndef ERTS_SCHED_SPEC_PRE_ALLOC_H__
32 #define ERTS_SCHED_SPEC_PRE_ALLOC_H__
33 
34 
35 #undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
36 #define ERL_THR_PROGRESS_TSD_TYPE_ONLY
37 #include "erl_thr_progress.h"
38 #undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
39 
40 #ifdef DEBUG
41 #define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)				\
42 do {									\
43     ASSERT((void *) (C) < (void *) (P));				\
44     ASSERT((void *) (P)							\
45 	   < (void *) (((char *) (C)) + (A)->chunks_mem_size));		\
46 } while (0)
47 #else
48 #define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)
49 #endif
50 
51 #ifdef DEBUG
52 extern Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers);
53 #endif
54 
55 #define ERTS_SSPA_FORCE_THR_CHECK_PROGRESS 10
56 #define ERTS_SSPA_MAX_GET_NEW_LOCAL 5
57 
58 typedef struct {
59     char *start;
60     char *end;
61     int chunks_mem_size;
62     int nthreads;
63 
64     /* Used only by thread variant: */
65     erts_tsd_key_t tsd_key;
66     erts_atomic_t id_generator;
67 } erts_sspa_data_t;
68 
69 typedef union erts_sspa_blk_t_ erts_sspa_blk_t;
70 union erts_sspa_blk_t_ {
71     erts_atomic_t next_atmc;
72     erts_sspa_blk_t *next_ptr;
73 };
74 
75 typedef struct {
76     erts_sspa_blk_t *first;
77     erts_sspa_blk_t *last;
78     int cnt;
79     int lim;
80 } erts_sspa_local_freelist_t;
81 
82 typedef struct {
83     erts_sspa_blk_t marker;
84     erts_atomic_t last;
85     erts_atomic_t um_refc[2];
86     erts_atomic32_t um_refc_ix;
87 } erts_sspa_tail_t;
88 
89 typedef struct {
90     /*
91      * This structure needs to be cache line aligned for best
92      * performance.
93      */
94     union {
95 	/* Modified by threads returning memory to this chunk */
96 	erts_sspa_tail_t data;
97 	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_tail_t))];
98     } tail;
99     /*
100      * Everything below this point is *only* accessed by the
101      * thread owning this chunk.
102      */
103     struct {
104 	int no_thr_progress_check;
105 	int used_marker;
106 	erts_sspa_blk_t *first;
107 	erts_sspa_blk_t *unref_end;
108 	struct {
109 	    ErtsThrPrgrVal thr_progress;
110 	    int thr_progress_reached;
111 	    int um_refc_ix;
112 	    erts_sspa_blk_t *unref_end;
113 	} next;
114     } head;
115     erts_sspa_local_freelist_t local;
116 } erts_sspa_chunk_header_t;
117 
118 typedef struct {
119     union {
120 	erts_sspa_chunk_header_t header;
121 	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(
122 		sizeof(erts_sspa_chunk_header_t))];
123     } aligned;
124     char data[1];
125 } erts_sspa_chunk_t;
126 
127 #ifdef DEBUG
128 ERTS_GLB_INLINE void
129 check_local_list(erts_sspa_chunk_header_t *chdr);
130 
131 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
132 ERTS_GLB_INLINE void
check_local_list(erts_sspa_chunk_header_t * chdr)133 check_local_list(erts_sspa_chunk_header_t *chdr)
134 {
135     erts_sspa_blk_t *blk;
136     int n = 0;
137     for (blk = chdr->local.first; blk; blk = blk->next_ptr)
138 	n++;
139     ASSERT(n == chdr->local.cnt);
140 }
141 #endif
142 #define ERTS_SSPA_DBG_CHK_LCL(CHDR) check_local_list((CHDR))
143 #else
144 #define ERTS_SSPA_DBG_CHK_LCL(CHDR)
145 #endif
146 
147 erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
148 				   int pa_size,
149                                    int nthreads,
150                                    const char* name);
151 void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
152 			   erts_sspa_blk_t *blk,
153 			   int cinit);
154 erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
155 						erts_sspa_blk_t *old_res);
156 
157 ERTS_GLB_INLINE erts_sspa_chunk_t *erts_sspa_cix2chunk(erts_sspa_data_t *data,
158 						       int cix);
159 ERTS_GLB_INLINE int erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr);
160 ERTS_GLB_INLINE char *erts_sspa_alloc(erts_sspa_data_t *data, int cix);
161 ERTS_GLB_INLINE int erts_sspa_free(erts_sspa_data_t *data, int cix, char *blk);
162 
163 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
164 
165 ERTS_GLB_INLINE erts_sspa_chunk_t *
erts_sspa_cix2chunk(erts_sspa_data_t * data,int cix)166 erts_sspa_cix2chunk(erts_sspa_data_t *data, int cix)
167 {
168     ASSERT(0 <= cix && cix < data->nthreads);
169     return (erts_sspa_chunk_t *) (data->start + cix*data->chunks_mem_size);
170 }
171 
172 ERTS_GLB_INLINE int
erts_sspa_ptr2cix(erts_sspa_data_t * data,void * ptr)173 erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr)
174 {
175     int cix;
176     size_t diff;
177     if ((char *) ptr < data->start || data->end <= (char *) ptr)
178 	return -1;
179     diff = ((char *) ptr) - data->start;
180     cix = (int) diff / data->chunks_mem_size;
181     ASSERT(0 <= cix && cix < data->nthreads);
182     return cix;
183 }
184 
185 ERTS_GLB_INLINE char *
erts_sspa_alloc(erts_sspa_data_t * data,int cix)186 erts_sspa_alloc(erts_sspa_data_t *data, int cix)
187 {
188     erts_sspa_chunk_t *chnk;
189     erts_sspa_chunk_header_t *chdr;
190     erts_sspa_blk_t *res;
191     ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC);
192 
193     chnk = erts_sspa_cix2chunk(data, cix);
194     chdr = &chnk->aligned.header;
195     res = chdr->local.first;
196     ERTS_SSPA_DBG_CHK_LCL(chdr);
197     if (res) {
198 	ERTS_SSPA_DBG_CHK_LCL(chdr);
199 	chdr->local.first = res->next_ptr;
200 	chdr->local.cnt--;
201 	if (!chdr->local.first)
202 	    chdr->local.last = NULL;
203 	ERTS_SSPA_DBG_CHK_LCL(chdr);
204     }
205     if (chdr->local.cnt <= chdr->local.lim) {
206 	res = erts_sspa_process_remote_frees(chdr, res);
207         ERTS_MSACC_POP_STATE_M_X();
208         return (char*) res;
209     }
210     else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
211 	chdr->head.no_thr_progress_check++;
212     ASSERT(res);
213     ERTS_MSACC_POP_STATE_M_X();
214     return (char *) res;
215 }
216 
217 ERTS_GLB_INLINE int
erts_sspa_free(erts_sspa_data_t * data,int cix,char * cblk)218 erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk)
219 {
220     erts_sspa_chunk_t *chnk;
221     erts_sspa_chunk_header_t *chdr;
222     erts_sspa_blk_t *blk = (erts_sspa_blk_t *) cblk;
223     int chnk_cix = erts_sspa_ptr2cix(data, blk);
224 
225     if (chnk_cix < 0)
226 	return 0;
227 
228     chnk = erts_sspa_cix2chunk(data, chnk_cix);
229     chdr = &chnk->aligned.header;
230     if (chnk_cix != cix) {
231 	/* Remote chunk */
232 	erts_sspa_remote_free(chdr, blk, chnk_cix - cix);
233     }
234     else {
235 	/* Local chunk */
236 	ERTS_SSPA_DBG_CHK_LCL(chdr);
237 	blk->next_ptr = chdr->local.first;
238 	chdr->local.first = blk;
239 	if (!chdr->local.last)
240 	    chdr->local.last = blk;
241 	chdr->local.cnt++;
242 	ERTS_SSPA_DBG_CHK_LCL(chdr);
243     }
244 
245     return 1;
246 }
247 
248 #endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
249 
250 
251 #endif /* ERTS_SCHED_SPEC_PRE_ALLOC_H__ */
252