1 /*
2 Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26 #include "tuppage.hpp"
27
28 /**
29 * Fix pages maintain a double linked list of free entries
30 *
31 * Var pages has a directory where each entry is
32 * [ C(1), F(1), L(15), P(15) ]
33 * C is chain bit, (is it a full tuple or just chain)
34 * F is free bit
35 * If true, L is prev free entry (in directory)
36 * P is next free entry (in directory)
37 * else
38 * L is len of entry
39 * P is pos of entry
40 */
41
42 Uint32
alloc_record()43 Tup_fixsize_page::alloc_record()
44 {
45 assert(free_space);
46 Uint32 page_idx = next_free_index;
47 assert(page_idx + 1 < DATA_WORDS);
48
49 Uint32 prev = m_data[page_idx] >> 16;
50 Uint32 next = m_data[page_idx] & 0xFFFF;
51
52 assert(prev == 0xFFFF);
53 assert(m_data[page_idx + 1] == FREE_RECORD);
54
55 m_data[page_idx + 1] = 0;
56 if (next != 0xFFFF)
57 {
58 assert(free_space > 1);
59 Uint32 nextP = m_data[next];
60 assert((nextP >> 16) == page_idx);
61 m_data[next] = 0xFFFF0000 | (nextP & 0xFFFF);
62 }
63 else
64 {
65 assert(free_space == 1);
66 }
67
68 next_free_index = next;
69 free_space--;
70 return page_idx;
71 }
72
73 Uint32
alloc_record(Uint32 page_idx)74 Tup_fixsize_page::alloc_record(Uint32 page_idx)
75 {
76 assert(page_idx + 1 < DATA_WORDS);
77 if (likely(free_space && m_data[page_idx + 1] == FREE_RECORD))
78 {
79 Uint32 prev = m_data[page_idx] >> 16;
80 Uint32 next = m_data[page_idx] & 0xFFFF;
81
82 assert(prev != 0xFFFF || (next_free_index == page_idx));
83 if (prev == 0xFFFF)
84 {
85 next_free_index = next;
86 }
87 else
88 {
89 Uint32 prevP = m_data[prev];
90 m_data[prev] = (prevP & 0xFFFF0000) | next;
91 }
92
93 if (next != 0xFFFF)
94 {
95 Uint32 nextP = m_data[next];
96 m_data[next] = (prev << 16) | (nextP & 0xFFFF);
97 }
98 free_space --;
99 m_data[page_idx + 1] = 0;
100 return page_idx;
101 }
102 return ~0;
103 }
104
105 Uint32
free_record(Uint32 page_idx)106 Tup_fixsize_page::free_record(Uint32 page_idx)
107 {
108 Uint32 next = next_free_index;
109
110 assert(page_idx + 1 < DATA_WORDS);
111 assert(m_data[page_idx + 1] != FREE_RECORD);
112
113 if (next == 0xFFFF)
114 {
115 assert(free_space == 0);
116 }
117 else
118 {
119 assert(free_space);
120 assert(next + 1 < DATA_WORDS);
121 Uint32 nextP = m_data[next];
122 assert((nextP >> 16) == 0xFFFF);
123 m_data[next] = (page_idx << 16) | (nextP & 0xFFFF);
124 assert(m_data[next + 1] == FREE_RECORD);
125 }
126
127 next_free_index = page_idx;
128 m_data[page_idx] = 0xFFFF0000 | next;
129 m_data[page_idx + 1] = FREE_RECORD;
130
131 return ++free_space;
132 }
133
134 void
init()135 Tup_varsize_page::init()
136 {
137 free_space= DATA_WORDS - 1;
138 high_index= 1;
139 insert_pos= 0;
140 next_free_index= END_OF_FREE_LIST;
141 m_page_header.m_page_type = File_formats::PT_Tup_varsize_page;
142 }
143
144 Uint32
alloc_record(Uint32 page_idx,Uint32 alloc_size,Tup_varsize_page * temp)145 Tup_varsize_page::alloc_record(Uint32 page_idx, Uint32 alloc_size,
146 Tup_varsize_page* temp)
147 {
148 assert(page_idx); // 0 is not allowed
149 Uint32 free = free_space;
150 Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
151 Uint32 free_list = next_free_index;
152
153 if (page_idx < high_index)
154 {
155 Uint32 *ptr = get_index_ptr(page_idx);
156 Uint32 word = *ptr;
157
158 if (unlikely((free < alloc_size) || ! (word & FREE)))
159 {
160 return ~0;
161 }
162
163 if (alloc_size >= largest_size)
164 {
165 /*
166 We can't fit this segment between the insert position and the end of
167 the index entries. We will pack the page so that all free space
168 exists between the insert position and the end of the index entries.
169 */
170 reorg(temp);
171 }
172
173 Uint32 next = (word & NEXT_MASK) >> NEXT_SHIFT;
174 Uint32 prev = (word & PREV_MASK) >> PREV_SHIFT;
175
176 if (next != END_OF_FREE_LIST)
177 {
178 Uint32 * next_ptr = get_index_ptr(next);
179 Uint32 next_word = * next_ptr;
180 * next_ptr = (next_word & ~PREV_MASK) | (prev << PREV_SHIFT);
181 }
182
183 if (prev != END_OF_FREE_LIST)
184 {
185 Uint32 * prev_ptr = get_index_ptr(prev);
186 Uint32 prev_word = * prev_ptr;
187 * prev_ptr = (prev_word & ~NEXT_MASK) | (next << NEXT_SHIFT);
188 }
189 else
190 {
191 assert(next_free_index == page_idx);
192 next_free_index = next;
193 }
194
195 * ptr = insert_pos + (alloc_size << LEN_SHIFT);
196 free -= alloc_size;
197 }
198 else
199 {
200 /**
201 * We need to expand directory
202 */
203 Uint32 hi = high_index;
204 Uint32 expand = (page_idx + 1 - hi);
205 Uint32 size = alloc_size + expand;
206 if (unlikely(size > free))
207 {
208 return ~0;
209 }
210
211 if (size >= largest_size)
212 {
213 /*
214 We can't fit this segment between the insert position and the end of
215 the index entries. We will pack the page so that all free space
216 exists between the insert position and the end of the index entries.
217 */
218 reorg(temp);
219 }
220
221 Uint32 *ptr = m_data + DATA_WORDS - hi;
222 if (page_idx == hi)
223 {
224 * ptr = insert_pos + (alloc_size << LEN_SHIFT);
225 }
226 else
227 {
228 if (free_list != END_OF_FREE_LIST)
229 {
230 Uint32 * prev_ptr = get_index_ptr(free_list);
231 Uint32 prev_word = * prev_ptr;
232 * prev_ptr = (prev_word & ~PREV_MASK) | (hi << PREV_SHIFT);
233 }
234
235 for (; hi < page_idx;)
236 {
237 * ptr-- = FREE | (free_list << NEXT_SHIFT) | ((hi+1) << PREV_SHIFT);
238 free_list = hi++;
239 }
240
241 * ptr++ = insert_pos + (alloc_size << LEN_SHIFT);
242 * ptr = ((* ptr) & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
243
244 next_free_index = hi - 1;
245 }
246 high_index = hi + 1;
247 free -= size;
248 }
249
250 free_space = free;
251 insert_pos += alloc_size;
252
253 return page_idx;
254 }
255
256 Uint32
alloc_record(Uint32 alloc_size,Tup_varsize_page * temp,Uint32 chain)257 Tup_varsize_page::alloc_record(Uint32 alloc_size,
258 Tup_varsize_page* temp, Uint32 chain)
259 {
260 assert(free_space >= alloc_size);
261 Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
262 if (alloc_size >= largest_size) {
263 /*
264 We can't fit this segment between the insert position and the end of
265 the index entries. We will pack the page so that all free space
266 exists between the insert position and the end of the index entries.
267 */
268 reorg(temp);
269 largest_size= DATA_WORDS - (insert_pos + high_index);
270 }
271 assert(largest_size > alloc_size);
272
273 Uint32 page_idx;
274 if (next_free_index == END_OF_FREE_LIST) {
275 /*
276 We are out of free index slots. We will extend the array of free
277 slots
278 */
279 page_idx= high_index++;
280 free_space--;
281 } else {
282 // Pick an empty slot among the index entries
283 page_idx= next_free_index;
284 assert((get_index_word(page_idx) & FREE) == FREE);
285 assert(((get_index_word(page_idx) & PREV_MASK) >> PREV_SHIFT) ==
286 END_OF_FREE_LIST);
287 next_free_index= (get_index_word(page_idx) & NEXT_MASK) >> NEXT_SHIFT;
288 assert(next_free_index);
289 if (next_free_index != END_OF_FREE_LIST)
290 {
291 Uint32 *ptr = get_index_ptr(next_free_index);
292 Uint32 word = *ptr;
293 * ptr = (word & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
294 }
295 }
296
297 assert(chain == 0 || chain == CHAIN);
298 * get_index_ptr(page_idx) = insert_pos + chain + (alloc_size << LEN_SHIFT);
299
300 insert_pos += alloc_size;
301 free_space -= alloc_size;
302 //ndbout_c("%p->alloc_record(%d%s) -> %d", this,alloc_size, (chain ? " CHAIN" : ""),page_idx);
303 return page_idx;
304 }
305
306 Uint32
free_record(Uint32 page_idx,Uint32 chain)307 Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain)
308 {
309 //ndbout_c("%p->free_record(%d%s)", this, page_idx, (chain ? " CHAIN": ""));
310 Uint32 *index_ptr= get_index_ptr(page_idx);
311 Uint32 index_word= * index_ptr;
312 Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
313 Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
314 assert(chain == 0 || chain == CHAIN);
315 assert((index_word & CHAIN) == chain);
316 #ifdef VM_TRACE
317 memset(m_data + entry_pos, 0xF2, 4*entry_len);
318 #endif
319 if (page_idx + 1 == high_index) {
320 /*
321 We are removing the last in the entry list. We could potentially
322 have several free entries also before this. To take that into account
323 we will rebuild the free list and thus compress it and update the
324 free space accordingly.
325 */
326 rebuild_index(index_ptr);
327 } else {
328 if (next_free_index != END_OF_FREE_LIST)
329 {
330 Uint32 *ptr = get_index_ptr(next_free_index);
331 Uint32 word = *ptr;
332 assert(((word & PREV_MASK) >> PREV_SHIFT) == END_OF_FREE_LIST);
333 * ptr = (word & ~PREV_MASK) | (page_idx << PREV_SHIFT);
334 }
335 * index_ptr= FREE | next_free_index | (END_OF_FREE_LIST << PREV_SHIFT);
336 next_free_index= page_idx;
337 assert(next_free_index);
338 }
339
340 free_space+= entry_len;
341 // If we're the "last" entry, decrease insert_pos
342 insert_pos -= (entry_pos + entry_len == insert_pos ? entry_len : 0);
343
344 return free_space;
345 }
346
347 void
rebuild_index(Uint32 * index_ptr)348 Tup_varsize_page::rebuild_index(Uint32* index_ptr)
349 {
350 Uint32 empty= 1;
351 Uint32 *end= m_data + DATA_WORDS;
352
353 /**
354 * Scan until you find first non empty index pos
355 */
356 for(index_ptr++; index_ptr < end; index_ptr++)
357 if((* index_ptr) & FREE)
358 empty++;
359 else
360 break;
361
362 if(index_ptr == end)
363 {
364 // Totally free page
365 high_index = 1;
366 free_space += empty;
367 next_free_index = END_OF_FREE_LIST;
368 return;
369 }
370
371 Uint32 next= END_OF_FREE_LIST;
372 Uint32 dummy;
373 Uint32 *prev_ptr = &dummy;
374 for(index_ptr++; index_ptr < end; index_ptr++)
375 {
376 if ((* index_ptr) & FREE)
377 {
378 * index_ptr= FREE | next;
379 next= Uint32(end - index_ptr);
380 * prev_ptr |= (next << PREV_SHIFT);
381 prev_ptr = index_ptr;
382 }
383 }
384
385 * prev_ptr |= (END_OF_FREE_LIST << PREV_SHIFT);
386
387 high_index -= empty;
388 free_space += empty;
389 next_free_index= next;
390 assert(next_free_index);
391 }
392
393 void
reorg(Tup_varsize_page * copy_page)394 Tup_varsize_page::reorg(Tup_varsize_page* copy_page)
395 {
396 Uint32 new_insert_pos= 0;
397 Uint32 old_insert_pos= insert_pos;
398
399 // Copy key data part of page to a temporary page.
400 memcpy(copy_page->m_data, m_data, 4*old_insert_pos);
401 assert(high_index > 0);
402 Uint32* index_ptr= get_index_ptr(high_index-1);
403 Uint32 *end_of_page= m_data + DATA_WORDS;
404 for (; index_ptr < end_of_page; index_ptr++)
405 {
406 Uint32 index_word= * index_ptr;
407 Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
408 if (!(index_word & FREE) && entry_len)
409 {
410 /*
411 We found an index item that needs to be packed.
412 We will update the index entry and copy the data to the page.
413 */
414 Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
415 assert(entry_pos + entry_len <= old_insert_pos);
416 assert(new_insert_pos + entry_len <= old_insert_pos);
417 * index_ptr= (new_insert_pos << POS_SHIFT) + (index_word & ~POS_MASK);
418 memcpy(m_data+new_insert_pos, copy_page->m_data+entry_pos, 4*entry_len);
419
420 new_insert_pos += entry_len;
421 }
422 }
423 insert_pos= new_insert_pos;
424 }
425
426 NdbOut&
operator <<(NdbOut & out,const Tup_varsize_page & page)427 operator<< (NdbOut& out, const Tup_varsize_page& page)
428 {
429 out << "[ Varpage " << &page << ": free: " << page.free_space
430 << " (" << (page.DATA_WORDS - (page.insert_pos + page.high_index + 1)) << ")"
431 << " insert_pos: " << page.insert_pos
432 << " high_index: " << page.high_index
433 << " index: " << flush;
434
435 const Uint32 *index_ptr= page.m_data+page.DATA_WORDS-1;
436 for(Uint32 i = 1; i<page.high_index; i++, index_ptr--)
437 {
438 out << " [ " << i;
439 if(! (*index_ptr & page.FREE))
440 out << " pos: " << ((* index_ptr & page.POS_MASK) >> page.POS_SHIFT)
441 << " len: " << ((* index_ptr & page.LEN_MASK) >> page.LEN_SHIFT)
442 << ((* index_ptr & page.CHAIN) ? " CHAIN " : " ")
443 << "]" << flush;
444 else
445 out << " FREE ]" << flush;
446 }
447
448 out << " free list: " << flush;
449 Uint32 next= page.next_free_index;
450 while(next != page.END_OF_FREE_LIST)
451 {
452 out << next << " " << flush;
453 next= ((* (page.m_data+page.DATA_WORDS-next)) & page.NEXT_MASK) >> page.NEXT_SHIFT;
454 }
455 out << "]";
456 return out;
457 }
458
459 NdbOut&
operator <<(NdbOut & out,const Tup_fixsize_page & page)460 operator<< (NdbOut& out, const Tup_fixsize_page& page)
461 {
462 out << "[ Fixpage " << &page
463 << ": frag_page: " << page.frag_page_id
464 << " page_no: " << page.m_page_no
465 << " file_no: " << page.m_file_no
466 << " table: " << page.m_table_id
467 << " fragment: " << page.m_fragment_id
468 << " uncommitted_used_space: " << page.uncommitted_used_space
469 << " free: " << page.free_space;
470
471 out << " free list: " << hex << page.next_free_index << " " << flush;
472 #if 0
473 Uint32 startTuple = page.next_free_index >> 16;
474 Uint32 cnt = 0;
475 Uint32 next= startTuple;
476 while((next & 0xFFFF) != 0xFFFF)
477 {
478 cnt++;
479 out << dec << "(" << (next & 0xFFFF) << " " << hex << next << ") " << flush;
480 assert(page.m_data[(next & 0xFFFF) + 1] == Dbtup::Tuple_header::FREE);
481 next= * (page.m_data + ( next & 0xFFFF ));
482 }
483 assert(cnt == page.free_space);
484 #endif
485 out << "]";
486 return out;
487 }
488