1 /*
2 Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26 #include "tuppage.hpp"
27
28 #define JAM_FILE_ID 427
29
30
31 /**
32 * Fix pages maintain a double linked list of free entries
33 *
34 * Var pages has a directory where each entry is
35 * [ C(1), F(1), L(15), P(15) ]
36 * C is chain bit, (is it a full tuple or just chain)
37 * F is free bit
38 * If true, L is prev free entry (in directory)
39 * P is next free entry (in directory)
40 * else
41 * L is len of entry
42 * P is pos of entry
43 */
44
45 Uint32
alloc_record()46 Tup_fixsize_page::alloc_record()
47 {
48 assert(free_space);
49 Uint32 page_idx = next_free_index;
50 assert(page_idx + 1 < DATA_WORDS);
51
52 #ifdef VM_TRACE
53 Uint32 prev = m_data[page_idx] >> 16;
54 #endif
55 Uint32 next = m_data[page_idx] & 0xFFFF;
56
57 assert(prev == 0xFFFF);
58 assert(m_data[page_idx + 1] == FREE_RECORD);
59
60 m_data[page_idx + 1] = 0;
61 if (next != 0xFFFF)
62 {
63 assert(free_space > 1);
64 Uint32 nextP = m_data[next];
65 assert((nextP >> 16) == page_idx);
66 m_data[next] = 0xFFFF0000 | (nextP & 0xFFFF);
67 }
68 else
69 {
70 assert(free_space == 1);
71 }
72
73 next_free_index = next;
74 free_space--;
75 return page_idx;
76 }
77
78 Uint32
alloc_record(Uint32 page_idx)79 Tup_fixsize_page::alloc_record(Uint32 page_idx)
80 {
81 assert(page_idx + 1 < DATA_WORDS);
82 if (likely(free_space && m_data[page_idx + 1] == FREE_RECORD))
83 {
84 Uint32 prev = m_data[page_idx] >> 16;
85 Uint32 next = m_data[page_idx] & 0xFFFF;
86
87 assert(prev != 0xFFFF || (next_free_index == page_idx));
88 if (prev == 0xFFFF)
89 {
90 next_free_index = next;
91 }
92 else
93 {
94 Uint32 prevP = m_data[prev];
95 m_data[prev] = (prevP & 0xFFFF0000) | next;
96 }
97
98 if (next != 0xFFFF)
99 {
100 Uint32 nextP = m_data[next];
101 m_data[next] = (prev << 16) | (nextP & 0xFFFF);
102 }
103 free_space --;
104 m_data[page_idx + 1] = 0;
105 return page_idx;
106 }
107 return ~0;
108 }
109
110 Uint32
free_record(Uint32 page_idx)111 Tup_fixsize_page::free_record(Uint32 page_idx)
112 {
113 Uint32 next = next_free_index;
114
115 assert(page_idx + 1 < DATA_WORDS);
116 assert(m_data[page_idx + 1] != FREE_RECORD);
117
118 if (next == 0xFFFF)
119 {
120 assert(free_space == 0);
121 }
122 else
123 {
124 assert(free_space);
125 assert(next + 1 < DATA_WORDS);
126 Uint32 nextP = m_data[next];
127 assert((nextP >> 16) == 0xFFFF);
128 m_data[next] = (page_idx << 16) | (nextP & 0xFFFF);
129 assert(m_data[next + 1] == FREE_RECORD);
130 }
131
132 next_free_index = page_idx;
133 m_data[page_idx] = 0xFFFF0000 | next;
134 m_data[page_idx + 1] = FREE_RECORD;
135
136 return ++free_space;
137 }
138
139 void
init()140 Tup_varsize_page::init()
141 {
142 free_space= DATA_WORDS - 1;
143 high_index= 1;
144 insert_pos= 0;
145 next_free_index= END_OF_FREE_LIST;
146 m_page_header.m_page_type = File_formats::PT_Tup_varsize_page;
147 }
148
149 Uint32
alloc_record(Uint32 page_idx,Uint32 alloc_size,Tup_varsize_page * temp)150 Tup_varsize_page::alloc_record(Uint32 page_idx, Uint32 alloc_size,
151 Tup_varsize_page* temp)
152 {
153 assert(page_idx); // 0 is not allowed
154 Uint32 free = free_space;
155 Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
156 Uint32 free_list = next_free_index;
157
158 if (page_idx < high_index)
159 {
160 Uint32 *ptr = get_index_ptr(page_idx);
161 Uint32 word = *ptr;
162
163 if (unlikely((free < alloc_size) || ! (word & FREE)))
164 {
165 return ~0;
166 }
167
168 if (alloc_size >= largest_size)
169 {
170 /*
171 We can't fit this segment between the insert position and the end of
172 the index entries. We will pack the page so that all free space
173 exists between the insert position and the end of the index entries.
174 */
175 reorg(temp);
176 }
177
178 Uint32 next = (word & NEXT_MASK) >> NEXT_SHIFT;
179 Uint32 prev = (word & PREV_MASK) >> PREV_SHIFT;
180
181 if (next != END_OF_FREE_LIST)
182 {
183 Uint32 * next_ptr = get_index_ptr(next);
184 Uint32 next_word = * next_ptr;
185 * next_ptr = (next_word & ~PREV_MASK) | (prev << PREV_SHIFT);
186 }
187
188 if (prev != END_OF_FREE_LIST)
189 {
190 Uint32 * prev_ptr = get_index_ptr(prev);
191 Uint32 prev_word = * prev_ptr;
192 * prev_ptr = (prev_word & ~NEXT_MASK) | (next << NEXT_SHIFT);
193 }
194 else
195 {
196 assert(next_free_index == page_idx);
197 next_free_index = next;
198 }
199
200 * ptr = insert_pos + (alloc_size << LEN_SHIFT);
201 free -= alloc_size;
202 }
203 else
204 {
205 /**
206 * We need to expand directory
207 */
208 Uint32 hi = high_index;
209 Uint32 expand = (page_idx + 1 - hi);
210 Uint32 size = alloc_size + expand;
211 if (unlikely(size > free))
212 {
213 return ~0;
214 }
215
216 if (size >= largest_size)
217 {
218 /*
219 We can't fit this segment between the insert position and the end of
220 the index entries. We will pack the page so that all free space
221 exists between the insert position and the end of the index entries.
222 */
223 reorg(temp);
224 }
225
226 Uint32 *ptr = m_data + DATA_WORDS - hi;
227 if (page_idx == hi)
228 {
229 * ptr = insert_pos + (alloc_size << LEN_SHIFT);
230 }
231 else
232 {
233 if (free_list != END_OF_FREE_LIST)
234 {
235 Uint32 * prev_ptr = get_index_ptr(free_list);
236 Uint32 prev_word = * prev_ptr;
237 * prev_ptr = (prev_word & ~PREV_MASK) | (hi << PREV_SHIFT);
238 }
239
240 for (; hi < page_idx;)
241 {
242 * ptr-- = FREE | (free_list << NEXT_SHIFT) | ((hi+1) << PREV_SHIFT);
243 free_list = hi++;
244 }
245
246 * ptr++ = insert_pos + (alloc_size << LEN_SHIFT);
247 * ptr = ((* ptr) & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
248
249 next_free_index = hi - 1;
250 }
251 high_index = hi + 1;
252 free -= size;
253 }
254
255 free_space = free;
256 insert_pos += alloc_size;
257
258 return page_idx;
259 }
260
261 Uint32
alloc_record(Uint32 alloc_size,Tup_varsize_page * temp,Uint32 chain)262 Tup_varsize_page::alloc_record(Uint32 alloc_size,
263 Tup_varsize_page* temp, Uint32 chain)
264 {
265 assert(free_space >= alloc_size);
266 Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
267 if (alloc_size >= largest_size) {
268 /*
269 We can't fit this segment between the insert position and the end of
270 the index entries. We will pack the page so that all free space
271 exists between the insert position and the end of the index entries.
272 */
273 reorg(temp);
274 largest_size= DATA_WORDS - (insert_pos + high_index);
275 }
276 assert(largest_size > alloc_size);
277
278 Uint32 page_idx;
279 if (next_free_index == END_OF_FREE_LIST) {
280 /*
281 We are out of free index slots. We will extend the array of free
282 slots
283 */
284 page_idx= high_index++;
285 free_space--;
286 } else {
287 // Pick an empty slot among the index entries
288 page_idx= next_free_index;
289 assert((get_index_word(page_idx) & FREE) == FREE);
290 assert(((get_index_word(page_idx) & PREV_MASK) >> PREV_SHIFT) ==
291 END_OF_FREE_LIST);
292 next_free_index= (get_index_word(page_idx) & NEXT_MASK) >> NEXT_SHIFT;
293 assert(next_free_index);
294 if (next_free_index != END_OF_FREE_LIST)
295 {
296 Uint32 *ptr = get_index_ptr(next_free_index);
297 Uint32 word = *ptr;
298 * ptr = (word & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
299 }
300 }
301
302 assert(chain == 0 || chain == CHAIN);
303 * get_index_ptr(page_idx) = insert_pos + chain + (alloc_size << LEN_SHIFT);
304
305 insert_pos += alloc_size;
306 free_space -= alloc_size;
307 //ndbout_c("%p->alloc_record(%d%s) -> %d", this,alloc_size, (chain ? " CHAIN" : ""),page_idx);
308 return page_idx;
309 }
310
311 Uint32
free_record(Uint32 page_idx,Uint32 chain)312 Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain)
313 {
314 //ndbout_c("%p->free_record(%d%s)", this, page_idx, (chain ? " CHAIN": ""));
315 Uint32 *index_ptr= get_index_ptr(page_idx);
316 Uint32 index_word= * index_ptr;
317 Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
318 Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
319 assert(chain == 0 || chain == CHAIN);
320 assert((index_word & CHAIN) == chain);
321 #ifdef VM_TRACE
322 memset(m_data + entry_pos, 0xF2, 4*entry_len);
323 #endif
324 if (page_idx + 1 == high_index) {
325 /*
326 We are removing the last in the entry list. We could potentially
327 have several free entries also before this. To take that into account
328 we will rebuild the free list and thus compress it and update the
329 free space accordingly.
330 */
331 rebuild_index(index_ptr);
332 } else {
333 if (next_free_index != END_OF_FREE_LIST)
334 {
335 Uint32 *ptr = get_index_ptr(next_free_index);
336 Uint32 word = *ptr;
337 assert(((word & PREV_MASK) >> PREV_SHIFT) == END_OF_FREE_LIST);
338 * ptr = (word & ~PREV_MASK) | (page_idx << PREV_SHIFT);
339 }
340 * index_ptr= FREE | next_free_index | (END_OF_FREE_LIST << PREV_SHIFT);
341 next_free_index= page_idx;
342 assert(next_free_index);
343 }
344
345 free_space+= entry_len;
346 // If we're the "last" entry, decrease insert_pos
347 insert_pos -= (entry_pos + entry_len == insert_pos ? entry_len : 0);
348
349 return free_space;
350 }
351
352 void
rebuild_index(Uint32 * index_ptr)353 Tup_varsize_page::rebuild_index(Uint32* index_ptr)
354 {
355 Uint32 empty= 1;
356 Uint32 *end= m_data + DATA_WORDS;
357
358 /**
359 * Scan until you find first non empty index pos
360 */
361 for(index_ptr++; index_ptr < end; index_ptr++)
362 if((* index_ptr) & FREE)
363 empty++;
364 else
365 break;
366
367 if(index_ptr == end)
368 {
369 // Totally free page
370 high_index = 1;
371 free_space += empty;
372 next_free_index = END_OF_FREE_LIST;
373 return;
374 }
375
376 Uint32 next= END_OF_FREE_LIST;
377 Uint32 dummy;
378 Uint32 *prev_ptr = &dummy;
379 for(index_ptr++; index_ptr < end; index_ptr++)
380 {
381 if ((* index_ptr) & FREE)
382 {
383 * index_ptr= FREE | next;
384 next= Uint32(end - index_ptr);
385 * prev_ptr |= (next << PREV_SHIFT);
386 prev_ptr = index_ptr;
387 }
388 }
389
390 * prev_ptr |= (END_OF_FREE_LIST << PREV_SHIFT);
391
392 high_index -= empty;
393 free_space += empty;
394 next_free_index= next;
395 assert(next_free_index);
396 }
397
398 void
reorg(Tup_varsize_page * copy_page)399 Tup_varsize_page::reorg(Tup_varsize_page* copy_page)
400 {
401 Uint32 new_insert_pos= 0;
402 Uint32 old_insert_pos= insert_pos;
403
404 // Copy key data part of page to a temporary page.
405 memcpy(copy_page->m_data, m_data, 4*old_insert_pos);
406 assert(high_index > 0);
407 Uint32* index_ptr= get_index_ptr(high_index-1);
408 Uint32 *end_of_page= m_data + DATA_WORDS;
409 for (; index_ptr < end_of_page; index_ptr++)
410 {
411 Uint32 index_word= * index_ptr;
412 Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
413 if (!(index_word & FREE) && entry_len)
414 {
415 /*
416 We found an index item that needs to be packed.
417 We will update the index entry and copy the data to the page.
418 */
419 Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
420 assert(entry_pos + entry_len <= old_insert_pos);
421 assert(new_insert_pos + entry_len <= old_insert_pos);
422 * index_ptr= (new_insert_pos << POS_SHIFT) + (index_word & ~POS_MASK);
423 memcpy(m_data+new_insert_pos, copy_page->m_data+entry_pos, 4*entry_len);
424
425 new_insert_pos += entry_len;
426 }
427 }
428 insert_pos= new_insert_pos;
429 }
430
431 NdbOut&
operator <<(NdbOut & out,const Tup_varsize_page & page)432 operator<< (NdbOut& out, const Tup_varsize_page& page)
433 {
434 out << "[ Varpage " << &page << ": free: " << page.free_space
435 << " (" << (page.DATA_WORDS - (page.insert_pos + page.high_index + 1)) << ")"
436 << " insert_pos: " << page.insert_pos
437 << " high_index: " << page.high_index
438 << " index: " << flush;
439
440 const Uint32 *index_ptr= page.m_data+page.DATA_WORDS-1;
441 for(Uint32 i = 1; i<page.high_index; i++, index_ptr--)
442 {
443 out << " [ " << i;
444 if(! (*index_ptr & page.FREE))
445 out << " pos: " << ((* index_ptr & page.POS_MASK) >> page.POS_SHIFT)
446 << " len: " << ((* index_ptr & page.LEN_MASK) >> page.LEN_SHIFT)
447 << ((* index_ptr & page.CHAIN) ? " CHAIN " : " ")
448 << "]" << flush;
449 else
450 out << " FREE ]" << flush;
451 }
452
453 out << " free list: " << flush;
454 Uint32 next= page.next_free_index;
455 while(next != page.END_OF_FREE_LIST)
456 {
457 out << next << " " << flush;
458 next= ((* (page.m_data+page.DATA_WORDS-next)) & page.NEXT_MASK) >> page.NEXT_SHIFT;
459 }
460 out << "]";
461 return out;
462 }
463
464 NdbOut&
operator <<(NdbOut & out,const Tup_fixsize_page & page)465 operator<< (NdbOut& out, const Tup_fixsize_page& page)
466 {
467 out << "[ Fixpage " << &page
468 << ": frag_page: " << page.frag_page_id
469 << " page_no: " << page.m_page_no
470 << " file_no: " << page.m_file_no
471 << " table: " << page.m_table_id
472 << " fragment: " << page.m_fragment_id
473 << " uncommitted_used_space: " << page.uncommitted_used_space
474 << " free: " << page.free_space;
475
476 out << " free list: " << hex << page.next_free_index << " " << flush;
477 #if 0
478 Uint32 startTuple = page.next_free_index >> 16;
479 Uint32 cnt = 0;
480 Uint32 next= startTuple;
481 while((next & 0xFFFF) != 0xFFFF)
482 {
483 cnt++;
484 out << dec << "(" << (next & 0xFFFF) << " " << hex << next << ") " << flush;
485 assert(page.m_data[(next & 0xFFFF) + 1] == Dbtup::Tuple_header::FREE);
486 next= * (page.m_data + ( next & 0xFFFF ));
487 }
488 assert(cnt == page.free_space);
489 #endif
490 out << "]";
491 return out;
492 }
493