1 /*
2    Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 #include "tuppage.hpp"
27 
28 #define JAM_FILE_ID 427
29 
30 
31 /**
32  * Fix pages maintain a double linked list of free entries
33  *
34  * Var pages has a directory where each entry is
35  * [ C(1), F(1), L(15), P(15) ]
36  *   C is chain bit, (is it a full tuple or just chain)
37  *   F is free bit
38  *     If true, L is prev free entry (in directory)
39  *              P is next free entry (in directory)
40  *     else
41  *              L is len of entry
42  *              P is pos of entry
43  */
44 
45 Uint32
alloc_record()46 Tup_fixsize_page::alloc_record()
47 {
48   assert(free_space);
49   Uint32 page_idx = next_free_index;
50   assert(page_idx + 1 < DATA_WORDS);
51 
52 #ifdef VM_TRACE
53   Uint32 prev = m_data[page_idx] >> 16;
54 #endif
55   Uint32 next = m_data[page_idx] & 0xFFFF;
56 
57   assert(prev == 0xFFFF);
58   assert(m_data[page_idx + 1] == FREE_RECORD);
59 
60   m_data[page_idx + 1] = 0;
61   if (next != 0xFFFF)
62   {
63     assert(free_space > 1);
64     Uint32 nextP = m_data[next];
65     assert((nextP >> 16) == page_idx);
66     m_data[next] = 0xFFFF0000 | (nextP & 0xFFFF);
67   }
68   else
69   {
70     assert(free_space == 1);
71   }
72 
73   next_free_index = next;
74   free_space--;
75   return page_idx;
76 }
77 
78 Uint32
alloc_record(Uint32 page_idx)79 Tup_fixsize_page::alloc_record(Uint32 page_idx)
80 {
81   assert(page_idx + 1 < DATA_WORDS);
82   if (likely(free_space && m_data[page_idx + 1] == FREE_RECORD))
83   {
84     Uint32 prev = m_data[page_idx] >> 16;
85     Uint32 next = m_data[page_idx] & 0xFFFF;
86 
87     assert(prev != 0xFFFF || (next_free_index == page_idx));
88     if (prev == 0xFFFF)
89     {
90       next_free_index = next;
91     }
92     else
93     {
94       Uint32 prevP = m_data[prev];
95       m_data[prev] = (prevP & 0xFFFF0000) | next;
96     }
97 
98     if (next != 0xFFFF)
99     {
100       Uint32 nextP = m_data[next];
101       m_data[next] = (prev << 16) | (nextP & 0xFFFF);
102     }
103     free_space --;
104     m_data[page_idx + 1] = 0;
105     return page_idx;
106   }
107   return ~0;
108 }
109 
110 Uint32
free_record(Uint32 page_idx)111 Tup_fixsize_page::free_record(Uint32 page_idx)
112 {
113   Uint32 next = next_free_index;
114 
115   assert(page_idx + 1 < DATA_WORDS);
116   assert(m_data[page_idx + 1] != FREE_RECORD);
117 
118   if (next == 0xFFFF)
119   {
120     assert(free_space == 0);
121   }
122   else
123   {
124     assert(free_space);
125     assert(next + 1 < DATA_WORDS);
126     Uint32 nextP = m_data[next];
127     assert((nextP >> 16) == 0xFFFF);
128     m_data[next] = (page_idx << 16) | (nextP & 0xFFFF);
129     assert(m_data[next + 1] == FREE_RECORD);
130   }
131 
132   next_free_index = page_idx;
133   m_data[page_idx] = 0xFFFF0000 | next;
134   m_data[page_idx + 1] = FREE_RECORD;
135 
136   return ++free_space;
137 }
138 
139 void
init()140 Tup_varsize_page::init()
141 {
142   free_space= DATA_WORDS - 1;
143   high_index= 1;
144   insert_pos= 0;
145   next_free_index= END_OF_FREE_LIST;
146   m_page_header.m_page_type = File_formats::PT_Tup_varsize_page;
147 }
148 
149 Uint32
alloc_record(Uint32 page_idx,Uint32 alloc_size,Tup_varsize_page * temp)150 Tup_varsize_page::alloc_record(Uint32 page_idx, Uint32 alloc_size,
151 			       Tup_varsize_page* temp)
152 {
153   assert(page_idx); // 0 is not allowed
154   Uint32 free = free_space;
155   Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
156   Uint32 free_list = next_free_index;
157 
158   if (page_idx < high_index)
159   {
160     Uint32 *ptr = get_index_ptr(page_idx);
161     Uint32 word = *ptr;
162 
163     if (unlikely((free < alloc_size) || ! (word & FREE)))
164     {
165       return ~0;
166     }
167 
168     if (alloc_size >= largest_size)
169     {
170       /*
171 	We can't fit this segment between the insert position and the end of
172 	the index entries. We will pack the page so that all free space
173 	exists between the insert position and the end of the index entries.
174       */
175       reorg(temp);
176     }
177 
178     Uint32 next = (word & NEXT_MASK) >> NEXT_SHIFT;
179     Uint32 prev = (word & PREV_MASK) >> PREV_SHIFT;
180 
181     if (next != END_OF_FREE_LIST)
182     {
183       Uint32 * next_ptr = get_index_ptr(next);
184       Uint32 next_word = * next_ptr;
185       * next_ptr = (next_word & ~PREV_MASK) | (prev << PREV_SHIFT);
186     }
187 
188     if (prev != END_OF_FREE_LIST)
189     {
190       Uint32 * prev_ptr = get_index_ptr(prev);
191       Uint32 prev_word = * prev_ptr;
192       * prev_ptr = (prev_word & ~NEXT_MASK) | (next << NEXT_SHIFT);
193     }
194     else
195     {
196       assert(next_free_index == page_idx);
197       next_free_index = next;
198     }
199 
200     * ptr = insert_pos + (alloc_size << LEN_SHIFT);
201     free -= alloc_size;
202   }
203   else
204   {
205     /**
206      * We need to expand directory
207      */
208     Uint32 hi = high_index;
209     Uint32 expand = (page_idx + 1 - hi);
210     Uint32 size = alloc_size + expand;
211     if (unlikely(size > free))
212     {
213       return ~0;
214     }
215 
216     if (size >= largest_size)
217     {
218       /*
219 	We can't fit this segment between the insert position and the end of
220 	the index entries. We will pack the page so that all free space
221 	exists between the insert position and the end of the index entries.
222       */
223       reorg(temp);
224     }
225 
226     Uint32 *ptr = m_data + DATA_WORDS - hi;
227     if (page_idx == hi)
228     {
229       * ptr = insert_pos + (alloc_size << LEN_SHIFT);
230     }
231     else
232     {
233       if (free_list != END_OF_FREE_LIST)
234       {
235 	Uint32 * prev_ptr = get_index_ptr(free_list);
236 	Uint32 prev_word = * prev_ptr;
237 	* prev_ptr = (prev_word & ~PREV_MASK) | (hi << PREV_SHIFT);
238       }
239 
240       for (; hi < page_idx;)
241       {
242 	* ptr-- = FREE | (free_list << NEXT_SHIFT) | ((hi+1) << PREV_SHIFT);
243 	free_list = hi++;
244       }
245 
246       * ptr++ = insert_pos + (alloc_size << LEN_SHIFT);
247       * ptr = ((* ptr) & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
248 
249       next_free_index = hi - 1;
250     }
251     high_index = hi + 1;
252     free -= size;
253   }
254 
255   free_space = free;
256   insert_pos += alloc_size;
257 
258   return page_idx;
259 }
260 
261 Uint32
alloc_record(Uint32 alloc_size,Tup_varsize_page * temp,Uint32 chain)262 Tup_varsize_page::alloc_record(Uint32 alloc_size,
263 			       Tup_varsize_page* temp, Uint32 chain)
264 {
265   assert(free_space >= alloc_size);
266   Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
267   if (alloc_size >= largest_size) {
268     /*
269       We can't fit this segment between the insert position and the end of
270       the index entries. We will pack the page so that all free space
271       exists between the insert position and the end of the index entries.
272     */
273     reorg(temp);
274     largest_size= DATA_WORDS - (insert_pos + high_index);
275   }
276   assert(largest_size > alloc_size);
277 
278   Uint32 page_idx;
279   if (next_free_index == END_OF_FREE_LIST) {
280     /*
281       We are out of free index slots. We will extend the array of free
282       slots
283     */
284     page_idx= high_index++;
285     free_space--;
286   } else {
287     // Pick an empty slot among the index entries
288     page_idx= next_free_index;
289     assert((get_index_word(page_idx) & FREE) == FREE);
290     assert(((get_index_word(page_idx) & PREV_MASK) >> PREV_SHIFT) ==
291 	   END_OF_FREE_LIST);
292     next_free_index= (get_index_word(page_idx) & NEXT_MASK) >> NEXT_SHIFT;
293     assert(next_free_index);
294     if (next_free_index != END_OF_FREE_LIST)
295     {
296       Uint32 *ptr = get_index_ptr(next_free_index);
297       Uint32 word = *ptr;
298       * ptr = (word & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
299     }
300   }
301 
302   assert(chain == 0 || chain == CHAIN);
303   * get_index_ptr(page_idx) = insert_pos + chain + (alloc_size << LEN_SHIFT);
304 
305   insert_pos += alloc_size;
306   free_space -= alloc_size;
307   //ndbout_c("%p->alloc_record(%d%s) -> %d", this,alloc_size, (chain ? " CHAIN" : ""),page_idx);
308   return page_idx;
309 }
310 
311 Uint32
free_record(Uint32 page_idx,Uint32 chain)312 Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain)
313 {
314   //ndbout_c("%p->free_record(%d%s)", this, page_idx, (chain ? " CHAIN": ""));
315   Uint32 *index_ptr= get_index_ptr(page_idx);
316   Uint32 index_word= * index_ptr;
317   Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
318   Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
319   assert(chain == 0 || chain == CHAIN);
320   assert((index_word & CHAIN) == chain);
321 #ifdef VM_TRACE
322   memset(m_data + entry_pos, 0xF2, 4*entry_len);
323 #endif
324   if (page_idx + 1 == high_index) {
325     /*
326       We are removing the last in the entry list. We could potentially
327       have several free entries also before this. To take that into account
328       we will rebuild the free list and thus compress it and update the
329       free space accordingly.
330     */
331     rebuild_index(index_ptr);
332   } else {
333     if (next_free_index != END_OF_FREE_LIST)
334     {
335       Uint32 *ptr = get_index_ptr(next_free_index);
336       Uint32 word = *ptr;
337       assert(((word & PREV_MASK) >> PREV_SHIFT) == END_OF_FREE_LIST);
338       * ptr = (word & ~PREV_MASK) | (page_idx << PREV_SHIFT);
339     }
340     * index_ptr= FREE | next_free_index | (END_OF_FREE_LIST << PREV_SHIFT);
341     next_free_index= page_idx;
342     assert(next_free_index);
343   }
344 
345   free_space+= entry_len;
346   // If we're the "last" entry, decrease insert_pos
347   insert_pos -= (entry_pos + entry_len == insert_pos ? entry_len : 0);
348 
349   return free_space;
350 }
351 
352 void
rebuild_index(Uint32 * index_ptr)353 Tup_varsize_page::rebuild_index(Uint32* index_ptr)
354 {
355   Uint32 empty= 1;
356   Uint32 *end= m_data + DATA_WORDS;
357 
358   /**
359    * Scan until you find first non empty index pos
360    */
361   for(index_ptr++; index_ptr < end; index_ptr++)
362     if((* index_ptr) & FREE)
363       empty++;
364     else
365       break;
366 
367   if(index_ptr == end)
368   {
369     // Totally free page
370     high_index = 1;
371     free_space += empty;
372     next_free_index = END_OF_FREE_LIST;
373     return;
374   }
375 
376   Uint32 next= END_OF_FREE_LIST;
377   Uint32 dummy;
378   Uint32 *prev_ptr = &dummy;
379   for(index_ptr++; index_ptr < end; index_ptr++)
380   {
381     if ((* index_ptr) & FREE)
382     {
383       * index_ptr= FREE | next;
384       next= Uint32(end - index_ptr);
385       * prev_ptr |= (next << PREV_SHIFT);
386       prev_ptr = index_ptr;
387     }
388   }
389 
390   * prev_ptr |= (END_OF_FREE_LIST << PREV_SHIFT);
391 
392   high_index -= empty;
393   free_space += empty;
394   next_free_index= next;
395   assert(next_free_index);
396 }
397 
398 void
reorg(Tup_varsize_page * copy_page)399 Tup_varsize_page::reorg(Tup_varsize_page* copy_page)
400 {
401   Uint32 new_insert_pos= 0;
402   Uint32 old_insert_pos= insert_pos;
403 
404   // Copy key data part of page to a temporary page.
405   memcpy(copy_page->m_data, m_data, 4*old_insert_pos);
406   assert(high_index > 0);
407   Uint32* index_ptr= get_index_ptr(high_index-1);
408   Uint32 *end_of_page= m_data + DATA_WORDS;
409   for (; index_ptr < end_of_page; index_ptr++)
410   {
411     Uint32 index_word= * index_ptr;
412     Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
413     if (!(index_word & FREE) && entry_len)
414     {
415       /*
416 	We found an index item that needs to be packed.
417 	We will update the index entry and copy the data to the page.
418       */
419       Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
420       assert(entry_pos + entry_len <= old_insert_pos);
421       assert(new_insert_pos + entry_len <= old_insert_pos);
422       * index_ptr= (new_insert_pos << POS_SHIFT) + (index_word & ~POS_MASK);
423       memcpy(m_data+new_insert_pos, copy_page->m_data+entry_pos, 4*entry_len);
424 
425       new_insert_pos += entry_len;
426     }
427   }
428   insert_pos= new_insert_pos;
429 }
430 
431 NdbOut&
operator <<(NdbOut & out,const Tup_varsize_page & page)432 operator<< (NdbOut& out, const Tup_varsize_page& page)
433 {
434   out << "[ Varpage " << &page << ": free: " << page.free_space
435       << " (" << (page.DATA_WORDS - (page.insert_pos + page.high_index + 1)) << ")"
436       << " insert_pos: " << page.insert_pos
437       << " high_index: " << page.high_index
438       << " index: " << flush;
439 
440   const Uint32 *index_ptr= page.m_data+page.DATA_WORDS-1;
441   for(Uint32 i = 1; i<page.high_index; i++, index_ptr--)
442   {
443     out << " [ " << i;
444     if(! (*index_ptr & page.FREE))
445       out << " pos: " << ((* index_ptr & page.POS_MASK) >> page.POS_SHIFT)
446 	  << " len: " << ((* index_ptr & page.LEN_MASK) >> page.LEN_SHIFT)
447 	  << ((* index_ptr & page.CHAIN) ? " CHAIN " : " ")
448 	  << "]" << flush;
449     else
450       out << " FREE ]" << flush;
451   }
452 
453   out << " free list: " << flush;
454   Uint32 next= page.next_free_index;
455   while(next != page.END_OF_FREE_LIST)
456   {
457     out << next << " " << flush;
458     next= ((* (page.m_data+page.DATA_WORDS-next)) & page.NEXT_MASK) >> page.NEXT_SHIFT;
459   }
460   out << "]";
461   return out;
462 }
463 
464 NdbOut&
operator <<(NdbOut & out,const Tup_fixsize_page & page)465 operator<< (NdbOut& out, const Tup_fixsize_page& page)
466 {
467   out << "[ Fixpage " << &page
468       << ": frag_page: " << page.frag_page_id
469       << " page_no: " << page.m_page_no
470       << " file_no: " << page.m_file_no
471       << " table: " << page.m_table_id
472       << " fragment: " << page.m_fragment_id
473       << " uncommitted_used_space: " << page.uncommitted_used_space
474       << " free: " << page.free_space;
475 
476   out << " free list: " << hex << page.next_free_index << " " << flush;
477 #if 0
478   Uint32 startTuple = page.next_free_index >> 16;
479   Uint32 cnt = 0;
480   Uint32 next= startTuple;
481   while((next & 0xFFFF) != 0xFFFF)
482   {
483     cnt++;
484     out << dec << "(" << (next & 0xFFFF) << " " << hex << next << ") " << flush;
485     assert(page.m_data[(next & 0xFFFF) + 1] == Dbtup::Tuple_header::FREE);
486     next= * (page.m_data + ( next & 0xFFFF ));
487   }
488   assert(cnt == page.free_space);
489 #endif
490   out << "]";
491   return out;
492 }
493