1 /*
2    Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 #include "tuppage.hpp"
27 
28 /**
29  * Fix pages maintain a double linked list of free entries
30  *
31  * Var pages has a directory where each entry is
32  * [ C(1), F(1), L(15), P(15) ]
33  *   C is chain bit, (is it a full tuple or just chain)
34  *   F is free bit
35  *     If true, L is prev free entry (in directory)
36  *              P is next free entry (in directory)
37  *     else
38  *              L is len of entry
39  *              P is pos of entry
40  */
41 
42 Uint32
alloc_record()43 Tup_fixsize_page::alloc_record()
44 {
45   assert(free_space);
46   Uint32 page_idx = next_free_index;
47   assert(page_idx + 1 < DATA_WORDS);
48 
49   Uint32 prev = m_data[page_idx] >> 16;
50   Uint32 next = m_data[page_idx] & 0xFFFF;
51 
52   assert(prev == 0xFFFF);
53   assert(m_data[page_idx + 1] == FREE_RECORD);
54 
55   m_data[page_idx + 1] = 0;
56   if (next != 0xFFFF)
57   {
58     assert(free_space > 1);
59     Uint32 nextP = m_data[next];
60     assert((nextP >> 16) == page_idx);
61     m_data[next] = 0xFFFF0000 | (nextP & 0xFFFF);
62   }
63   else
64   {
65     assert(free_space == 1);
66   }
67 
68   next_free_index = next;
69   free_space--;
70   return page_idx;
71 }
72 
73 Uint32
alloc_record(Uint32 page_idx)74 Tup_fixsize_page::alloc_record(Uint32 page_idx)
75 {
76   assert(page_idx + 1 < DATA_WORDS);
77   if (likely(free_space && m_data[page_idx + 1] == FREE_RECORD))
78   {
79     Uint32 prev = m_data[page_idx] >> 16;
80     Uint32 next = m_data[page_idx] & 0xFFFF;
81 
82     assert(prev != 0xFFFF || (next_free_index == page_idx));
83     if (prev == 0xFFFF)
84     {
85       next_free_index = next;
86     }
87     else
88     {
89       Uint32 prevP = m_data[prev];
90       m_data[prev] = (prevP & 0xFFFF0000) | next;
91     }
92 
93     if (next != 0xFFFF)
94     {
95       Uint32 nextP = m_data[next];
96       m_data[next] = (prev << 16) | (nextP & 0xFFFF);
97     }
98     free_space --;
99     m_data[page_idx + 1] = 0;
100     return page_idx;
101   }
102   return ~0;
103 }
104 
105 Uint32
free_record(Uint32 page_idx)106 Tup_fixsize_page::free_record(Uint32 page_idx)
107 {
108   Uint32 next = next_free_index;
109 
110   assert(page_idx + 1 < DATA_WORDS);
111   assert(m_data[page_idx + 1] != FREE_RECORD);
112 
113   if (next == 0xFFFF)
114   {
115     assert(free_space == 0);
116   }
117   else
118   {
119     assert(free_space);
120     assert(next + 1 < DATA_WORDS);
121     Uint32 nextP = m_data[next];
122     assert((nextP >> 16) == 0xFFFF);
123     m_data[next] = (page_idx << 16) | (nextP & 0xFFFF);
124     assert(m_data[next + 1] == FREE_RECORD);
125   }
126 
127   next_free_index = page_idx;
128   m_data[page_idx] = 0xFFFF0000 | next;
129   m_data[page_idx + 1] = FREE_RECORD;
130 
131   return ++free_space;
132 }
133 
134 void
init()135 Tup_varsize_page::init()
136 {
137   free_space= DATA_WORDS - 1;
138   high_index= 1;
139   insert_pos= 0;
140   next_free_index= END_OF_FREE_LIST;
141   m_page_header.m_page_type = File_formats::PT_Tup_varsize_page;
142 }
143 
144 Uint32
alloc_record(Uint32 page_idx,Uint32 alloc_size,Tup_varsize_page * temp)145 Tup_varsize_page::alloc_record(Uint32 page_idx, Uint32 alloc_size,
146 			       Tup_varsize_page* temp)
147 {
148   assert(page_idx); // 0 is not allowed
149   Uint32 free = free_space;
150   Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
151   Uint32 free_list = next_free_index;
152 
153   if (page_idx < high_index)
154   {
155     Uint32 *ptr = get_index_ptr(page_idx);
156     Uint32 word = *ptr;
157 
158     if (unlikely((free < alloc_size) || ! (word & FREE)))
159     {
160       return ~0;
161     }
162 
163     if (alloc_size >= largest_size)
164     {
165       /*
166 	We can't fit this segment between the insert position and the end of
167 	the index entries. We will pack the page so that all free space
168 	exists between the insert position and the end of the index entries.
169       */
170       reorg(temp);
171     }
172 
173     Uint32 next = (word & NEXT_MASK) >> NEXT_SHIFT;
174     Uint32 prev = (word & PREV_MASK) >> PREV_SHIFT;
175 
176     if (next != END_OF_FREE_LIST)
177     {
178       Uint32 * next_ptr = get_index_ptr(next);
179       Uint32 next_word = * next_ptr;
180       * next_ptr = (next_word & ~PREV_MASK) | (prev << PREV_SHIFT);
181     }
182 
183     if (prev != END_OF_FREE_LIST)
184     {
185       Uint32 * prev_ptr = get_index_ptr(prev);
186       Uint32 prev_word = * prev_ptr;
187       * prev_ptr = (prev_word & ~NEXT_MASK) | (next << NEXT_SHIFT);
188     }
189     else
190     {
191       assert(next_free_index == page_idx);
192       next_free_index = next;
193     }
194 
195     * ptr = insert_pos + (alloc_size << LEN_SHIFT);
196     free -= alloc_size;
197   }
198   else
199   {
200     /**
201      * We need to expand directory
202      */
203     Uint32 hi = high_index;
204     Uint32 expand = (page_idx + 1 - hi);
205     Uint32 size = alloc_size + expand;
206     if (unlikely(size > free))
207     {
208       return ~0;
209     }
210 
211     if (size >= largest_size)
212     {
213       /*
214 	We can't fit this segment between the insert position and the end of
215 	the index entries. We will pack the page so that all free space
216 	exists between the insert position and the end of the index entries.
217       */
218       reorg(temp);
219     }
220 
221     Uint32 *ptr = m_data + DATA_WORDS - hi;
222     if (page_idx == hi)
223     {
224       * ptr = insert_pos + (alloc_size << LEN_SHIFT);
225     }
226     else
227     {
228       if (free_list != END_OF_FREE_LIST)
229       {
230 	Uint32 * prev_ptr = get_index_ptr(free_list);
231 	Uint32 prev_word = * prev_ptr;
232 	* prev_ptr = (prev_word & ~PREV_MASK) | (hi << PREV_SHIFT);
233       }
234 
235       for (; hi < page_idx;)
236       {
237 	* ptr-- = FREE | (free_list << NEXT_SHIFT) | ((hi+1) << PREV_SHIFT);
238 	free_list = hi++;
239       }
240 
241       * ptr++ = insert_pos + (alloc_size << LEN_SHIFT);
242       * ptr = ((* ptr) & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
243 
244       next_free_index = hi - 1;
245     }
246     high_index = hi + 1;
247     free -= size;
248   }
249 
250   free_space = free;
251   insert_pos += alloc_size;
252 
253   return page_idx;
254 }
255 
256 Uint32
alloc_record(Uint32 alloc_size,Tup_varsize_page * temp,Uint32 chain)257 Tup_varsize_page::alloc_record(Uint32 alloc_size,
258 			       Tup_varsize_page* temp, Uint32 chain)
259 {
260   assert(free_space >= alloc_size);
261   Uint32 largest_size= DATA_WORDS - (insert_pos + high_index);
262   if (alloc_size >= largest_size) {
263     /*
264       We can't fit this segment between the insert position and the end of
265       the index entries. We will pack the page so that all free space
266       exists between the insert position and the end of the index entries.
267     */
268     reorg(temp);
269     largest_size= DATA_WORDS - (insert_pos + high_index);
270   }
271   assert(largest_size > alloc_size);
272 
273   Uint32 page_idx;
274   if (next_free_index == END_OF_FREE_LIST) {
275     /*
276       We are out of free index slots. We will extend the array of free
277       slots
278     */
279     page_idx= high_index++;
280     free_space--;
281   } else {
282     // Pick an empty slot among the index entries
283     page_idx= next_free_index;
284     assert((get_index_word(page_idx) & FREE) == FREE);
285     assert(((get_index_word(page_idx) & PREV_MASK) >> PREV_SHIFT) ==
286 	   END_OF_FREE_LIST);
287     next_free_index= (get_index_word(page_idx) & NEXT_MASK) >> NEXT_SHIFT;
288     assert(next_free_index);
289     if (next_free_index != END_OF_FREE_LIST)
290     {
291       Uint32 *ptr = get_index_ptr(next_free_index);
292       Uint32 word = *ptr;
293       * ptr = (word & ~PREV_MASK) | (END_OF_FREE_LIST << PREV_SHIFT);
294     }
295   }
296 
297   assert(chain == 0 || chain == CHAIN);
298   * get_index_ptr(page_idx) = insert_pos + chain + (alloc_size << LEN_SHIFT);
299 
300   insert_pos += alloc_size;
301   free_space -= alloc_size;
302   //ndbout_c("%p->alloc_record(%d%s) -> %d", this,alloc_size, (chain ? " CHAIN" : ""),page_idx);
303   return page_idx;
304 }
305 
306 Uint32
free_record(Uint32 page_idx,Uint32 chain)307 Tup_varsize_page::free_record(Uint32 page_idx, Uint32 chain)
308 {
309   //ndbout_c("%p->free_record(%d%s)", this, page_idx, (chain ? " CHAIN": ""));
310   Uint32 *index_ptr= get_index_ptr(page_idx);
311   Uint32 index_word= * index_ptr;
312   Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
313   Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
314   assert(chain == 0 || chain == CHAIN);
315   assert((index_word & CHAIN) == chain);
316 #ifdef VM_TRACE
317   memset(m_data + entry_pos, 0xF2, 4*entry_len);
318 #endif
319   if (page_idx + 1 == high_index) {
320     /*
321       We are removing the last in the entry list. We could potentially
322       have several free entries also before this. To take that into account
323       we will rebuild the free list and thus compress it and update the
324       free space accordingly.
325     */
326     rebuild_index(index_ptr);
327   } else {
328     if (next_free_index != END_OF_FREE_LIST)
329     {
330       Uint32 *ptr = get_index_ptr(next_free_index);
331       Uint32 word = *ptr;
332       assert(((word & PREV_MASK) >> PREV_SHIFT) == END_OF_FREE_LIST);
333       * ptr = (word & ~PREV_MASK) | (page_idx << PREV_SHIFT);
334     }
335     * index_ptr= FREE | next_free_index | (END_OF_FREE_LIST << PREV_SHIFT);
336     next_free_index= page_idx;
337     assert(next_free_index);
338   }
339 
340   free_space+= entry_len;
341   // If we're the "last" entry, decrease insert_pos
342   insert_pos -= (entry_pos + entry_len == insert_pos ? entry_len : 0);
343 
344   return free_space;
345 }
346 
347 void
rebuild_index(Uint32 * index_ptr)348 Tup_varsize_page::rebuild_index(Uint32* index_ptr)
349 {
350   Uint32 empty= 1;
351   Uint32 *end= m_data + DATA_WORDS;
352 
353   /**
354    * Scan until you find first non empty index pos
355    */
356   for(index_ptr++; index_ptr < end; index_ptr++)
357     if((* index_ptr) & FREE)
358       empty++;
359     else
360       break;
361 
362   if(index_ptr == end)
363   {
364     // Totally free page
365     high_index = 1;
366     free_space += empty;
367     next_free_index = END_OF_FREE_LIST;
368     return;
369   }
370 
371   Uint32 next= END_OF_FREE_LIST;
372   Uint32 dummy;
373   Uint32 *prev_ptr = &dummy;
374   for(index_ptr++; index_ptr < end; index_ptr++)
375   {
376     if ((* index_ptr) & FREE)
377     {
378       * index_ptr= FREE | next;
379       next= Uint32(end - index_ptr);
380       * prev_ptr |= (next << PREV_SHIFT);
381       prev_ptr = index_ptr;
382     }
383   }
384 
385   * prev_ptr |= (END_OF_FREE_LIST << PREV_SHIFT);
386 
387   high_index -= empty;
388   free_space += empty;
389   next_free_index= next;
390   assert(next_free_index);
391 }
392 
393 void
reorg(Tup_varsize_page * copy_page)394 Tup_varsize_page::reorg(Tup_varsize_page* copy_page)
395 {
396   Uint32 new_insert_pos= 0;
397   Uint32 old_insert_pos= insert_pos;
398 
399   // Copy key data part of page to a temporary page.
400   memcpy(copy_page->m_data, m_data, 4*old_insert_pos);
401   assert(high_index > 0);
402   Uint32* index_ptr= get_index_ptr(high_index-1);
403   Uint32 *end_of_page= m_data + DATA_WORDS;
404   for (; index_ptr < end_of_page; index_ptr++)
405   {
406     Uint32 index_word= * index_ptr;
407     Uint32 entry_len= (index_word & LEN_MASK) >> LEN_SHIFT;
408     if (!(index_word & FREE) && entry_len)
409     {
410       /*
411 	We found an index item that needs to be packed.
412 	We will update the index entry and copy the data to the page.
413       */
414       Uint32 entry_pos= (index_word & POS_MASK) >> POS_SHIFT;
415       assert(entry_pos + entry_len <= old_insert_pos);
416       assert(new_insert_pos + entry_len <= old_insert_pos);
417       * index_ptr= (new_insert_pos << POS_SHIFT) + (index_word & ~POS_MASK);
418       memcpy(m_data+new_insert_pos, copy_page->m_data+entry_pos, 4*entry_len);
419 
420       new_insert_pos += entry_len;
421     }
422   }
423   insert_pos= new_insert_pos;
424 }
425 
426 NdbOut&
operator <<(NdbOut & out,const Tup_varsize_page & page)427 operator<< (NdbOut& out, const Tup_varsize_page& page)
428 {
429   out << "[ Varpage " << &page << ": free: " << page.free_space
430       << " (" << (page.DATA_WORDS - (page.insert_pos + page.high_index + 1)) << ")"
431       << " insert_pos: " << page.insert_pos
432       << " high_index: " << page.high_index
433       << " index: " << flush;
434 
435   const Uint32 *index_ptr= page.m_data+page.DATA_WORDS-1;
436   for(Uint32 i = 1; i<page.high_index; i++, index_ptr--)
437   {
438     out << " [ " << i;
439     if(! (*index_ptr & page.FREE))
440       out << " pos: " << ((* index_ptr & page.POS_MASK) >> page.POS_SHIFT)
441 	  << " len: " << ((* index_ptr & page.LEN_MASK) >> page.LEN_SHIFT)
442 	  << ((* index_ptr & page.CHAIN) ? " CHAIN " : " ")
443 	  << "]" << flush;
444     else
445       out << " FREE ]" << flush;
446   }
447 
448   out << " free list: " << flush;
449   Uint32 next= page.next_free_index;
450   while(next != page.END_OF_FREE_LIST)
451   {
452     out << next << " " << flush;
453     next= ((* (page.m_data+page.DATA_WORDS-next)) & page.NEXT_MASK) >> page.NEXT_SHIFT;
454   }
455   out << "]";
456   return out;
457 }
458 
459 NdbOut&
operator <<(NdbOut & out,const Tup_fixsize_page & page)460 operator<< (NdbOut& out, const Tup_fixsize_page& page)
461 {
462   out << "[ Fixpage " << &page
463       << ": frag_page: " << page.frag_page_id
464       << " page_no: " << page.m_page_no
465       << " file_no: " << page.m_file_no
466       << " table: " << page.m_table_id
467       << " fragment: " << page.m_fragment_id
468       << " uncommitted_used_space: " << page.uncommitted_used_space
469       << " free: " << page.free_space;
470 
471   out << " free list: " << hex << page.next_free_index << " " << flush;
472 #if 0
473   Uint32 startTuple = page.next_free_index >> 16;
474   Uint32 cnt = 0;
475   Uint32 next= startTuple;
476   while((next & 0xFFFF) != 0xFFFF)
477   {
478     cnt++;
479     out << dec << "(" << (next & 0xFFFF) << " " << hex << next << ") " << flush;
480     assert(page.m_data[(next & 0xFFFF) + 1] == Dbtup::Tuple_header::FREE);
481     next= * (page.m_data + ( next & 0xFFFF ));
482   }
483   assert(cnt == page.free_space);
484 #endif
485   out << "]";
486   return out;
487 }
488