1 /*
2 Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 /*
26 This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
27 It is an abstraction layer on top of other handlers such as MyISAM,
28 InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
29 be handled by a storage engine. The current example of this is NDB
30 Cluster that has internally handled partitioning. This have benefits in
31 that many loops needed in the partition handler can be avoided.
32
33 Partitioning has an inherent feature which in some cases is positive and
34 in some cases is negative. It splits the data into chunks. This makes
35 the data more manageable, queries can easily be parallelised towards the
36 parts and indexes are split such that there are less levels in the
37 index trees. The inherent disadvantage is that to use a split index
38 one has to scan all index parts which is ok for large queries but for
39 small queries it can be a disadvantage.
40
41 Partitioning lays the foundation for more manageable databases that are
42 extremely large. It does also lay the foundation for more parallelism
43 in the execution of queries. This functionality will grow with later
44 versions of MySQL.
45
46 You can enable it in your buld by doing the following during your build
47 process:
48 ./configure --with-partition
49
50 The partition is setup to use table locks. It implements an partition "SHARE"
51 that is inserted into a hash by table name. You can use this to store
52 information of state that any partition handler object will be able to see
53 if it is using the same table.
54
55 Please read the object definition in ha_partition.h before reading the rest
56 if this file.
57 */
58
59 #include "sql_priv.h"
60 #include "sql_parse.h" // append_file_to_dir
61 #include "binlog.h" // mysql_bin_log
62
63 #ifdef WITH_PARTITION_STORAGE_ENGINE
64 #include "ha_partition.h"
65 #include "sql_table.h" // tablename_to_filename
66 #include "key.h"
67 #include "sql_plugin.h"
68 #include "sql_partition.h"
69 #include "sql_show.h" // append_identifier
70 #include "sql_admin.h" // SQL_ADMIN_MSG_TEXT_SIZE
71
72 #include "debug_sync.h"
73
74 using std::min;
75 using std::max;
76
77
78 /* First 4 bytes in the .par file is the number of 32-bit words in the file */
79 #define PAR_WORD_SIZE 4
80 /* offset to the .par file checksum */
81 #define PAR_CHECKSUM_OFFSET 4
82 /* offset to the total number of partitions */
83 #define PAR_NUM_PARTS_OFFSET 8
84 /* offset to the engines array */
85 #define PAR_ENGINES_OFFSET 12
86 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
87 HA_REC_NOT_IN_SEQ | \
88 HA_CAN_REPAIR)
89 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
90 HA_CAN_FULLTEXT | \
91 HA_DUPLICATE_POS | \
92 HA_CAN_SQL_HANDLER | \
93 HA_CAN_INSERT_DELAYED | \
94 HA_READ_BEFORE_WRITE_REMOVAL)
95 static const char *ha_par_ext= ".par";
96
97 /****************************************************************************
98 MODULE create/delete handler object
99 ****************************************************************************/
100
101 static handler *partition_create_handler(handlerton *hton,
102 TABLE_SHARE *share,
103 MEM_ROOT *mem_root);
104 static uint partition_flags();
105 static uint alter_table_flags(uint flags);
106
107
108 /****************************************************************************
109 Check whether the partition column order changes after alter
110 ****************************************************************************/
111 static bool check_partition_column_order(List<Create_field> *create_list,
112 Field** field_arary);
113
114 #ifdef HAVE_PSI_INTERFACE
115 PSI_mutex_key key_partition_auto_inc_mutex;
116
117 static PSI_mutex_info all_partition_mutexes[]=
118 {
119 { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0}
120 };
121
init_partition_psi_keys(void)122 static void init_partition_psi_keys(void)
123 {
124 const char* category= "partition";
125 int count;
126
127 count= array_elements(all_partition_mutexes);
128 mysql_mutex_register(category, all_partition_mutexes, count);
129 }
130 #endif /* HAVE_PSI_INTERFACE */
131
partition_initialize(void * p)132 static int partition_initialize(void *p)
133 {
134
135 handlerton *partition_hton;
136 partition_hton= (handlerton *)p;
137
138 partition_hton->state= SHOW_OPTION_YES;
139 partition_hton->db_type= DB_TYPE_PARTITION_DB;
140 partition_hton->create= partition_create_handler;
141 partition_hton->partition_flags= partition_flags;
142 partition_hton->alter_table_flags= alter_table_flags;
143 partition_hton->flags= HTON_NOT_USER_SELECTABLE |
144 HTON_HIDDEN |
145 HTON_TEMPORARY_NOT_SUPPORTED;
146 #ifdef HAVE_PSI_INTERFACE
147 init_partition_psi_keys();
148 #endif
149 return 0;
150 }
151
152
153 /**
154 Initialize and allocate space for partitions shares.
155
156 @param num_parts Number of partitions to allocate storage for.
157
158 @return Operation status.
159 @retval true Failure (out of memory).
160 @retval false Success.
161 */
162
init(uint num_parts)163 bool Partition_share::init(uint num_parts)
164 {
165 DBUG_ENTER("Partition_share::init");
166 mysql_mutex_init(key_partition_auto_inc_mutex,
167 &auto_inc_mutex,
168 MY_MUTEX_INIT_FAST);
169 auto_inc_initialized= false;
170 partition_name_hash_initialized= false;
171 next_auto_inc_val= 0;
172 partitions_share_refs= new Parts_share_refs;
173 if (!partitions_share_refs)
174 DBUG_RETURN(true);
175 if (partitions_share_refs->init(num_parts))
176 {
177 delete partitions_share_refs;
178 DBUG_RETURN(true);
179 }
180 DBUG_RETURN(false);
181 }
182
183
184 /*
185 Create new partition handler
186
187 SYNOPSIS
188 partition_create_handler()
189 table Table object
190
191 RETURN VALUE
192 New partition object
193 */
194
partition_create_handler(handlerton * hton,TABLE_SHARE * share,MEM_ROOT * mem_root)195 static handler *partition_create_handler(handlerton *hton,
196 TABLE_SHARE *share,
197 MEM_ROOT *mem_root)
198 {
199 ha_partition *file= new (mem_root) ha_partition(hton, share);
200 if (file && file->initialize_partition(mem_root))
201 {
202 delete file;
203 file= 0;
204 }
205 return file;
206 }
207
208 /*
209 HA_CAN_PARTITION:
210 Used by storage engines that can handle partitioning without this
211 partition handler
212 (Partition, NDB)
213
214 HA_CAN_UPDATE_PARTITION_KEY:
215 Set if the handler can update fields that are part of the partition
216 function.
217
218 HA_CAN_PARTITION_UNIQUE:
219 Set if the handler can handle unique indexes where the fields of the
220 unique key are not part of the fields of the partition function. Thus
221 a unique key can be set on all fields.
222
223 HA_USE_AUTO_PARTITION
224 Set if the handler sets all tables to be partitioned by default.
225 */
226
partition_flags()227 static uint partition_flags()
228 {
229 return HA_CAN_PARTITION;
230 }
231
alter_table_flags(uint flags MY_ATTRIBUTE ((unused)))232 static uint alter_table_flags(uint flags MY_ATTRIBUTE((unused)))
233 {
234 return (HA_PARTITION_FUNCTION_SUPPORTED |
235 HA_FAST_CHANGE_PARTITION);
236 }
237
check_partition_column_order(List<Create_field> * create_list,Field ** field_arary)238 static bool check_partition_column_order(List<Create_field> *create_list,
239 Field** field_arary)
240 {
241
242 Field **f_ptr;
243 List_iterator_fast<Create_field> new_field_it;
244 Create_field *new_field= NULL;
245 new_field_it.init(*create_list);
246
247 for (f_ptr= field_arary ; *f_ptr; f_ptr++)
248 {
249 while ((new_field= new_field_it++))
250 {
251 if (new_field->field == *f_ptr)
252 break;
253 }
254 if (!new_field)
255 break;
256 }
257
258 if (!new_field)
259 {
260 /* Not same order, INPLACE cannot be allowed!*/
261 return false;
262 }
263 return true;
264 }
265
266 const uint32 ha_partition::NO_CURRENT_PART_ID= NOT_A_PARTITION_ID;
267
268 /*
269 Constructor method
270
271 SYNOPSIS
272 ha_partition()
273 table Table object
274
275 RETURN VALUE
276 NONE
277 */
278
ha_partition(handlerton * hton,TABLE_SHARE * share)279 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
280 :handler(hton, share)
281 {
282 DBUG_ENTER("ha_partition::ha_partition(table)");
283 init_handler_variables();
284 DBUG_VOID_RETURN;
285 }
286
287
288 /*
289 Constructor method
290
291 SYNOPSIS
292 ha_partition()
293 part_info Partition info
294
295 RETURN VALUE
296 NONE
297 */
298
ha_partition(handlerton * hton,partition_info * part_info)299 ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
300 :handler(hton, NULL)
301 {
302 DBUG_ENTER("ha_partition::ha_partition(part_info)");
303 DBUG_ASSERT(part_info);
304 init_handler_variables();
305 m_part_info= part_info;
306 m_create_handler= TRUE;
307 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
308 DBUG_VOID_RETURN;
309 }
310
311 /**
312 ha_partition constructor method used by ha_partition::clone()
313
314 @param hton Handlerton (partition_hton)
315 @param share Table share object
316 @param part_info_arg partition_info to use
317 @param clone_arg ha_partition to clone
318 @param clme_mem_root_arg MEM_ROOT to use
319
320 @return New partition handler
321 */
322
ha_partition(handlerton * hton,TABLE_SHARE * share,partition_info * part_info_arg,ha_partition * clone_arg,MEM_ROOT * clone_mem_root_arg)323 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
324 partition_info *part_info_arg,
325 ha_partition *clone_arg,
326 MEM_ROOT *clone_mem_root_arg)
327 :handler(hton, share)
328 {
329 DBUG_ENTER("ha_partition::ha_partition(clone)");
330 init_handler_variables();
331 m_part_info= part_info_arg;
332 m_create_handler= TRUE;
333 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
334 m_is_clone_of= clone_arg;
335 m_clone_mem_root= clone_mem_root_arg;
336 part_share= clone_arg->part_share;
337 m_tot_parts= clone_arg->m_tot_parts;
338 m_pkey_is_clustered= clone_arg->primary_key_is_clustered();
339 DBUG_VOID_RETURN;
340 }
341
342 /*
343 Initialize handler object
344
345 SYNOPSIS
346 init_handler_variables()
347
348 RETURN VALUE
349 NONE
350 */
351
init_handler_variables()352 void ha_partition::init_handler_variables()
353 {
354 active_index= MAX_KEY;
355 m_mode= 0;
356 m_open_test_lock= 0;
357 m_file_buffer= NULL;
358 m_name_buffer_ptr= NULL;
359 m_engine_array= NULL;
360 m_file= NULL;
361 m_file_tot_parts= 0;
362 m_reorged_file= NULL;
363 m_new_file= NULL;
364 m_reorged_parts= 0;
365 m_added_file= NULL;
366 m_tot_parts= 0;
367 m_pkey_is_clustered= 0;
368 m_part_spec.start_part= NO_CURRENT_PART_ID;
369 m_scan_value= 2;
370 m_ref_length= 0;
371 m_part_spec.end_part= NO_CURRENT_PART_ID;
372 m_index_scan_type= partition_no_index_scan;
373 m_start_key.key= NULL;
374 m_start_key.length= 0;
375 m_myisam= FALSE;
376 m_innodb= FALSE;
377 m_extra_cache= FALSE;
378 m_extra_cache_size= 0;
379 m_extra_prepare_for_update= FALSE;
380 m_extra_cache_part_id= NO_CURRENT_PART_ID;
381 m_handler_status= handler_not_initialized;
382 m_low_byte_first= 1;
383 m_part_field_array= NULL;
384 m_ordered_rec_buffer= NULL;
385 m_top_entry= NO_CURRENT_PART_ID;
386 m_rec_length= 0;
387 m_last_part= 0;
388 m_rec0= 0;
389 m_err_rec= NULL;
390 m_curr_key_info[0]= NULL;
391 m_curr_key_info[1]= NULL;
392 m_part_func_monotonicity_info= NON_MONOTONIC;
393 auto_increment_lock= FALSE;
394 auto_increment_safe_stmt_log_lock= FALSE;
395 /*
396 this allows blackhole to work properly
397 */
398 m_num_locks= 0;
399 m_part_info= NULL;
400 m_create_handler= FALSE;
401 m_is_sub_partitioned= 0;
402 m_is_clone_of= NULL;
403 m_clone_mem_root= NULL;
404 part_share= NULL;
405 m_new_partitions_share_refs.empty();
406 m_part_ids_sorted_by_num_of_records= NULL;
407 m_sec_sort_by_rowid= false;
408
409 #ifdef DONT_HAVE_TO_BE_INITALIZED
410 m_start_key.flag= 0;
411 m_ordered= TRUE;
412 #endif
413 }
414
415
table_type() const416 const char *ha_partition::table_type() const
417 {
418 // we can do this since we only support a single engine type
419 return m_file[0]->table_type();
420 }
421
422
423 /*
424 Destructor method
425
426 SYNOPSIS
427 ~ha_partition()
428
429 RETURN VALUE
430 NONE
431 */
432
~ha_partition()433 ha_partition::~ha_partition()
434 {
435 DBUG_ENTER("ha_partition::~ha_partition()");
436 if (m_new_partitions_share_refs.elements)
437 m_new_partitions_share_refs.delete_elements();
438 if (m_file != NULL)
439 {
440 uint i;
441 for (i= 0; i < m_tot_parts; i++)
442 delete m_file[i];
443 }
444 destroy_record_priority_queue();
445 my_free(m_part_ids_sorted_by_num_of_records);
446
447 clear_handler_file();
448 DBUG_VOID_RETURN;
449 }
450
451
452 /*
453 Initialize partition handler object
454
455 SYNOPSIS
456 initialize_partition()
457 mem_root Allocate memory through this
458
459 RETURN VALUE
460 1 Error
461 0 Success
462
463 DESCRIPTION
464
465 The partition handler is only a layer on top of other engines. Thus it
466 can't really perform anything without the underlying handlers. Thus we
467 add this method as part of the allocation of a handler object.
468
469 1) Allocation of underlying handlers
470 If we have access to the partition info we will allocate one handler
471 instance for each partition.
472 2) Allocation without partition info
473 The cases where we don't have access to this information is when called
474 in preparation for delete_table and rename_table and in that case we
475 only need to set HA_FILE_BASED. In that case we will use the .par file
476 that contains information about the partitions and their engines and
477 the names of each partition.
478 3) Table flags initialisation
479 We need also to set table flags for the partition handler. This is not
480 static since it depends on what storage engines are used as underlying
481 handlers.
482 The table flags is set in this routine to simulate the behaviour of a
483 normal storage engine
484 The flag HA_FILE_BASED will be set independent of the underlying handlers
485 4) Index flags initialisation
486 When knowledge exists on the indexes it is also possible to initialize the
487 index flags. Again the index flags must be initialized by using the under-
488 lying handlers since this is storage engine dependent.
489 The flag HA_READ_ORDER will be reset for the time being to indicate no
490 ordered output is available from partition handler indexes. Later a merge
491 sort will be performed using the underlying handlers.
492 5) primary_key_is_clustered, has_transactions and low_byte_first is
493 calculated here.
494
495 */
496
initialize_partition(MEM_ROOT * mem_root)497 bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
498 {
499 handler **file_array, *file;
500 ulonglong check_table_flags;
501 DBUG_ENTER("ha_partition::initialize_partition");
502
503 if (m_create_handler)
504 {
505 m_tot_parts= m_part_info->get_tot_partitions();
506 DBUG_ASSERT(m_tot_parts > 0);
507 if (new_handlers_from_part_info(mem_root))
508 DBUG_RETURN(1);
509 }
510 else if (!table_share || !table_share->normalized_path.str)
511 {
512 /*
513 Called with dummy table share (delete, rename and alter table).
514 Don't need to set-up anything.
515 */
516 DBUG_RETURN(0);
517 }
518 else if (get_from_handler_file(table_share->normalized_path.str,
519 mem_root, false))
520 {
521 my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
522 DBUG_RETURN(1);
523 }
524 /*
525 We create all underlying table handlers here. We do it in this special
526 method to be able to report allocation errors.
527
528 Set up low_byte_first, primary_key_is_clustered and
529 has_transactions since they are called often in all kinds of places,
530 other parameters are calculated on demand.
531 Verify that all partitions have the same table_flags.
532 */
533 check_table_flags= m_file[0]->ha_table_flags();
534 m_low_byte_first= m_file[0]->low_byte_first();
535 m_pkey_is_clustered= TRUE;
536 file_array= m_file;
537 do
538 {
539 file= *file_array;
540 if (m_low_byte_first != file->low_byte_first())
541 {
542 // Cannot have handlers with different endian
543 my_error(ER_MIX_HANDLER_ERROR, MYF(0));
544 DBUG_RETURN(1);
545 }
546 if (!file->primary_key_is_clustered())
547 m_pkey_is_clustered= FALSE;
548 if (check_table_flags != file->ha_table_flags())
549 {
550 my_error(ER_MIX_HANDLER_ERROR, MYF(0));
551 DBUG_RETURN(1);
552 }
553 } while (*(++file_array));
554 m_handler_status= handler_initialized;
555 DBUG_RETURN(0);
556 }
557
558 /****************************************************************************
559 MODULE meta data changes
560 ****************************************************************************/
561 /*
562 Delete a table
563
564 SYNOPSIS
565 delete_table()
566 name Full path of table name
567
568 RETURN VALUE
569 >0 Error
570 0 Success
571
572 DESCRIPTION
573 Used to delete a table. By the time delete_table() has been called all
574 opened references to this table will have been closed (and your globally
575 shared references released. The variable name will just be the name of
576 the table. You will need to remove any files you have created at this
577 point.
578
579 If you do not implement this, the default delete_table() is called from
580 handler.cc and it will delete all files with the file extentions returned
581 by bas_ext().
582
583 Called from handler.cc by delete_table and ha_create_table(). Only used
584 during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
585 the storage engine.
586 */
587
delete_table(const char * name)588 int ha_partition::delete_table(const char *name)
589 {
590 DBUG_ENTER("ha_partition::delete_table");
591
592 DBUG_RETURN(del_ren_table(name, NULL));
593 }
594
595
596 /*
597 Rename a table
598
599 SYNOPSIS
600 rename_table()
601 from Full path of old table name
602 to Full path of new table name
603
604 RETURN VALUE
605 >0 Error
606 0 Success
607
608 DESCRIPTION
609 Renames a table from one name to another from alter table call.
610
611 If you do not implement this, the default rename_table() is called from
612 handler.cc and it will rename all files with the file extentions returned
613 by bas_ext().
614
615 Called from sql_table.cc by mysql_rename_table().
616 */
617
rename_table(const char * from,const char * to)618 int ha_partition::rename_table(const char *from, const char *to)
619 {
620 DBUG_ENTER("ha_partition::rename_table");
621
622 DBUG_RETURN(del_ren_table(from, to));
623 }
624
625
626 /*
627 Create the handler file (.par-file)
628
629 SYNOPSIS
630 create_handler_files()
631 name Full path of table name
632 create_info Create info generated for CREATE TABLE
633
634 RETURN VALUE
635 >0 Error
636 0 Success
637
638 DESCRIPTION
639 create_handler_files is called to create any handler specific files
640 before opening the file with openfrm to later call ::create on the
641 file object.
642 In the partition handler this is used to store the names of partitions
643 and types of engines in the partitions.
644 */
645
create_handler_files(const char * path,const char * old_path,int action_flag,HA_CREATE_INFO * create_info)646 int ha_partition::create_handler_files(const char *path,
647 const char *old_path,
648 int action_flag,
649 HA_CREATE_INFO *create_info)
650 {
651 DBUG_ENTER("ha_partition::create_handler_files()");
652
653 /*
654 We need to update total number of parts since we might write the handler
655 file as part of a partition management command
656 */
657 if (action_flag == CHF_DELETE_FLAG ||
658 action_flag == CHF_RENAME_FLAG)
659 {
660 char name[FN_REFLEN];
661 char old_name[FN_REFLEN];
662
663 strxmov(name, path, ha_par_ext, NullS);
664 strxmov(old_name, old_path, ha_par_ext, NullS);
665 if ((action_flag == CHF_DELETE_FLAG &&
666 mysql_file_delete(key_file_partition, name, MYF(MY_WME))) ||
667 (action_flag == CHF_RENAME_FLAG &&
668 mysql_file_rename(key_file_partition, old_name, name, MYF(MY_WME))))
669 {
670 DBUG_RETURN(TRUE);
671 }
672 }
673 else if (action_flag == CHF_CREATE_FLAG)
674 {
675 if (create_handler_file(path))
676 {
677 my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
678 DBUG_RETURN(1);
679 }
680 }
681 DBUG_RETURN(0);
682 }
683
684
685 /*
686 Create a partitioned table
687
688 SYNOPSIS
689 create()
690 name Full path of table name
691 table_arg Table object
692 create_info Create info generated for CREATE TABLE
693
694 RETURN VALUE
695 >0 Error
696 0 Success
697
698 DESCRIPTION
699 create() is called to create a table. The variable name will have the name
700 of the table. When create() is called you do not need to worry about
701 opening the table. Also, the FRM file will have already been created so
702 adjusting create_info will not do you any good. You can overwrite the frm
703 file at this point if you wish to change the table definition, but there
704 are no methods currently provided for doing that.
705
706 Called from handler.cc by ha_create_table().
707 */
708
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * create_info)709 int ha_partition::create(const char *name, TABLE *table_arg,
710 HA_CREATE_INFO *create_info)
711 {
712 int error= 0;
713 char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN + 1];
714 char *name_buffer_ptr;
715 const char *path;
716 uint i;
717 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
718 partition_element *part_elem;
719 handler **file, **abort_file;
720 DBUG_ENTER("ha_partition::create");
721
722 DBUG_ASSERT(*fn_rext((char*)name) == '\0');
723
724 /* Not allowed to create temporary partitioned tables */
725 if (create_info && create_info->options & HA_LEX_CREATE_TMP_TABLE)
726 {
727 my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
728 DBUG_RETURN(TRUE);
729 }
730
731 if (get_from_handler_file(name, ha_thd()->mem_root, false))
732 DBUG_RETURN(TRUE);
733 DBUG_ASSERT(m_file_buffer);
734 DBUG_PRINT("enter", ("name: (%s)", name));
735 name_buffer_ptr= m_name_buffer_ptr;
736 file= m_file;
737 /*
738 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
739 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
740 See Bug#37402, for Mac OS X.
741 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
742 Using the first partitions handler, since mixing handlers is not allowed.
743 */
744 path= get_canonical_filename(*file, name, name_lc_buff);
745 for (i= 0; i < m_part_info->num_parts; i++)
746 {
747 part_elem= part_it++;
748 if (m_is_sub_partitioned)
749 {
750 uint j;
751 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
752 for (j= 0; j < m_part_info->num_subparts; j++)
753 {
754 part_elem= sub_it++;
755 if ((error= create_partition_name(name_buff, path, name_buffer_ptr,
756 NORMAL_PART_NAME, FALSE)))
757 goto create_error;
758
759 if ((error= set_up_table_before_create(table_arg, name_buff,
760 create_info, part_elem)) ||
761 ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
762 goto create_error;
763
764 name_buffer_ptr= strend(name_buffer_ptr) + 1;
765 file++;
766 }
767 }
768 else
769 {
770 if ((create_partition_name(name_buff, path, name_buffer_ptr,
771 NORMAL_PART_NAME, FALSE)))
772 goto create_error;
773
774 if ((error= set_up_table_before_create(table_arg, name_buff,
775 create_info, part_elem)) ||
776 ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
777 goto create_error;
778
779 name_buffer_ptr= strend(name_buffer_ptr) + 1;
780 file++;
781 }
782 }
783 DBUG_RETURN(0);
784
785 create_error:
786 name_buffer_ptr= m_name_buffer_ptr;
787 for (abort_file= file, file= m_file; file < abort_file; file++)
788 {
789 if (!create_partition_name(name_buff, path, name_buffer_ptr, NORMAL_PART_NAME,
790 FALSE))
791 (void) (*file)->ha_delete_table((const char*) name_buff);
792 name_buffer_ptr= strend(name_buffer_ptr) + 1;
793 }
794 handler::delete_table(name);
795 DBUG_RETURN(error);
796 }
797
798
799 /*
800 Drop partitions as part of ALTER TABLE of partitions
801
802 SYNOPSIS
803 drop_partitions()
804 path Complete path of db and table name
805
806 RETURN VALUE
807 >0 Failure
808 0 Success
809
810 DESCRIPTION
811 Use part_info object on handler object to deduce which partitions to
812 drop (each partition has a state attached to it)
813 */
814
drop_partitions(const char * path)815 int ha_partition::drop_partitions(const char *path)
816 {
817 List_iterator<partition_element> part_it(m_part_info->partitions);
818 char part_name_buff[FN_REFLEN + 1];
819 uint num_parts= m_part_info->partitions.elements;
820 uint num_subparts= m_part_info->num_subparts;
821 uint i= 0;
822 uint name_variant;
823 int ret_error;
824 int error= 0;
825 DBUG_ENTER("ha_partition::drop_partitions");
826
827 /*
828 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
829 We use m_file[0] as long as all partitions have the same storage engine.
830 */
831 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
832 part_name_buff)));
833 do
834 {
835 partition_element *part_elem= part_it++;
836 if (part_elem->part_state == PART_TO_BE_DROPPED)
837 {
838 handler *file;
839 /*
840 This part is to be dropped, meaning the part or all its subparts.
841 */
842 name_variant= NORMAL_PART_NAME;
843 if (m_is_sub_partitioned)
844 {
845 List_iterator<partition_element> sub_it(part_elem->subpartitions);
846 uint j= 0, part;
847 do
848 {
849 partition_element *sub_elem= sub_it++;
850 part= i * num_subparts + j;
851 if ((ret_error= create_subpartition_name(part_name_buff, path,
852 part_elem->partition_name,
853 sub_elem->partition_name,
854 name_variant)))
855 error= ret_error;
856
857 file= m_file[part];
858 DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
859 if ((ret_error= file->ha_delete_table(part_name_buff)))
860 error= ret_error;
861 if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
862 error= 1;
863 } while (++j < num_subparts);
864 }
865 else
866 {
867 if ((ret_error= create_partition_name(part_name_buff, path,
868 part_elem->partition_name,
869 name_variant, TRUE)))
870 error= ret_error;
871
872 file= m_file[i];
873 DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
874 if ((ret_error= file->ha_delete_table(part_name_buff)))
875 error= ret_error;
876 if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
877 error= 1;
878 }
879 if (part_elem->part_state == PART_IS_CHANGED)
880 part_elem->part_state= PART_NORMAL;
881 else
882 part_elem->part_state= PART_IS_DROPPED;
883 }
884 } while (++i < num_parts);
885 (void) sync_ddl_log();
886 DBUG_RETURN(error);
887 }
888
889
890 /*
891 Rename partitions as part of ALTER TABLE of partitions
892
893 SYNOPSIS
894 rename_partitions()
895 path Complete path of db and table name
896
897 RETURN VALUE
898 TRUE Failure
899 FALSE Success
900
901 DESCRIPTION
902 When reorganising partitions, adding hash partitions and coalescing
903 partitions it can be necessary to rename partitions while holding
904 an exclusive lock on the table.
905 Which partitions to rename is given by state of partitions found by the
906 partition info struct referenced from the handler object
907 */
908
rename_partitions(const char * path)909 int ha_partition::rename_partitions(const char *path)
910 {
911 List_iterator<partition_element> part_it(m_part_info->partitions);
912 List_iterator<partition_element> temp_it(m_part_info->temp_partitions);
913 char part_name_buff[FN_REFLEN];
914 char norm_name_buff[FN_REFLEN];
915 uint num_parts= m_part_info->partitions.elements;
916 uint part_count= 0;
917 uint num_subparts= m_part_info->num_subparts;
918 uint i= 0;
919 uint j= 0;
920 int error= 0;
921 int ret_error;
922 uint temp_partitions= m_part_info->temp_partitions.elements;
923 handler *file;
924 partition_element *part_elem, *sub_elem;
925 DBUG_ENTER("ha_partition::rename_partitions");
926
927 /*
928 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
929 We use m_file[0] as long as all partitions have the same storage engine.
930 */
931 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
932 norm_name_buff)));
933
934 DEBUG_SYNC(ha_thd(), "before_rename_partitions");
935 if (temp_partitions)
936 {
937 /*
938 These are the reorganised partitions that have already been copied.
939 We delete the partitions and log the delete by inactivating the
940 delete log entry in the table log. We only need to synchronise
941 these writes before moving to the next loop since there is no
942 interaction among reorganised partitions, they cannot have the
943 same name.
944 */
945 do
946 {
947 part_elem= temp_it++;
948 if (m_is_sub_partitioned)
949 {
950 List_iterator<partition_element> sub_it(part_elem->subpartitions);
951 j= 0;
952 do
953 {
954 sub_elem= sub_it++;
955 file= m_reorged_file[part_count++];
956 if ((ret_error= create_subpartition_name(norm_name_buff, path,
957 part_elem->partition_name,
958 sub_elem->partition_name,
959 NORMAL_PART_NAME)))
960 error= ret_error;
961
962 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
963 if ((ret_error= file->ha_delete_table(norm_name_buff)))
964 error= ret_error;
965 else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
966 error= 1;
967 else
968 sub_elem->log_entry= NULL; /* Indicate success */
969 } while (++j < num_subparts);
970 }
971 else
972 {
973 file= m_reorged_file[part_count++];
974 if ((ret_error= create_partition_name(norm_name_buff, path,
975 part_elem->partition_name,
976 NORMAL_PART_NAME, TRUE)))
977 error= ret_error;
978
979 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
980 if ((ret_error= file->ha_delete_table(norm_name_buff)))
981 error= ret_error;
982 else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
983 error= 1;
984 else
985 part_elem->log_entry= NULL; /* Indicate success */
986 }
987 } while (++i < temp_partitions);
988 (void) sync_ddl_log();
989 }
990 i= 0;
991 do
992 {
993 /*
994 When state is PART_IS_CHANGED it means that we have created a new
995 TEMP partition that is to be renamed to normal partition name and
996 we are to delete the old partition with currently the normal name.
997
998 We perform this operation by
999 1) Delete old partition with normal partition name
1000 2) Signal this in table log entry
1001 3) Synch table log to ensure we have consistency in crashes
1002 4) Rename temporary partition name to normal partition name
1003 5) Signal this to table log entry
1004 It is not necessary to synch the last state since a new rename
1005 should not corrupt things if there was no temporary partition.
1006
1007 The only other parts we need to cater for are new parts that
1008 replace reorganised parts. The reorganised parts were deleted
1009 by the code above that goes through the temp_partitions list.
1010 Thus the synch above makes it safe to simply perform step 4 and 5
1011 for those entries.
1012 */
1013 part_elem= part_it++;
1014 if (part_elem->part_state == PART_IS_CHANGED ||
1015 part_elem->part_state == PART_TO_BE_DROPPED ||
1016 (part_elem->part_state == PART_IS_ADDED && temp_partitions))
1017 {
1018 if (m_is_sub_partitioned)
1019 {
1020 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1021 uint part;
1022
1023 j= 0;
1024 do
1025 {
1026 sub_elem= sub_it++;
1027 part= i * num_subparts + j;
1028 if ((ret_error= create_subpartition_name(norm_name_buff, path,
1029 part_elem->partition_name,
1030 sub_elem->partition_name,
1031 NORMAL_PART_NAME)))
1032 error= ret_error;
1033
1034 if (part_elem->part_state == PART_IS_CHANGED)
1035 {
1036 file= m_reorged_file[part_count++];
1037 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1038 if ((ret_error= file->ha_delete_table(norm_name_buff)))
1039 error= ret_error;
1040 else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
1041 error= 1;
1042 (void) sync_ddl_log();
1043 }
1044 file= m_new_file[part];
1045 if ((ret_error= create_subpartition_name(part_name_buff, path,
1046 part_elem->partition_name,
1047 sub_elem->partition_name,
1048 TEMP_PART_NAME)))
1049 error= ret_error;
1050
1051 DBUG_PRINT("info", ("Rename subpartition from %s to %s",
1052 part_name_buff, norm_name_buff));
1053 if ((ret_error= file->ha_rename_table(part_name_buff,
1054 norm_name_buff)))
1055 error= ret_error;
1056 else if (deactivate_ddl_log_entry(sub_elem->log_entry->entry_pos))
1057 error= 1;
1058 else
1059 sub_elem->log_entry= NULL;
1060 } while (++j < num_subparts);
1061 }
1062 else
1063 {
1064 if ((ret_error= create_partition_name(norm_name_buff, path,
1065 part_elem->partition_name,
1066 NORMAL_PART_NAME, TRUE)))
1067 error= ret_error;
1068
1069 if (part_elem->part_state == PART_IS_CHANGED)
1070 {
1071 file= m_reorged_file[part_count++];
1072 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1073 if ((ret_error= file->ha_delete_table(norm_name_buff)))
1074 error= ret_error;
1075 else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
1076 error= 1;
1077 (void) sync_ddl_log();
1078 }
1079 file= m_new_file[i];
1080 if ((error= create_partition_name(part_name_buff, path,
1081 part_elem->partition_name,
1082 TEMP_PART_NAME, TRUE)))
1083 error= ret_error;
1084 DBUG_PRINT("info", ("Rename partition from %s to %s",
1085 part_name_buff, norm_name_buff));
1086 if ((ret_error= file->ha_rename_table(part_name_buff,
1087 norm_name_buff)))
1088 error= ret_error;
1089 else if (deactivate_ddl_log_entry(part_elem->log_entry->entry_pos))
1090 error= 1;
1091 else
1092 part_elem->log_entry= NULL;
1093 }
1094 }
1095 } while (++i < num_parts);
1096 (void) sync_ddl_log();
1097 DBUG_RETURN(error);
1098 }
1099
1100
1101 #define OPTIMIZE_PARTS 1
1102 #define ANALYZE_PARTS 2
1103 #define CHECK_PARTS 3
1104 #define REPAIR_PARTS 4
1105 #define ASSIGN_KEYCACHE_PARTS 5
1106 #define PRELOAD_KEYS_PARTS 6
1107
1108 static const char *opt_op_name[]= {NULL,
1109 "optimize", "analyze", "check", "repair",
1110 "assign_to_keycache", "preload_keys"};
1111
1112 /*
1113 Optimize table
1114
1115 SYNOPSIS
1116 optimize()
1117 thd Thread object
1118 check_opt Check/analyze/repair/optimize options
1119
1120 RETURN VALUES
1121 >0 Error
1122 0 Success
1123 */
1124
optimize(THD * thd,HA_CHECK_OPT * check_opt)1125 int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
1126 {
1127 DBUG_ENTER("ha_partition::optimize");
1128
1129 DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
1130 }
1131
1132
1133 /*
1134 Analyze table
1135
1136 SYNOPSIS
1137 analyze()
1138 thd Thread object
1139 check_opt Check/analyze/repair/optimize options
1140
1141 RETURN VALUES
1142 >0 Error
1143 0 Success
1144 */
1145
analyze(THD * thd,HA_CHECK_OPT * check_opt)1146 int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
1147 {
1148 DBUG_ENTER("ha_partition::analyze");
1149
1150 DBUG_RETURN(handle_opt_partitions(thd, check_opt, ANALYZE_PARTS));
1151 }
1152
1153
1154 /*
1155 Check table
1156
1157 SYNOPSIS
1158 check()
1159 thd Thread object
1160 check_opt Check/analyze/repair/optimize options
1161
1162 RETURN VALUES
1163 >0 Error
1164 0 Success
1165 */
1166
check(THD * thd,HA_CHECK_OPT * check_opt)1167 int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
1168 {
1169 DBUG_ENTER("ha_partition::check");
1170
1171 DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
1172 }
1173
1174
1175 /*
1176 Repair table
1177
1178 SYNOPSIS
1179 repair()
1180 thd Thread object
1181 check_opt Check/analyze/repair/optimize options
1182
1183 RETURN VALUES
1184 >0 Error
1185 0 Success
1186 */
1187
repair(THD * thd,HA_CHECK_OPT * check_opt)1188 int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
1189 {
1190 DBUG_ENTER("ha_partition::repair");
1191
1192 DBUG_RETURN(handle_opt_partitions(thd, check_opt, REPAIR_PARTS));
1193 }
1194
1195 /**
1196 Assign to keycache
1197
1198 @param thd Thread object
1199 @param check_opt Check/analyze/repair/optimize options
1200
1201 @return
1202 @retval >0 Error
1203 @retval 0 Success
1204 */
1205
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1206 int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
1207 {
1208 DBUG_ENTER("ha_partition::assign_to_keycache");
1209
1210 DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
1211 }
1212
1213
1214 /**
1215 Preload to keycache
1216
1217 @param thd Thread object
1218 @param check_opt Check/analyze/repair/optimize options
1219
1220 @return
1221 @retval >0 Error
1222 @retval 0 Success
1223 */
1224
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1225 int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
1226 {
1227 DBUG_ENTER("ha_partition::preload_keys");
1228
1229 DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
1230 }
1231
1232
1233 /*
1234 Handle optimize/analyze/check/repair of one partition
1235
1236 SYNOPSIS
1237 handle_opt_part()
1238 thd Thread object
1239 check_opt Options
1240 file Handler object of partition
1241 flag Optimize/Analyze/Check/Repair flag
1242
1243 RETURN VALUE
1244 >0 Failure
1245 0 Success
1246 */
1247
handle_opt_part(THD * thd,HA_CHECK_OPT * check_opt,uint part_id,uint flag)1248 int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
1249 uint part_id, uint flag)
1250 {
1251 int error;
1252 handler *file= m_file[part_id];
1253 DBUG_ENTER("handle_opt_part");
1254 DBUG_PRINT("enter", ("flag = %u", flag));
1255
1256 if (flag == OPTIMIZE_PARTS)
1257 error= file->ha_optimize(thd, check_opt);
1258 else if (flag == ANALYZE_PARTS)
1259 error= file->ha_analyze(thd, check_opt);
1260 else if (flag == CHECK_PARTS)
1261 {
1262 error= file->ha_check(thd, check_opt);
1263 if (!error ||
1264 error == HA_ADMIN_ALREADY_DONE ||
1265 error == HA_ADMIN_NOT_IMPLEMENTED)
1266 {
1267 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1268 error= check_misplaced_rows(part_id, false);
1269 }
1270 }
1271 else if (flag == REPAIR_PARTS)
1272 {
1273 error= file->ha_repair(thd, check_opt);
1274 if (!error ||
1275 error == HA_ADMIN_ALREADY_DONE ||
1276 error == HA_ADMIN_NOT_IMPLEMENTED)
1277 {
1278 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1279 error= check_misplaced_rows(part_id, true);
1280 }
1281 }
1282 else if (flag == ASSIGN_KEYCACHE_PARTS)
1283 error= file->assign_to_keycache(thd, check_opt);
1284 else if (flag == PRELOAD_KEYS_PARTS)
1285 error= file->preload_keys(thd, check_opt);
1286 else
1287 {
1288 DBUG_ASSERT(FALSE);
1289 error= 1;
1290 }
1291 if (error == HA_ADMIN_ALREADY_DONE)
1292 error= 0;
1293 DBUG_RETURN(error);
1294 }
1295
1296
1297 /*
1298 print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE
1299 (modelled after mi_check_print_msg)
1300 TODO: move this into the handler, or rewrite mysql_admin_table.
1301 */
1302 static bool print_admin_msg(THD* thd, uint len,
1303 const char* msg_type,
1304 const char* db_name, const char* table_name,
1305 const char* op_name, const char *fmt, ...)
1306 ATTRIBUTE_FORMAT(printf, 7, 8);
print_admin_msg(THD * thd,uint len,const char * msg_type,const char * db_name,const char * table_name,const char * op_name,const char * fmt,...)1307 static bool print_admin_msg(THD* thd, uint len,
1308 const char* msg_type,
1309 const char* db_name, const char* table_name,
1310 const char* op_name, const char *fmt, ...)
1311 {
1312 va_list args;
1313 Protocol *protocol= thd->protocol;
1314 uint length;
1315 uint msg_length;
1316 char name[NAME_LEN*2+2];
1317 char *msgbuf;
1318 bool error= true;
1319
1320 if (!(msgbuf= (char*) my_malloc(len, MYF(0))))
1321 return true;
1322 va_start(args, fmt);
1323 msg_length= my_vsnprintf(msgbuf, len, fmt, args);
1324 va_end(args);
1325 if (msg_length >= (len - 1))
1326 goto err;
1327 msgbuf[len - 1] = 0; // healthy paranoia
1328
1329
1330 if (!thd->vio_ok())
1331 {
1332 sql_print_error("%s", msgbuf);
1333 goto err;
1334 }
1335
1336 length=(uint) (strxmov(name, db_name, ".", table_name,NullS) - name);
1337 /*
1338 TODO: switch from protocol to push_warning here. The main reason we didn't
1339 it yet is parallel repair. Due to following trace:
1340 mi_check_print_msg/push_warning/sql_alloc/my_pthread_getspecific_ptr.
1341
1342 Also we likely need to lock mutex here (in both cases with protocol and
1343 push_warning).
1344 */
1345 DBUG_PRINT("info",("print_admin_msg: %s, %s, %s, %s", name, op_name,
1346 msg_type, msgbuf));
1347 protocol->prepare_for_resend();
1348 protocol->store(name, length, system_charset_info);
1349 protocol->store(op_name, system_charset_info);
1350 protocol->store(msg_type, system_charset_info);
1351 protocol->store(msgbuf, msg_length, system_charset_info);
1352 if (protocol->write())
1353 {
1354 sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
1355 msgbuf);
1356 goto err;
1357 }
1358 error= false;
1359 err:
1360 my_free(msgbuf);
1361 return error;
1362 }
1363
1364
1365 /*
1366 Handle optimize/analyze/check/repair of partitions
1367
1368 SYNOPSIS
1369 handle_opt_partitions()
1370 thd Thread object
1371 check_opt Options
1372 flag Optimize/Analyze/Check/Repair flag
1373
1374 RETURN VALUE
1375 >0 Failure
1376 0 Success
1377 */
1378
handle_opt_partitions(THD * thd,HA_CHECK_OPT * check_opt,uint flag)1379 int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1380 uint flag)
1381 {
1382 List_iterator<partition_element> part_it(m_part_info->partitions);
1383 uint num_parts= m_part_info->num_parts;
1384 uint num_subparts= m_part_info->num_subparts;
1385 uint i= 0;
1386 int error;
1387 DBUG_ENTER("ha_partition::handle_opt_partitions");
1388 DBUG_PRINT("enter", ("flag= %u", flag));
1389
1390 do
1391 {
1392 partition_element *part_elem= part_it++;
1393 /*
1394 when ALTER TABLE <CMD> PARTITION ...
1395 it should only do named partitions, otherwise all partitions
1396 */
1397 if (!(thd->lex->alter_info.flags & Alter_info::ALTER_ADMIN_PARTITION) ||
1398 part_elem->part_state == PART_ADMIN)
1399 {
1400 if (m_is_sub_partitioned)
1401 {
1402 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1403 partition_element *sub_elem;
1404 uint j= 0, part;
1405 do
1406 {
1407 sub_elem= subpart_it++;
1408 part= i * num_subparts + j;
1409 DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1410 part, sub_elem->partition_name));
1411 if ((error= handle_opt_part(thd, check_opt, part, flag)))
1412 {
1413 /* print a line which partition the error belongs to */
1414 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1415 error != HA_ADMIN_ALREADY_DONE &&
1416 error != HA_ADMIN_TRY_ALTER)
1417 {
1418 print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1419 table_share->db.str, table->alias,
1420 opt_op_name[flag],
1421 "Subpartition %s returned error",
1422 sub_elem->partition_name);
1423 }
1424 /* reset part_state for the remaining partitions */
1425 do
1426 {
1427 if (part_elem->part_state == PART_ADMIN)
1428 part_elem->part_state= PART_NORMAL;
1429 } while ((part_elem= part_it++));
1430 DBUG_RETURN(error);
1431 }
1432 } while (++j < num_subparts);
1433 }
1434 else
1435 {
1436 DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1437 part_elem->partition_name));
1438 if ((error= handle_opt_part(thd, check_opt, i, flag)))
1439 {
1440 /* print a line which partition the error belongs to */
1441 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1442 error != HA_ADMIN_ALREADY_DONE &&
1443 error != HA_ADMIN_TRY_ALTER)
1444 {
1445 print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1446 table_share->db.str, table->alias,
1447 opt_op_name[flag], "Partition %s returned error",
1448 part_elem->partition_name);
1449 }
1450 /* reset part_state for the remaining partitions */
1451 do
1452 {
1453 if (part_elem->part_state == PART_ADMIN)
1454 part_elem->part_state= PART_NORMAL;
1455 } while ((part_elem= part_it++));
1456 DBUG_RETURN(error);
1457 }
1458 }
1459 part_elem->part_state= PART_NORMAL;
1460 }
1461 } while (++i < num_parts);
1462 DBUG_RETURN(FALSE);
1463 }
1464
1465
1466 /**
1467 @brief Check and repair the table if neccesary
1468
1469 @param thd Thread object
1470
1471 @retval TRUE Error/Not supported
1472 @retval FALSE Success
1473
1474 @note Called if open_table_from_share fails and ::is_crashed().
1475 */
1476
check_and_repair(THD * thd)1477 bool ha_partition::check_and_repair(THD *thd)
1478 {
1479 handler **file= m_file;
1480 DBUG_ENTER("ha_partition::check_and_repair");
1481
1482 do
1483 {
1484 if ((*file)->ha_check_and_repair(thd))
1485 DBUG_RETURN(TRUE);
1486 } while (*(++file));
1487 DBUG_RETURN(FALSE);
1488 }
1489
1490
1491 /**
1492 @breif Check if the table can be automatically repaired
1493
1494 @retval TRUE Can be auto repaired
1495 @retval FALSE Cannot be auto repaired
1496 */
1497
auto_repair() const1498 bool ha_partition::auto_repair() const
1499 {
1500 DBUG_ENTER("ha_partition::auto_repair");
1501
1502 /*
1503 As long as we only support one storage engine per table,
1504 we can use the first partition for this function.
1505 */
1506 DBUG_RETURN(m_file[0]->auto_repair());
1507 }
1508
1509
1510 /**
1511 @breif Check if the table is crashed
1512
1513 @retval TRUE Crashed
1514 @retval FALSE Not crashed
1515 */
1516
is_crashed() const1517 bool ha_partition::is_crashed() const
1518 {
1519 handler **file= m_file;
1520 DBUG_ENTER("ha_partition::is_crashed");
1521
1522 do
1523 {
1524 if ((*file)->is_crashed())
1525 DBUG_RETURN(TRUE);
1526 } while (*(++file));
1527 DBUG_RETURN(FALSE);
1528 }
1529
1530
1531 /*
1532 Prepare by creating a new partition
1533
1534 SYNOPSIS
1535 prepare_new_partition()
1536 table Table object
1537 create_info Create info from CREATE TABLE
1538 file Handler object of new partition
1539 part_name partition name
1540
1541 RETURN VALUE
1542 >0 Error
1543 0 Success
1544 */
1545
prepare_new_partition(TABLE * tbl,HA_CREATE_INFO * create_info,handler * file,const char * part_name,partition_element * p_elem,uint disable_non_uniq_indexes)1546 int ha_partition::prepare_new_partition(TABLE *tbl,
1547 HA_CREATE_INFO *create_info,
1548 handler *file, const char *part_name,
1549 partition_element *p_elem,
1550 uint disable_non_uniq_indexes)
1551 {
1552 int error;
1553 DBUG_ENTER("prepare_new_partition");
1554
1555 /*
1556 This call to set_up_table_before_create() is done for an alter table.
1557 So this may be the second time around for this partition_element,
1558 depending on how many partitions and subpartitions there were before,
1559 and how many there are now.
1560 The first time, on the CREATE, data_file_name and index_file_name
1561 came from the parser. They did not have the file name attached to
1562 the end. But if this partition is less than the total number of
1563 previous partitions, it's data_file_name has the filename attached.
1564 So we need to take the partition filename off if it exists.
1565 That file name may be different from part_name, which will be
1566 attached in append_file_to_dir().
1567 */
1568 truncate_partition_filename(p_elem->data_file_name);
1569 truncate_partition_filename(p_elem->index_file_name);
1570
1571 if ((error= set_up_table_before_create(tbl, part_name, create_info, p_elem)))
1572 goto error_create;
1573
1574 if ((error= file->ha_create(part_name, tbl, create_info)))
1575 {
1576 /*
1577 Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1578 if the table/partition already exists.
1579 If we return that error code, then print_error would try to
1580 get_dup_key on a non-existing partition.
1581 So return a more reasonable error code.
1582 */
1583 if (error == HA_ERR_FOUND_DUPP_KEY)
1584 error= HA_ERR_TABLE_EXIST;
1585 goto error_create;
1586 }
1587 DBUG_PRINT("info", ("partition %s created", part_name));
1588 if ((error= file->ha_open(tbl, part_name, m_mode,
1589 m_open_test_lock | HA_OPEN_NO_PSI_CALL)))
1590 goto error_open;
1591 DBUG_PRINT("info", ("partition %s opened", part_name));
1592
1593 /*
1594 Note: if you plan to add another call that may return failure,
1595 better to do it before external_lock() as cleanup_new_partition()
1596 assumes that external_lock() is last call that may fail here.
1597 Otherwise see description for cleanup_new_partition().
1598 */
1599 if ((error= file->ha_external_lock(ha_thd(), F_WRLCK)))
1600 goto error_external_lock;
1601 DBUG_PRINT("info", ("partition %s external locked", part_name));
1602
1603 if (disable_non_uniq_indexes)
1604 file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
1605
1606 DBUG_RETURN(0);
1607 error_external_lock:
1608 (void) file->ha_close();
1609 error_open:
1610 (void) file->ha_delete_table(part_name);
1611 error_create:
1612 DBUG_RETURN(error);
1613 }
1614
1615
1616 /*
1617 Cleanup by removing all created partitions after error
1618
1619 SYNOPSIS
1620 cleanup_new_partition()
1621 part_count Number of partitions to remove
1622
1623 RETURN VALUE
1624 NONE
1625
1626 DESCRIPTION
1627 This function is called immediately after prepare_new_partition() in
1628 case the latter fails.
1629
1630 In prepare_new_partition() last call that may return failure is
1631 external_lock(). That means if prepare_new_partition() fails,
1632 partition does not have external lock. Thus no need to call
1633 external_lock(F_UNLCK) here.
1634
1635 TODO:
1636 We must ensure that in the case that we get an error during the process
1637 that we call external_lock with F_UNLCK, close the table and delete the
1638 table in the case where we have been successful with prepare_handler.
1639 We solve this by keeping an array of successful calls to prepare_handler
1640 which can then be used to undo the call.
1641 */
1642
cleanup_new_partition(uint part_count)1643 void ha_partition::cleanup_new_partition(uint part_count)
1644 {
1645 DBUG_ENTER("ha_partition::cleanup_new_partition");
1646
1647 if (m_added_file)
1648 {
1649 THD *thd= ha_thd();
1650 handler **file= m_added_file;
1651 while ((part_count > 0) && (*file))
1652 {
1653 (*file)->ha_external_lock(thd, F_UNLCK);
1654 (*file)->ha_close();
1655
1656 /* Leave the (*file)->ha_delete_table(part_name) to the ddl-log */
1657
1658 file++;
1659 part_count--;
1660 }
1661 m_added_file= NULL;
1662 }
1663 DBUG_VOID_RETURN;
1664 }
1665
1666 /*
1667 Implement the partition changes defined by ALTER TABLE of partitions
1668
1669 SYNOPSIS
1670 change_partitions()
1671 create_info HA_CREATE_INFO object describing all
1672 fields and indexes in table
1673 path Complete path of db and table name
1674 out: copied Output parameter where number of copied
1675 records are added
1676 out: deleted Output parameter where number of deleted
1677 records are added
1678 pack_frm_data Reference to packed frm file
1679 pack_frm_len Length of packed frm file
1680
1681 RETURN VALUE
1682 >0 Failure
1683 0 Success
1684
1685 DESCRIPTION
1686 Add and copy if needed a number of partitions, during this operation
1687 no other operation is ongoing in the server. This is used by
1688 ADD PARTITION all types as well as by REORGANIZE PARTITION. For
1689 one-phased implementations it is used also by DROP and COALESCE
1690 PARTITIONs.
1691 One-phased implementation needs the new frm file, other handlers will
1692 get zero length and a NULL reference here.
1693 */
1694
change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data MY_ATTRIBUTE ((unused)),size_t pack_frm_len MY_ATTRIBUTE ((unused)))1695 int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
1696 const char *path,
1697 ulonglong * const copied,
1698 ulonglong * const deleted,
1699 const uchar *pack_frm_data
1700 MY_ATTRIBUTE((unused)),
1701 size_t pack_frm_len
1702 MY_ATTRIBUTE((unused)))
1703 {
1704 List_iterator<partition_element> part_it(m_part_info->partitions);
1705 List_iterator <partition_element> t_it(m_part_info->temp_partitions);
1706 char part_name_buff[FN_REFLEN + 1];
1707 uint num_parts= m_part_info->partitions.elements;
1708 uint num_subparts= m_part_info->num_subparts;
1709 uint i= 0;
1710 uint num_remain_partitions, part_count, orig_count;
1711 handler **new_file_array;
1712 int error= 1;
1713 bool first;
1714 uint temp_partitions= m_part_info->temp_partitions.elements;
1715 THD *thd= ha_thd();
1716 DBUG_ENTER("ha_partition::change_partitions");
1717
1718 /*
1719 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
1720 We use m_file[0] as long as all partitions have the same storage engine.
1721 */
1722 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
1723 part_name_buff)));
1724 m_reorged_parts= 0;
1725 if (!m_part_info->is_sub_partitioned())
1726 num_subparts= 1;
1727
1728 /*
1729 Step 1:
1730 Calculate number of reorganised partitions and allocate space for
1731 their handler references.
1732 */
1733 if (temp_partitions)
1734 {
1735 m_reorged_parts= temp_partitions * num_subparts;
1736 }
1737 else
1738 {
1739 do
1740 {
1741 partition_element *part_elem= part_it++;
1742 if (part_elem->part_state == PART_CHANGED ||
1743 part_elem->part_state == PART_REORGED_DROPPED)
1744 {
1745 m_reorged_parts+= num_subparts;
1746 }
1747 } while (++i < num_parts);
1748 }
1749 if (m_reorged_parts &&
1750 !(m_reorged_file= (handler**)sql_calloc(sizeof(handler*)*
1751 (m_reorged_parts + 1))))
1752 {
1753 mem_alloc_error(sizeof(handler*)*(m_reorged_parts+1));
1754 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1755 }
1756
1757 /*
1758 Step 2:
1759 Calculate number of partitions after change and allocate space for
1760 their handler references.
1761 */
1762 num_remain_partitions= 0;
1763 if (temp_partitions)
1764 {
1765 num_remain_partitions= num_parts * num_subparts;
1766 }
1767 else
1768 {
1769 part_it.rewind();
1770 i= 0;
1771 do
1772 {
1773 partition_element *part_elem= part_it++;
1774 if (part_elem->part_state == PART_NORMAL ||
1775 part_elem->part_state == PART_TO_BE_ADDED ||
1776 part_elem->part_state == PART_CHANGED)
1777 {
1778 num_remain_partitions+= num_subparts;
1779 }
1780 } while (++i < num_parts);
1781 }
1782 if (!(new_file_array= (handler**)sql_calloc(sizeof(handler*)*
1783 (2*(num_remain_partitions + 1)))))
1784 {
1785 mem_alloc_error(sizeof(handler*)*2*(num_remain_partitions+1));
1786 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1787 }
1788 m_added_file= &new_file_array[num_remain_partitions + 1];
1789
1790 /*
1791 Step 3:
1792 Fill m_reorged_file with handler references and NULL at the end
1793 */
1794 if (m_reorged_parts)
1795 {
1796 i= 0;
1797 part_count= 0;
1798 first= TRUE;
1799 part_it.rewind();
1800 do
1801 {
1802 partition_element *part_elem= part_it++;
1803 if (part_elem->part_state == PART_CHANGED ||
1804 part_elem->part_state == PART_REORGED_DROPPED)
1805 {
1806 memcpy((void*)&m_reorged_file[part_count],
1807 (void*)&m_file[i*num_subparts],
1808 sizeof(handler*)*num_subparts);
1809 part_count+= num_subparts;
1810 }
1811 else if (first && temp_partitions &&
1812 part_elem->part_state == PART_TO_BE_ADDED)
1813 {
1814 /*
1815 When doing an ALTER TABLE REORGANIZE PARTITION a number of
1816 partitions is to be reorganised into a set of new partitions.
1817 The reorganised partitions are in this case in the temp_partitions
1818 list. We copy all of them in one batch and thus we only do this
1819 until we find the first partition with state PART_TO_BE_ADDED
1820 since this is where the new partitions go in and where the old
1821 ones used to be.
1822 */
1823 first= FALSE;
1824 DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts);
1825 memcpy((void*)m_reorged_file, &m_file[i*num_subparts],
1826 sizeof(handler*)*m_reorged_parts);
1827 }
1828 } while (++i < num_parts);
1829 }
1830
1831 /*
1832 Step 4:
1833 Fill new_array_file with handler references. Create the handlers if
1834 needed.
1835 */
1836 i= 0;
1837 part_count= 0;
1838 orig_count= 0;
1839 first= TRUE;
1840 part_it.rewind();
1841 do
1842 {
1843 partition_element *part_elem= part_it++;
1844 if (part_elem->part_state == PART_NORMAL)
1845 {
1846 DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts);
1847 memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count],
1848 sizeof(handler*)*num_subparts);
1849 part_count+= num_subparts;
1850 orig_count+= num_subparts;
1851 }
1852 else if (part_elem->part_state == PART_CHANGED ||
1853 part_elem->part_state == PART_TO_BE_ADDED)
1854 {
1855 uint j= 0;
1856 Parts_share_refs *p_share_refs;
1857 /*
1858 The Handler_shares for each partition's handler can be allocated
1859 within this handler, since there will not be any more instances of the
1860 new partitions, until the table is reopened after the ALTER succeeded.
1861 */
1862 p_share_refs= new Parts_share_refs;
1863 if (!p_share_refs)
1864 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1865 if (p_share_refs->init(num_subparts))
1866 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1867 if (m_new_partitions_share_refs.push_back(p_share_refs))
1868 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1869 do
1870 {
1871 handler **new_file= &new_file_array[part_count++];
1872 if (!(*new_file=
1873 get_new_handler(table->s,
1874 thd->mem_root,
1875 part_elem->engine_type)))
1876 {
1877 mem_alloc_error(sizeof(handler));
1878 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1879 }
1880 if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j]))
1881 {
1882 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1883 }
1884 } while (++j < num_subparts);
1885 if (part_elem->part_state == PART_CHANGED)
1886 orig_count+= num_subparts;
1887 else if (temp_partitions && first)
1888 {
1889 orig_count+= (num_subparts * temp_partitions);
1890 first= FALSE;
1891 }
1892 }
1893 } while (++i < num_parts);
1894 first= FALSE;
1895 /*
1896 Step 5:
1897 Create the new partitions and also open, lock and call external_lock
1898 on them to prepare them for copy phase and also for later close
1899 calls
1900 */
1901
1902 /*
1903 Before creating new partitions check whether indexes are disabled
1904 in the partitions.
1905 */
1906
1907 uint disable_non_uniq_indexes = indexes_are_disabled();
1908
1909 i= 0;
1910 part_count= 0;
1911 part_it.rewind();
1912 do
1913 {
1914 partition_element *part_elem= part_it++;
1915 if (part_elem->part_state == PART_TO_BE_ADDED ||
1916 part_elem->part_state == PART_CHANGED)
1917 {
1918 /*
1919 A new partition needs to be created PART_TO_BE_ADDED means an
1920 entirely new partition and PART_CHANGED means a changed partition
1921 that will still exist with either more or less data in it.
1922 */
1923 uint name_variant= NORMAL_PART_NAME;
1924 if (part_elem->part_state == PART_CHANGED ||
1925 (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
1926 name_variant= TEMP_PART_NAME;
1927 if (m_part_info->is_sub_partitioned())
1928 {
1929 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1930 uint j= 0, part;
1931 do
1932 {
1933 partition_element *sub_elem= sub_it++;
1934 if ((error= create_subpartition_name(part_name_buff, path,
1935 part_elem->partition_name,
1936 sub_elem->partition_name,
1937 name_variant)))
1938 {
1939 cleanup_new_partition(part_count);
1940 DBUG_RETURN(error);
1941 }
1942
1943 part= i * num_subparts + j;
1944 DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
1945 if ((error= prepare_new_partition(table, create_info,
1946 new_file_array[part],
1947 (const char *)part_name_buff,
1948 sub_elem,
1949 disable_non_uniq_indexes)))
1950 {
1951 cleanup_new_partition(part_count);
1952 DBUG_RETURN(error);
1953 }
1954
1955 m_added_file[part_count++]= new_file_array[part];
1956 } while (++j < num_subparts);
1957 }
1958 else
1959 {
1960 if ((error= create_partition_name(part_name_buff, path,
1961 part_elem->partition_name,
1962 name_variant, TRUE)))
1963 {
1964 cleanup_new_partition(part_count);
1965 DBUG_RETURN(error);
1966 }
1967
1968 DBUG_PRINT("info", ("Add partition %s", part_name_buff));
1969 if ((error= prepare_new_partition(table, create_info,
1970 new_file_array[i],
1971 (const char *)part_name_buff,
1972 part_elem,
1973 disable_non_uniq_indexes)))
1974 {
1975 cleanup_new_partition(part_count);
1976 DBUG_RETURN(error);
1977 }
1978
1979 m_added_file[part_count++]= new_file_array[i];
1980 }
1981 }
1982 } while (++i < num_parts);
1983
1984 /*
1985 Step 6:
1986 State update to prepare for next write of the frm file.
1987 */
1988 i= 0;
1989 part_it.rewind();
1990 do
1991 {
1992 partition_element *part_elem= part_it++;
1993 if (part_elem->part_state == PART_TO_BE_ADDED)
1994 part_elem->part_state= PART_IS_ADDED;
1995 else if (part_elem->part_state == PART_CHANGED)
1996 part_elem->part_state= PART_IS_CHANGED;
1997 else if (part_elem->part_state == PART_REORGED_DROPPED)
1998 part_elem->part_state= PART_TO_BE_DROPPED;
1999 } while (++i < num_parts);
2000 for (i= 0; i < temp_partitions; i++)
2001 {
2002 partition_element *part_elem= t_it++;
2003 DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
2004 part_elem->part_state= PART_TO_BE_DROPPED;
2005 }
2006 m_new_file= new_file_array;
2007 if ((error= copy_partitions(copied, deleted)))
2008 {
2009 /*
2010 Close and unlock the new temporary partitions.
2011 They will later be deleted through the ddl-log.
2012 */
2013 cleanup_new_partition(part_count);
2014 }
2015 DBUG_RETURN(error);
2016 }
2017
2018
2019 /*
2020 Copy partitions as part of ALTER TABLE of partitions
2021
2022 SYNOPSIS
2023 copy_partitions()
2024 out:copied Number of records copied
2025 out:deleted Number of records deleted
2026
2027 RETURN VALUE
2028 >0 Error code
2029 0 Success
2030
2031 DESCRIPTION
2032 change_partitions has done all the preparations, now it is time to
2033 actually copy the data from the reorganised partitions to the new
2034 partitions.
2035 */
2036
copy_partitions(ulonglong * const copied,ulonglong * const deleted)2037 int ha_partition::copy_partitions(ulonglong * const copied,
2038 ulonglong * const deleted)
2039 {
2040 uint reorg_part= 0;
2041 int result= 0;
2042 longlong func_value;
2043 DBUG_ENTER("ha_partition::copy_partitions");
2044
2045 if (m_part_info->linear_hash_ind)
2046 {
2047 if (m_part_info->part_type == HASH_PARTITION)
2048 set_linear_hash_mask(m_part_info, m_part_info->num_parts);
2049 else
2050 set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
2051 }
2052
2053 while (reorg_part < m_reorged_parts)
2054 {
2055 handler *file= m_reorged_file[reorg_part];
2056 uint32 new_part;
2057
2058 late_extra_cache(reorg_part);
2059 if ((result= file->ha_rnd_init(1)))
2060 goto init_error;
2061 while (TRUE)
2062 {
2063 if ((result= file->ha_rnd_next(m_rec0)))
2064 {
2065 if (result == HA_ERR_RECORD_DELETED)
2066 continue; //Probably MyISAM
2067 if (result != HA_ERR_END_OF_FILE)
2068 goto error;
2069 /*
2070 End-of-file reached, break out to continue with next partition or
2071 end the copy process.
2072 */
2073 break;
2074 }
2075 /* Found record to insert into new handler */
2076 if (m_part_info->get_partition_id(m_part_info, &new_part,
2077 &func_value))
2078 {
2079 /*
2080 This record is in the original table but will not be in the new
2081 table since it doesn't fit into any partition any longer due to
2082 changed partitioning ranges or list values.
2083 */
2084 (*deleted)++;
2085 }
2086 else
2087 {
2088 THD *thd= ha_thd();
2089 /* Copy record to new handler */
2090 (*copied)++;
2091 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
2092 result= m_new_file[new_part]->ha_write_row(m_rec0);
2093 reenable_binlog(thd);
2094 if (result)
2095 goto error;
2096 }
2097 }
2098 late_extra_no_cache(reorg_part);
2099 file->ha_rnd_end();
2100 reorg_part++;
2101 }
2102 DBUG_RETURN(FALSE);
2103 error:
2104 m_reorged_file[reorg_part]->ha_rnd_end();
2105 init_error:
2106 DBUG_RETURN(result);
2107 }
2108
2109 /*
2110 Update create info as part of ALTER TABLE
2111
2112 SYNOPSIS
2113 update_create_info()
2114 create_info Create info from ALTER TABLE
2115
2116 RETURN VALUE
2117 NONE
2118
2119 DESCRIPTION
2120 Forward this handler call to the storage engine foreach
2121 partition handler. The data_file_name for each partition may
2122 need to be reset if the tablespace was moved. Use a dummy
2123 HA_CREATE_INFO structure and transfer necessary data.
2124 */
2125
update_create_info(HA_CREATE_INFO * create_info)2126 void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
2127 {
2128 DBUG_ENTER("ha_partition::update_create_info");
2129
2130 /*
2131 Fix for bug#38751, some engines needs info-calls in ALTER.
2132 Archive need this since it flushes in ::info.
2133 HA_STATUS_AUTO is optimized so it will not always be forwarded
2134 to all partitions, but HA_STATUS_VARIABLE will.
2135 */
2136 info(HA_STATUS_VARIABLE);
2137
2138 info(HA_STATUS_AUTO);
2139
2140 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2141 create_info->auto_increment_value= stats.auto_increment_value;
2142
2143 /*
2144 DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
2145 partitioned table, only its parts.
2146 */
2147 my_bool from_alter = (create_info->data_file_name == (const char*) -1);
2148 create_info->data_file_name= create_info->index_file_name = NULL;
2149
2150 /*
2151 We do not need to update the individual partition DATA DIRECTORY settings
2152 since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
2153 */
2154 if (from_alter)
2155 DBUG_VOID_RETURN;
2156
2157 /*
2158 send Handler::update_create_info() to the storage engine for each
2159 partition that currently has a handler object. Using a dummy
2160 HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
2161 */
2162
2163 List_iterator<partition_element> part_it(m_part_info->partitions);
2164 partition_element *part_elem, *sub_elem;
2165 uint num_subparts= m_part_info->num_subparts;
2166 uint num_parts = num_subparts ? m_file_tot_parts / num_subparts
2167 : m_file_tot_parts;
2168 HA_CREATE_INFO dummy_info;
2169 memset(&dummy_info, 0, sizeof(dummy_info));
2170
2171 /*
2172 Since update_create_info() can be called from mysql_prepare_alter_table()
2173 when not all handlers are set up, we look for that condition first.
2174 If all handlers are not available, do not call update_create_info for any.
2175 */
2176 uint i, j, part;
2177 for (i= 0; i < num_parts; i++)
2178 {
2179 part_elem= part_it++;
2180 if (!part_elem)
2181 DBUG_VOID_RETURN;
2182 if (m_is_sub_partitioned)
2183 {
2184 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2185 for (j= 0; j < num_subparts; j++)
2186 {
2187 sub_elem= subpart_it++;
2188 if (!sub_elem)
2189 DBUG_VOID_RETURN;
2190 part= i * num_subparts + j;
2191 if (part >= m_file_tot_parts || !m_file[part])
2192 DBUG_VOID_RETURN;
2193 }
2194 }
2195 else
2196 {
2197 if (!m_file[i])
2198 DBUG_VOID_RETURN;
2199 }
2200 }
2201 part_it.rewind();
2202
2203 for (i= 0; i < num_parts; i++)
2204 {
2205 part_elem= part_it++;
2206 DBUG_ASSERT(part_elem);
2207 if (m_is_sub_partitioned)
2208 {
2209 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2210 for (j= 0; j < num_subparts; j++)
2211 {
2212 sub_elem= subpart_it++;
2213 DBUG_ASSERT(sub_elem);
2214 part= i * num_subparts + j;
2215 DBUG_ASSERT(part < m_file_tot_parts && m_file[part]);
2216 if (ha_legacy_type(m_file[part]->ht) == DB_TYPE_INNODB)
2217 {
2218 dummy_info.data_file_name= dummy_info.index_file_name = NULL;
2219 m_file[part]->update_create_info(&dummy_info);
2220
2221 if (dummy_info.data_file_name || sub_elem->data_file_name)
2222 {
2223 sub_elem->data_file_name = (char*) dummy_info.data_file_name;
2224 }
2225 if (dummy_info.index_file_name || sub_elem->index_file_name)
2226 {
2227 sub_elem->index_file_name = (char*) dummy_info.index_file_name;
2228 }
2229 }
2230 }
2231 }
2232 else
2233 {
2234 DBUG_ASSERT(m_file[i]);
2235 if (ha_legacy_type(m_file[i]->ht) == DB_TYPE_INNODB)
2236 {
2237 dummy_info.data_file_name= dummy_info.index_file_name= NULL;
2238 m_file[i]->update_create_info(&dummy_info);
2239 if (dummy_info.data_file_name || part_elem->data_file_name)
2240 {
2241 part_elem->data_file_name = (char*) dummy_info.data_file_name;
2242 }
2243 if (dummy_info.index_file_name || part_elem->index_file_name)
2244 {
2245 part_elem->index_file_name = (char*) dummy_info.index_file_name;
2246 }
2247 }
2248 }
2249 }
2250 DBUG_VOID_RETURN;
2251 }
2252
2253
2254 /**
2255 Change the internal TABLE_SHARE pointer
2256
2257 @param table_arg TABLE object
2258 @param share New share to use
2259
2260 @note Is used in error handling in ha_delete_table.
2261 All handlers should exist (lock_partitions should not be used)
2262 */
2263
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2264 void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2265 {
2266 handler **file_array;
2267 table= table_arg;
2268 table_share= share;
2269 /*
2270 m_file can be NULL when using an old cached table in DROP TABLE, when the
2271 table just has REMOVED PARTITIONING, see Bug#42438
2272 */
2273 if (m_file)
2274 {
2275 file_array= m_file;
2276 DBUG_ASSERT(*file_array);
2277 do
2278 {
2279 (*file_array)->change_table_ptr(table_arg, share);
2280 } while (*(++file_array));
2281 }
2282
2283 if (m_added_file && m_added_file[0])
2284 {
2285 /* if in middle of a drop/rename etc */
2286 file_array= m_added_file;
2287 do
2288 {
2289 (*file_array)->change_table_ptr(table_arg, share);
2290 } while (*(++file_array));
2291 }
2292 }
2293
2294 /*
2295 Change comments specific to handler
2296
2297 SYNOPSIS
2298 update_table_comment()
2299 comment Original comment
2300
2301 RETURN VALUE
2302 new comment
2303
2304 DESCRIPTION
2305 No comment changes so far
2306 */
2307
update_table_comment(const char * comment)2308 char *ha_partition::update_table_comment(const char *comment)
2309 {
2310 return (char*) comment; /* Nothing to change */
2311 }
2312
2313
2314 /**
2315 Handle delete and rename table
2316
2317 @param from Full path of old table
2318 @param to Full path of new table
2319
2320 @return Operation status
2321 @retval >0 Error
2322 @retval 0 Success
2323
2324 @note Common routine to handle delete_table and rename_table.
2325 The routine uses the partition handler file to get the
2326 names of the partition instances. Both these routines
2327 are called after creating the handler without table
2328 object and thus the file is needed to discover the
2329 names of the partitions and the underlying storage engines.
2330 */
2331
del_ren_table(const char * from,const char * to)2332 int ha_partition::del_ren_table(const char *from, const char *to)
2333 {
2334 int save_error= 0;
2335 int error= HA_ERR_INTERNAL_ERROR;
2336 char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1], from_lc_buff[FN_REFLEN + 1],
2337 to_lc_buff[FN_REFLEN + 1], buff[FN_REFLEN + 1];
2338 char *name_buffer_ptr;
2339 const char *from_path;
2340 const char *to_path= NULL;
2341 uint i;
2342 handler **file, **abort_file;
2343 DBUG_ENTER("ha_partition::del_ren_table");
2344
2345 fn_format(buff,from, "", ha_par_ext, MY_APPEND_EXT);
2346 /* Check if the par file exists */
2347 if (my_access(buff,F_OK))
2348 {
2349 /*
2350 If the .par file does not exist, return HA_ERR_NO_SUCH_TABLE,
2351 This will signal to the caller that it can remove the .frm
2352 file.
2353 */
2354 error= HA_ERR_NO_SUCH_TABLE;
2355 DBUG_RETURN(error);
2356 }
2357
2358 if (get_from_handler_file(from, ha_thd()->mem_root, false))
2359 DBUG_RETURN(error);
2360 DBUG_ASSERT(m_file_buffer);
2361 DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
2362 name_buffer_ptr= m_name_buffer_ptr;
2363 file= m_file;
2364 /*
2365 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
2366 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
2367 See Bug#37402, for Mac OS X.
2368 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
2369 Using the first partitions handler, since mixing handlers is not allowed.
2370 */
2371 from_path= get_canonical_filename(*file, from, from_lc_buff);
2372 if (to != NULL)
2373 to_path= get_canonical_filename(*file, to, to_lc_buff);
2374 i= 0;
2375 do
2376 {
2377 if ((error= create_partition_name(from_buff, from_path, name_buffer_ptr,
2378 NORMAL_PART_NAME, FALSE)))
2379 goto rename_error;
2380
2381 if (to != NULL)
2382 { // Rename branch
2383 if ((error= create_partition_name(to_buff, to_path, name_buffer_ptr,
2384 NORMAL_PART_NAME, FALSE)))
2385 goto rename_error;
2386
2387 error= (*file)->ha_rename_table(from_buff, to_buff);
2388 if (error)
2389 goto rename_error;
2390 }
2391 else // delete branch
2392 {
2393 error= (*file)->ha_delete_table(from_buff);
2394 }
2395 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2396 if (error)
2397 save_error= error;
2398 i++;
2399 } while (*(++file));
2400
2401 if (to == NULL)
2402 {
2403 DBUG_EXECUTE_IF("crash_before_deleting_par_file", DBUG_SUICIDE(););
2404
2405 /* Delete the .par file. If error, break.*/
2406 if ((error= handler::delete_table(from)))
2407 DBUG_RETURN(error);
2408
2409 DBUG_EXECUTE_IF("crash_after_deleting_par_file", DBUG_SUICIDE(););
2410 }
2411
2412 if (to != NULL)
2413 {
2414 if ((error= handler::rename_table(from, to)))
2415 {
2416 /* Try to revert everything, ignore errors */
2417 (void) handler::rename_table(to, from);
2418 goto rename_error;
2419 }
2420 }
2421 DBUG_RETURN(save_error);
2422 rename_error:
2423 name_buffer_ptr= m_name_buffer_ptr;
2424 for (abort_file= file, file= m_file; file < abort_file; file++)
2425 {
2426 /* Revert the rename, back from 'to' to the original 'from' */
2427 if (!create_partition_name(from_buff, from_path, name_buffer_ptr,
2428 NORMAL_PART_NAME, FALSE))
2429 if (!create_partition_name(to_buff, to_path, name_buffer_ptr,
2430 NORMAL_PART_NAME, FALSE))
2431 /* Ignore error here */
2432 (void) (*file)->ha_rename_table(to_buff, from_buff);
2433 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2434 }
2435 DBUG_RETURN(error);
2436 }
2437
2438
2439 /**
2440 Set up table share object before calling create on underlying handler
2441
2442 @param table Table object
2443 @param info Create info
2444 @param part_elem[in,out] Pointer to used partition_element, searched if NULL
2445
2446 @return status
2447 @retval TRUE Error
2448 @retval FALSE Success
2449
2450 @details
2451 Set up
2452 1) Comment on partition
2453 2) MAX_ROWS, MIN_ROWS on partition
2454 3) Index file name on partition
2455 4) Data file name on partition
2456 */
2457
set_up_table_before_create(TABLE * tbl,const char * partition_name_with_path,HA_CREATE_INFO * info,partition_element * part_elem)2458 int ha_partition::set_up_table_before_create(TABLE *tbl,
2459 const char *partition_name_with_path,
2460 HA_CREATE_INFO *info,
2461 partition_element *part_elem)
2462 {
2463 int error= 0;
2464 const char *partition_name;
2465 THD *thd= ha_thd();
2466 DBUG_ENTER("set_up_table_before_create");
2467
2468 DBUG_ASSERT(part_elem);
2469
2470 if (!part_elem)
2471 DBUG_RETURN(1);
2472 tbl->s->max_rows= part_elem->part_max_rows;
2473 tbl->s->min_rows= part_elem->part_min_rows;
2474 partition_name= strrchr(partition_name_with_path, FN_LIBCHAR);
2475 if ((part_elem->index_file_name &&
2476 (error= append_file_to_dir(thd,
2477 (const char**)&part_elem->index_file_name,
2478 partition_name+1))) ||
2479 (part_elem->data_file_name &&
2480 (error= append_file_to_dir(thd,
2481 (const char**)&part_elem->data_file_name,
2482 partition_name+1))))
2483 {
2484 DBUG_RETURN(error);
2485 }
2486 info->index_file_name= part_elem->index_file_name;
2487 info->data_file_name= part_elem->data_file_name;
2488 DBUG_RETURN(0);
2489 }
2490
2491
2492 /*
2493 Add two names together
2494
2495 SYNOPSIS
2496 name_add()
2497 out:dest Destination string
2498 first_name First name
2499 sec_name Second name
2500
2501 RETURN VALUE
2502 >0 Error
2503 0 Success
2504
2505 DESCRIPTION
2506 Routine used to add two names with '_' in between then. Service routine
2507 to create_handler_file
2508 Include the NULL in the count of characters since it is needed as separator
2509 between the partition names.
2510 */
2511
name_add(char * dest,const char * first_name,const char * sec_name)2512 static uint name_add(char *dest, const char *first_name, const char *sec_name)
2513 {
2514 return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
2515 }
2516
2517
2518 /**
2519 Create the special .par file
2520
2521 @param name Full path of table name
2522
2523 @return Operation status
2524 @retval FALSE Error code
2525 @retval TRUE Success
2526
2527 @note
2528 Method used to create handler file with names of partitions, their
2529 engine types and the number of partitions.
2530 */
2531
create_handler_file(const char * name)2532 bool ha_partition::create_handler_file(const char *name)
2533 {
2534 partition_element *part_elem, *subpart_elem;
2535 uint i, j, part_name_len, subpart_name_len;
2536 uint tot_partition_words, tot_name_len, num_parts;
2537 uint tot_parts= 0;
2538 uint tot_len_words, tot_len_byte, chksum, tot_name_words;
2539 char *name_buffer_ptr;
2540 uchar *file_buffer, *engine_array;
2541 bool result= TRUE;
2542 char file_name[FN_REFLEN];
2543 char part_name[FN_REFLEN];
2544 char subpart_name[FN_REFLEN];
2545 File file;
2546 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2547 DBUG_ENTER("create_handler_file");
2548
2549 num_parts= m_part_info->partitions.elements;
2550 DBUG_PRINT("info", ("table name = %s, num_parts = %u", name,
2551 num_parts));
2552 tot_name_len= 0;
2553 for (i= 0; i < num_parts; i++)
2554 {
2555 part_elem= part_it++;
2556 if (part_elem->part_state != PART_NORMAL &&
2557 part_elem->part_state != PART_TO_BE_ADDED &&
2558 part_elem->part_state != PART_CHANGED)
2559 continue;
2560 tablename_to_filename(part_elem->partition_name, part_name,
2561 FN_REFLEN);
2562 part_name_len= strlen(part_name);
2563 if (!m_is_sub_partitioned)
2564 {
2565 tot_name_len+= part_name_len + 1;
2566 tot_parts++;
2567 }
2568 else
2569 {
2570 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2571 for (j= 0; j < m_part_info->num_subparts; j++)
2572 {
2573 subpart_elem= sub_it++;
2574 tablename_to_filename(subpart_elem->partition_name,
2575 subpart_name,
2576 FN_REFLEN);
2577 subpart_name_len= strlen(subpart_name);
2578 tot_name_len+= part_name_len + subpart_name_len + 5;
2579 tot_parts++;
2580 }
2581 }
2582 }
2583 /*
2584 File format:
2585 Length in words 4 byte
2586 Checksum 4 byte
2587 Total number of partitions 4 byte
2588 Array of engine types n * 4 bytes where
2589 n = (m_tot_parts + 3)/4
2590 Length of name part in bytes 4 bytes
2591 (Names in filename format)
2592 Name part m * 4 bytes where
2593 m = ((length_name_part + 3)/4)*4
2594
2595 All padding bytes are zeroed
2596 */
2597 tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2598 tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2599 /* 4 static words (tot words, checksum, tot partitions, name length) */
2600 tot_len_words= 4 + tot_partition_words + tot_name_words;
2601 tot_len_byte= PAR_WORD_SIZE * tot_len_words;
2602 if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
2603 DBUG_RETURN(TRUE);
2604 engine_array= (file_buffer + PAR_ENGINES_OFFSET);
2605 name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
2606 + PAR_WORD_SIZE);
2607 part_it.rewind();
2608 for (i= 0; i < num_parts; i++)
2609 {
2610 part_elem= part_it++;
2611 if (part_elem->part_state != PART_NORMAL &&
2612 part_elem->part_state != PART_TO_BE_ADDED &&
2613 part_elem->part_state != PART_CHANGED)
2614 continue;
2615 if (!m_is_sub_partitioned)
2616 {
2617 tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
2618 name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1;
2619 *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
2620 DBUG_PRINT("info", ("engine: %u", *engine_array));
2621 engine_array++;
2622 }
2623 else
2624 {
2625 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2626 for (j= 0; j < m_part_info->num_subparts; j++)
2627 {
2628 subpart_elem= sub_it++;
2629 tablename_to_filename(part_elem->partition_name, part_name,
2630 FN_REFLEN);
2631 tablename_to_filename(subpart_elem->partition_name, subpart_name,
2632 FN_REFLEN);
2633 name_buffer_ptr+= name_add(name_buffer_ptr,
2634 part_name,
2635 subpart_name);
2636 *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
2637 DBUG_PRINT("info", ("engine: %u", *engine_array));
2638 engine_array++;
2639 }
2640 }
2641 }
2642 chksum= 0;
2643 int4store(file_buffer, tot_len_words);
2644 int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
2645 int4store(file_buffer + PAR_ENGINES_OFFSET +
2646 (tot_partition_words * PAR_WORD_SIZE),
2647 tot_name_len);
2648 for (i= 0; i < tot_len_words; i++)
2649 chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
2650 int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
2651 /*
2652 Add .par extension to the file name.
2653 Create and write and close file
2654 to be used at open, delete_table and rename_table
2655 */
2656 fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
2657 if ((file= mysql_file_create(key_file_partition,
2658 file_name, CREATE_MODE, O_RDWR | O_TRUNC,
2659 MYF(MY_WME))) >= 0)
2660 {
2661 result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
2662 MYF(MY_WME | MY_NABP)) != 0;
2663 (void) mysql_file_close(file, MYF(0));
2664 }
2665 else
2666 result= TRUE;
2667 my_free(file_buffer);
2668 DBUG_RETURN(result);
2669 }
2670
2671
2672 /**
2673 Clear handler variables and free some memory
2674 */
2675
clear_handler_file()2676 void ha_partition::clear_handler_file()
2677 {
2678 if (m_engine_array)
2679 {
2680 plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
2681 my_free(m_engine_array);
2682 m_engine_array= NULL;
2683 }
2684 if (m_file_buffer)
2685 {
2686 my_free(m_file_buffer);
2687 m_file_buffer= NULL;
2688 }
2689 }
2690
2691
2692 /**
2693 Create underlying handler objects
2694
2695 @param mem_root Allocate memory through this
2696
2697 @return Operation status
2698 @retval TRUE Error
2699 @retval FALSE Success
2700 */
2701
create_handlers(MEM_ROOT * mem_root)2702 bool ha_partition::create_handlers(MEM_ROOT *mem_root)
2703 {
2704 uint i;
2705 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2706 handlerton *hton0;
2707 DBUG_ENTER("create_handlers");
2708
2709 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2710 DBUG_RETURN(TRUE);
2711 m_file_tot_parts= m_tot_parts;
2712 memset(m_file, 0, alloc_len);
2713 for (i= 0; i < m_tot_parts; i++)
2714 {
2715 handlerton *hton= plugin_data(m_engine_array[i], handlerton*);
2716 if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
2717 DBUG_RETURN(TRUE);
2718 DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
2719 }
2720 /* For the moment we only support partition over the same table engine */
2721 hton0= plugin_data(m_engine_array[0], handlerton*);
2722 if (hton0 == myisam_hton)
2723 {
2724 DBUG_PRINT("info", ("MyISAM"));
2725 m_myisam= TRUE;
2726 }
2727 /* INNODB may not be compiled in... */
2728 else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
2729 {
2730 DBUG_PRINT("info", ("InnoDB"));
2731 m_innodb= TRUE;
2732 }
2733 DBUG_RETURN(FALSE);
2734 }
2735
2736
2737 /*
2738 Create underlying handler objects from partition info
2739
2740 SYNOPSIS
2741 new_handlers_from_part_info()
2742 mem_root Allocate memory through this
2743
2744 RETURN VALUE
2745 TRUE Error
2746 FALSE Success
2747 */
2748
new_handlers_from_part_info(MEM_ROOT * mem_root)2749 bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
2750 {
2751 uint i, j, part_count;
2752 partition_element *part_elem;
2753 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2754 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2755 DBUG_ENTER("ha_partition::new_handlers_from_part_info");
2756
2757 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2758 {
2759 mem_alloc_error(alloc_len);
2760 goto error_end;
2761 }
2762 m_file_tot_parts= m_tot_parts;
2763 memset(m_file, 0, alloc_len);
2764 DBUG_ASSERT(m_part_info->num_parts > 0);
2765
2766 i= 0;
2767 part_count= 0;
2768 /*
2769 Don't know the size of the underlying storage engine, invent a number of
2770 bytes allocated for error message if allocation fails
2771 */
2772 do
2773 {
2774 part_elem= part_it++;
2775 if (m_is_sub_partitioned)
2776 {
2777 for (j= 0; j < m_part_info->num_subparts; j++)
2778 {
2779 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2780 part_elem->engine_type)))
2781 goto error;
2782 DBUG_PRINT("info", ("engine_type: %u",
2783 (uint) ha_legacy_type(part_elem->engine_type)));
2784 }
2785 }
2786 else
2787 {
2788 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2789 part_elem->engine_type)))
2790 goto error;
2791 DBUG_PRINT("info", ("engine_type: %u",
2792 (uint) ha_legacy_type(part_elem->engine_type)));
2793 }
2794 } while (++i < m_part_info->num_parts);
2795 if (part_elem->engine_type == myisam_hton)
2796 {
2797 DBUG_PRINT("info", ("MyISAM"));
2798 m_myisam= TRUE;
2799 }
2800 DBUG_RETURN(FALSE);
2801 error:
2802 mem_alloc_error(sizeof(handler));
2803 error_end:
2804 DBUG_RETURN(TRUE);
2805 }
2806
2807
2808 /**
2809 Read the .par file to get the partitions engines and names
2810
2811 @param name Name of table file (without extention)
2812
2813 @return Operation status
2814 @retval true Failure
2815 @retval false Success
2816
2817 @note On success, m_file_buffer is allocated and must be
2818 freed by the caller. m_name_buffer_ptr and m_tot_parts is also set.
2819 */
2820
read_par_file(const char * name)2821 bool ha_partition::read_par_file(const char *name)
2822 {
2823 char buff[FN_REFLEN], *tot_name_len_offset, *buff_p= buff;
2824 File file;
2825 char *file_buffer;
2826 uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
2827 DBUG_ENTER("ha_partition::read_par_file");
2828 DBUG_PRINT("enter", ("table name: '%s'", name));
2829
2830 if (m_file_buffer)
2831 DBUG_RETURN(false);
2832 fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
2833
2834 /* Following could be done with mysql_file_stat to read in whole file */
2835 if ((file= mysql_file_open(key_file_partition,
2836 buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
2837 DBUG_RETURN(TRUE);
2838 if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
2839 goto err1;
2840 len_words= uint4korr(buff_p);
2841 len_bytes= PAR_WORD_SIZE * len_words;
2842 if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
2843 goto err1;
2844 if (!(file_buffer= (char*) my_malloc(len_bytes, MYF(0))))
2845 goto err1;
2846 if (mysql_file_read(file, (uchar *) file_buffer, len_bytes, MYF(MY_NABP)))
2847 goto err2;
2848
2849 chksum= 0;
2850 for (i= 0; i < len_words; i++)
2851 chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
2852 if (chksum)
2853 goto err2;
2854 m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
2855 DBUG_PRINT("info", ("No of parts = %u", m_tot_parts));
2856 tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2857
2858 tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
2859 PAR_WORD_SIZE * tot_partition_words;
2860 tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
2861 PAR_WORD_SIZE;
2862 /*
2863 Verify the total length = tot size word, checksum word, num parts word +
2864 engines array + name length word + name array.
2865 */
2866 if (len_words != (tot_partition_words + tot_name_words + 4))
2867 goto err2;
2868 (void) mysql_file_close(file, MYF(0));
2869 m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
2870 m_name_buffer_ptr= tot_name_len_offset + PAR_WORD_SIZE;
2871
2872 DBUG_RETURN(false);
2873
2874 err2:
2875 my_free(file_buffer);
2876 err1:
2877 (void) mysql_file_close(file, MYF(0));
2878 DBUG_RETURN(true);
2879 }
2880
2881
2882 /**
2883 Setup m_engine_array
2884
2885 @param mem_root MEM_ROOT to use for allocating new handlers
2886
2887 @return Operation status
2888 @retval false Success
2889 @retval true Failure
2890 */
2891
setup_engine_array(MEM_ROOT * mem_root)2892 bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
2893 {
2894 uint i;
2895 uchar *buff;
2896 handlerton **engine_array, *first_engine;
2897 enum legacy_db_type db_type, first_db_type;
2898
2899 DBUG_ASSERT(!m_file);
2900 DBUG_ENTER("ha_partition::setup_engine_array");
2901 engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*));
2902 if (!engine_array)
2903 DBUG_RETURN(true);
2904
2905 buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
2906 first_db_type= (enum legacy_db_type) buff[0];
2907 first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
2908 if (!first_engine)
2909 goto err;
2910
2911 if (!(m_engine_array= (plugin_ref*)
2912 my_malloc(m_tot_parts * sizeof(plugin_ref), MYF(MY_WME))))
2913 goto err;
2914
2915 for (i= 0; i < m_tot_parts; i++)
2916 {
2917 db_type= (enum legacy_db_type) buff[i];
2918 if (db_type != first_db_type)
2919 {
2920 DBUG_PRINT("error", ("partition %u engine %d is not same as "
2921 "first partition %d", i, db_type,
2922 (int) first_db_type));
2923 DBUG_ASSERT(0);
2924 clear_handler_file();
2925 goto err;
2926 }
2927 m_engine_array[i]= ha_lock_engine(NULL, first_engine);
2928 if (!m_engine_array[i])
2929 {
2930 clear_handler_file();
2931 goto err;
2932 }
2933 }
2934
2935 my_afree((gptr) engine_array);
2936
2937 if (create_handlers(mem_root))
2938 {
2939 clear_handler_file();
2940 DBUG_RETURN(true);
2941 }
2942
2943 DBUG_RETURN(false);
2944
2945 err:
2946 my_afree((gptr) engine_array);
2947 DBUG_RETURN(true);
2948 }
2949
2950
2951 /**
2952 Get info about partition engines and their names from the .par file
2953
2954 @param name Full path of table name
2955 @param mem_root Allocate memory through this
2956 @param is_clone If it is a clone, don't create new handlers
2957
2958 @return Operation status
2959 @retval true Error
2960 @retval false Success
2961
2962 @note Open handler file to get partition names, engine types and number of
2963 partitions.
2964 */
2965
get_from_handler_file(const char * name,MEM_ROOT * mem_root,bool is_clone)2966 bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
2967 bool is_clone)
2968 {
2969 DBUG_ENTER("ha_partition::get_from_handler_file");
2970 DBUG_PRINT("enter", ("table name: '%s'", name));
2971
2972 if (m_file_buffer)
2973 DBUG_RETURN(false);
2974
2975 if (read_par_file(name))
2976 DBUG_RETURN(true);
2977
2978 if (!is_clone && setup_engine_array(mem_root))
2979 DBUG_RETURN(true);
2980
2981 DBUG_RETURN(false);
2982 }
2983
2984
2985 /****************************************************************************
2986 MODULE open/close object
2987 ****************************************************************************/
2988
2989 /**
2990 Get the partition name.
2991
2992 @param part Struct containing name and length
2993 @param[out] length Length of the name
2994
2995 @return Partition name
2996 */
2997
get_part_name(PART_NAME_DEF * part,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))2998 static uchar *get_part_name(PART_NAME_DEF *part, size_t *length,
2999 my_bool not_used MY_ATTRIBUTE((unused)))
3000 {
3001 *length= part->length;
3002 return part->partition_name;
3003 }
3004
3005
3006 /**
3007 Insert a partition name in the partition_name_hash.
3008
3009 @param name Name of partition
3010 @param part_id Partition id (number)
3011 @param is_subpart Set if the name belongs to a subpartition
3012
3013 @return Operation status
3014 @retval true Failure
3015 @retval false Sucess
3016 */
3017
insert_partition_name_in_hash(const char * name,uint part_id,bool is_subpart)3018 bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id,
3019 bool is_subpart)
3020 {
3021 PART_NAME_DEF *part_def;
3022 uchar *part_name;
3023 uint part_name_length;
3024 DBUG_ENTER("ha_partition::insert_partition_name_in_hash");
3025 /*
3026 Calculate and store the length here, to avoid doing it when
3027 searching the hash.
3028 */
3029 part_name_length= strlen(name);
3030 /*
3031 Must use memory that lives as long as table_share.
3032 Freed in the Partition_share destructor.
3033 Since we use my_multi_malloc, then my_free(part_def) will also free
3034 part_name, as a part of my_hash_free.
3035 */
3036 if (!my_multi_malloc(MY_WME,
3037 &part_def, sizeof(PART_NAME_DEF),
3038 &part_name, part_name_length + 1,
3039 NULL))
3040 DBUG_RETURN(true);
3041 memcpy(part_name, name, part_name_length + 1);
3042 part_def->partition_name= part_name;
3043 part_def->length= part_name_length;
3044 part_def->part_id= part_id;
3045 part_def->is_subpart= is_subpart;
3046 if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def))
3047 {
3048 my_free(part_def);
3049 DBUG_RETURN(true);
3050 }
3051 DBUG_RETURN(false);
3052 }
3053
3054
3055 /**
3056 Populate the partition_name_hash in part_share.
3057 */
3058
populate_partition_name_hash()3059 bool ha_partition::populate_partition_name_hash()
3060 {
3061 List_iterator<partition_element> part_it(m_part_info->partitions);
3062 uint num_parts= m_part_info->num_parts;
3063 uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1;
3064 uint tot_names;
3065 uint i= 0;
3066 DBUG_ASSERT(part_share);
3067
3068 DBUG_ENTER("ha_partition::populate_partition_name_hash");
3069
3070 /*
3071 partition_name_hash is only set once and never changed
3072 -> OK to check without locking.
3073 */
3074
3075 if (part_share->partition_name_hash_initialized)
3076 DBUG_RETURN(false);
3077 lock_shared_ha_data();
3078 if (part_share->partition_name_hash_initialized)
3079 {
3080 unlock_shared_ha_data();
3081 DBUG_RETURN(false);
3082 }
3083 tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts;
3084 if (my_hash_init(&part_share->partition_name_hash,
3085 system_charset_info, tot_names, 0, 0,
3086 (my_hash_get_key) get_part_name,
3087 my_free, HASH_UNIQUE))
3088 {
3089 unlock_shared_ha_data();
3090 DBUG_RETURN(TRUE);
3091 }
3092
3093 do
3094 {
3095 partition_element *part_elem= part_it++;
3096 DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
3097 if (part_elem->part_state == PART_NORMAL)
3098 {
3099 if (insert_partition_name_in_hash(part_elem->partition_name,
3100 i * num_subparts, false))
3101 goto err;
3102 if (m_is_sub_partitioned)
3103 {
3104 List_iterator<partition_element>
3105 subpart_it(part_elem->subpartitions);
3106 partition_element *sub_elem;
3107 uint j= 0;
3108 do
3109 {
3110 sub_elem= subpart_it++;
3111 if (insert_partition_name_in_hash(sub_elem->partition_name,
3112 i * num_subparts + j, true))
3113 goto err;
3114
3115 } while (++j < num_subparts);
3116 }
3117 }
3118 } while (++i < num_parts);
3119
3120 part_share->partition_name_hash_initialized= true;
3121 unlock_shared_ha_data();
3122
3123 DBUG_RETURN(FALSE);
3124 err:
3125 my_hash_free(&part_share->partition_name_hash);
3126 unlock_shared_ha_data();
3127
3128 DBUG_RETURN(TRUE);
3129 }
3130
3131
3132 /**
3133 Set Handler_share pointer and allocate Handler_share pointers
3134 for each partition and set those.
3135
3136 @param ha_share_arg Where to store/retrieve the Partitioning_share pointer
3137 to be shared by all instances of the same table.
3138
3139 @return Operation status
3140 @retval true Failure
3141 @retval false Sucess
3142 */
3143
set_ha_share_ref(Handler_share ** ha_share_arg)3144 bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
3145 {
3146 Handler_share **ha_shares;
3147 uint i;
3148 DBUG_ENTER("ha_partition::set_ha_share_ref");
3149
3150 DBUG_ASSERT(!part_share);
3151 DBUG_ASSERT(table_share);
3152 DBUG_ASSERT(!m_is_clone_of);
3153 DBUG_ASSERT(m_tot_parts);
3154 if (handler::set_ha_share_ref(ha_share_arg))
3155 DBUG_RETURN(true);
3156 if (!(part_share= get_share()))
3157 DBUG_RETURN(true);
3158 DBUG_ASSERT(part_share->partitions_share_refs);
3159 DBUG_ASSERT(part_share->partitions_share_refs->num_parts >= m_tot_parts);
3160 ha_shares= part_share->partitions_share_refs->ha_shares;
3161 for (i= 0; i < m_tot_parts; i++)
3162 {
3163 if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
3164 DBUG_RETURN(true);
3165 }
3166 DBUG_RETURN(false);
3167 }
3168
3169
3170 /**
3171 Get the PARTITION_SHARE for the table.
3172
3173 @return Operation status
3174 @retval true Error
3175 @retval false Success
3176
3177 @note Gets or initializes the Partition_share object used by partitioning.
3178 The Partition_share is used for handling the auto_increment etc.
3179 */
3180
get_share()3181 Partition_share *ha_partition::get_share()
3182 {
3183 Partition_share *tmp_share;
3184 DBUG_ENTER("ha_partition::get_share");
3185 DBUG_ASSERT(table_share);
3186
3187 lock_shared_ha_data();
3188 if (!(tmp_share= static_cast<Partition_share*>(get_ha_share_ptr())))
3189 {
3190 tmp_share= new Partition_share;
3191 if (!tmp_share)
3192 goto err;
3193 if (tmp_share->init(m_tot_parts))
3194 {
3195 delete tmp_share;
3196 tmp_share= NULL;
3197 goto err;
3198 }
3199 set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
3200 }
3201 err:
3202 unlock_shared_ha_data();
3203 DBUG_RETURN(tmp_share);
3204 }
3205
3206
3207
3208 /**
3209 Helper function for freeing all internal bitmaps.
3210 */
3211
free_partition_bitmaps()3212 void ha_partition::free_partition_bitmaps()
3213 {
3214 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3215 bitmap_free(&m_bulk_insert_started);
3216 bitmap_free(&m_locked_partitions);
3217 bitmap_free(&m_partitions_to_reset);
3218 bitmap_free(&m_key_not_found_partitions);
3219 }
3220
3221
3222 /**
3223 Helper function for initializing all internal bitmaps.
3224 */
3225
init_partition_bitmaps()3226 bool ha_partition::init_partition_bitmaps()
3227 {
3228 DBUG_ENTER("ha_partition::init_partition_bitmaps");
3229 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3230 if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
3231 DBUG_RETURN(true);
3232 bitmap_clear_all(&m_bulk_insert_started);
3233
3234 /* Initialize the bitmap we use to keep track of locked partitions */
3235 if (bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
3236 {
3237 bitmap_free(&m_bulk_insert_started);
3238 DBUG_RETURN(true);
3239 }
3240 bitmap_clear_all(&m_locked_partitions);
3241
3242 /*
3243 Initialize the bitmap we use to keep track of partitions which may have
3244 something to reset in ha_reset().
3245 */
3246 if (bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
3247 {
3248 bitmap_free(&m_bulk_insert_started);
3249 bitmap_free(&m_locked_partitions);
3250 DBUG_RETURN(true);
3251 }
3252 bitmap_clear_all(&m_partitions_to_reset);
3253
3254 /*
3255 Initialize the bitmap we use to keep track of partitions which returned
3256 HA_ERR_KEY_NOT_FOUND from index_read_map.
3257 */
3258 if (bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, FALSE))
3259 {
3260 bitmap_free(&m_bulk_insert_started);
3261 bitmap_free(&m_locked_partitions);
3262 bitmap_free(&m_partitions_to_reset);
3263 DBUG_RETURN(true);
3264 }
3265 bitmap_clear_all(&m_key_not_found_partitions);
3266 m_key_not_found= false;
3267 /* Initialize the bitmap for read/lock_partitions */
3268 if (!m_is_clone_of)
3269 {
3270 DBUG_ASSERT(!m_clone_mem_root);
3271 if (m_part_info->set_partition_bitmaps(NULL))
3272 {
3273 free_partition_bitmaps();
3274 DBUG_RETURN(true);
3275 }
3276 }
3277 DBUG_RETURN(false);
3278 }
3279
3280
3281 /*
3282 Open handler object
3283
3284 SYNOPSIS
3285 open()
3286 name Full path of table name
3287 mode Open mode flags
3288 test_if_locked ?
3289
3290 RETURN VALUE
3291 >0 Error
3292 0 Success
3293
3294 DESCRIPTION
3295 Used for opening tables. The name will be the name of the file.
3296 A table is opened when it needs to be opened. For instance
3297 when a request comes in for a select on the table (tables are not
3298 open and closed for each request, they are cached).
3299
3300 Called from handler.cc by handler::ha_open(). The server opens all tables
3301 by calling ha_open() which then calls the handler specific open().
3302 */
3303
open(const char * name,int mode,uint test_if_locked)3304 int ha_partition::open(const char *name, int mode, uint test_if_locked)
3305 {
3306 char *name_buffer_ptr;
3307 int error= HA_ERR_INITIALIZATION;
3308 handler **file;
3309 char name_buff[FN_REFLEN + 1];
3310 ulonglong check_table_flags;
3311 DBUG_ENTER("ha_partition::open");
3312
3313 DBUG_ASSERT(table->s == table_share);
3314 ref_length= 0;
3315 m_mode= mode;
3316 m_open_test_lock= test_if_locked;
3317 m_part_field_array= m_part_info->full_part_field_array;
3318 if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of)))
3319 DBUG_RETURN(error);
3320 name_buffer_ptr= m_name_buffer_ptr;
3321 if (populate_partition_name_hash())
3322 {
3323 DBUG_RETURN(HA_ERR_INITIALIZATION);
3324 }
3325 m_start_key.length= 0;
3326 m_rec0= table->record[0];
3327 legacy_db_type db_type = ha_legacy_type(m_part_info->default_engine_type);
3328 if(db_type == DB_TYPE_HEAP)
3329 {
3330 m_rec_length= table_share->rec_buff_length;
3331 }
3332 else {
3333 m_rec_length= table_share->rec_buff_length;
3334 }
3335 DBUG_ASSERT(db_type != DB_TYPE_UNKNOWN);
3336
3337 if (!m_part_ids_sorted_by_num_of_records)
3338 {
3339 if (!(m_part_ids_sorted_by_num_of_records=
3340 (uint32*) my_malloc(m_tot_parts * sizeof(uint32), MYF(MY_WME))))
3341 DBUG_RETURN(error);
3342 uint32 i;
3343 /* Initialize it with all partition ids. */
3344 for (i= 0; i < m_tot_parts; i++)
3345 m_part_ids_sorted_by_num_of_records[i]= i;
3346 }
3347
3348 if (init_partition_bitmaps())
3349 DBUG_RETURN(error);
3350
3351 DBUG_ASSERT(m_part_info);
3352
3353 if (m_is_clone_of)
3354 {
3355 uint i, alloc_len;
3356 DBUG_ASSERT(m_clone_mem_root);
3357 /* Allocate an array of handler pointers for the partitions handlers. */
3358 alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3359 if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
3360 {
3361 error= HA_ERR_INITIALIZATION;
3362 goto err_alloc;
3363 }
3364 memset(m_file, 0, alloc_len);
3365 /*
3366 Populate them by cloning the original partitions. This also opens them.
3367 Note that file->ref is allocated too.
3368 */
3369 file= m_is_clone_of->m_file;
3370 for (i= 0; i < m_tot_parts; i++)
3371 {
3372 if ((error= create_partition_name(name_buff, name, name_buffer_ptr,
3373 NORMAL_PART_NAME, FALSE)))
3374 {
3375 file= &m_file[i];
3376 goto err_handler;
3377 }
3378
3379 /* ::clone() will also set ha_share from the original. */
3380 if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
3381 {
3382 error= HA_ERR_INITIALIZATION;
3383 file= &m_file[i];
3384 goto err_handler;
3385 }
3386 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3387 }
3388 }
3389 else
3390 {
3391 file= m_file;
3392 do
3393 {
3394 if ((error= create_partition_name(name_buff, name, name_buffer_ptr,
3395 NORMAL_PART_NAME, FALSE)))
3396 goto err_handler;
3397
3398 if ((error= (*file)->ha_open(table, name_buff, mode,
3399 test_if_locked | HA_OPEN_NO_PSI_CALL)))
3400 goto err_handler;
3401 if (m_file == file)
3402 m_num_locks= (*file)->lock_count();
3403 DBUG_ASSERT(m_num_locks == (*file)->lock_count());
3404 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3405 } while (*(++file));
3406 }
3407
3408 file= m_file;
3409 ref_length= (*file)->ref_length;
3410 check_table_flags= (((*file)->ha_table_flags() &
3411 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3412 (PARTITION_ENABLED_TABLE_FLAGS));
3413 while (*(++file))
3414 {
3415 /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
3416 set_if_bigger(ref_length, ((*file)->ref_length));
3417 /*
3418 Verify that all partitions have the same set of table flags.
3419 Mask all flags that partitioning enables/disables.
3420 */
3421 if (check_table_flags != (((*file)->ha_table_flags() &
3422 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3423 (PARTITION_ENABLED_TABLE_FLAGS)))
3424 {
3425 error= HA_ERR_INITIALIZATION;
3426 /* set file to last handler, so all of them are closed */
3427 file = &m_file[m_tot_parts - 1];
3428 goto err_handler;
3429 }
3430 }
3431 key_used_on_scan= m_file[0]->key_used_on_scan;
3432 implicit_emptied= m_file[0]->implicit_emptied;
3433 /*
3434 Add 2 bytes for partition id in position ref length.
3435 ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
3436 */
3437 ref_length+= PARTITION_BYTES_IN_POS;
3438 m_ref_length= ref_length;
3439
3440 /*
3441 Release buffer read from .par file. It will not be reused again after
3442 being opened once.
3443 */
3444 clear_handler_file();
3445
3446 /*
3447 Some handlers update statistics as part of the open call. This will in
3448 some cases corrupt the statistics of the partition handler and thus
3449 to ensure we have correct statistics we call info from open after
3450 calling open on all individual handlers.
3451 */
3452 m_handler_status= handler_opened;
3453 if (m_part_info->part_expr)
3454 m_part_func_monotonicity_info=
3455 m_part_info->part_expr->get_monotonicity_info();
3456 else if (m_part_info->list_of_part_fields)
3457 m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
3458 info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
3459 DBUG_RETURN(0);
3460
3461 err_handler:
3462 DEBUG_SYNC(ha_thd(), "partition_open_error");
3463 while (file-- != m_file)
3464 (*file)->ha_close();
3465 err_alloc:
3466 free_partition_bitmaps();
3467
3468 DBUG_RETURN(error);
3469 }
3470
3471
3472 /*
3473 Disabled since it is not possible to prune yet.
3474 without pruning, it need to rebind/unbind every partition in every
3475 statement which uses a table from the table cache. Will also use
3476 as many PSI_tables as there are partitions.
3477 */
3478 #ifdef HAVE_M_PSI_PER_PARTITION
unbind_psi()3479 void ha_partition::unbind_psi()
3480 {
3481 uint i;
3482
3483 DBUG_ENTER("ha_partition::unbind_psi");
3484 handler::unbind_psi();
3485 for (i= 0; i < m_tot_parts; i++)
3486 {
3487 DBUG_ASSERT(m_file[i] != NULL);
3488 m_file[i]->unbind_psi();
3489 }
3490 DBUG_VOID_RETURN;
3491 }
3492
rebind_psi()3493 void ha_partition::rebind_psi()
3494 {
3495 uint i;
3496
3497 DBUG_ENTER("ha_partition::rebind_psi");
3498 handler::rebind_psi();
3499 for (i= 0; i < m_tot_parts; i++)
3500 {
3501 DBUG_ASSERT(m_file[i] != NULL);
3502 m_file[i]->rebind_psi();
3503 }
3504 DBUG_VOID_RETURN;
3505 }
3506 #endif /* HAVE_M_PSI_PER_PARTITION */
3507
3508
3509 /**
3510 Clone the open and locked partitioning handler.
3511
3512 @param mem_root MEM_ROOT to use.
3513
3514 @return Pointer to the successfully created clone or NULL
3515
3516 @details
3517 This function creates a new ha_partition handler as a clone/copy. The
3518 original (this) must already be opened and locked. The clone will use
3519 the originals m_part_info.
3520 It also allocates memory for ref + ref_dup.
3521 In ha_partition::open() it will clone its original handlers partitions
3522 which will allocate then on the correct MEM_ROOT and also open them.
3523 */
3524
clone(const char * name,MEM_ROOT * mem_root)3525 handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
3526 {
3527 ha_partition *new_handler;
3528
3529 DBUG_ENTER("ha_partition::clone");
3530 new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
3531 this, mem_root);
3532 if (!new_handler)
3533 DBUG_RETURN(NULL);
3534
3535 /*
3536 We will not clone each partition's handler here, it will be done in
3537 ha_partition::open() for clones. Also set_ha_share_ref is not needed
3538 here, since 1) ha_share is copied in the constructor used above
3539 2) each partition's cloned handler will set it from its original.
3540 */
3541
3542 /*
3543 Allocate new_handler->ref here because otherwise ha_open will allocate it
3544 on this->table->mem_root and we will not be able to reclaim that memory
3545 when the clone handler object is destroyed.
3546 */
3547 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
3548 ALIGN_SIZE(m_ref_length)*2)))
3549 goto err;
3550
3551 if (new_handler->ha_open(table, name,
3552 table->db_stat,
3553 HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
3554 goto err;
3555
3556 DBUG_RETURN((handler*) new_handler);
3557
3558 err:
3559 delete new_handler;
3560 DBUG_RETURN(NULL);
3561 }
3562
3563
3564 /*
3565 Close handler object
3566
3567 SYNOPSIS
3568 close()
3569
3570 RETURN VALUE
3571 >0 Error code
3572 0 Success
3573
3574 DESCRIPTION
3575 Called from sql_base.cc, sql_select.cc, and table.cc.
3576 In sql_select.cc it is only used to close up temporary tables or during
3577 the process where a temporary table is converted over to being a
3578 myisam table.
3579 For sql_base.cc look at close_data_tables().
3580 */
3581
close(void)3582 int ha_partition::close(void)
3583 {
3584 bool first= TRUE;
3585 handler **file;
3586 DBUG_ENTER("ha_partition::close");
3587
3588 DBUG_ASSERT(table->s == table_share);
3589 destroy_record_priority_queue();
3590 free_partition_bitmaps();
3591 DBUG_ASSERT(m_part_info);
3592 file= m_file;
3593
3594 repeat:
3595 do
3596 {
3597 (*file)->ha_close();
3598 } while (*(++file));
3599
3600 if (first && m_added_file && m_added_file[0])
3601 {
3602 file= m_added_file;
3603 first= FALSE;
3604 goto repeat;
3605 }
3606
3607 m_handler_status= handler_closed;
3608 DBUG_RETURN(0);
3609 }
3610
3611 /****************************************************************************
3612 MODULE start/end statement
3613 ****************************************************************************/
3614 /*
3615 A number of methods to define various constants for the handler. In
3616 the case of the partition handler we need to use some max and min
3617 of the underlying handlers in most cases.
3618 */
3619
3620 /*
3621 Set external locks on table
3622
3623 SYNOPSIS
3624 external_lock()
3625 thd Thread object
3626 lock_type Type of external lock
3627
3628 RETURN VALUE
3629 >0 Error code
3630 0 Success
3631
3632 DESCRIPTION
3633 First you should go read the section "locking functions for mysql" in
3634 lock.cc to understand this.
3635 This create a lock on the table. If you are implementing a storage engine
3636 that can handle transactions look at ha_berkeley.cc to see how you will
3637 want to go about doing this. Otherwise you should consider calling
3638 flock() here.
3639 Originally this method was used to set locks on file level to enable
3640 several MySQL Servers to work on the same data. For transactional
3641 engines it has been "abused" to also mean start and end of statements
3642 to enable proper rollback of statements and transactions. When LOCK
3643 TABLES has been issued the start_stmt method takes over the role of
3644 indicating start of statement but in this case there is no end of
3645 statement indicator(?).
3646
3647 Called from lock.cc by lock_external() and unlock_external(). Also called
3648 from sql_table.cc by copy_data_between_tables().
3649 */
3650
external_lock(THD * thd,int lock_type)3651 int ha_partition::external_lock(THD *thd, int lock_type)
3652 {
3653 uint error;
3654 uint i, first_used_partition;
3655 MY_BITMAP *used_partitions;
3656 DBUG_ENTER("ha_partition::external_lock");
3657
3658 DBUG_ASSERT(!auto_increment_lock && !auto_increment_safe_stmt_log_lock);
3659
3660 if (lock_type == F_UNLCK)
3661 used_partitions= &m_locked_partitions;
3662 else
3663 used_partitions= &(m_part_info->lock_partitions);
3664
3665 first_used_partition= bitmap_get_first_set(used_partitions);
3666
3667 for (i= first_used_partition;
3668 i < m_tot_parts;
3669 i= bitmap_get_next_set(used_partitions, i))
3670 {
3671 DBUG_PRINT("info", ("external_lock(thd, %d) part %d", lock_type, i));
3672 if ((error= m_file[i]->ha_external_lock(thd, lock_type)))
3673 {
3674 if (lock_type != F_UNLCK)
3675 goto err_handler;
3676 }
3677 DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
3678 if (lock_type != F_UNLCK)
3679 bitmap_set_bit(&m_locked_partitions, i);
3680 }
3681 if (lock_type == F_UNLCK)
3682 {
3683 bitmap_clear_all(used_partitions);
3684 }
3685 else
3686 {
3687 /* Add touched partitions to be included in reset(). */
3688 bitmap_union(&m_partitions_to_reset, used_partitions);
3689 }
3690
3691 if (m_added_file && m_added_file[0])
3692 {
3693 handler **file= m_added_file;
3694 DBUG_ASSERT(lock_type == F_UNLCK);
3695 do
3696 {
3697 (void) (*file)->ha_external_lock(thd, lock_type);
3698 } while (*(++file));
3699 }
3700 DBUG_RETURN(0);
3701
3702 err_handler:
3703 uint j;
3704 for (j= first_used_partition;
3705 j < i;
3706 j= bitmap_get_next_set(&m_locked_partitions, j))
3707 {
3708 (void) m_file[j]->ha_external_lock(thd, F_UNLCK);
3709 }
3710 bitmap_clear_all(&m_locked_partitions);
3711 DBUG_RETURN(error);
3712 }
3713
3714
3715 /*
3716 Get the lock(s) for the table and perform conversion of locks if needed
3717
3718 SYNOPSIS
3719 store_lock()
3720 thd Thread object
3721 to Lock object array
3722 lock_type Table lock type
3723
3724 RETURN VALUE
3725 >0 Error code
3726 0 Success
3727
3728 DESCRIPTION
3729 The idea with handler::store_lock() is the following:
3730
3731 The statement decided which locks we should need for the table
3732 for updates/deletes/inserts we get WRITE locks, for SELECT... we get
3733 read locks.
3734
3735 Before adding the lock into the table lock handler (see thr_lock.c)
3736 mysqld calls store lock with the requested locks. Store lock can now
3737 modify a write lock to a read lock (or some other lock), ignore the
3738 lock (if we don't want to use MySQL table locks at all) or add locks
3739 for many tables (like we do when we are using a MERGE handler).
3740
3741 Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
3742 (which signals that we are doing WRITES, but we are still allowing other
3743 reader's and writer's.
3744
3745 When releasing locks, store_lock() is also called. In this case one
3746 usually doesn't have to do anything.
3747
3748 store_lock is called when holding a global mutex to ensure that only
3749 one thread at a time changes the locking information of tables.
3750
3751 In some exceptional cases MySQL may send a request for a TL_IGNORE;
3752 This means that we are requesting the same lock as last time and this
3753 should also be ignored. (This may happen when someone does a flush
3754 table when we have opened a part of the tables, in which case mysqld
3755 closes and reopens the tables and tries to get the same locks as last
3756 time). In the future we will probably try to remove this.
3757
3758 Called from lock.cc by get_lock_data().
3759 */
3760
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)3761 THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
3762 THR_LOCK_DATA **to,
3763 enum thr_lock_type lock_type)
3764 {
3765 uint i;
3766 DBUG_ENTER("ha_partition::store_lock");
3767 DBUG_ASSERT(thd == current_thd);
3768
3769 /*
3770 This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
3771 even when thd != table->in_use. In that case don't use partition pruning,
3772 but use all partitions instead to avoid using another threads structures.
3773 */
3774 if (thd != table->in_use)
3775 {
3776 for (i= 0; i < m_tot_parts; i++)
3777 to= m_file[i]->store_lock(thd, to, lock_type);
3778 }
3779 else
3780 {
3781 for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
3782 i < m_tot_parts;
3783 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
3784 {
3785 DBUG_PRINT("info", ("store lock %d iteration", i));
3786 to= m_file[i]->store_lock(thd, to, lock_type);
3787 }
3788 }
3789 DBUG_RETURN(to);
3790 }
3791
3792 /*
3793 Start a statement when table is locked
3794
3795 SYNOPSIS
3796 start_stmt()
3797 thd Thread object
3798 lock_type Type of external lock
3799
3800 RETURN VALUE
3801 >0 Error code
3802 0 Success
3803
3804 DESCRIPTION
3805 This method is called instead of external lock when the table is locked
3806 before the statement is executed.
3807 */
3808
start_stmt(THD * thd,thr_lock_type lock_type)3809 int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
3810 {
3811 int error= 0;
3812 uint i;
3813 /* Assert that read_partitions is included in lock_partitions */
3814 DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
3815 &m_part_info->lock_partitions));
3816 /*
3817 m_locked_partitions is set in previous external_lock/LOCK TABLES.
3818 Current statement's lock requests must not include any partitions
3819 not previously locked.
3820 */
3821 DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
3822 &m_locked_partitions));
3823 DBUG_ENTER("ha_partition::start_stmt");
3824
3825 for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
3826 i < m_tot_parts;
3827 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
3828 {
3829 if ((error= m_file[i]->start_stmt(thd, lock_type)))
3830 break;
3831 /* Add partition to be called in reset(). */
3832 bitmap_set_bit(&m_partitions_to_reset, i);
3833 }
3834 DBUG_RETURN(error);
3835 }
3836
3837
3838 /**
3839 Get number of lock objects returned in store_lock
3840
3841 @returns Number of locks returned in call to store_lock
3842
3843 @desc
3844 Returns the number of store locks needed in call to store lock.
3845 We return number of partitions we will lock multiplied with number of
3846 locks needed by each partition. Assists the above functions in allocating
3847 sufficient space for lock structures.
3848 */
3849
lock_count() const3850 uint ha_partition::lock_count() const
3851 {
3852 DBUG_ENTER("ha_partition::lock_count");
3853 /*
3854 The caller want to know the upper bound, to allocate enough memory.
3855 There is no performance lost if we simply return maximum number locks
3856 needed, only some minor over allocation of memory in get_lock_data().
3857
3858 Also notice that this may be called for another thread != table->in_use,
3859 when mysql_lock_abort_for_thread() is called. So this is more safe, then
3860 using number of partitions after pruning.
3861 */
3862 DBUG_RETURN(m_tot_parts * m_num_locks);
3863 }
3864
3865
3866 /*
3867 Unlock last accessed row
3868
3869 SYNOPSIS
3870 unlock_row()
3871
3872 RETURN VALUE
3873 NONE
3874
3875 DESCRIPTION
3876 Record currently processed was not in the result set of the statement
3877 and is thus unlocked. Used for UPDATE and DELETE queries.
3878 */
3879
unlock_row()3880 void ha_partition::unlock_row()
3881 {
3882 DBUG_ENTER("ha_partition::unlock_row");
3883 m_file[m_last_part]->unlock_row();
3884 DBUG_VOID_RETURN;
3885 }
3886
3887 /**
3888 Check if semi consistent read was used
3889
3890 SYNOPSIS
3891 was_semi_consistent_read()
3892
3893 RETURN VALUE
3894 TRUE Previous read was a semi consistent read
3895 FALSE Previous read was not a semi consistent read
3896
3897 DESCRIPTION
3898 See handler.h:
3899 In an UPDATE or DELETE, if the row under the cursor was locked by another
3900 transaction, and the engine used an optimistic read of the last
3901 committed row value under the cursor, then the engine returns 1 from this
3902 function. MySQL must NOT try to update this optimistic value. If the
3903 optimistic value does not match the WHERE condition, MySQL can decide to
3904 skip over this row. Currently only works for InnoDB. This can be used to
3905 avoid unnecessary lock waits.
3906
3907 If this method returns nonzero, it will also signal the storage
3908 engine that the next read will be a locking re-read of the row.
3909 */
was_semi_consistent_read()3910 bool ha_partition::was_semi_consistent_read()
3911 {
3912 DBUG_ENTER("ha_partition::was_semi_consistent_read");
3913 DBUG_ASSERT(m_last_part < m_tot_parts &&
3914 bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
3915 DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
3916 }
3917
3918 /**
3919 Use semi consistent read if possible
3920
3921 SYNOPSIS
3922 try_semi_consistent_read()
3923 yes Turn on semi consistent read
3924
3925 RETURN VALUE
3926 NONE
3927
3928 DESCRIPTION
3929 See handler.h:
3930 Tell the engine whether it should avoid unnecessary lock waits.
3931 If yes, in an UPDATE or DELETE, if the row under the cursor was locked
3932 by another transaction, the engine may try an optimistic read of
3933 the last committed row value under the cursor.
3934 Note: prune_partitions are already called before this call, so using
3935 pruning is OK.
3936 */
try_semi_consistent_read(bool yes)3937 void ha_partition::try_semi_consistent_read(bool yes)
3938 {
3939 uint i;
3940 DBUG_ENTER("ha_partition::try_semi_consistent_read");
3941
3942 i= bitmap_get_first_set(&(m_part_info->read_partitions));
3943 DBUG_ASSERT(i != MY_BIT_NONE);
3944 for (;
3945 i < m_tot_parts;
3946 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
3947 {
3948 m_file[i]->try_semi_consistent_read(yes);
3949 }
3950 DBUG_VOID_RETURN;
3951 }
3952
3953
3954 /****************************************************************************
3955 MODULE change record
3956 ****************************************************************************/
3957
3958 /*
3959 Insert a row to the table
3960
3961 SYNOPSIS
3962 write_row()
3963 buf The row in MySQL Row Format
3964
3965 RETURN VALUE
3966 >0 Error code
3967 0 Success
3968
3969 DESCRIPTION
3970 write_row() inserts a row. buf() is a byte array of data, normally
3971 record[0].
3972
3973 You can use the field information to extract the data from the native byte
3974 array type.
3975
3976 Example of this would be:
3977 for (Field **field=table->field ; *field ; field++)
3978 {
3979 ...
3980 }
3981
3982 See ha_tina.cc for a variant of extracting all of the data as strings.
3983 ha_berkeley.cc has a variant of how to store it intact by "packing" it
3984 for ha_berkeley's own native storage type.
3985
3986 Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
3987 sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
3988
3989 */
3990
write_row(uchar * buf)3991 int ha_partition::write_row(uchar * buf)
3992 {
3993 uint32 part_id;
3994 int error;
3995 longlong func_value;
3996 bool have_auto_increment= table->next_number_field && buf == table->record[0];
3997 my_bitmap_map *old_map;
3998 THD *thd= ha_thd();
3999 sql_mode_t saved_sql_mode= thd->variables.sql_mode;
4000 bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null;
4001 DBUG_ENTER("ha_partition::write_row");
4002 DBUG_ASSERT(buf == m_rec0);
4003
4004 /*
4005 If we have an auto_increment column and we are writing a changed row
4006 or a new row, then update the auto_increment value in the record.
4007 */
4008 if (have_auto_increment)
4009 {
4010 if (!part_share->auto_inc_initialized &&
4011 !table_share->next_number_keypart)
4012 {
4013 /*
4014 If auto_increment in table_share is not initialized, start by
4015 initializing it.
4016 */
4017 info(HA_STATUS_AUTO);
4018 }
4019 error= update_auto_increment();
4020
4021 /*
4022 If we have failed to set the auto-increment value for this row,
4023 it is highly likely that we will not be able to insert it into
4024 the correct partition. We must check and fail if neccessary.
4025 */
4026 if (error)
4027 goto exit;
4028
4029 /*
4030 Don't allow generation of auto_increment value the partitions handler.
4031 If a partitions handler would change the value, then it might not
4032 match the partition any longer.
4033 This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
4034 So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
4035 The partitions handler::next_insert_id must always be 0. Otherwise
4036 we need to forward release_auto_increment, or reset it for all
4037 partitions.
4038 */
4039 if (table->next_number_field->val_int() == 0)
4040 {
4041 table->auto_increment_field_not_null= TRUE;
4042 thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
4043 }
4044 }
4045
4046 old_map= dbug_tmp_use_all_columns(table, table->read_set);
4047 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4048 dbug_tmp_restore_column_map(table->read_set, old_map);
4049 if (unlikely(error))
4050 {
4051 m_part_info->err_value= func_value;
4052 goto exit;
4053 }
4054 if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4055 {
4056 DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
4057 part_id, (long) func_value));
4058 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4059 goto exit;
4060 }
4061 m_last_part= part_id;
4062 DBUG_PRINT("info", ("Insert in partition %d", part_id));
4063 start_part_bulk_insert(thd, part_id);
4064
4065 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4066 error= m_file[part_id]->ha_write_row(buf);
4067 if (have_auto_increment && !table->s->next_number_keypart)
4068 set_auto_increment_if_higher(table->next_number_field);
4069 reenable_binlog(thd);
4070 exit:
4071 thd->variables.sql_mode= saved_sql_mode;
4072 table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
4073 DBUG_RETURN(error);
4074 }
4075
4076
4077 /*
4078 Update an existing row
4079
4080 SYNOPSIS
4081 update_row()
4082 old_data Old record in MySQL Row Format
4083 new_data New record in MySQL Row Format
4084
4085 RETURN VALUE
4086 >0 Error code
4087 0 Success
4088
4089 DESCRIPTION
4090 Yes, update_row() does what you expect, it updates a row. old_data will
4091 have the previous row record in it, while new_data will have the newest
4092 data in it.
4093 Keep in mind that the server can do updates based on ordering if an
4094 ORDER BY clause was used. Consecutive ordering is not guarenteed.
4095
4096 Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
4097 new_data is always record[0]
4098 old_data is always record[1]
4099 */
4100
update_row(const uchar * old_data,uchar * new_data)4101 int ha_partition::update_row(const uchar *old_data, uchar *new_data)
4102 {
4103 THD *thd= ha_thd();
4104 uint32 new_part_id, old_part_id;
4105 int error= 0;
4106 longlong func_value;
4107 DBUG_ENTER("ha_partition::update_row");
4108 m_err_rec= NULL;
4109
4110 // Need to read partition-related columns, to locate the row's partition:
4111 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4112 table->read_set));
4113 if ((error= get_parts_for_update(old_data, new_data, table->record[0],
4114 m_part_info, &old_part_id, &new_part_id,
4115 &func_value)))
4116 {
4117 m_part_info->err_value= func_value;
4118 goto exit;
4119 }
4120 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
4121 if (!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id))
4122 {
4123 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4124 goto exit;
4125 }
4126
4127 /*
4128 The protocol for updating a row is:
4129 1) position the handler (cursor) on the row to be updated,
4130 either through the last read row (rnd or index) or by rnd_pos.
4131 2) call update_row with both old and new full records as arguments.
4132
4133 This means that m_last_part should already be set to actual partition
4134 where the row was read from. And if that is not the same as the
4135 calculated part_id we found a misplaced row, we return an error to
4136 notify the user that something is broken in the row distribution
4137 between partitions! Since we don't check all rows on read, we return an
4138 error instead of correcting m_last_part, to make the user aware of the
4139 problem!
4140
4141 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4142 so this is not supported for this engine.
4143 */
4144 if (old_part_id != m_last_part)
4145 {
4146 m_err_rec= old_data;
4147 DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
4148 }
4149
4150 m_last_part= new_part_id;
4151 start_part_bulk_insert(thd, new_part_id);
4152 if (new_part_id == old_part_id)
4153 {
4154 DBUG_PRINT("info", ("Update in partition %d", new_part_id));
4155 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4156 error= m_file[new_part_id]->ha_update_row(old_data, new_data);
4157 reenable_binlog(thd);
4158 goto exit;
4159 }
4160 else
4161 {
4162 Field *saved_next_number_field= table->next_number_field;
4163 /*
4164 Don't allow generation of auto_increment value for update.
4165 table->next_number_field is never set on UPDATE.
4166 But is set for INSERT ... ON DUPLICATE KEY UPDATE,
4167 and since update_row() does not generate or update an auto_inc value,
4168 we cannot have next_number_field set when moving a row
4169 to another partition with write_row(), since that could
4170 generate/update the auto_inc value.
4171 This gives the same behavior for partitioned vs non partitioned tables.
4172 */
4173 table->next_number_field= NULL;
4174 DBUG_PRINT("info", ("Update from partition %d to partition %d",
4175 old_part_id, new_part_id));
4176 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4177 error= m_file[new_part_id]->ha_write_row(new_data);
4178 reenable_binlog(thd);
4179 table->next_number_field= saved_next_number_field;
4180 if (error)
4181 goto exit;
4182
4183 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4184 error= m_file[old_part_id]->ha_delete_row(old_data);
4185 reenable_binlog(thd);
4186 if (error)
4187 {
4188 #ifdef IN_THE_FUTURE
4189 (void) m_file[new_part_id]->delete_last_inserted_row(new_data);
4190 #endif
4191 goto exit;
4192 }
4193 }
4194
4195 exit:
4196 /*
4197 if updating an auto_increment column, update
4198 part_share->next_auto_inc_val if needed.
4199 (not to be used if auto_increment on secondary field in a multi-column
4200 index)
4201 mysql_update does not set table->next_number_field, so we use
4202 table->found_next_number_field instead.
4203 Also checking that the field is marked in the write set.
4204 */
4205 if (table->found_next_number_field &&
4206 new_data == table->record[0] &&
4207 !table->s->next_number_keypart &&
4208 bitmap_is_set(table->write_set,
4209 table->found_next_number_field->field_index))
4210 {
4211 if (!part_share->auto_inc_initialized)
4212 info(HA_STATUS_AUTO);
4213 set_auto_increment_if_higher(table->found_next_number_field);
4214 }
4215 DBUG_RETURN(error);
4216 }
4217
4218
4219 /*
4220 Remove an existing row
4221
4222 SYNOPSIS
4223 delete_row
4224 buf Deleted row in MySQL Row Format
4225
4226 RETURN VALUE
4227 >0 Error Code
4228 0 Success
4229
4230 DESCRIPTION
4231 This will delete a row. buf will contain a copy of the row to be deleted.
4232 The server will call this right after the current row has been read
4233 (from either a previous rnd_xxx() or index_xxx() call).
4234 If you keep a pointer to the last row or can access a primary key it will
4235 make doing the deletion quite a bit easier.
4236 Keep in mind that the server does no guarentee consecutive deletions.
4237 ORDER BY clauses can be used.
4238
4239 Called in sql_acl.cc and sql_udf.cc to manage internal table information.
4240 Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
4241 it is used for removing duplicates while in insert it is used for REPLACE
4242 calls.
4243
4244 buf is either record[0] or record[1]
4245 */
4246
delete_row(const uchar * buf)4247 int ha_partition::delete_row(const uchar *buf)
4248 {
4249 uint32 part_id;
4250 int error;
4251 THD *thd= ha_thd();
4252 DBUG_ENTER("ha_partition::delete_row");
4253 m_err_rec= NULL;
4254
4255 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4256 table->read_set));
4257 if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id)))
4258 {
4259 DBUG_RETURN(error);
4260 }
4261 /* Should never call delete_row on a partition which is not read */
4262 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
4263 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4264 if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4265 DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
4266
4267 /*
4268 The protocol for deleting a row is:
4269 1) position the handler (cursor) on the row to be deleted,
4270 either through the last read row (rnd or index) or by rnd_pos.
4271 2) call delete_row with the full record as argument.
4272
4273 This means that m_last_part should already be set to actual partition
4274 where the row was read from. And if that is not the same as the
4275 calculated part_id we found a misplaced row, we return an error to
4276 notify the user that something is broken in the row distribution
4277 between partitions! Since we don't check all rows on read, we return an
4278 error instead of forwarding the delete to the correct (m_last_part)
4279 partition!
4280
4281 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4282 so this is not supported for this engine.
4283
4284 TODO: change the assert in InnoDB into an error instead and make this one
4285 an assert instead and remove the get_part_for_delete()!
4286 */
4287 if (part_id != m_last_part)
4288 {
4289 m_err_rec= buf;
4290 DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
4291 }
4292
4293 m_last_part= part_id;
4294 tmp_disable_binlog(thd);
4295 error= m_file[part_id]->ha_delete_row(buf);
4296 reenable_binlog(thd);
4297 DBUG_RETURN(error);
4298 }
4299
4300
4301 /*
4302 Delete all rows in a table
4303
4304 SYNOPSIS
4305 delete_all_rows()
4306
4307 RETURN VALUE
4308 >0 Error Code
4309 0 Success
4310
4311 DESCRIPTION
4312 Used to delete all rows in a table. Both for cases of truncate and
4313 for cases where the optimizer realizes that all rows will be
4314 removed as a result of a SQL statement.
4315
4316 Called from item_sum.cc by Item_func_group_concat::clear(),
4317 Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4318 Called from sql_delete.cc by mysql_delete().
4319 Called from sql_select.cc by JOIN::reset().
4320 Called from sql_union.cc by st_select_lex_unit::exec().
4321 */
4322
delete_all_rows()4323 int ha_partition::delete_all_rows()
4324 {
4325 int error;
4326 uint i;
4327 DBUG_ENTER("ha_partition::delete_all_rows");
4328
4329 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4330 i < m_tot_parts;
4331 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4332 {
4333 /* Can be pruned, like DELETE FROM t PARTITION (pX) */
4334 if ((error= m_file[i]->ha_delete_all_rows()))
4335 DBUG_RETURN(error);
4336 }
4337 DBUG_RETURN(0);
4338 }
4339
4340
4341 /**
4342 Manually truncate the table.
4343
4344 @retval 0 Success.
4345 @retval > 0 Error code.
4346 */
4347
truncate()4348 int ha_partition::truncate()
4349 {
4350 int error;
4351 handler **file;
4352 DBUG_ENTER("ha_partition::truncate");
4353
4354 /*
4355 TRUNCATE also means resetting auto_increment. Hence, reset
4356 it so that it will be initialized again at the next use.
4357 */
4358 lock_auto_increment();
4359 part_share->next_auto_inc_val= 0;
4360 part_share->auto_inc_initialized= false;
4361 unlock_auto_increment();
4362
4363 file= m_file;
4364 do
4365 {
4366 if ((error= (*file)->ha_truncate()))
4367 DBUG_RETURN(error);
4368 } while (*(++file));
4369 DBUG_RETURN(0);
4370 }
4371
4372
4373 /**
4374 Truncate a set of specific partitions.
4375
4376 @remark Auto increment value will be truncated in that partition as well!
4377
4378 ALTER TABLE t TRUNCATE PARTITION ...
4379 */
4380
truncate_partition(Alter_info * alter_info,bool * binlog_stmt)4381 int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt)
4382 {
4383 int error= 0;
4384 List_iterator<partition_element> part_it(m_part_info->partitions);
4385 uint num_parts= m_part_info->num_parts;
4386 uint num_subparts= m_part_info->num_subparts;
4387 uint i= 0;
4388 DBUG_ENTER("ha_partition::truncate_partition");
4389
4390 /* Only binlog when it starts any call to the partitions handlers */
4391 *binlog_stmt= false;
4392
4393 if (set_part_state(alter_info, m_part_info, PART_ADMIN))
4394 DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
4395
4396 /*
4397 TRUNCATE also means resetting auto_increment. Hence, reset
4398 it so that it will be initialized again at the next use.
4399 */
4400 lock_auto_increment();
4401 part_share->next_auto_inc_val= 0;
4402 part_share->auto_inc_initialized= FALSE;
4403 unlock_auto_increment();
4404
4405 *binlog_stmt= true;
4406
4407 do
4408 {
4409 partition_element *part_elem= part_it++;
4410 if (part_elem->part_state == PART_ADMIN)
4411 {
4412 if (m_is_sub_partitioned)
4413 {
4414 List_iterator<partition_element>
4415 subpart_it(part_elem->subpartitions);
4416 partition_element *sub_elem;
4417 uint j= 0, part;
4418 do
4419 {
4420 sub_elem= subpart_it++;
4421 part= i * num_subparts + j;
4422 DBUG_PRINT("info", ("truncate subpartition %u (%s)",
4423 part, sub_elem->partition_name));
4424 if ((error= m_file[part]->ha_truncate()))
4425 break;
4426 sub_elem->part_state= PART_NORMAL;
4427 } while (++j < num_subparts);
4428 }
4429 else
4430 {
4431 DBUG_PRINT("info", ("truncate partition %u (%s)", i,
4432 part_elem->partition_name));
4433 error= m_file[i]->ha_truncate();
4434 }
4435 part_elem->part_state= PART_NORMAL;
4436 }
4437 } while (!error && (++i < num_parts));
4438 if (error)
4439 {
4440 /* Reset to PART_NORMAL. */
4441 set_all_part_state(m_part_info, PART_NORMAL);
4442 }
4443 DBUG_RETURN(error);
4444 }
4445
4446
4447 /*
4448 Start a large batch of insert rows
4449
4450 SYNOPSIS
4451 start_bulk_insert()
4452 rows Number of rows to insert
4453
4454 RETURN VALUE
4455 NONE
4456
4457 DESCRIPTION
4458 rows == 0 means we will probably insert many rows
4459 */
start_bulk_insert(ha_rows rows)4460 void ha_partition::start_bulk_insert(ha_rows rows)
4461 {
4462 DBUG_ENTER("ha_partition::start_bulk_insert");
4463
4464 m_bulk_inserted_rows= 0;
4465 bitmap_clear_all(&m_bulk_insert_started);
4466 /* use the last bit for marking if bulk_insert_started was called */
4467 bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
4468 DBUG_VOID_RETURN;
4469 }
4470
4471
4472 /*
4473 Check if start_bulk_insert has been called for this partition,
4474 if not, call it and mark it called
4475 */
start_part_bulk_insert(THD * thd,uint part_id)4476 void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
4477 {
4478 long old_buffer_size;
4479 if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
4480 bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4481 {
4482 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4483 old_buffer_size= thd->variables.read_buff_size;
4484 /* Update read_buffer_size for this partition */
4485 thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
4486 m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
4487 bitmap_set_bit(&m_bulk_insert_started, part_id);
4488 thd->variables.read_buff_size= old_buffer_size;
4489 }
4490 m_bulk_inserted_rows++;
4491 }
4492
4493 /*
4494 Estimate the read buffer size for each partition.
4495 SYNOPSIS
4496 ha_partition::estimate_read_buffer_size()
4497 original_size read buffer size originally set for the server
4498 RETURN VALUE
4499 estimated buffer size.
4500 DESCRIPTION
4501 If the estimated number of rows to insert is less than 10 (but not 0)
4502 the new buffer size is same as original buffer size.
4503 In case of first partition of when partition function is monotonic
4504 new buffer size is same as the original buffer size.
4505 For rest of the partition total buffer of 10*original_size is divided
4506 equally if number of partition is more than 10 other wise each partition
4507 will be allowed to use original buffer size.
4508 */
estimate_read_buffer_size(long original_size)4509 long ha_partition::estimate_read_buffer_size(long original_size)
4510 {
4511 /*
4512 If number of rows to insert is less than 10, but not 0,
4513 return original buffer size.
4514 */
4515 if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
4516 return (original_size);
4517 /*
4518 If first insert/partition and monotonic partition function,
4519 allow using buffer size originally set.
4520 */
4521 if (!m_bulk_inserted_rows &&
4522 m_part_func_monotonicity_info != NON_MONOTONIC &&
4523 m_tot_parts > 1)
4524 return original_size;
4525 /*
4526 Allow total buffer used in all partition to go up to 10*read_buffer_size.
4527 11*read_buffer_size in case of monotonic partition function.
4528 */
4529
4530 if (m_tot_parts < 10)
4531 return original_size;
4532 return (original_size * 10 / m_tot_parts);
4533 }
4534
4535 /*
4536 Try to predict the number of inserts into this partition.
4537
4538 If less than 10 rows (including 0 which means Unknown)
4539 just give that as a guess
4540 If monotonic partitioning function was used
4541 guess that 50 % of the inserts goes to the first partition
4542 For all other cases, guess on equal distribution between the partitions
4543 */
guess_bulk_insert_rows()4544 ha_rows ha_partition::guess_bulk_insert_rows()
4545 {
4546 DBUG_ENTER("guess_bulk_insert_rows");
4547
4548 if (estimation_rows_to_insert < 10)
4549 DBUG_RETURN(estimation_rows_to_insert);
4550
4551 /* If first insert/partition and monotonic partition function, guess 50%. */
4552 if (!m_bulk_inserted_rows &&
4553 m_part_func_monotonicity_info != NON_MONOTONIC &&
4554 m_tot_parts > 1)
4555 DBUG_RETURN(estimation_rows_to_insert / 2);
4556
4557 /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
4558 if (m_bulk_inserted_rows < estimation_rows_to_insert)
4559 DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
4560 / m_tot_parts) + 1);
4561 /* The estimation was wrong, must say 'Unknown' */
4562 DBUG_RETURN(0);
4563 }
4564
4565
4566 /**
4567 Finish a large batch of insert rows.
4568
4569 @return Operation status.
4570 @retval 0 Success
4571 @retval != 0 Error code
4572 */
4573
end_bulk_insert()4574 int ha_partition::end_bulk_insert()
4575 {
4576 int error= 0;
4577 uint i;
4578 DBUG_ENTER("ha_partition::end_bulk_insert");
4579
4580 if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4581 {
4582 DBUG_ASSERT(0);
4583 DBUG_RETURN(error);
4584 }
4585
4586 for (i= bitmap_get_first_set(&m_bulk_insert_started);
4587 i < m_tot_parts;
4588 i= bitmap_get_next_set(&m_bulk_insert_started, i))
4589 {
4590 int tmp;
4591 if ((tmp= m_file[i]->ha_end_bulk_insert()))
4592 error= tmp;
4593 }
4594 bitmap_clear_all(&m_bulk_insert_started);
4595 DBUG_RETURN(error);
4596 }
4597
4598
4599 /****************************************************************************
4600 MODULE full table scan
4601 ****************************************************************************/
4602 /*
4603 Initialize engine for random reads
4604
4605 SYNOPSIS
4606 ha_partition::rnd_init()
4607 scan 0 Initialize for random reads through rnd_pos()
4608 1 Initialize for random scan through rnd_next()
4609
4610 RETURN VALUE
4611 >0 Error code
4612 0 Success
4613
4614 DESCRIPTION
4615 rnd_init() is called when the server wants the storage engine to do a
4616 table scan or when the server wants to access data through rnd_pos.
4617
4618 When scan is used we will scan one handler partition at a time.
4619 When preparing for rnd_pos we will init all handler partitions.
4620 No extra cache handling is needed when scannning is not performed.
4621
4622 Before initialising we will call rnd_end to ensure that we clean up from
4623 any previous incarnation of a table scan.
4624 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
4625 sql_table.cc, and sql_update.cc.
4626 */
4627
rnd_init(bool scan)4628 int ha_partition::rnd_init(bool scan)
4629 {
4630 int error;
4631 uint i= 0;
4632 uint32 part_id;
4633 DBUG_ENTER("ha_partition::rnd_init");
4634
4635 /*
4636 For operations that may need to change data, we may need to extend
4637 read_set.
4638 */
4639 if (get_lock_type() == F_WRLCK)
4640 {
4641 /*
4642 If write_set contains any of the fields used in partition and
4643 subpartition expression, we need to set all bits in read_set because
4644 the row may need to be inserted in a different [sub]partition. In
4645 other words update_row() can be converted into write_row(), which
4646 requires a complete record.
4647 */
4648 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
4649 table->write_set))
4650 bitmap_set_all(table->read_set);
4651 else
4652 {
4653 /*
4654 Some handlers only read fields as specified by the bitmap for the
4655 read set. For partitioned handlers we always require that the
4656 fields of the partition functions are read such that we can
4657 calculate the partition id to place updated and deleted records.
4658 */
4659 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4660 }
4661 }
4662
4663 /* Now we see what the index of our first important partition is */
4664 DBUG_PRINT("info", ("m_part_info->read_partitions: 0x%lx",
4665 (long) m_part_info->read_partitions.bitmap));
4666 part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
4667 DBUG_PRINT("info", ("m_part_spec.start_part %d", part_id));
4668
4669 if (MY_BIT_NONE == part_id)
4670 {
4671 error= 0;
4672 goto err1;
4673 }
4674
4675 /*
4676 We have a partition and we are scanning with rnd_next
4677 so we bump our cache
4678 */
4679 DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
4680 if (scan)
4681 {
4682 /*
4683 rnd_end() is needed for partitioning to reset internal data if scan
4684 is already in use
4685 */
4686 rnd_end();
4687 late_extra_cache(part_id);
4688 if ((error= m_file[part_id]->ha_rnd_init(scan)))
4689 goto err;
4690 }
4691 else
4692 {
4693 for (i= part_id;
4694 i < m_tot_parts;
4695 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4696 {
4697 if ((error= m_file[i]->ha_rnd_init(scan)))
4698 goto err;
4699 }
4700 }
4701 m_scan_value= scan;
4702 m_part_spec.start_part= part_id;
4703 m_part_spec.end_part= m_tot_parts - 1;
4704 DBUG_PRINT("info", ("m_scan_value=%d", m_scan_value));
4705 DBUG_RETURN(0);
4706
4707 err:
4708 /* Call rnd_end for all previously inited partitions. */
4709 for (;
4710 part_id < i;
4711 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id))
4712 {
4713 m_file[part_id]->ha_rnd_end();
4714 }
4715 err1:
4716 m_scan_value= 2;
4717 m_part_spec.start_part= NO_CURRENT_PART_ID;
4718 DBUG_RETURN(error);
4719 }
4720
4721
4722 /*
4723 End of a table scan
4724
4725 SYNOPSIS
4726 rnd_end()
4727
4728 RETURN VALUE
4729 >0 Error code
4730 0 Success
4731 */
4732
rnd_end()4733 int ha_partition::rnd_end()
4734 {
4735 DBUG_ENTER("ha_partition::rnd_end");
4736 switch (m_scan_value) {
4737 case 2: // Error
4738 break;
4739 case 1:
4740 if (NO_CURRENT_PART_ID != m_part_spec.start_part) // Table scan
4741 {
4742 late_extra_no_cache(m_part_spec.start_part);
4743 m_file[m_part_spec.start_part]->ha_rnd_end();
4744 }
4745 break;
4746 case 0:
4747 uint i;
4748 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4749 i < m_tot_parts;
4750 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4751 {
4752 m_file[i]->ha_rnd_end();
4753 }
4754 break;
4755 }
4756 m_scan_value= 2;
4757 m_part_spec.start_part= NO_CURRENT_PART_ID;
4758 DBUG_RETURN(0);
4759 }
4760
4761 /*
4762 read next row during full table scan (scan in random row order)
4763
4764 SYNOPSIS
4765 rnd_next()
4766 buf buffer that should be filled with data
4767
4768 RETURN VALUE
4769 >0 Error code
4770 0 Success
4771
4772 DESCRIPTION
4773 This is called for each row of the table scan. When you run out of records
4774 you should return HA_ERR_END_OF_FILE.
4775 The Field structure for the table is the key to getting data into buf
4776 in a manner that will allow the server to understand it.
4777
4778 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
4779 sql_table.cc, and sql_update.cc.
4780 */
4781
rnd_next(uchar * buf)4782 int ha_partition::rnd_next(uchar *buf)
4783 {
4784 handler *file;
4785 int result= HA_ERR_END_OF_FILE;
4786 uint part_id= m_part_spec.start_part;
4787 DBUG_ENTER("ha_partition::rnd_next");
4788
4789 if (NO_CURRENT_PART_ID == part_id)
4790 {
4791 /*
4792 The original set of partitions to scan was empty and thus we report
4793 the result here.
4794 */
4795 goto end;
4796 }
4797
4798 DBUG_ASSERT(m_scan_value == 1);
4799 file= m_file[part_id];
4800
4801 while (TRUE)
4802 {
4803 result= file->ha_rnd_next(buf);
4804 if (!result)
4805 {
4806 m_last_part= part_id;
4807 m_part_spec.start_part= part_id;
4808 table->status= 0;
4809 DBUG_RETURN(0);
4810 }
4811
4812 /*
4813 if we get here, then the current partition ha_rnd_next returned failure
4814 */
4815 if (result == HA_ERR_RECORD_DELETED)
4816 continue; // Probably MyISAM
4817
4818 if (result != HA_ERR_END_OF_FILE)
4819 goto end_dont_reset_start_part; // Return error
4820
4821 /* End current partition */
4822 late_extra_no_cache(part_id);
4823 DBUG_PRINT("info", ("rnd_end on partition %d", part_id));
4824 if ((result= file->ha_rnd_end()))
4825 break;
4826
4827 /* Shift to next partition */
4828 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id);
4829 if (part_id >= m_tot_parts)
4830 {
4831 result= HA_ERR_END_OF_FILE;
4832 break;
4833 }
4834 m_last_part= part_id;
4835 m_part_spec.start_part= part_id;
4836 file= m_file[part_id];
4837 DBUG_PRINT("info", ("rnd_init on partition %d", part_id));
4838 if ((result= file->ha_rnd_init(1)))
4839 break;
4840 late_extra_cache(part_id);
4841 }
4842
4843 end:
4844 m_part_spec.start_part= NO_CURRENT_PART_ID;
4845 end_dont_reset_start_part:
4846 table->status= STATUS_NOT_FOUND;
4847 DBUG_RETURN(result);
4848 }
4849
4850
4851 /*
4852 Save position of current row
4853
4854 SYNOPSIS
4855 position()
4856 record Current record in MySQL Row Format
4857
4858 RETURN VALUE
4859 NONE
4860
4861 DESCRIPTION
4862 position() is called after each call to rnd_next() if the data needs
4863 to be ordered. You can do something like the following to store
4864 the position:
4865 ha_store_ptr(ref, ref_length, current_position);
4866
4867 The server uses ref to store data. ref_length in the above case is
4868 the size needed to store current_position. ref is just a byte array
4869 that the server will maintain. If you are using offsets to mark rows, then
4870 current_position should be the offset. If it is a primary key like in
4871 BDB, then it needs to be a primary key.
4872
4873 Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
4874 */
4875
position(const uchar * record)4876 void ha_partition::position(const uchar *record)
4877 {
4878 handler *file= m_file[m_last_part];
4879 uint pad_length;
4880 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4881 DBUG_ENTER("ha_partition::position");
4882
4883 int2store(ref, m_last_part);
4884 /*
4885 If m_sec_sort_by_rowid is set, then the ref is already stored in the
4886 priority queue (m_queue) when doing ordered scans.
4887 */
4888 if (m_sec_sort_by_rowid && m_ordered_scan_ongoing)
4889 {
4890 DBUG_ASSERT(m_queue.elements);
4891 DBUG_ASSERT(m_ordered_rec_buffer);
4892 DBUG_ASSERT(!m_curr_key_info[1]);
4893 /* We already have the ref. */
4894 memcpy(ref + PARTITION_BYTES_IN_POS,
4895 queue_top(&m_queue) + PARTITION_BYTES_IN_POS,
4896 file->ref_length);
4897 #ifndef DBUG_OFF
4898 /* Verify that the position is correct! */
4899 file->position(record);
4900 DBUG_ASSERT(!memcmp(ref + PARTITION_BYTES_IN_POS, file->ref,
4901 file->ref_length));
4902 #endif
4903 }
4904 else
4905 {
4906 file->position(record);
4907 memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length);
4908 }
4909 pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
4910 if (pad_length)
4911 memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
4912
4913 DBUG_VOID_RETURN;
4914 }
4915
4916
4917 /*
4918 Read row using position
4919
4920 SYNOPSIS
4921 rnd_pos()
4922 out:buf Row read in MySQL Row Format
4923 position Position of read row
4924
4925 RETURN VALUE
4926 >0 Error code
4927 0 Success
4928
4929 DESCRIPTION
4930 This is like rnd_next, but you are given a position to use
4931 to determine the row. The position will be of the type that you stored in
4932 ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
4933 or position you saved when position() was called.
4934 Called from filesort.cc records.cc sql_insert.cc sql_select.cc
4935 sql_update.cc.
4936 */
4937
rnd_pos(uchar * buf,uchar * pos)4938 int ha_partition::rnd_pos(uchar * buf, uchar *pos)
4939 {
4940 uint part_id;
4941 handler *file;
4942 DBUG_ENTER("ha_partition::rnd_pos");
4943
4944 part_id= uint2korr((const uchar *) pos);
4945 DBUG_ASSERT(part_id < m_tot_parts);
4946 file= m_file[part_id];
4947 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
4948 m_last_part= part_id;
4949 DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
4950 }
4951
4952
4953 /*
4954 Read row using position using given record to find
4955
4956 SYNOPSIS
4957 rnd_pos_by_record()
4958 record Current record in MySQL Row Format
4959
4960 RETURN VALUE
4961 >0 Error code
4962 0 Success
4963
4964 DESCRIPTION
4965 this works as position()+rnd_pos() functions, but does some extra work,
4966 calculating m_last_part - the partition to where the 'record'
4967 should go.
4968
4969 called from replication (log_event.cc)
4970 */
4971
rnd_pos_by_record(uchar * record)4972 int ha_partition::rnd_pos_by_record(uchar *record)
4973 {
4974 DBUG_ENTER("ha_partition::rnd_pos_by_record");
4975 if (unlikely(get_part_for_delete(record, m_rec0, m_part_info, &m_last_part)))
4976 DBUG_RETURN(1);
4977 DBUG_RETURN(m_file[m_last_part]->rnd_pos_by_record(record));
4978 }
4979
4980 /****************************************************************************
4981 MODULE index scan
4982 ****************************************************************************/
4983 /*
4984 Positions an index cursor to the index specified in the handle. Fetches the
4985 row if available. If the key value is null, begin at the first key of the
4986 index.
4987
4988 There are loads of optimisations possible here for the partition handler.
4989 The same optimisations can also be checked for full table scan although
4990 only through conditions and not from index ranges.
4991 Phase one optimisations:
4992 Check if the fields of the partition function are bound. If so only use
4993 the single partition it becomes bound to.
4994 Phase two optimisations:
4995 If it can be deducted through range or list partitioning that only a
4996 subset of the partitions are used, then only use those partitions.
4997 */
4998
4999 /** Compare key and rowid.
5000 Helper function for sorting records in the priority queue.
5001 a/b points to table->record[0] rows which must have the
5002 key fields set. The bytes before a and b store the handler::ref.
5003 This is used for comparing/sorting rows first according to
5004 KEY and if same KEY, by handler::ref (rowid).
5005
5006 @param key_info Null terminated array of index information
5007 @param a Pointer to record+ref in first record
5008 @param b Pointer to record+ref in second record
5009
5010 @return Return value is SIGN(first_rec - second_rec)
5011 @retval 0 Keys are equal
5012 @retval -1 second_rec is greater than first_rec
5013 @retval +1 first_rec is greater than second_rec
5014 */
5015
key_and_ref_cmp(void * key_info,uchar * a,uchar * b)5016 static int key_and_ref_cmp(void* key_info, uchar *a, uchar *b)
5017 {
5018 int cmp= key_rec_cmp(key_info, a, b);
5019 if (cmp)
5020 return cmp;
5021 /*
5022 We must compare by handler::ref, which is added before the record,
5023 in the priority queue.
5024 */
5025 KEY **key = (KEY**)key_info;
5026 uint ref_length= (*key)->table->file->ref_length;
5027 return (*key)->table->file->cmp_ref(a - ref_length, b - ref_length);
5028 }
5029
5030
5031 /**
5032 Setup the ordered record buffer and the priority queue.
5033 */
5034
init_record_priority_queue()5035 bool ha_partition::init_record_priority_queue()
5036 {
5037 DBUG_ENTER("ha_partition::init_record_priority_queue");
5038 DBUG_ASSERT(!m_ordered_rec_buffer);
5039 /*
5040 Initialize the ordered record buffer.
5041 */
5042 if (!m_ordered_rec_buffer)
5043 {
5044 uint alloc_len;
5045 uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
5046 /*
5047 Allocate record buffer for each used partition.
5048 If we need to do a secondary sort by PK, then it is already in the
5049 record, so we only need to allocate for part id and a full record per
5050 partition.
5051 Otherwise we do a secondary sort by rowid (handler::ref) and must
5052 allocate for ref (includes part id) and full record per partition.
5053 We don't know yet if we need to do secondary sort by rowid, so we must
5054 allocate space for it.
5055 */
5056 if (m_curr_key_info[1])
5057 m_rec_offset= PARTITION_BYTES_IN_POS;
5058 else
5059 m_rec_offset= m_ref_length;
5060 alloc_len= used_parts * (m_rec_offset + m_rec_length);
5061 /* Allocate a key for temporary use when setting up the scan. */
5062 alloc_len+= table_share->max_key_length;
5063
5064 if (!(m_ordered_rec_buffer= (uchar*)my_malloc(alloc_len, MYF(MY_WME))))
5065 DBUG_RETURN(true);
5066
5067 /*
5068 We set-up one record per partition and each record has 2 bytes in
5069 front where the partition id is written. This is used by ordered
5070 index_read.
5071 If we need to also sort by rowid (handler::ref), then m_curr_key_info[1]
5072 is NULL and we add the rowid before the record.
5073 We also set-up a reference to the first record for temporary use in
5074 setting up the scan.
5075 */
5076 char *ptr= (char*) m_ordered_rec_buffer;
5077 uint i;
5078 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5079 i < m_tot_parts;
5080 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5081 {
5082 DBUG_PRINT("info", ("init rec-buf for part %u", i));
5083 int2store(ptr, i);
5084 ptr+= m_rec_offset + m_rec_length;
5085 }
5086 m_start_key.key= (const uchar*)ptr;
5087 /*
5088 Initialize priority queue, initialized to reading forward.
5089 Start by only sort by KEY, HA_EXTRA_SECONDARY_SORT_ROWID
5090 will be given if we should sort by handler::ref too.
5091 */
5092 if (init_queue(&m_queue, used_parts, m_rec_offset,
5093 0,
5094 key_rec_cmp,
5095 (void*)m_curr_key_info))
5096 {
5097 my_free(m_ordered_rec_buffer);
5098 m_ordered_rec_buffer= NULL;
5099 DBUG_RETURN(true);
5100 }
5101 }
5102 DBUG_RETURN(false);
5103 }
5104
5105
5106 /**
5107 Destroy the ordered record buffer and the priority queue.
5108 */
5109
destroy_record_priority_queue()5110 void ha_partition::destroy_record_priority_queue()
5111 {
5112 DBUG_ENTER("ha_partition::destroy_record_priority_queue");
5113 if (m_ordered_rec_buffer)
5114 {
5115 delete_queue(&m_queue);
5116 my_free(m_ordered_rec_buffer);
5117 m_ordered_rec_buffer= NULL;
5118 }
5119 DBUG_VOID_RETURN;
5120 }
5121
5122
5123 /*
5124 Initialize handler before start of index scan
5125
5126 SYNOPSIS
5127 index_init()
5128 inx Index number
5129 sorted Is rows to be returned in sorted order
5130
5131 RETURN VALUE
5132 >0 Error code
5133 0 Success
5134
5135 DESCRIPTION
5136 index_init is always called before starting index scans (except when
5137 starting through index_read_idx and using read_range variants).
5138 */
5139
index_init(uint inx,bool sorted)5140 int ha_partition::index_init(uint inx, bool sorted)
5141 {
5142 int error= 0;
5143 uint i;
5144 DBUG_ENTER("ha_partition::index_init");
5145
5146 DBUG_PRINT("info", ("inx %u sorted %u", inx, sorted));
5147 active_index= inx;
5148 m_part_spec.start_part= NO_CURRENT_PART_ID;
5149 m_start_key.length= 0;
5150 m_ordered= sorted;
5151 m_sec_sort_by_rowid= false;
5152 m_curr_key_info[0]= table->key_info+inx;
5153 m_curr_key_info[1]= NULL;
5154 /*
5155 There are two cases where it is not enough to only sort on the key:
5156 1) For clustered indexes, the optimizer assumes that all keys
5157 have the rest of the PK columns appended to the KEY, so it will
5158 sort by PK as secondary sort key.
5159 2) Rowid-Order-Retrieval access methods, like index_merge_intersect
5160 and index_merge_union. These methods requires the index to be sorted
5161 on rowid (handler::ref) as secondary sort key.
5162 */
5163 if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
5164 {
5165 /*
5166 if PK is clustered, then the key cmp must use the pk to
5167 differentiate between equal key in given index.
5168 */
5169 DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
5170 m_curr_key_info[1]= table->key_info+table->s->primary_key;
5171 m_curr_key_info[2]= NULL;
5172 }
5173
5174 if (init_record_priority_queue())
5175 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5176
5177 /*
5178 Some handlers only read fields as specified by the bitmap for the
5179 read set. For partitioned handlers we always require that the
5180 fields of the partition functions are read such that we can
5181 calculate the partition id to place updated and deleted records.
5182 But this is required for operations that may need to change data only.
5183 */
5184 if (get_lock_type() == F_WRLCK)
5185 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5186 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5187 i < m_tot_parts;
5188 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5189 {
5190 if ((error= m_file[i]->ha_index_init(inx, sorted)))
5191 goto err;
5192
5193 DBUG_EXECUTE_IF("ha_partition_fail_index_init", {
5194 i++;
5195 error= HA_ERR_NO_PARTITION_FOUND;
5196 goto err;
5197 });
5198 }
5199 err:
5200 if (error)
5201 {
5202 /* End the previously initialized indexes. */
5203 uint j;
5204 for (j= bitmap_get_first_set(&m_part_info->read_partitions);
5205 j < i;
5206 j= bitmap_get_next_set(&m_part_info->read_partitions, j))
5207 {
5208 (void) m_file[j]->ha_index_end();
5209 }
5210 destroy_record_priority_queue();
5211 }
5212 DBUG_RETURN(error);
5213 }
5214
5215
5216 /*
5217 End of index scan
5218
5219 SYNOPSIS
5220 index_end()
5221
5222 RETURN VALUE
5223 >0 Error code
5224 0 Success
5225
5226 DESCRIPTION
5227 index_end is called at the end of an index scan to clean up any
5228 things needed to clean up.
5229 */
5230
index_end()5231 int ha_partition::index_end()
5232 {
5233 int error= 0;
5234 uint i;
5235 DBUG_ENTER("ha_partition::index_end");
5236
5237 active_index= MAX_KEY;
5238 m_part_spec.start_part= NO_CURRENT_PART_ID;
5239 m_sec_sort_by_rowid= false;
5240 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5241 i < m_tot_parts;
5242 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5243 {
5244 int tmp;
5245 if ((tmp= m_file[i]->ha_index_end()))
5246 error= tmp;
5247 }
5248 destroy_record_priority_queue();
5249 DBUG_RETURN(error);
5250 }
5251
5252
5253 /*
5254 Read one record in an index scan and start an index scan
5255
5256 SYNOPSIS
5257 index_read_map()
5258 buf Read row in MySQL Row Format
5259 key Key parts in consecutive order
5260 keypart_map Which part of key is used
5261 find_flag What type of key condition is used
5262
5263 RETURN VALUE
5264 >0 Error code
5265 0 Success
5266
5267 DESCRIPTION
5268 index_read_map starts a new index scan using a start key. The MySQL Server
5269 will check the end key on its own. Thus to function properly the
5270 partitioned handler need to ensure that it delivers records in the sort
5271 order of the MySQL Server.
5272 index_read_map can be restarted without calling index_end on the previous
5273 index scan and without calling index_init. In this case the index_read_map
5274 is on the same index as the previous index_scan. This is particularly
5275 used in conjuntion with multi read ranges.
5276 */
5277
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5278 int ha_partition::index_read_map(uchar *buf, const uchar *key,
5279 key_part_map keypart_map,
5280 enum ha_rkey_function find_flag)
5281 {
5282 DBUG_ENTER("ha_partition::index_read_map");
5283 end_range= 0;
5284 m_index_scan_type= partition_index_read;
5285 m_start_key.key= key;
5286 m_start_key.keypart_map= keypart_map;
5287 m_start_key.flag= find_flag;
5288 DBUG_RETURN(common_index_read(buf, TRUE));
5289 }
5290
5291
5292 /**
5293 Common routine for a number of index_read variants
5294
5295 @param buf Buffer where the record should be returned.
5296 @param have_start_key TRUE <=> the left endpoint is available, i.e.
5297 we're in index_read call or in read_range_first
5298 call and the range has left endpoint.
5299 FALSE <=> there is no left endpoint (we're in
5300 read_range_first() call and the range has no left
5301 endpoint).
5302
5303 @return Operation status
5304 @retval 0 OK
5305 @retval HA_ERR_END_OF_FILE Whole index scanned, without finding the record.
5306 @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
5307 @retval other error code.
5308
5309 @details
5310 Start scanning the range (when invoked from read_range_first()) or doing
5311 an index lookup (when invoked from index_read_XXX):
5312 - If possible, perform partition selection
5313 - Find the set of partitions we're going to use
5314 - Depending on whether we need ordering:
5315 NO: Get the first record from first used partition (see
5316 handle_unordered_scan_next_partition)
5317 YES: Fill the priority queue and get the record that is the first in
5318 the ordering
5319 */
5320
common_index_read(uchar * buf,bool have_start_key)5321 int ha_partition::common_index_read(uchar *buf, bool have_start_key)
5322 {
5323 int error;
5324 uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */
5325 bool reverse_order= FALSE;
5326 DBUG_ENTER("ha_partition::common_index_read");
5327
5328 DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
5329 m_ordered, m_ordered_scan_ongoing));
5330
5331 if (have_start_key)
5332 {
5333 m_start_key.length= key_len= calculate_key_len(table, active_index,
5334 m_start_key.key,
5335 m_start_key.keypart_map);
5336 DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
5337 m_start_key.keypart_map, m_start_key.flag, key_len));
5338 DBUG_ASSERT(key_len);
5339 }
5340 if ((error= partition_scan_set_up(buf, have_start_key)))
5341 {
5342 DBUG_RETURN(error);
5343 }
5344
5345 if (have_start_key &&
5346 (m_start_key.flag == HA_READ_PREFIX_LAST ||
5347 m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
5348 m_start_key.flag == HA_READ_BEFORE_KEY))
5349 {
5350 reverse_order= TRUE;
5351 m_ordered_scan_ongoing= TRUE;
5352 }
5353 DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
5354 m_ordered, m_ordered_scan_ongoing, have_start_key));
5355 if (!m_ordered_scan_ongoing)
5356 {
5357 /*
5358 We use unordered index scan when read_range is used and flag
5359 is set to not use ordered.
5360 We also use an unordered index scan when the number of partitions to
5361 scan is only one.
5362 The unordered index scan will use the partition set created.
5363 */
5364 DBUG_PRINT("info", ("doing unordered scan"));
5365 error= handle_unordered_scan_next_partition(buf);
5366 }
5367 else
5368 {
5369 /*
5370 In all other cases we will use the ordered index scan. This will use
5371 the partition set created by the get_partition_set method.
5372 */
5373 error= handle_ordered_index_scan(buf, reverse_order);
5374 }
5375 DBUG_RETURN(error);
5376 }
5377
5378
5379 /*
5380 Start an index scan from leftmost record and return first record
5381
5382 SYNOPSIS
5383 index_first()
5384 buf Read row in MySQL Row Format
5385
5386 RETURN VALUE
5387 >0 Error code
5388 0 Success
5389
5390 DESCRIPTION
5391 index_first() asks for the first key in the index.
5392 This is similar to index_read except that there is no start key since
5393 the scan starts from the leftmost entry and proceeds forward with
5394 index_next.
5395
5396 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5397 and sql_select.cc.
5398 */
5399
index_first(uchar * buf)5400 int ha_partition::index_first(uchar * buf)
5401 {
5402 DBUG_ENTER("ha_partition::index_first");
5403
5404 end_range= 0;
5405 m_index_scan_type= partition_index_first;
5406 DBUG_RETURN(common_first_last(buf));
5407 }
5408
5409
5410 /*
5411 Start an index scan from rightmost record and return first record
5412
5413 SYNOPSIS
5414 index_last()
5415 buf Read row in MySQL Row Format
5416
5417 RETURN VALUE
5418 >0 Error code
5419 0 Success
5420
5421 DESCRIPTION
5422 index_last() asks for the last key in the index.
5423 This is similar to index_read except that there is no start key since
5424 the scan starts from the rightmost entry and proceeds forward with
5425 index_prev.
5426
5427 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5428 and sql_select.cc.
5429 */
5430
index_last(uchar * buf)5431 int ha_partition::index_last(uchar * buf)
5432 {
5433 DBUG_ENTER("ha_partition::index_last");
5434 int error = HA_ERR_END_OF_FILE;
5435 uint part_id = bitmap_get_first_set(&(m_part_info->read_partitions));
5436 if (part_id == MY_BIT_NONE)
5437 {
5438 /* No partition to scan. */
5439 DBUG_RETURN(error);
5440 }
5441 m_index_scan_type= partition_index_last;
5442 DBUG_RETURN(common_first_last(buf));
5443
5444 }
5445
5446 /*
5447 Common routine for index_first/index_last
5448
5449 SYNOPSIS
5450 ha_partition::common_first_last()
5451
5452 see index_first for rest
5453 */
5454
common_first_last(uchar * buf)5455 int ha_partition::common_first_last(uchar *buf)
5456 {
5457 int error;
5458
5459 if ((error= partition_scan_set_up(buf, FALSE)))
5460 return error;
5461 if (!m_ordered_scan_ongoing &&
5462 m_index_scan_type != partition_index_last)
5463 return handle_unordered_scan_next_partition(buf);
5464 return handle_ordered_index_scan(buf, FALSE);
5465 }
5466
5467
5468 /*
5469 Read last using key
5470
5471 SYNOPSIS
5472 index_read_last_map()
5473 buf Read row in MySQL Row Format
5474 key Key
5475 keypart_map Which part of key is used
5476
5477 RETURN VALUE
5478 >0 Error code
5479 0 Success
5480
5481 DESCRIPTION
5482 This is used in join_read_last_key to optimise away an ORDER BY.
5483 Can only be used on indexes supporting HA_READ_ORDER
5484 */
5485
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)5486 int ha_partition::index_read_last_map(uchar *buf, const uchar *key,
5487 key_part_map keypart_map)
5488 {
5489 DBUG_ENTER("ha_partition::index_read_last_map");
5490
5491 m_ordered= TRUE; // Safety measure
5492 end_range= 0;
5493 m_index_scan_type= partition_index_read_last;
5494 m_start_key.key= key;
5495 m_start_key.keypart_map= keypart_map;
5496 m_start_key.flag= HA_READ_PREFIX_LAST;
5497 DBUG_RETURN(common_index_read(buf, TRUE));
5498 }
5499
5500
5501 /*
5502 Optimization of the default implementation to take advantage of dynamic
5503 partition pruning.
5504 */
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5505 int ha_partition::index_read_idx_map(uchar *buf, uint index,
5506 const uchar *key,
5507 key_part_map keypart_map,
5508 enum ha_rkey_function find_flag)
5509 {
5510 int error= HA_ERR_KEY_NOT_FOUND;
5511 DBUG_ENTER("ha_partition::index_read_idx_map");
5512
5513 if (find_flag == HA_READ_KEY_EXACT)
5514 {
5515 uint part;
5516 m_start_key.key= key;
5517 m_start_key.keypart_map= keypart_map;
5518 m_start_key.flag= find_flag;
5519 m_start_key.length= calculate_key_len(table, index, m_start_key.key,
5520 m_start_key.keypart_map);
5521
5522 get_partition_set(table, buf, index, &m_start_key, &m_part_spec);
5523
5524 /*
5525 We have either found exactly 1 partition
5526 (in which case start_part == end_part)
5527 or no matching partitions (start_part > end_part)
5528 */
5529 DBUG_ASSERT(m_part_spec.start_part >= m_part_spec.end_part);
5530 /* The start part is must be marked as used. */
5531 DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
5532 bitmap_is_set(&(m_part_info->read_partitions),
5533 m_part_spec.start_part));
5534
5535 for (part= m_part_spec.start_part;
5536 part <= m_part_spec.end_part;
5537 part= bitmap_get_next_set(&m_part_info->read_partitions, part))
5538 {
5539 error= m_file[part]->ha_index_read_idx_map(buf, index, key,
5540 keypart_map, find_flag);
5541 if (error != HA_ERR_KEY_NOT_FOUND &&
5542 error != HA_ERR_END_OF_FILE)
5543 break;
5544 }
5545 if (part <= m_part_spec.end_part)
5546 m_last_part= part;
5547 }
5548 else
5549 {
5550 /*
5551 If not only used with READ_EXACT, we should investigate if possible
5552 to optimize for other find_flag's as well.
5553 */
5554 DBUG_ASSERT(0);
5555 /* fall back on the default implementation */
5556 error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag);
5557 }
5558 DBUG_RETURN(error);
5559 }
5560
5561
5562 /*
5563 Read next record in a forward index scan
5564
5565 SYNOPSIS
5566 index_next()
5567 buf Read row in MySQL Row Format
5568
5569 RETURN VALUE
5570 >0 Error code
5571 0 Success
5572
5573 DESCRIPTION
5574 Used to read forward through the index.
5575 */
5576
index_next(uchar * buf)5577 int ha_partition::index_next(uchar * buf)
5578 {
5579 DBUG_ENTER("ha_partition::index_next");
5580
5581 /*
5582 TODO(low priority):
5583 If we want partition to work with the HANDLER commands, we
5584 must be able to do index_last() -> index_prev() -> index_next()
5585 and if direction changes, we must step back those partitions in
5586 the record queue so we don't return a value from the wrong direction.
5587 */
5588 DBUG_ASSERT(m_index_scan_type != partition_index_last);
5589 if (!m_ordered_scan_ongoing)
5590 {
5591 DBUG_RETURN(handle_unordered_next(buf, FALSE));
5592 }
5593 DBUG_RETURN(handle_ordered_next(buf, FALSE));
5594 }
5595
5596
5597 /*
5598 Read next record special
5599
5600 SYNOPSIS
5601 index_next_same()
5602 buf Read row in MySQL Row Format
5603 key Key
5604 keylen Length of key
5605
5606 RETURN VALUE
5607 >0 Error code
5608 0 Success
5609
5610 DESCRIPTION
5611 This routine is used to read the next but only if the key is the same
5612 as supplied in the call.
5613 */
5614
index_next_same(uchar * buf,const uchar * key,uint keylen)5615 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
5616 {
5617 DBUG_ENTER("ha_partition::index_next_same");
5618
5619 DBUG_ASSERT(keylen == m_start_key.length);
5620 DBUG_ASSERT(m_index_scan_type != partition_index_last);
5621 if (!m_ordered_scan_ongoing)
5622 DBUG_RETURN(handle_unordered_next(buf, TRUE));
5623 DBUG_RETURN(handle_ordered_next(buf, TRUE));
5624 }
5625
5626
5627 /*
5628 Read next record when performing index scan backwards
5629
5630 SYNOPSIS
5631 index_prev()
5632 buf Read row in MySQL Row Format
5633
5634 RETURN VALUE
5635 >0 Error code
5636 0 Success
5637
5638 DESCRIPTION
5639 Used to read backwards through the index.
5640 */
5641
index_prev(uchar * buf)5642 int ha_partition::index_prev(uchar * buf)
5643 {
5644 DBUG_ENTER("ha_partition::index_prev");
5645
5646 /* TODO: read comment in index_next */
5647 DBUG_ASSERT(m_index_scan_type != partition_index_first);
5648 DBUG_RETURN(handle_ordered_prev(buf));
5649 }
5650
5651
5652 /*
5653 Start a read of one range with start and end key
5654
5655 SYNOPSIS
5656 read_range_first()
5657 start_key Specification of start key
5658 end_key Specification of end key
5659 eq_range_arg Is it equal range
5660 sorted Should records be returned in sorted order
5661
5662 RETURN VALUE
5663 >0 Error code
5664 0 Success
5665
5666 DESCRIPTION
5667 We reimplement read_range_first since we don't want the compare_key
5668 check at the end. This is already performed in the partition handler.
5669 read_range_next is very much different due to that we need to scan
5670 all underlying handlers.
5671 */
5672
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)5673 int ha_partition::read_range_first(const key_range *start_key,
5674 const key_range *end_key,
5675 bool eq_range_arg, bool sorted)
5676 {
5677 int error;
5678 DBUG_ENTER("ha_partition::read_range_first");
5679
5680 m_ordered= sorted;
5681 eq_range= eq_range_arg;
5682 set_end_range(end_key, RANGE_SCAN_ASC);
5683
5684 range_key_part= m_curr_key_info[0]->key_part;
5685 if (start_key)
5686 m_start_key= *start_key;
5687 else
5688 m_start_key.key= NULL;
5689
5690 m_index_scan_type= partition_read_range;
5691 error= common_index_read(m_rec0, MY_TEST(start_key));
5692 DBUG_RETURN(error);
5693 }
5694
5695
5696 /*
5697 Read next record in read of a range with start and end key
5698
5699 SYNOPSIS
5700 read_range_next()
5701
5702 RETURN VALUE
5703 >0 Error code
5704 0 Success
5705 */
5706
read_range_next()5707 int ha_partition::read_range_next()
5708 {
5709 DBUG_ENTER("ha_partition::read_range_next");
5710
5711 if (m_ordered_scan_ongoing)
5712 {
5713 DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
5714 }
5715 DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
5716 }
5717
5718
5719 /*
5720 Common routine to set up index scans
5721
5722 SYNOPSIS
5723 ha_partition::partition_scan_set_up()
5724 buf Buffer to later return record in (this function
5725 needs it to calculcate partitioning function
5726 values)
5727
5728 idx_read_flag TRUE <=> m_start_key has range start endpoint which
5729 probably can be used to determine the set of partitions
5730 to scan.
5731 FALSE <=> there is no start endpoint.
5732
5733 DESCRIPTION
5734 Find out which partitions we'll need to read when scanning the specified
5735 range.
5736
5737 If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
5738 as we will not need to do merge ordering.
5739
5740 RETURN VALUE
5741 >0 Error code
5742 0 Success
5743 */
5744
partition_scan_set_up(uchar * buf,bool idx_read_flag)5745 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
5746 {
5747 DBUG_ENTER("ha_partition::partition_scan_set_up");
5748
5749 if (idx_read_flag)
5750 get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec);
5751 else
5752 {
5753 m_part_spec.start_part= 0;
5754 m_part_spec.end_part= m_tot_parts - 1;
5755 }
5756 if (m_part_spec.start_part > m_part_spec.end_part)
5757 {
5758 /*
5759 We discovered a partition set but the set was empty so we report
5760 key not found.
5761 */
5762 DBUG_PRINT("info", ("scan with no partition to scan"));
5763 table->status= STATUS_NOT_FOUND;
5764 DBUG_RETURN(HA_ERR_END_OF_FILE);
5765 }
5766 if (m_part_spec.start_part == m_part_spec.end_part)
5767 {
5768 /*
5769 We discovered a single partition to scan, this never needs to be
5770 performed using the ordered index scan.
5771 */
5772 DBUG_PRINT("info", ("index scan using the single partition %d",
5773 m_part_spec.start_part));
5774 m_ordered_scan_ongoing= FALSE;
5775 }
5776 else
5777 {
5778 /*
5779 Set m_ordered_scan_ongoing according how the scan should be done
5780 Only exact partitions are discovered atm by get_partition_set.
5781 Verify this, also bitmap must have at least one bit set otherwise
5782 the result from this table is the empty set.
5783 */
5784 uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions));
5785 if (start_part == MY_BIT_NONE)
5786 {
5787 DBUG_PRINT("info", ("scan with no partition to scan"));
5788 table->status= STATUS_NOT_FOUND;
5789 DBUG_RETURN(HA_ERR_END_OF_FILE);
5790 }
5791 if (start_part > m_part_spec.start_part)
5792 m_part_spec.start_part= start_part;
5793 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
5794 m_ordered_scan_ongoing= m_ordered;
5795 }
5796 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts &&
5797 m_part_spec.end_part < m_tot_parts);
5798 DBUG_RETURN(0);
5799 }
5800
5801
5802 /****************************************************************************
5803 Unordered Index Scan Routines
5804 ****************************************************************************/
5805 /*
5806 Common routine to handle index_next with unordered results
5807
5808 SYNOPSIS
5809 handle_unordered_next()
5810 out:buf Read row in MySQL Row Format
5811 next_same Called from index_next_same
5812
5813 RETURN VALUE
5814 HA_ERR_END_OF_FILE End of scan
5815 0 Success
5816 other Error code
5817
5818 DESCRIPTION
5819 These routines are used to scan partitions without considering order.
5820 This is performed in two situations.
5821 1) In read_multi_range this is the normal case
5822 2) When performing any type of index_read, index_first, index_last where
5823 all fields in the partition function is bound. In this case the index
5824 scan is performed on only one partition and thus it isn't necessary to
5825 perform any sort.
5826 */
5827
handle_unordered_next(uchar * buf,bool is_next_same)5828 int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
5829 {
5830 handler *file;
5831 int error;
5832 DBUG_ENTER("ha_partition::handle_unordered_next");
5833
5834 if (m_part_spec.start_part >= m_tot_parts)
5835 {
5836 /* Should never happen! */
5837 DBUG_ASSERT(0);
5838 DBUG_RETURN(HA_ERR_END_OF_FILE);
5839 }
5840 file= m_file[m_part_spec.start_part];
5841
5842 /*
5843 We should consider if this should be split into three functions as
5844 partition_read_range is_next_same are always local constants
5845 */
5846
5847 if (m_index_scan_type == partition_read_range)
5848 {
5849 if (!(error= file->read_range_next()))
5850 {
5851 m_last_part= m_part_spec.start_part;
5852 DBUG_RETURN(0);
5853 }
5854 }
5855 else if (is_next_same)
5856 {
5857 if (!(error= file->ha_index_next_same(buf, m_start_key.key,
5858 m_start_key.length)))
5859 {
5860 m_last_part= m_part_spec.start_part;
5861 DBUG_RETURN(0);
5862 }
5863 }
5864 else
5865 {
5866 if (!(error= file->ha_index_next(buf)))
5867 {
5868 m_last_part= m_part_spec.start_part;
5869 DBUG_RETURN(0); // Row was in range
5870 }
5871 }
5872
5873 if (error == HA_ERR_END_OF_FILE)
5874 {
5875 m_part_spec.start_part++; // Start using next part
5876 error= handle_unordered_scan_next_partition(buf);
5877 }
5878 DBUG_RETURN(error);
5879 }
5880
5881
5882 /*
5883 Handle index_next when changing to new partition
5884
5885 SYNOPSIS
5886 handle_unordered_scan_next_partition()
5887 buf Read row in MySQL Row Format
5888
5889 RETURN VALUE
5890 HA_ERR_END_OF_FILE End of scan
5891 0 Success
5892 other Error code
5893
5894 DESCRIPTION
5895 This routine is used to start the index scan on the next partition.
5896 Both initial start and after completing scan on one partition.
5897 */
5898
handle_unordered_scan_next_partition(uchar * buf)5899 int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
5900 {
5901 uint i= m_part_spec.start_part;
5902 int saved_error= HA_ERR_END_OF_FILE;
5903 DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
5904
5905 if (i)
5906 i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1);
5907 else
5908 i= bitmap_get_first_set(&m_part_info->read_partitions);
5909
5910 for (;
5911 i <= m_part_spec.end_part;
5912 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5913 {
5914 int error;
5915 handler *file= m_file[i];
5916 m_part_spec.start_part= i;
5917 switch (m_index_scan_type) {
5918 case partition_read_range:
5919 DBUG_PRINT("info", ("read_range_first on partition %d", i));
5920 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
5921 end_range, eq_range, FALSE);
5922 break;
5923 case partition_index_read:
5924 DBUG_PRINT("info", ("index_read on partition %d", i));
5925 error= file->ha_index_read_map(buf, m_start_key.key,
5926 m_start_key.keypart_map,
5927 m_start_key.flag);
5928 break;
5929 case partition_index_first:
5930 DBUG_PRINT("info", ("index_first on partition %d", i));
5931 error= file->ha_index_first(buf);
5932 break;
5933 case partition_index_first_unordered:
5934 /*
5935 We perform a scan without sorting and this means that we
5936 should not use the index_first since not all handlers
5937 support it and it is also unnecessary to restrict sort
5938 order.
5939 */
5940 DBUG_PRINT("info", ("read_range_first on partition %d", i));
5941 table->record[0]= buf;
5942 error= file->read_range_first(0, end_range, eq_range, 0);
5943 table->record[0]= m_rec0;
5944 break;
5945 default:
5946 DBUG_ASSERT(FALSE);
5947 DBUG_RETURN(1);
5948 }
5949 if (!error)
5950 {
5951 m_last_part= i;
5952 DBUG_RETURN(0);
5953 }
5954 if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
5955 DBUG_RETURN(error);
5956
5957 /*
5958 If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
5959 HA_ERR_END_OF_FILE, to be able to continue search.
5960 */
5961 if (saved_error != HA_ERR_KEY_NOT_FOUND)
5962 saved_error= error;
5963 DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %d", i));
5964 }
5965 if (saved_error == HA_ERR_END_OF_FILE)
5966 m_part_spec.start_part= NO_CURRENT_PART_ID;
5967 DBUG_RETURN(saved_error);
5968 }
5969
5970
5971 /**
5972 Common routine to start index scan with ordered results.
5973
5974 @param[out] buf Read row in MySQL Row Format
5975
5976 @return Operation status
5977 @retval HA_ERR_END_OF_FILE End of scan
5978 @retval HA_ERR_KEY_NOT_FOUNE End of scan
5979 @retval 0 Success
5980 @retval other Error code
5981
5982 @details
5983 This part contains the logic to handle index scans that require ordered
5984 output. This includes all except those started by read_range_first with
5985 the flag ordered set to FALSE. Thus most direct index_read and all
5986 index_first and index_last.
5987
5988 We implement ordering by keeping one record plus a key buffer for each
5989 partition. Every time a new entry is requested we will fetch a new
5990 entry from the partition that is currently not filled with an entry.
5991 Then the entry is put into its proper sort position.
5992
5993 Returning a record is done by getting the top record, copying the
5994 record to the request buffer and setting the partition as empty on
5995 entries.
5996 */
5997
handle_ordered_index_scan(uchar * buf,bool reverse_order)5998 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
5999 {
6000 uint i;
6001 uint j= 0;
6002 bool found= FALSE;
6003 uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
6004 int saved_error= HA_ERR_END_OF_FILE;
6005 DBUG_ENTER("ha_partition::handle_ordered_index_scan");
6006
6007 if (m_key_not_found)
6008 {
6009 m_key_not_found= false;
6010 bitmap_clear_all(&m_key_not_found_partitions);
6011 }
6012 m_top_entry= NO_CURRENT_PART_ID;
6013 queue_remove_all(&m_queue);
6014 DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions,
6015 m_part_spec.start_part));
6016
6017 /*
6018 Position part_rec_buf_ptr to point to the first used partition >=
6019 start_part. There may be partitions marked by used_partitions,
6020 but is before start_part. These partitions has allocated record buffers
6021 but is dynamically pruned, so those buffers must be skipped.
6022 */
6023 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
6024 i < m_part_spec.start_part;
6025 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
6026 {
6027 part_rec_buf_ptr+= m_rec_offset + m_rec_length;
6028 }
6029 DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
6030 m_part_spec.start_part, i));
6031 for (/* continue from above */ ;
6032 i <= m_part_spec.end_part;
6033 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
6034 {
6035 DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
6036 i, m_index_scan_type));
6037 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
6038 uchar *rec_buf_ptr= part_rec_buf_ptr + m_rec_offset;
6039 int error;
6040 handler *file= m_file[i];
6041
6042 switch (m_index_scan_type) {
6043 case partition_index_read:
6044 error= file->ha_index_read_map(rec_buf_ptr,
6045 m_start_key.key,
6046 m_start_key.keypart_map,
6047 m_start_key.flag);
6048 break;
6049 case partition_index_first:
6050 error= file->ha_index_first(rec_buf_ptr);
6051 reverse_order= FALSE;
6052 break;
6053 case partition_index_last:
6054 error= file->ha_index_last(rec_buf_ptr);
6055 reverse_order= TRUE;
6056 break;
6057 case partition_index_read_last:
6058 error= file->ha_index_read_last_map(rec_buf_ptr,
6059 m_start_key.key,
6060 m_start_key.keypart_map);
6061 reverse_order= TRUE;
6062 break;
6063 case partition_read_range:
6064 {
6065 /*
6066 This can only read record to table->record[0], as it was set when
6067 the table was being opened. We have to memcpy data ourselves.
6068 */
6069 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
6070 end_range, eq_range, TRUE);
6071 memcpy(rec_buf_ptr, table->record[0], m_rec_length);
6072 reverse_order= FALSE;
6073 break;
6074 }
6075 default:
6076 DBUG_ASSERT(FALSE);
6077 DBUG_RETURN(HA_ERR_END_OF_FILE);
6078 }
6079 if (!error)
6080 {
6081 found= TRUE;
6082 if (m_sec_sort_by_rowid)
6083 {
6084 file->position(rec_buf_ptr);
6085 memcpy(part_rec_buf_ptr + PARTITION_BYTES_IN_POS,
6086 file->ref, file->ref_length);
6087 }
6088 /*
6089 Initialize queue without order first, simply insert
6090 */
6091 queue_element(&m_queue, j++)= part_rec_buf_ptr;
6092 }
6093 else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
6094 {
6095 DBUG_RETURN(error);
6096 }
6097 else if (error == HA_ERR_KEY_NOT_FOUND)
6098 {
6099 DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
6100 bitmap_set_bit(&m_key_not_found_partitions, i);
6101 m_key_not_found= true;
6102 saved_error= error;
6103 }
6104 part_rec_buf_ptr+= m_rec_offset + m_rec_length;
6105 }
6106 if (found)
6107 {
6108 /*
6109 We found at least one partition with data, now sort all entries and
6110 after that read the first entry and copy it to the buffer to return in.
6111 */
6112 queue_set_max_at_top(&m_queue, reverse_order);
6113 queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
6114 DBUG_ASSERT(m_queue.elements == 0);
6115 /*
6116 If PK, we should not sort by rowid, since that is already done
6117 through the KEY setup.
6118 */
6119 DBUG_ASSERT(!m_curr_key_info[1] || !m_sec_sort_by_rowid);
6120 m_queue.elements= j;
6121 queue_fix(&m_queue);
6122 return_top_record(buf);
6123 table->status= 0;
6124 DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
6125 DBUG_RETURN(0);
6126 }
6127 DBUG_RETURN(saved_error);
6128 }
6129
6130
6131 /*
6132 Return the top record in sort order
6133
6134 SYNOPSIS
6135 return_top_record()
6136 out:buf Row returned in MySQL Row Format
6137
6138 RETURN VALUE
6139 NONE
6140 */
6141
return_top_record(uchar * buf)6142 void ha_partition::return_top_record(uchar *buf)
6143 {
6144 uint part_id;
6145 uchar *key_buffer= queue_top(&m_queue);
6146 uchar *rec_buffer= key_buffer + m_rec_offset;
6147
6148 part_id= uint2korr(key_buffer);
6149 /* Do column copy to avoid overwriting of non read columns
6150 specific to table with innodb engine */
6151 handler *file= m_file[part_id];
6152 file->copy_cached_row(buf, rec_buffer, m_rec_length);
6153 m_last_part= part_id;
6154 m_top_entry= part_id;
6155 }
6156
6157
6158 /**
6159 Add index_next/prev from partitions without exact match.
6160
6161 If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
6162 ha_index_read_map was done, those partitions must be included in the
6163 following index_next/prev call.
6164 */
6165
handle_ordered_index_scan_key_not_found()6166 int ha_partition::handle_ordered_index_scan_key_not_found()
6167 {
6168 int error;
6169 uint i, old_elements= m_queue.elements;
6170 uchar *part_buf= m_ordered_rec_buffer;
6171 uchar *curr_rec_buf= NULL;
6172 DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found");
6173 DBUG_ASSERT(m_key_not_found);
6174 /*
6175 Loop over all used partitions to get the correct offset
6176 into m_ordered_rec_buffer.
6177 */
6178 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
6179 i < m_tot_parts;
6180 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
6181 {
6182 if (bitmap_is_set(&m_key_not_found_partitions, i))
6183 {
6184 /*
6185 This partition is used and did return HA_ERR_KEY_NOT_FOUND
6186 in index_read_map.
6187 */
6188 curr_rec_buf= part_buf + m_rec_offset;
6189 error= m_file[i]->ha_index_next(curr_rec_buf);
6190 /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
6191 DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
6192 if (!error)
6193 {
6194 if (m_sec_sort_by_rowid)
6195 {
6196 m_file[i]->position(curr_rec_buf);
6197 memcpy(part_buf + PARTITION_BYTES_IN_POS,
6198 m_file[i]->ref,
6199 m_file[i]->ref_length);
6200 }
6201 queue_insert(&m_queue, part_buf);
6202 }
6203 else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
6204 DBUG_RETURN(error);
6205 }
6206 part_buf+= m_rec_offset + m_rec_length;
6207 }
6208 DBUG_ASSERT(curr_rec_buf);
6209 bitmap_clear_all(&m_key_not_found_partitions);
6210 m_key_not_found= false;
6211
6212 if (m_queue.elements > old_elements)
6213 {
6214 /* Update m_top_entry, which may have changed. */
6215 uchar *key_buffer= queue_top(&m_queue);
6216 m_top_entry= uint2korr(key_buffer);
6217 }
6218 DBUG_RETURN(0);
6219 }
6220
6221
6222 /*
6223 Common routine to handle index_next with ordered results
6224
6225 SYNOPSIS
6226 handle_ordered_next()
6227 out:buf Read row in MySQL Row Format
6228 next_same Called from index_next_same
6229
6230 RETURN VALUE
6231 HA_ERR_END_OF_FILE End of scan
6232 0 Success
6233 other Error code
6234 */
6235
handle_ordered_next(uchar * buf,bool is_next_same)6236 int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
6237 {
6238 int error;
6239 uint part_id= m_top_entry;
6240 uchar *rec_buf= queue_top(&m_queue) + m_rec_offset;
6241 handler *file;
6242 DBUG_ENTER("ha_partition::handle_ordered_next");
6243
6244 if (m_key_not_found)
6245 {
6246 if (is_next_same)
6247 {
6248 /* Only rows which match the key. */
6249 m_key_not_found= false;
6250 bitmap_clear_all(&m_key_not_found_partitions);
6251 }
6252 else
6253 {
6254 /* There are partitions not included in the index record queue. */
6255 uint old_elements= m_queue.elements;
6256 if ((error= handle_ordered_index_scan_key_not_found()))
6257 DBUG_RETURN(error);
6258 /*
6259 If the queue top changed, i.e. one of the partitions that gave
6260 HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
6261 return it.
6262 Otherwise replace the old with a call to index_next (fall through).
6263 */
6264 if (old_elements != m_queue.elements && part_id != m_top_entry)
6265 {
6266 return_top_record(buf);
6267 DBUG_RETURN(0);
6268 }
6269 }
6270 }
6271 if (part_id >= m_tot_parts)
6272 DBUG_RETURN(HA_ERR_END_OF_FILE);
6273
6274 file= m_file[part_id];
6275
6276 if (m_index_scan_type == partition_read_range)
6277 {
6278 error= file->read_range_next();
6279 memcpy(rec_buf, table->record[0], m_rec_length);
6280 }
6281 else if (!is_next_same)
6282 error= file->ha_index_next(rec_buf);
6283 else
6284 error= file->ha_index_next_same(rec_buf, m_start_key.key,
6285 m_start_key.length);
6286 if (error)
6287 {
6288 if (error == HA_ERR_END_OF_FILE)
6289 {
6290 /* Return next buffered row */
6291 queue_remove(&m_queue, (uint) 0);
6292 if (m_queue.elements)
6293 {
6294 DBUG_PRINT("info", ("Record returned from partition %u (2)",
6295 m_top_entry));
6296 return_top_record(buf);
6297 table->status= 0;
6298 error= 0;
6299 }
6300 }
6301 DBUG_RETURN(error);
6302 }
6303 if (m_sec_sort_by_rowid)
6304 {
6305 file->position(rec_buf);
6306 memcpy(rec_buf - m_rec_offset + PARTITION_BYTES_IN_POS,
6307 file->ref, file->ref_length);
6308 }
6309 queue_replaced(&m_queue);
6310 return_top_record(buf);
6311 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
6312 DBUG_RETURN(0);
6313 }
6314
6315
6316 /*
6317 Common routine to handle index_prev with ordered results
6318
6319 SYNOPSIS
6320 handle_ordered_prev()
6321 out:buf Read row in MySQL Row Format
6322
6323 RETURN VALUE
6324 HA_ERR_END_OF_FILE End of scan
6325 0 Success
6326 other Error code
6327 */
6328
handle_ordered_prev(uchar * buf)6329 int ha_partition::handle_ordered_prev(uchar *buf)
6330 {
6331 int error;
6332 uint part_id= m_top_entry;
6333 uchar *rec_buf= queue_top(&m_queue) + m_rec_offset;
6334 handler *file= m_file[part_id];
6335 DBUG_ENTER("ha_partition::handle_ordered_prev");
6336
6337 if ((error= file->ha_index_prev(rec_buf)))
6338 {
6339 if (error == HA_ERR_END_OF_FILE)
6340 {
6341 queue_remove(&m_queue, (uint) 0);
6342 if (m_queue.elements)
6343 {
6344 return_top_record(buf);
6345 DBUG_PRINT("info", ("Record returned from partition %d (2)",
6346 m_top_entry));
6347 error= 0;
6348 table->status= 0;
6349 }
6350 }
6351 DBUG_RETURN(error);
6352 }
6353 if (m_sec_sort_by_rowid)
6354 {
6355 file->position(rec_buf);
6356 memcpy(rec_buf - m_rec_offset + PARTITION_BYTES_IN_POS,
6357 file->ref, file->ref_length);
6358 }
6359 queue_replaced(&m_queue);
6360 return_top_record(buf);
6361 DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry));
6362 DBUG_RETURN(0);
6363 }
6364
6365
6366 /****************************************************************************
6367 MODULE information calls
6368 ****************************************************************************/
6369
6370 /*
6371 These are all first approximations of the extra, info, scan_time
6372 and read_time calls
6373 */
6374
6375 /**
6376 Helper function for sorting according to number of rows in descending order.
6377 */
6378
compare_number_of_records(ha_partition * me,const uint32 * a,const uint32 * b)6379 int ha_partition::compare_number_of_records(ha_partition *me,
6380 const uint32 *a,
6381 const uint32 *b)
6382 {
6383 handler **file= me->m_file;
6384 /* Note: sorting in descending order! */
6385 if (file[*a]->stats.records > file[*b]->stats.records)
6386 return -1;
6387 if (file[*a]->stats.records < file[*b]->stats.records)
6388 return 1;
6389 return 0;
6390 }
6391
6392
6393 /*
6394 General method to gather info from handler
6395
6396 SYNOPSIS
6397 info()
6398 flag Specifies what info is requested
6399
6400 RETURN VALUE
6401 NONE
6402
6403 DESCRIPTION
6404 ::info() is used to return information to the optimizer.
6405 Currently this table handler doesn't implement most of the fields
6406 really needed. SHOW also makes use of this data
6407 Another note, if your handler doesn't proved exact record count,
6408 you will probably want to have the following in your code:
6409 if (records < 2)
6410 records = 2;
6411 The reason is that the server will optimize for cases of only a single
6412 record. If in a table scan you don't know the number of records
6413 it will probably be better to set records to two so you can return
6414 as many records as you need.
6415
6416 Along with records a few more variables you may wish to set are:
6417 records
6418 deleted
6419 data_file_length
6420 index_file_length
6421 delete_length
6422 check_time
6423 Take a look at the public variables in handler.h for more information.
6424
6425 Called in:
6426 filesort.cc
6427 ha_heap.cc
6428 item_sum.cc
6429 opt_sum.cc
6430 sql_delete.cc
6431 sql_delete.cc
6432 sql_derived.cc
6433 sql_select.cc
6434 sql_select.cc
6435 sql_select.cc
6436 sql_select.cc
6437 sql_select.cc
6438 sql_show.cc
6439 sql_show.cc
6440 sql_show.cc
6441 sql_show.cc
6442 sql_table.cc
6443 sql_union.cc
6444 sql_update.cc
6445
6446 Some flags that are not implemented
6447 HA_STATUS_POS:
6448 This parameter is never used from the MySQL Server. It is checked in a
6449 place in MyISAM so could potentially be used by MyISAM specific
6450 programs.
6451 HA_STATUS_NO_LOCK:
6452 This is declared and often used. It's only used by MyISAM.
6453 It means that MySQL doesn't need the absolute latest statistics
6454 information. This may save the handler from doing internal locks while
6455 retrieving statistics data.
6456 */
6457
info(uint flag)6458 int ha_partition::info(uint flag)
6459 {
6460 uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
6461 uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
6462 DBUG_ENTER("ha_partition::info");
6463
6464 #ifndef DBUG_OFF
6465 if (bitmap_is_set_all(&(m_part_info->read_partitions)))
6466 DBUG_PRINT("info", ("All partitions are used"));
6467 #endif /* DBUG_OFF */
6468 if (flag & HA_STATUS_AUTO)
6469 {
6470 bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0);
6471 DBUG_PRINT("info", ("HA_STATUS_AUTO"));
6472 if (!table->found_next_number_field)
6473 stats.auto_increment_value= 0;
6474 else if (part_share->auto_inc_initialized)
6475 {
6476 lock_auto_increment();
6477 stats.auto_increment_value= part_share->next_auto_inc_val;
6478 unlock_auto_increment();
6479 }
6480 else
6481 {
6482 lock_auto_increment();
6483 /* to avoid two concurrent initializations, check again when locked */
6484 if (part_share->auto_inc_initialized)
6485 stats.auto_increment_value= part_share->next_auto_inc_val;
6486 else
6487 {
6488 /*
6489 The auto-inc mutex in the table_share is locked, so we do not need
6490 to have the handlers locked.
6491 HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
6492 the mutex, because it is initialized.
6493 */
6494 handler *file, **file_array;
6495 ulonglong auto_increment_value= 0;
6496 file_array= m_file;
6497 DBUG_PRINT("info",
6498 ("checking all partitions for auto_increment_value"));
6499 do
6500 {
6501 file= *file_array;
6502 file->info(HA_STATUS_AUTO | no_lock_flag);
6503 set_if_bigger(auto_increment_value,
6504 file->stats.auto_increment_value);
6505 } while (*(++file_array));
6506
6507 DBUG_ASSERT(auto_increment_value);
6508 stats.auto_increment_value= auto_increment_value;
6509 if (auto_inc_is_first_in_idx)
6510 {
6511 set_if_bigger(part_share->next_auto_inc_val,
6512 auto_increment_value);
6513 part_share->auto_inc_initialized= true;
6514 DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
6515 (ulong) part_share->next_auto_inc_val));
6516 }
6517 }
6518 unlock_auto_increment();
6519 }
6520 }
6521 if (flag & HA_STATUS_VARIABLE)
6522 {
6523 uint i;
6524 DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
6525 /*
6526 Calculates statistical variables
6527 records: Estimate of number records in table
6528 We report sum (always at least 2 if not empty)
6529 deleted: Estimate of number holes in the table due to
6530 deletes
6531 We report sum
6532 data_file_length: Length of data file, in principle bytes in table
6533 We report sum
6534 index_file_length: Length of index file, in principle bytes in
6535 indexes in the table
6536 We report sum
6537 delete_length: Length of free space easily used by new records in table
6538 We report sum
6539 mean_record_length:Mean record length in the table
6540 We calculate this
6541 check_time: Time of last check (only applicable to MyISAM)
6542 We report last time of all underlying handlers
6543 */
6544 handler *file;
6545 stats.records= 0;
6546 stats.deleted= 0;
6547 stats.data_file_length= 0;
6548 stats.index_file_length= 0;
6549 stats.check_time= 0;
6550 stats.delete_length= 0;
6551 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
6552 i < m_tot_parts;
6553 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
6554 {
6555 file= m_file[i];
6556 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
6557 stats.records+= file->stats.records;
6558 stats.deleted+= file->stats.deleted;
6559 stats.data_file_length+= file->stats.data_file_length;
6560 stats.index_file_length+= file->stats.index_file_length;
6561 stats.delete_length+= file->stats.delete_length;
6562 if (file->stats.check_time > stats.check_time)
6563 stats.check_time= file->stats.check_time;
6564 }
6565 if (stats.records && stats.records < 2 &&
6566 !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
6567 stats.records= 2;
6568 if (stats.records > 0)
6569 stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
6570 else
6571 stats.mean_rec_length= 0;
6572 }
6573 if (flag & HA_STATUS_CONST)
6574 {
6575 DBUG_PRINT("info", ("HA_STATUS_CONST"));
6576 /*
6577 Recalculate loads of constant variables. MyISAM also sets things
6578 directly on the table share object.
6579
6580 Check whether this should be fixed since handlers should not
6581 change things directly on the table object.
6582
6583 Monty comment: This should NOT be changed! It's the handlers
6584 responsibility to correct table->s->keys_xxxx information if keys
6585 have been disabled.
6586
6587 The most important parameters set here is records per key on
6588 all indexes. block_size and primar key ref_length.
6589
6590 For each index there is an array of rec_per_key.
6591 As an example if we have an index with three attributes a,b and c
6592 we will have an array of 3 rec_per_key.
6593 rec_per_key[0] is an estimate of number of records divided by
6594 number of unique values of the field a.
6595 rec_per_key[1] is an estimate of the number of records divided
6596 by the number of unique combinations of the fields a and b.
6597 rec_per_key[2] is an estimate of the number of records divided
6598 by the number of unique combinations of the fields a,b and c.
6599
6600 Many handlers only set the value of rec_per_key when all fields
6601 are bound (rec_per_key[2] in the example above).
6602
6603 If the handler doesn't support statistics, it should set all of the
6604 above to 0.
6605
6606 We first scans through all partitions to get the one holding most rows.
6607 We will then allow the handler with the most rows to set
6608 the rec_per_key and use this as an estimate on the total table.
6609
6610 max_data_file_length: Maximum data file length
6611 We ignore it, is only used in
6612 SHOW TABLE STATUS
6613 max_index_file_length: Maximum index file length
6614 We ignore it since it is never used
6615 block_size: Block size used
6616 We set it to the value of the first handler
6617 ref_length: We set this to the value calculated
6618 and stored in local object
6619 create_time: Creation time of table
6620
6621 So we calculate these constants by using the variables from the
6622 handler with most rows.
6623 */
6624 handler *file, **file_array;
6625 ulonglong max_records= 0;
6626 uint32 i= 0;
6627 uint32 handler_instance= 0;
6628
6629 file_array= m_file;
6630 do
6631 {
6632 file= *file_array;
6633 /* Get variables if not already done */
6634 if (!(flag & HA_STATUS_VARIABLE) ||
6635 !bitmap_is_set(&(m_part_info->read_partitions),
6636 (file_array - m_file)))
6637 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
6638 if (file->stats.records > max_records)
6639 {
6640 max_records= file->stats.records;
6641 handler_instance= i;
6642 }
6643 i++;
6644 } while (*(++file_array));
6645 /*
6646 Sort the array of part_ids by number of records in
6647 in descending order.
6648 */
6649 my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
6650 m_tot_parts,
6651 sizeof(uint32),
6652 (qsort2_cmp) compare_number_of_records,
6653 this);
6654
6655 file= m_file[handler_instance];
6656 file->info(HA_STATUS_CONST | no_lock_flag);
6657 stats.block_size= file->stats.block_size;
6658 stats.create_time= file->stats.create_time;
6659 ref_length= m_ref_length;
6660 }
6661 if (flag & HA_STATUS_ERRKEY)
6662 {
6663 handler *file= m_file[m_last_part];
6664 DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
6665 /*
6666 This flag is used to get index number of the unique index that
6667 reported duplicate key
6668 We will report the errkey on the last handler used and ignore the rest
6669 Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
6670 */
6671 file->errkey= errkey;
6672 file->info(HA_STATUS_ERRKEY | no_lock_flag);
6673 errkey= file->errkey;
6674 }
6675 if (flag & HA_STATUS_TIME)
6676 {
6677 handler *file, **file_array;
6678 DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
6679 /*
6680 This flag is used to set the latest update time of the table.
6681 Used by SHOW commands
6682 We will report the maximum of these times
6683 */
6684 stats.update_time= 0;
6685 file_array= m_file;
6686 do
6687 {
6688 file= *file_array;
6689 file->info(HA_STATUS_TIME | no_lock_flag);
6690 if (file->stats.update_time > stats.update_time)
6691 stats.update_time= file->stats.update_time;
6692 } while (*(++file_array));
6693 }
6694 DBUG_RETURN(0);
6695 }
6696
6697
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)6698 void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
6699 uint part_id)
6700 {
6701 handler *file= m_file[part_id];
6702 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
6703 file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
6704 HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
6705
6706 stat_info->records= file->stats.records;
6707 stat_info->mean_rec_length= file->stats.mean_rec_length;
6708 stat_info->data_file_length= file->stats.data_file_length;
6709 stat_info->max_data_file_length= file->stats.max_data_file_length;
6710 stat_info->index_file_length= file->stats.index_file_length;
6711 stat_info->delete_length= file->stats.delete_length;
6712 stat_info->create_time= file->stats.create_time;
6713 stat_info->update_time= file->stats.update_time;
6714 stat_info->check_time= file->stats.check_time;
6715 stat_info->check_sum= 0;
6716 if (file->ha_table_flags() & HA_HAS_CHECKSUM)
6717 stat_info->check_sum= file->checksum();
6718 return;
6719 }
6720
6721
6722 /**
6723 General function to prepare handler for certain behavior.
6724
6725 @param[in] operation operation to execute
6726
6727 @return status
6728 @retval 0 success
6729 @retval >0 error code
6730
6731 @detail
6732
6733 extra() is called whenever the server wishes to send a hint to
6734 the storage engine. The MyISAM engine implements the most hints.
6735
6736 We divide the parameters into the following categories:
6737 1) Operations used by most handlers
6738 2) Operations used by some non-MyISAM handlers
6739 3) Operations used only by MyISAM
6740 4) Operations only used by temporary tables for query processing
6741 5) Operations only used by MyISAM internally
6742 6) Operations not used at all
6743 7) Operations only used by federated tables for query processing
6744 8) Operations only used by NDB
6745 9) Operations only used by MERGE
6746 10) Operations only used by InnoDB
6747 11) Operations only used by partitioning
6748
6749 The partition handler need to handle category 1), 2), 3), 10) and 11).
6750
6751 1) Operations used by most handlers
6752 -----------------------------------
6753 HA_EXTRA_RESET:
6754 This option is used by most handlers and it resets the handler state
6755 to the same state as after an open call. This includes releasing
6756 any READ CACHE or WRITE CACHE or other internal buffer used.
6757
6758 It is called from the reset method in the handler interface. There are
6759 three instances where this is called.
6760 1) After completing a INSERT ... SELECT ... query the handler for the
6761 table inserted into is reset
6762 2) It is called from close_thread_table which in turn is called from
6763 close_thread_tables except in the case where the tables are locked
6764 in which case ha_commit_stmt is called instead.
6765 It is only called from here if refresh_version hasn't changed and the
6766 table is not an old table when calling close_thread_table.
6767 close_thread_tables is called from many places as a general clean up
6768 function after completing a query.
6769 3) It is called when deleting the QUICK_RANGE_SELECT object if the
6770 QUICK_RANGE_SELECT object had its own handler object. It is called
6771 immediatley before close of this local handler object.
6772 HA_EXTRA_KEYREAD:
6773 HA_EXTRA_NO_KEYREAD:
6774 These parameters are used to provide an optimisation hint to the handler.
6775 If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
6776 many handlers this means that the index-only scans can be used and it
6777 is not necessary to use the real records to satisfy this part of the
6778 query. Index-only scans is a very important optimisation for disk-based
6779 indexes. For main-memory indexes most indexes contain a reference to the
6780 record and thus KEYREAD only says that it is enough to read key fields.
6781 HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
6782 will disable this option.
6783 The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
6784 feature is supported.
6785 HA_EXTRA_FLUSH:
6786 Indication to flush tables to disk, is supposed to be used to
6787 ensure disk based tables are flushed at end of query execution.
6788 Currently is never used.
6789 HA_EXTRA_PREPARE_FOR_RENAME:
6790 Informs the handler we are about to attempt a rename of the table.
6791 For handlers that have share open files (MyISAM key-file and
6792 Archive writer) they must close the files before rename is possible
6793 on Windows.
6794 HA_EXTRA_FORCE_REOPEN:
6795 Only used by MyISAM and Archive, called when altering table,
6796 closing tables to enforce a reopen of the table files.
6797
6798 2) Operations used by some non-MyISAM handlers
6799 ----------------------------------------------
6800 HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
6801 This is a strictly InnoDB feature that is more or less undocumented.
6802 When it is activated InnoDB copies field by field from its fetch
6803 cache instead of all fields in one memcpy. Have no idea what the
6804 purpose of this is.
6805 Cut from include/my_base.h:
6806 When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
6807 other fields intact. When this is off (by default) InnoDB will use memcpy
6808 to overwrite entire row.
6809 HA_EXTRA_IGNORE_DUP_KEY:
6810 HA_EXTRA_NO_IGNORE_DUP_KEY:
6811 Informs the handler to we will not stop the transaction if we get an
6812 duplicate key errors during insert/upate.
6813 Always called in pair, triggered by INSERT IGNORE and other similar
6814 SQL constructs.
6815 Not used by MyISAM.
6816
6817 3) Operations used only by MyISAM
6818 ---------------------------------
6819 HA_EXTRA_NORMAL:
6820 Only used in MyISAM to reset quick mode, not implemented by any other
6821 handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
6822
6823 It is called after completing a successful DELETE query if the QUICK
6824 option is set.
6825
6826 HA_EXTRA_QUICK:
6827 When the user does DELETE QUICK FROM table where-clause; this extra
6828 option is called before the delete query is performed and
6829 HA_EXTRA_NORMAL is called after the delete query is completed.
6830 Temporary tables used internally in MySQL always set this option
6831
6832 The meaning of quick mode is that when deleting in a B-tree no merging
6833 of leafs is performed. This is a common method and many large DBMS's
6834 actually only support this quick mode since it is very difficult to
6835 merge leaves in a tree used by many threads concurrently.
6836
6837 HA_EXTRA_CACHE:
6838 This flag is usually set with extra_opt along with a cache size.
6839 The size of this buffer is set by the user variable
6840 record_buffer_size. The value of this cache size is the amount of
6841 data read from disk in each fetch when performing a table scan.
6842 This means that before scanning a table it is normal to call
6843 extra with HA_EXTRA_CACHE and when the scan is completed to call
6844 HA_EXTRA_NO_CACHE to release the cache memory.
6845
6846 Some special care is taken when using this extra parameter since there
6847 could be a write ongoing on the table in the same statement. In this
6848 one has to take special care since there might be a WRITE CACHE as
6849 well. HA_EXTRA_CACHE specifies using a READ CACHE and using
6850 READ CACHE and WRITE CACHE at the same time is not possible.
6851
6852 Only MyISAM currently use this option.
6853
6854 It is set when doing full table scans using rr_sequential and
6855 reset when completing such a scan with end_read_record
6856 (resetting means calling extra with HA_EXTRA_NO_CACHE).
6857
6858 It is set in filesort.cc for MyISAM internal tables and it is set in
6859 a multi-update where HA_EXTRA_CACHE is called on a temporary result
6860 table and after that ha_rnd_init(0) on table to be updated
6861 and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
6862
6863 Apart from that it is always used from init_read_record but not when
6864 used from UPDATE statements. It is not used from DELETE statements
6865 with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
6866 statements. The reason here is that DELETE's in MyISAM doesn't move
6867 existings data rows.
6868
6869 It is also set in copy_data_between_tables when scanning the old table
6870 to copy over to the new table.
6871 And it is set in join_init_read_record where quick objects are used
6872 to perform a scan on the table. In this case the full table scan can
6873 even be performed multiple times as part of the nested loop join.
6874
6875 For purposes of the partition handler it is obviously necessary to have
6876 special treatment of this extra call. If we would simply pass this
6877 extra call down to each handler we would allocate
6878 cache size * no of partitions amount of memory and this is not
6879 necessary since we will only scan one partition at a time when doing
6880 full table scans.
6881
6882 Thus we treat it by first checking whether we have MyISAM handlers in
6883 the table, if not we simply ignore the call and if we have we will
6884 record the call but will not call any underlying handler yet. Then
6885 when performing the sequential scan we will check this recorded value
6886 and call extra_opt whenever we start scanning a new partition.
6887
6888 HA_EXTRA_NO_CACHE:
6889 When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
6890 flush method in the select_union class.
6891 It is used to some extent when insert delayed inserts.
6892 See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
6893
6894 It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
6895 if they are MyISAM handlers. Other handlers we can ignore the call
6896 for. If no cache is in use they will quickly return after finding
6897 this out. And we also ensure that all caches are disabled and no one
6898 is left by mistake.
6899 In the future this call will probably be deleted and we will instead call
6900 ::reset();
6901
6902 HA_EXTRA_WRITE_CACHE:
6903 See above, called from various places. It is mostly used when we
6904 do INSERT ... SELECT
6905 No special handling to save cache space is developed currently.
6906
6907 HA_EXTRA_PREPARE_FOR_UPDATE:
6908 This is called as part of a multi-table update. When the table to be
6909 updated is also scanned then this informs MyISAM handler to drop any
6910 caches if dynamic records are used (fixed size records do not care
6911 about this call). We pass this along to the first partition to scan, and
6912 flag that it is to be called after HA_EXTRA_CACHE when moving to the next
6913 partition to scan.
6914
6915 HA_EXTRA_PREPARE_FOR_DROP:
6916 Only used by MyISAM, called in preparation for a DROP TABLE.
6917 It's used mostly by Windows that cannot handle dropping an open file.
6918 On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
6919
6920 HA_EXTRA_READCHECK:
6921 HA_EXTRA_NO_READCHECK:
6922 Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
6923 this is not needed in SQL. The reason for this call is that MyISAM sets
6924 the READ_CHECK_USED in the open call so the call is needed for MyISAM
6925 to reset this feature.
6926 The idea with this parameter was to inform of doing/not doing a read
6927 check before applying an update. Since SQL always performs a read before
6928 applying the update No Read Check is needed in MyISAM as well.
6929
6930 This is a cut from Docs/myisam.txt
6931 Sometimes you might want to force an update without checking whether
6932 another user has changed the record since you last read it. This is
6933 somewhat dangerous, so it should ideally not be used. That can be
6934 accomplished by wrapping the mi_update() call in two calls to mi_extra(),
6935 using these functions:
6936 HA_EXTRA_NO_READCHECK=5 No readcheck on update
6937 HA_EXTRA_READCHECK=6 Use readcheck (def)
6938
6939
6940 4) Operations only used by temporary tables for query processing
6941 ----------------------------------------------------------------
6942 HA_EXTRA_RESET_STATE:
6943 Same as reset() except that buffers are not released. If there is
6944 a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
6945 or to change type of cache between READ CACHE and WRITE CACHE.
6946
6947 This extra function is always called immediately before calling
6948 delete_all_rows on the handler for temporary tables.
6949 There are cases however when HA_EXTRA_RESET_STATE isn't called in
6950 a similar case for a temporary table in sql_union.cc and in two other
6951 cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
6952 called afterwards.
6953 The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
6954 disable caching, delete all rows and enable WRITE CACHE. This is
6955 used for temporary tables containing distinct sums and a
6956 functional group.
6957
6958 The only case that delete_all_rows is called on non-temporary tables
6959 is in sql_delete.cc when DELETE FROM table; is called by a user.
6960 In this case no special extra calls are performed before or after this
6961 call.
6962
6963 The partition handler should not need to bother about this one. It
6964 should never be called.
6965
6966 HA_EXTRA_NO_ROWS:
6967 Don't insert rows indication to HEAP and MyISAM, only used by temporary
6968 tables used in query processing.
6969 Not handled by partition handler.
6970
6971 5) Operations only used by MyISAM internally
6972 --------------------------------------------
6973 HA_EXTRA_REINIT_CACHE:
6974 This call reinitializes the READ CACHE described above if there is one
6975 and otherwise the call is ignored.
6976
6977 We can thus safely call it on all underlying handlers if they are
6978 MyISAM handlers. It is however never called so we don't handle it at all.
6979 HA_EXTRA_FLUSH_CACHE:
6980 Flush WRITE CACHE in MyISAM. It is only from one place in the code.
6981 This is in sql_insert.cc where it is called if the table_flags doesn't
6982 contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS
6983 set is the MyISAM handler and so the only handler not receiving this
6984 call is MyISAM.
6985 Thus in effect this call is called but never used. Could be removed
6986 from sql_insert.cc
6987 HA_EXTRA_NO_USER_CHANGE:
6988 Only used by MyISAM, never called.
6989 Simulates lock_type as locked.
6990 HA_EXTRA_WAIT_LOCK:
6991 HA_EXTRA_WAIT_NOLOCK:
6992 Only used by MyISAM, called from MyISAM handler but never from server
6993 code on top of the handler.
6994 Sets lock_wait on/off
6995 HA_EXTRA_NO_KEYS:
6996 Only used MyISAM, only used internally in MyISAM handler, never called
6997 from server level.
6998 HA_EXTRA_KEYREAD_CHANGE_POS:
6999 HA_EXTRA_REMEMBER_POS:
7000 HA_EXTRA_RESTORE_POS:
7001 HA_EXTRA_PRELOAD_BUFFER_SIZE:
7002 HA_EXTRA_CHANGE_KEY_TO_DUP:
7003 HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
7004 Only used by MyISAM, never called.
7005
7006 6) Operations not used at all
7007 -----------------------------
7008 HA_EXTRA_KEY_CACHE:
7009 HA_EXTRA_NO_KEY_CACHE:
7010 This parameters are no longer used and could be removed.
7011
7012 7) Operations only used by federated tables for query processing
7013 ----------------------------------------------------------------
7014 HA_EXTRA_INSERT_WITH_UPDATE:
7015 Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be
7016 executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY.
7017
7018 8) Operations only used by NDB
7019 ------------------------------
7020 HA_EXTRA_DELETE_CANNOT_BATCH:
7021 HA_EXTRA_UPDATE_CANNOT_BATCH:
7022 Inform handler that delete_row()/update_row() cannot batch deletes/updates
7023 and should perform them immediately. This may be needed when table has
7024 AFTER DELETE/UPDATE triggers which access to subject table.
7025 These flags are reset by the handler::extra(HA_EXTRA_RESET) call.
7026
7027 9) Operations only used by MERGE
7028 ------------------------------
7029 HA_EXTRA_ADD_CHILDREN_LIST:
7030 HA_EXTRA_ATTACH_CHILDREN:
7031 HA_EXTRA_IS_ATTACHED_CHILDREN:
7032 HA_EXTRA_DETACH_CHILDREN:
7033 Special actions for MERGE tables. Ignore.
7034
7035 10) Operations only used by InnoDB
7036 ----------------------------------
7037 HA_EXTRA_EXPORT:
7038 Prepare table for export
7039 (e.g. quiesce the table and write table metadata).
7040
7041 11) Operations only used by partitioning
7042 ------------------------------
7043 HA_EXTRA_SECONDARY_SORT_ROWID:
7044 INDEX_MERGE type of execution, needs to do secondary sort by
7045 ROWID (handler::ref).
7046 */
7047
extra(enum ha_extra_function operation)7048 int ha_partition::extra(enum ha_extra_function operation)
7049 {
7050 DBUG_ENTER("ha_partition:extra");
7051 DBUG_PRINT("info", ("operation: %d", (int) operation));
7052
7053 switch (operation) {
7054 /* Category 1), used by most handlers */
7055 case HA_EXTRA_KEYREAD:
7056 case HA_EXTRA_NO_KEYREAD:
7057 case HA_EXTRA_FLUSH:
7058 DBUG_RETURN(loop_extra(operation));
7059 case HA_EXTRA_PREPARE_FOR_RENAME:
7060 case HA_EXTRA_FORCE_REOPEN:
7061 DBUG_RETURN(loop_extra_alter(operation));
7062 break;
7063
7064 /* Category 2), used by non-MyISAM handlers */
7065 case HA_EXTRA_IGNORE_DUP_KEY:
7066 case HA_EXTRA_NO_IGNORE_DUP_KEY:
7067 case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
7068 {
7069 if (!m_myisam)
7070 DBUG_RETURN(loop_extra(operation));
7071 break;
7072 }
7073
7074 /* Category 3), used by MyISAM handlers */
7075 case HA_EXTRA_PREPARE_FOR_UPDATE:
7076 /*
7077 Needs to be run on the first partition in the range now, and
7078 later in late_extra_cache, when switching to a new partition to scan.
7079 */
7080 m_extra_prepare_for_update= TRUE;
7081 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
7082 {
7083 if (!m_extra_cache)
7084 m_extra_cache_part_id= m_part_spec.start_part;
7085 DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part);
7086 (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
7087 }
7088 break;
7089 case HA_EXTRA_NORMAL:
7090 case HA_EXTRA_QUICK:
7091 case HA_EXTRA_PREPARE_FOR_DROP:
7092 case HA_EXTRA_FLUSH_CACHE:
7093 {
7094 if (m_myisam)
7095 DBUG_RETURN(loop_extra(operation));
7096 break;
7097 }
7098 case HA_EXTRA_NO_READCHECK:
7099 {
7100 /*
7101 This is only done as a part of ha_open, which is also used in
7102 ha_partition::open, so no need to do anything.
7103 */
7104 break;
7105 }
7106 case HA_EXTRA_CACHE:
7107 {
7108 prepare_extra_cache(0);
7109 break;
7110 }
7111 case HA_EXTRA_NO_CACHE:
7112 {
7113 int ret= 0;
7114 if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
7115 ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
7116 m_extra_cache= FALSE;
7117 m_extra_cache_size= 0;
7118 m_extra_prepare_for_update= FALSE;
7119 m_extra_cache_part_id= NO_CURRENT_PART_ID;
7120 DBUG_RETURN(ret);
7121 }
7122 case HA_EXTRA_WRITE_CACHE:
7123 {
7124 m_extra_cache= FALSE;
7125 m_extra_cache_size= 0;
7126 m_extra_prepare_for_update= FALSE;
7127 m_extra_cache_part_id= NO_CURRENT_PART_ID;
7128 DBUG_RETURN(loop_extra(operation));
7129 }
7130 case HA_EXTRA_IGNORE_NO_KEY:
7131 case HA_EXTRA_NO_IGNORE_NO_KEY:
7132 {
7133 /*
7134 Ignore as these are specific to NDB for handling
7135 idempotency
7136 */
7137 break;
7138 }
7139 case HA_EXTRA_WRITE_CAN_REPLACE:
7140 case HA_EXTRA_WRITE_CANNOT_REPLACE:
7141 {
7142 /*
7143 Informs handler that write_row() can replace rows which conflict
7144 with row being inserted by PK/unique key without reporting error
7145 to the SQL-layer.
7146
7147 This optimization is not safe for partitioned table in general case
7148 since we may have to put new version of row into partition which is
7149 different from partition in which old version resides (for example
7150 when we partition by non-PK column or by some column which is not
7151 part of unique key which were violated).
7152 And since NDB which is the only engine at the moment that supports
7153 this optimization handles partitioning on its own we simple disable
7154 it here. (BTW for NDB this optimization is safe since it supports
7155 only KEY partitioning and won't use this optimization for tables
7156 which have additional unique constraints).
7157 */
7158 break;
7159 }
7160 /* Category 7), used by federated handlers */
7161 case HA_EXTRA_INSERT_WITH_UPDATE:
7162 DBUG_RETURN(loop_extra(operation));
7163 /* Category 8) Operations only used by NDB */
7164 case HA_EXTRA_DELETE_CANNOT_BATCH:
7165 case HA_EXTRA_UPDATE_CANNOT_BATCH:
7166 {
7167 /* Currently only NDB use the *_CANNOT_BATCH */
7168 break;
7169 }
7170 /* Category 9) Operations only used by MERGE */
7171 case HA_EXTRA_ADD_CHILDREN_LIST:
7172 case HA_EXTRA_ATTACH_CHILDREN:
7173 case HA_EXTRA_IS_ATTACHED_CHILDREN:
7174 case HA_EXTRA_DETACH_CHILDREN:
7175 {
7176 /* Special actions for MERGE tables. Ignore. */
7177 break;
7178 }
7179 /*
7180 http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
7181 says we no longer support logging to partitioned tables, so we fail
7182 here.
7183 */
7184 case HA_EXTRA_MARK_AS_LOG_TABLE:
7185 DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
7186 /* Category 10), used by InnoDB handlers */
7187 case HA_EXTRA_EXPORT:
7188 DBUG_RETURN(loop_extra(operation));
7189 /* Category 11) Operations only used by partitioning. */
7190 case HA_EXTRA_SECONDARY_SORT_ROWID:
7191 {
7192 /* index_init(sorted=true) must have been called! */
7193 DBUG_ASSERT(m_ordered);
7194 DBUG_ASSERT(m_ordered_rec_buffer);
7195 /* No index_read call must have been done! */
7196 DBUG_ASSERT(m_queue.elements == 0);
7197 /* If not PK is set as secondary sort, do secondary sort by rowid/ref. */
7198 if (!m_curr_key_info[1])
7199 {
7200 m_sec_sort_by_rowid= true;
7201 queue_set_compare(&m_queue, key_and_ref_cmp);
7202 }
7203 break;
7204 }
7205 default:
7206 {
7207 /* Temporary crash to discover what is wrong */
7208 DBUG_ASSERT(0);
7209 break;
7210 }
7211 }
7212 DBUG_RETURN(0);
7213 }
7214
7215
7216 /**
7217 Special extra call to reset extra parameters
7218
7219 @return Operation status.
7220 @retval >0 Error code
7221 @retval 0 Success
7222
7223 @note Called at end of each statement to reset buffers.
7224 To avoid excessive calls, the m_partitions_to_reset bitmap keep records
7225 of which partitions that have been used in extra(), external_lock() or
7226 start_stmt() and is needed to be called.
7227 */
7228
reset(void)7229 int ha_partition::reset(void)
7230 {
7231 int result= 0;
7232 int tmp;
7233 uint i;
7234 DBUG_ENTER("ha_partition::reset");
7235
7236 for (i= bitmap_get_first_set(&m_partitions_to_reset);
7237 i < m_tot_parts;
7238 i= bitmap_get_next_set(&m_partitions_to_reset, i))
7239 {
7240 if ((tmp= m_file[i]->ha_reset()))
7241 result= tmp;
7242 }
7243 bitmap_clear_all(&m_partitions_to_reset);
7244 DBUG_RETURN(result);
7245 }
7246
7247 /*
7248 Special extra method for HA_EXTRA_CACHE with cachesize as extra parameter
7249
7250 SYNOPSIS
7251 extra_opt()
7252 operation Must be HA_EXTRA_CACHE
7253 cachesize Size of cache in full table scan
7254
7255 RETURN VALUE
7256 >0 Error code
7257 0 Success
7258 */
7259
extra_opt(enum ha_extra_function operation,ulong cachesize)7260 int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
7261 {
7262 DBUG_ENTER("ha_partition::extra_opt()");
7263
7264 DBUG_ASSERT(HA_EXTRA_CACHE == operation);
7265 prepare_extra_cache(cachesize);
7266 DBUG_RETURN(0);
7267 }
7268
7269
7270 /*
7271 Call extra on handler with HA_EXTRA_CACHE and cachesize
7272
7273 SYNOPSIS
7274 prepare_extra_cache()
7275 cachesize Size of cache for full table scan
7276
7277 RETURN VALUE
7278 NONE
7279 */
7280
prepare_extra_cache(uint cachesize)7281 void ha_partition::prepare_extra_cache(uint cachesize)
7282 {
7283 DBUG_ENTER("ha_partition::prepare_extra_cache()");
7284 DBUG_PRINT("info", ("cachesize %u", cachesize));
7285
7286 m_extra_cache= TRUE;
7287 m_extra_cache_size= cachesize;
7288 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
7289 {
7290 DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset,
7291 m_part_spec.start_part));
7292 bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
7293 late_extra_cache(m_part_spec.start_part);
7294 }
7295 DBUG_VOID_RETURN;
7296 }
7297
7298
7299 /**
7300 Prepares our new and reorged handlers for rename or delete.
7301
7302 @param operation Operation to forward
7303
7304 @return Operation status
7305 @retval 0 Success
7306 @retval !0 Error
7307 */
7308
loop_extra_alter(enum ha_extra_function operation)7309 int ha_partition::loop_extra_alter(enum ha_extra_function operation)
7310 {
7311 int result= 0, tmp;
7312 handler **file;
7313 DBUG_ENTER("ha_partition::loop_extra_alter()");
7314 DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME ||
7315 operation == HA_EXTRA_FORCE_REOPEN);
7316
7317 if (m_new_file != NULL)
7318 {
7319 for (file= m_new_file; *file; file++)
7320 if ((tmp= (*file)->extra(operation)))
7321 result= tmp;
7322 }
7323 if (m_reorged_file != NULL)
7324 {
7325 for (file= m_reorged_file; *file; file++)
7326 if ((tmp= (*file)->extra(operation)))
7327 result= tmp;
7328 }
7329 if ((tmp= loop_extra(operation)))
7330 result= tmp;
7331 DBUG_RETURN(result);
7332 }
7333
7334 /*
7335 Call extra on all partitions
7336
7337 SYNOPSIS
7338 loop_extra()
7339 operation extra operation type
7340
7341 RETURN VALUE
7342 >0 Error code
7343 0 Success
7344 */
7345
loop_extra(enum ha_extra_function operation)7346 int ha_partition::loop_extra(enum ha_extra_function operation)
7347 {
7348 int result= 0, tmp;
7349 uint i;
7350 DBUG_ENTER("ha_partition::loop_extra()");
7351
7352 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
7353 i < m_tot_parts;
7354 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
7355 {
7356 if ((tmp= m_file[i]->extra(operation)))
7357 result= tmp;
7358 }
7359 /* Add all used partitions to be called in reset(). */
7360 bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
7361 DBUG_RETURN(result);
7362 }
7363
7364
7365 /*
7366 Call extra(HA_EXTRA_CACHE) on next partition_id
7367
7368 SYNOPSIS
7369 late_extra_cache()
7370 partition_id Partition id to call extra on
7371
7372 RETURN VALUE
7373 NONE
7374 */
7375
late_extra_cache(uint partition_id)7376 void ha_partition::late_extra_cache(uint partition_id)
7377 {
7378 handler *file;
7379 DBUG_ENTER("ha_partition::late_extra_cache");
7380 DBUG_PRINT("info", ("extra_cache %u prepare %u partid %u size %u",
7381 m_extra_cache, m_extra_prepare_for_update,
7382 partition_id, m_extra_cache_size));
7383
7384 if (!m_extra_cache && !m_extra_prepare_for_update)
7385 DBUG_VOID_RETURN;
7386 file= m_file[partition_id];
7387 if (m_extra_cache)
7388 {
7389 if (m_extra_cache_size == 0)
7390 (void) file->extra(HA_EXTRA_CACHE);
7391 else
7392 (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
7393 }
7394 if (m_extra_prepare_for_update)
7395 {
7396 (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
7397 }
7398 m_extra_cache_part_id= partition_id;
7399 DBUG_VOID_RETURN;
7400 }
7401
7402
7403 /*
7404 Call extra(HA_EXTRA_NO_CACHE) on next partition_id
7405
7406 SYNOPSIS
7407 late_extra_no_cache()
7408 partition_id Partition id to call extra on
7409
7410 RETURN VALUE
7411 NONE
7412 */
7413
late_extra_no_cache(uint partition_id)7414 void ha_partition::late_extra_no_cache(uint partition_id)
7415 {
7416 handler *file;
7417 DBUG_ENTER("ha_partition::late_extra_no_cache");
7418
7419 if (!m_extra_cache && !m_extra_prepare_for_update)
7420 DBUG_VOID_RETURN;
7421 file= m_file[partition_id];
7422 (void) file->extra(HA_EXTRA_NO_CACHE);
7423 DBUG_ASSERT(partition_id == m_extra_cache_part_id);
7424 m_extra_cache_part_id= NO_CURRENT_PART_ID;
7425 DBUG_VOID_RETURN;
7426 }
7427
7428
7429 /****************************************************************************
7430 MODULE optimiser support
7431 ****************************************************************************/
7432
7433 /**
7434 Get keys to use for scanning.
7435
7436 @return key_map of keys usable for scanning
7437
7438 @note No need to use read_partitions here, since it does not depend on
7439 which partitions is used, only which storage engine used.
7440 */
7441
keys_to_use_for_scanning()7442 const key_map *ha_partition::keys_to_use_for_scanning()
7443 {
7444 DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
7445 DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
7446 }
7447
7448
7449 /**
7450 Minimum number of rows to base optimizer estimate on.
7451 */
7452
min_rows_for_estimate()7453 ha_rows ha_partition::min_rows_for_estimate()
7454 {
7455 uint i, max_used_partitions, tot_used_partitions;
7456 DBUG_ENTER("ha_partition::min_rows_for_estimate");
7457
7458 tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions);
7459
7460 /*
7461 All partitions might have been left as unused during partition pruning
7462 due to, for example, an impossible WHERE condition. Nonetheless, the
7463 optimizer might still attempt to perform (e.g. range) analysis where an
7464 estimate of the the number of rows is calculated using records_in_range.
7465 Hence, to handle this and other possible cases, use zero as the minimum
7466 number of rows to base the estimate on if no partition is being used.
7467 */
7468 if (!tot_used_partitions)
7469 DBUG_RETURN(0);
7470
7471 /*
7472 Allow O(log2(tot_partitions)) increase in number of used partitions.
7473 This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
7474 I.e when the total number of partitions doubles, allow one more
7475 partition to be checked.
7476 */
7477 i= 2;
7478 max_used_partitions= 1;
7479 while (i < m_tot_parts)
7480 {
7481 max_used_partitions++;
7482 i= i << 1;
7483 }
7484 if (max_used_partitions > tot_used_partitions)
7485 max_used_partitions= tot_used_partitions;
7486
7487 /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
7488 DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
7489 max_used_partitions,
7490 (ulong) stats.records));
7491 DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
7492 tot_used_partitions,
7493 (ulong) stats.records * max_used_partitions
7494 / tot_used_partitions));
7495 DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
7496 }
7497
7498
7499 /**
7500 Get the biggest used partition.
7501
7502 Starting at the N:th biggest partition and skips all non used
7503 partitions, returning the biggest used partition found
7504
7505 @param[in,out] part_index Skip the *part_index biggest partitions
7506
7507 @return The biggest used partition with index not lower than *part_index.
7508 @retval NO_CURRENT_PART_ID No more partition used.
7509 @retval != NO_CURRENT_PART_ID partition id of biggest used partition with
7510 index >= *part_index supplied. Note that
7511 *part_index will be updated to the next
7512 partition index to use.
7513 */
7514
get_biggest_used_partition(uint * part_index)7515 uint ha_partition::get_biggest_used_partition(uint *part_index)
7516 {
7517 uint part_id;
7518 while ((*part_index) < m_tot_parts)
7519 {
7520 part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
7521 if (bitmap_is_set(&m_part_info->read_partitions, part_id))
7522 return part_id;
7523 }
7524 return NO_CURRENT_PART_ID;
7525 }
7526
7527
7528 /*
7529 Return time for a scan of the table
7530
7531 SYNOPSIS
7532 scan_time()
7533
7534 RETURN VALUE
7535 time for scan
7536 */
7537
scan_time()7538 double ha_partition::scan_time()
7539 {
7540 double scan_time= 0;
7541 uint i;
7542 DBUG_ENTER("ha_partition::scan_time");
7543
7544 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7545 i < m_tot_parts;
7546 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7547 scan_time+= m_file[i]->scan_time();
7548 DBUG_RETURN(scan_time);
7549 }
7550
7551
7552 /**
7553 Find number of records in a range.
7554 @param inx Index number
7555 @param min_key Start of range
7556 @param max_key End of range
7557
7558 @return Number of rows in range.
7559
7560 Given a starting key, and an ending key estimate the number of rows that
7561 will exist between the two. max_key may be empty which in case determine
7562 if start_key matches any rows.
7563 */
7564
records_in_range(uint inx,key_range * min_key,key_range * max_key)7565 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
7566 key_range *max_key)
7567 {
7568 ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
7569 uint partition_index= 0, part_id;
7570 DBUG_ENTER("ha_partition::records_in_range");
7571
7572 min_rows_to_check= min_rows_for_estimate();
7573
7574 while ((part_id= get_biggest_used_partition(&partition_index))
7575 != NO_CURRENT_PART_ID)
7576 {
7577 rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
7578
7579 DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
7580 (ulong) m_file[part_id]->stats.records));
7581
7582 if (rows == HA_POS_ERROR)
7583 DBUG_RETURN(HA_POS_ERROR);
7584 estimated_rows+= rows;
7585 checked_rows+= m_file[part_id]->stats.records;
7586 /*
7587 Returning 0 means no rows can be found, so we must continue
7588 this loop as long as we have estimated_rows == 0.
7589 Also many engines return 1 to indicate that there may exist
7590 a matching row, we do not normalize this by dividing by number of
7591 used partitions, but leave it to be returned as a sum, which will
7592 reflect that we will need to scan each partition's index.
7593
7594 Note that this statistics may not always be correct, so we must
7595 continue even if the current partition has 0 rows, since we might have
7596 deleted rows from the current partition, or inserted to the next
7597 partition.
7598 */
7599 if (estimated_rows && checked_rows &&
7600 checked_rows >= min_rows_to_check)
7601 {
7602 DBUG_PRINT("info",
7603 ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
7604 inx,
7605 (ulong) (estimated_rows * stats.records / checked_rows),
7606 (ulong) estimated_rows,
7607 (ulong) stats.records,
7608 (ulong) checked_rows));
7609 DBUG_RETURN(estimated_rows * stats.records / checked_rows);
7610 }
7611 }
7612 DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
7613 inx,
7614 (ulong) estimated_rows));
7615 DBUG_RETURN(estimated_rows);
7616 }
7617
7618
7619 /**
7620 Estimate upper bound of number of rows.
7621
7622 @return Number of rows.
7623 */
7624
estimate_rows_upper_bound()7625 ha_rows ha_partition::estimate_rows_upper_bound()
7626 {
7627 ha_rows rows, tot_rows= 0;
7628 handler **file= m_file;
7629 DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
7630
7631 do
7632 {
7633 if (bitmap_is_set(&(m_part_info->read_partitions), (file - m_file)))
7634 {
7635 rows= (*file)->estimate_rows_upper_bound();
7636 if (rows == HA_POS_ERROR)
7637 DBUG_RETURN(HA_POS_ERROR);
7638 tot_rows+= rows;
7639 }
7640 } while (*(++file));
7641 DBUG_RETURN(tot_rows);
7642 }
7643
7644
7645 /*
7646 Get time to read
7647
7648 SYNOPSIS
7649 read_time()
7650 index Index number used
7651 ranges Number of ranges
7652 rows Number of rows
7653
7654 RETURN VALUE
7655 time for read
7656
7657 DESCRIPTION
7658 This will be optimised later to include whether or not the index can
7659 be used with partitioning. To achieve we need to add another parameter
7660 that specifies how many of the index fields that are bound in the ranges.
7661 Possibly added as a new call to handlers.
7662 */
7663
read_time(uint index,uint ranges,ha_rows rows)7664 double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
7665 {
7666 DBUG_ENTER("ha_partition::read_time");
7667
7668 DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
7669 }
7670
7671
7672 /**
7673 Number of rows in table. see handler.h
7674
7675 @return Number of records in the table (after pruning!)
7676 */
7677
records()7678 ha_rows ha_partition::records()
7679 {
7680 ha_rows rows, tot_rows= 0;
7681 uint i;
7682 DBUG_ENTER("ha_partition::records");
7683
7684 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7685 i < m_tot_parts;
7686 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7687 {
7688 rows= m_file[i]->records();
7689 if (rows == HA_POS_ERROR)
7690 DBUG_RETURN(HA_POS_ERROR);
7691 tot_rows+= rows;
7692 }
7693 DBUG_RETURN(tot_rows);
7694 }
7695
7696
7697 /*
7698 Is it ok to switch to a new engine for this table
7699
7700 SYNOPSIS
7701 can_switch_engine()
7702
7703 RETURN VALUE
7704 TRUE Ok
7705 FALSE Not ok
7706
7707 DESCRIPTION
7708 Used to ensure that tables with foreign key constraints are not moved
7709 to engines without foreign key support.
7710 */
7711
can_switch_engines()7712 bool ha_partition::can_switch_engines()
7713 {
7714 handler **file;
7715 DBUG_ENTER("ha_partition::can_switch_engines");
7716
7717 file= m_file;
7718 do
7719 {
7720 if (!(*file)->can_switch_engines())
7721 DBUG_RETURN(FALSE);
7722 } while (*(++file));
7723 DBUG_RETURN(TRUE);
7724 }
7725
7726
7727 /*
7728 Is table cache supported
7729
7730 SYNOPSIS
7731 table_cache_type()
7732
7733 */
7734
table_cache_type()7735 uint8 ha_partition::table_cache_type()
7736 {
7737 DBUG_ENTER("ha_partition::table_cache_type");
7738
7739 DBUG_RETURN(m_file[0]->table_cache_type());
7740 }
7741
7742
7743 /**
7744 Calculate hash value for KEY partitioning using an array of fields.
7745
7746 @param field_array An array of the fields in KEY partitioning
7747
7748 @return hash_value calculated
7749
7750 @note Uses the hash function on the character set of the field.
7751 Integer and floating point fields use the binary character set by default.
7752 */
7753
calculate_key_hash_value(Field ** field_array)7754 uint32 ha_partition::calculate_key_hash_value(Field **field_array)
7755 {
7756 ulong nr1= 1;
7757 ulong nr2= 4;
7758 bool use_51_hash;
7759 use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm ==
7760 partition_info::KEY_ALGORITHM_51);
7761
7762 do
7763 {
7764 Field *field= *field_array;
7765 if (use_51_hash)
7766 {
7767 switch (field->real_type()) {
7768 case MYSQL_TYPE_TINY:
7769 case MYSQL_TYPE_SHORT:
7770 case MYSQL_TYPE_LONG:
7771 case MYSQL_TYPE_FLOAT:
7772 case MYSQL_TYPE_DOUBLE:
7773 case MYSQL_TYPE_NEWDECIMAL:
7774 case MYSQL_TYPE_TIMESTAMP:
7775 case MYSQL_TYPE_LONGLONG:
7776 case MYSQL_TYPE_INT24:
7777 case MYSQL_TYPE_TIME:
7778 case MYSQL_TYPE_DATETIME:
7779 case MYSQL_TYPE_YEAR:
7780 case MYSQL_TYPE_NEWDATE:
7781 {
7782 if (field->is_null())
7783 {
7784 nr1^= (nr1 << 1) | 1;
7785 continue;
7786 }
7787 /* Force this to my_hash_sort_bin, which was used in 5.1! */
7788 uint len= field->pack_length();
7789 my_charset_bin.coll->hash_sort(&my_charset_bin, field->ptr, len,
7790 &nr1, &nr2);
7791 /* Done with this field, continue with next one. */
7792 continue;
7793 }
7794 case MYSQL_TYPE_STRING:
7795 case MYSQL_TYPE_VARCHAR:
7796 case MYSQL_TYPE_BIT:
7797 /* Not affected, same in 5.1 and 5.5 */
7798 break;
7799 /*
7800 ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
7801 and my_hash_sort_bin in 5.5!
7802 */
7803 case MYSQL_TYPE_ENUM:
7804 case MYSQL_TYPE_SET:
7805 {
7806 if (field->is_null())
7807 {
7808 nr1^= (nr1 << 1) | 1;
7809 continue;
7810 }
7811 /* Force this to my_hash_sort_bin, which was used in 5.1! */
7812 uint len= field->pack_length();
7813 my_charset_latin1.coll->hash_sort(&my_charset_latin1, field->ptr,
7814 len, &nr1, &nr2);
7815 continue;
7816 }
7817 /* New types in mysql-5.6. */
7818 case MYSQL_TYPE_DATETIME2:
7819 case MYSQL_TYPE_TIME2:
7820 case MYSQL_TYPE_TIMESTAMP2:
7821 /* Not affected, 5.6+ only! */
7822 break;
7823
7824 /* These types should not be allowed for partitioning! */
7825 case MYSQL_TYPE_NULL:
7826 case MYSQL_TYPE_DECIMAL:
7827 case MYSQL_TYPE_DATE:
7828 case MYSQL_TYPE_TINY_BLOB:
7829 case MYSQL_TYPE_MEDIUM_BLOB:
7830 case MYSQL_TYPE_LONG_BLOB:
7831 case MYSQL_TYPE_BLOB:
7832 case MYSQL_TYPE_VAR_STRING:
7833 case MYSQL_TYPE_GEOMETRY:
7834 /* fall through. */
7835 default:
7836 DBUG_ASSERT(0); // New type?
7837 /* Fall through for default hashing (5.5). */
7838 }
7839 /* fall through, use collation based hashing. */
7840 }
7841 field->hash(&nr1, &nr2);
7842 } while (*(++field_array));
7843 return (uint32) nr1;
7844 }
7845
7846
7847 /****************************************************************************
7848 MODULE print messages
7849 ****************************************************************************/
7850
index_type(uint inx)7851 const char *ha_partition::index_type(uint inx)
7852 {
7853 uint first_used_partition;
7854 DBUG_ENTER("ha_partition::index_type");
7855
7856 first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions));
7857
7858 if (first_used_partition == MY_BIT_NONE)
7859 {
7860 DBUG_ASSERT(0); // How can this happen?
7861 DBUG_RETURN(handler::index_type(inx));
7862 }
7863
7864 DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
7865 }
7866
7867
get_row_type() const7868 enum row_type ha_partition::get_row_type() const
7869 {
7870 uint i;
7871 enum row_type type;
7872 DBUG_ENTER("ha_partition::get_row_type");
7873
7874 i= bitmap_get_first_set(&m_part_info->read_partitions);
7875 DBUG_ASSERT(i < m_tot_parts);
7876 if (i >= m_tot_parts)
7877 DBUG_RETURN(ROW_TYPE_NOT_USED);
7878
7879 type= m_file[i]->get_row_type();
7880 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
7881
7882 for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
7883 i < m_tot_parts;
7884 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
7885 {
7886 enum row_type part_type= m_file[i]->get_row_type();
7887 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
7888 if (part_type != type)
7889 DBUG_RETURN(ROW_TYPE_NOT_USED);
7890 }
7891
7892 DBUG_RETURN(type);
7893 }
7894
7895
append_row_to_str(String & str)7896 void ha_partition::append_row_to_str(String &str)
7897 {
7898 const uchar *rec;
7899 bool is_rec0= !m_err_rec || m_err_rec == table->record[0];
7900 if (is_rec0)
7901 rec= table->record[0];
7902 else
7903 rec= m_err_rec;
7904 // If PK, use full PK instead of full part field array!
7905 if (table->s->primary_key != MAX_KEY)
7906 {
7907 KEY *key= table->key_info + table->s->primary_key;
7908 KEY_PART_INFO *key_part= key->key_part;
7909 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
7910 if (!is_rec0)
7911 set_key_field_ptr(key, rec, table->record[0]);
7912 for (; key_part != key_part_end; key_part++)
7913 {
7914 Field *field= key_part->field;
7915 str.append(" ");
7916 str.append(field->field_name);
7917 str.append(":");
7918 field_unpack(&str, field, rec, 0, false);
7919 }
7920 if (!is_rec0)
7921 set_key_field_ptr(key, table->record[0], rec);
7922 }
7923 else
7924 {
7925 Field **field_ptr;
7926 if (!is_rec0)
7927 set_field_ptr(m_part_info->full_part_field_array, rec,
7928 table->record[0]);
7929 /* No primary key, use full partition field array. */
7930 for (field_ptr= m_part_info->full_part_field_array;
7931 *field_ptr;
7932 field_ptr++)
7933 {
7934 Field *field= *field_ptr;
7935 str.append(" ");
7936 str.append(field->field_name);
7937 str.append(":");
7938 field_unpack(&str, field, rec, 0, false);
7939 }
7940 if (!is_rec0)
7941 set_field_ptr(m_part_info->full_part_field_array, table->record[0],
7942 rec);
7943 }
7944 }
7945
7946
print_error(int error,myf errflag)7947 void ha_partition::print_error(int error, myf errflag)
7948 {
7949 THD *thd= ha_thd();
7950 DBUG_ENTER("ha_partition::print_error");
7951
7952 /* Should probably look for my own errors first */
7953 DBUG_PRINT("enter", ("error: %d", error));
7954
7955 if ((error == HA_ERR_NO_PARTITION_FOUND) &&
7956 ! (thd->lex->alter_info.flags & Alter_info::ALTER_TRUNCATE_PARTITION))
7957 m_part_info->print_no_partition_found(table);
7958 else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
7959 {
7960 /* Should only happen on DELETE or UPDATE! */
7961 DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
7962 thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
7963 thd_sql_command(thd) == SQLCOM_UPDATE ||
7964 thd_sql_command(thd) == SQLCOM_UPDATE_MULTI);
7965 DBUG_ASSERT(m_err_rec);
7966 if (m_err_rec)
7967 {
7968 uint max_length;
7969 char buf[MAX_KEY_LENGTH];
7970 String str(buf,sizeof(buf),system_charset_info);
7971 uint32 part_id;
7972 str.length(0);
7973 str.append("(");
7974 str.append_ulonglong(m_last_part);
7975 str.append(" != ");
7976 if (get_part_for_delete(m_err_rec, m_rec0, m_part_info, &part_id))
7977 str.append("?");
7978 else
7979 str.append_ulonglong(part_id);
7980 str.append(")");
7981 append_row_to_str(str);
7982
7983 /* Log this error, so the DBA can notice it and fix it! */
7984 sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s\n"
7985 "Please REPAIR the table!",
7986 table->s->table_name.str,
7987 str.c_ptr_safe());
7988
7989 max_length= (MYSQL_ERRMSG_SIZE - (uint) strlen(ER(ER_ROW_IN_WRONG_PARTITION)));
7990 if (str.length() >= max_length)
7991 {
7992 str.length(max_length-4);
7993 str.append(STRING_WITH_LEN("..."));
7994 }
7995 my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
7996 m_err_rec= NULL;
7997 DBUG_VOID_RETURN;
7998 }
7999 /* fall through to generic error handling. */
8000 }
8001
8002 /* In case m_file has not been initialized, like in bug#42438 */
8003 if (m_file)
8004 {
8005 if (m_last_part >= m_tot_parts)
8006 {
8007 DBUG_ASSERT(0);
8008 m_last_part= 0;
8009 }
8010 m_file[m_last_part]->print_error(error, errflag);
8011 }
8012 else
8013 handler::print_error(error, errflag);
8014 DBUG_VOID_RETURN;
8015 }
8016
8017
get_error_message(int error,String * buf)8018 bool ha_partition::get_error_message(int error, String *buf)
8019 {
8020 DBUG_ENTER("ha_partition::get_error_message");
8021
8022 /* Should probably look for my own errors first */
8023
8024 /* In case m_file has not been initialized, like in bug#42438 */
8025 if (m_file)
8026 DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
8027 DBUG_RETURN(handler::get_error_message(error, buf));
8028
8029 }
8030
8031
8032 /****************************************************************************
8033 MODULE in-place ALTER
8034 ****************************************************************************/
8035 /**
8036 Get table flags.
8037 */
8038
table_flags() const8039 handler::Table_flags ha_partition::table_flags() const
8040 {
8041 uint first_used_partition= 0;
8042 DBUG_ENTER("ha_partition::table_flags");
8043 if (m_handler_status < handler_initialized ||
8044 m_handler_status >= handler_closed)
8045 DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
8046
8047 if (get_lock_type() != F_UNLCK)
8048 {
8049 /*
8050 The flags are cached after external_lock, and may depend on isolation
8051 level. So we should use a locked partition to get the correct flags.
8052 */
8053 first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
8054 if (first_used_partition == MY_BIT_NONE)
8055 first_used_partition= 0;
8056 }
8057 DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
8058 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
8059 (PARTITION_ENABLED_TABLE_FLAGS));
8060 }
8061
8062
8063 /**
8064 alter_table_flags must be on handler/table level, not on hton level
8065 due to the ha_partition hton does not know what the underlying hton is.
8066 */
alter_table_flags(uint flags)8067 uint ha_partition::alter_table_flags(uint flags)
8068 {
8069 uint flags_to_return;
8070 DBUG_ENTER("ha_partition::alter_table_flags");
8071
8072 flags_to_return= ht->alter_table_flags(flags);
8073 flags_to_return|= m_file[0]->alter_table_flags(flags);
8074
8075 DBUG_RETURN(flags_to_return);
8076 }
8077
8078
8079 /**
8080 check if copy of data is needed in alter table.
8081 */
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)8082 bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info,
8083 uint table_changes)
8084 {
8085 handler **file;
8086 bool ret= COMPATIBLE_DATA_YES;
8087
8088 /*
8089 The check for any partitioning related changes have already been done
8090 in mysql_alter_table (by fix_partition_func), so it is only up to
8091 the underlying handlers.
8092 */
8093 for (file= m_file; *file; file++)
8094 if ((ret= (*file)->check_if_incompatible_data(create_info,
8095 table_changes)) !=
8096 COMPATIBLE_DATA_YES)
8097 break;
8098 return ret;
8099 }
8100
8101
8102 /**
8103 Support of in-place alter table.
8104 */
8105
8106 /**
8107 Helper class for in-place alter, see handler.h
8108 */
8109
8110 class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
8111 {
8112 public:
8113 inplace_alter_handler_ctx **handler_ctx_array;
8114 private:
8115 uint m_tot_parts;
8116
8117 public:
ha_partition_inplace_ctx(THD * thd,uint tot_parts)8118 ha_partition_inplace_ctx(THD *thd, uint tot_parts)
8119 : inplace_alter_handler_ctx(),
8120 handler_ctx_array(NULL),
8121 m_tot_parts(tot_parts)
8122 {}
8123
~ha_partition_inplace_ctx()8124 ~ha_partition_inplace_ctx()
8125 {
8126 if (handler_ctx_array)
8127 {
8128 for (uint index= 0; index < m_tot_parts; index++)
8129 delete handler_ctx_array[index];
8130 }
8131 }
8132 };
8133
8134
8135 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)8136 ha_partition::check_if_supported_inplace_alter(TABLE *altered_table,
8137 Alter_inplace_info *ha_alter_info)
8138 {
8139 uint index= 0;
8140 enum_alter_inplace_result result= HA_ALTER_INPLACE_NO_LOCK;
8141 ha_partition_inplace_ctx *part_inplace_ctx;
8142 bool first_is_set= false;
8143 THD *thd= ha_thd();
8144
8145 DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
8146 /*
8147 Support inplace change of KEY () -> KEY ALGORITHM = N ().
8148 Any other change would set partition_changed in
8149 prep_alter_part_table() in mysql_alter_table().
8150 */
8151 if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
8152 DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
8153
8154 /* We cannot allow INPLACE to change order of KEY partitioning fields! */
8155 if (ha_alter_info->handler_flags & Alter_inplace_info::ALTER_COLUMN_ORDER)
8156 {
8157 /* If column partitioning is used then no need to check partition order */
8158 if (m_part_info->list_of_part_fields && !m_part_info->column_list)
8159 {
8160 if(!check_partition_column_order(&ha_alter_info->alter_info->create_list,
8161 table->part_info->part_field_array))
8162 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
8163 }
8164
8165 /* Check subpartition ordering */
8166 if (m_part_info->list_of_subpart_fields)
8167 {
8168 if(!check_partition_column_order(&ha_alter_info->alter_info->create_list,
8169 table->part_info->subpart_field_array))
8170 DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
8171 }
8172 }
8173
8174 part_inplace_ctx=
8175 new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
8176 if (!part_inplace_ctx)
8177 DBUG_RETURN(HA_ALTER_ERROR);
8178
8179 part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
8180 thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
8181 if (!part_inplace_ctx->handler_ctx_array)
8182 DBUG_RETURN(HA_ALTER_ERROR);
8183
8184 /* Set all to NULL, including the terminating one. */
8185 for (index= 0; index <= m_tot_parts; index++)
8186 part_inplace_ctx->handler_ctx_array[index]= NULL;
8187
8188 for (index= 0; index < m_tot_parts; index++)
8189 {
8190 enum_alter_inplace_result p_result=
8191 m_file[index]->check_if_supported_inplace_alter(altered_table,
8192 ha_alter_info);
8193 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
8194
8195 if (index == 0)
8196 {
8197 first_is_set= (ha_alter_info->handler_ctx != NULL);
8198 }
8199 else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
8200 {
8201 /* Either none or all partitions must set handler_ctx! */
8202 DBUG_ASSERT(0);
8203 DBUG_RETURN(HA_ALTER_ERROR);
8204 }
8205 if (p_result < result)
8206 result= p_result;
8207 if (result == HA_ALTER_ERROR)
8208 break;
8209 }
8210
8211 ha_alter_info->handler_ctx= part_inplace_ctx;
8212 /*
8213 To indicate for future inplace calls that there are several
8214 partitions/handlers that need to be committed together,
8215 we set group_commit_ctx to the NULL terminated array of
8216 the partitions handlers.
8217 */
8218 ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
8219
8220 DBUG_RETURN(result);
8221 }
8222
8223
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)8224 bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table,
8225 Alter_inplace_info *ha_alter_info)
8226 {
8227 uint index= 0;
8228 bool error= false;
8229 ha_partition_inplace_ctx *part_inplace_ctx;
8230
8231 DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
8232
8233 /*
8234 Changing to similar partitioning, only update metadata.
8235 Non allowed changes would be catched in prep_alter_part_table().
8236 */
8237 if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
8238 DBUG_RETURN(false);
8239
8240 part_inplace_ctx=
8241 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
8242
8243 for (index= 0; index < m_tot_parts && !error; index++)
8244 {
8245 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
8246 m_file[index]->update_create_info(ha_alter_info->create_info);
8247 if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
8248 ha_alter_info))
8249 error= true;
8250 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
8251 }
8252 ha_alter_info->handler_ctx= part_inplace_ctx;
8253
8254 DBUG_RETURN(error);
8255 }
8256
8257
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)8258 bool ha_partition::inplace_alter_table(TABLE *altered_table,
8259 Alter_inplace_info *ha_alter_info)
8260 {
8261 uint index= 0;
8262 bool error= false;
8263 ha_partition_inplace_ctx *part_inplace_ctx;
8264
8265 DBUG_ENTER("ha_partition::inplace_alter_table");
8266
8267 /*
8268 Changing to similar partitioning, only update metadata.
8269 Non allowed changes would be catched in prep_alter_part_table().
8270 */
8271 if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
8272 DBUG_RETURN(false);
8273
8274 part_inplace_ctx=
8275 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
8276
8277 for (index= 0; index < m_tot_parts && !error; index++)
8278 {
8279 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
8280
8281 if (index != 0 && ha_alter_info->handler_ctx != NULL)
8282 ha_alter_info->handler_ctx->set_shared_data(
8283 part_inplace_ctx->handler_ctx_array[index - 1]);
8284
8285 if (m_file[index]->ha_inplace_alter_table(altered_table,
8286 ha_alter_info))
8287 error= true;
8288 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
8289 }
8290 ha_alter_info->handler_ctx= part_inplace_ctx;
8291
8292 DBUG_RETURN(error);
8293 }
8294
8295
8296 /*
8297 Note that this function will try rollback failed ADD INDEX by
8298 executing DROP INDEX for the indexes that were committed (if any)
8299 before the error occured. This means that the underlying storage
8300 engine must be able to drop index in-place with X-lock held.
8301 (As X-lock will be held here if new indexes are to be committed)
8302 */
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)8303 bool ha_partition::commit_inplace_alter_table(TABLE *altered_table,
8304 Alter_inplace_info *ha_alter_info,
8305 bool commit)
8306 {
8307 ha_partition_inplace_ctx *part_inplace_ctx;
8308 bool error= false;
8309
8310 DBUG_ENTER("ha_partition::commit_inplace_alter_table");
8311
8312 /*
8313 Changing to similar partitioning, only update metadata.
8314 Non allowed changes would be catched in prep_alter_part_table().
8315 */
8316 if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION)
8317 DBUG_RETURN(false);
8318
8319 part_inplace_ctx=
8320 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
8321
8322 if (commit)
8323 {
8324 DBUG_ASSERT(ha_alter_info->group_commit_ctx ==
8325 part_inplace_ctx->handler_ctx_array);
8326 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
8327 error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
8328 ha_alter_info, commit);
8329 if (error)
8330 goto end;
8331 if (ha_alter_info->group_commit_ctx)
8332 {
8333 /*
8334 If ha_alter_info->group_commit_ctx is not set to NULL,
8335 then the engine did only commit the first partition!
8336 The engine is probably new, since both innodb and the default
8337 implementation of handler::commit_inplace_alter_table sets it to NULL
8338 and simply return false, since it allows metadata changes only.
8339 Loop over all other partitions as to follow the protocol!
8340 */
8341 uint i;
8342 DBUG_ASSERT(0);
8343 for (i= 1; i < m_tot_parts; i++)
8344 {
8345 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
8346 error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
8347 ha_alter_info,
8348 true);
8349 }
8350 }
8351 }
8352 else
8353 {
8354 uint i;
8355 for (i= 0; i < m_tot_parts; i++)
8356 {
8357 /* Rollback, commit == false, is done for each partition! */
8358 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
8359 if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
8360 ha_alter_info, false))
8361 error= true;
8362 }
8363 }
8364 end:
8365 ha_alter_info->handler_ctx= part_inplace_ctx;
8366
8367 DBUG_RETURN(error);
8368 }
8369
8370
notify_table_changed()8371 void ha_partition::notify_table_changed()
8372 {
8373 handler **file;
8374
8375 DBUG_ENTER("ha_partition::notify_table_changed");
8376
8377 for (file= m_file; *file; file++)
8378 (*file)->ha_notify_table_changed();
8379
8380 DBUG_VOID_RETURN;
8381 }
8382
8383
8384 /*
8385 If frm_error() is called then we will use this to to find out what file
8386 extensions exist for the storage engine. This is also used by the default
8387 rename_table and delete_table method in handler.cc.
8388 */
8389
8390 static const char *ha_partition_ext[]=
8391 {
8392 ha_par_ext, NullS
8393 };
8394
bas_ext() const8395 const char **ha_partition::bas_ext() const
8396 { return ha_partition_ext; }
8397
8398
min_of_the_max_uint(uint (handler::* operator_func)(void)const) const8399 uint ha_partition::min_of_the_max_uint(
8400 uint (handler::*operator_func)(void) const) const
8401 {
8402 handler **file;
8403 uint min_of_the_max= ((*m_file)->*operator_func)();
8404
8405 for (file= m_file+1; *file; file++)
8406 {
8407 uint tmp= ((*file)->*operator_func)();
8408 set_if_smaller(min_of_the_max, tmp);
8409 }
8410 return min_of_the_max;
8411 }
8412
8413
max_supported_key_parts() const8414 uint ha_partition::max_supported_key_parts() const
8415 {
8416 return min_of_the_max_uint(&handler::max_supported_key_parts);
8417 }
8418
8419
max_supported_key_length() const8420 uint ha_partition::max_supported_key_length() const
8421 {
8422 return min_of_the_max_uint(&handler::max_supported_key_length);
8423 }
8424
8425
max_supported_key_part_length() const8426 uint ha_partition::max_supported_key_part_length() const
8427 {
8428 return min_of_the_max_uint(&handler::max_supported_key_part_length);
8429 }
8430
8431
max_supported_record_length() const8432 uint ha_partition::max_supported_record_length() const
8433 {
8434 return min_of_the_max_uint(&handler::max_supported_record_length);
8435 }
8436
8437
max_supported_keys() const8438 uint ha_partition::max_supported_keys() const
8439 {
8440 return min_of_the_max_uint(&handler::max_supported_keys);
8441 }
8442
8443
extra_rec_buf_length() const8444 uint ha_partition::extra_rec_buf_length() const
8445 {
8446 handler **file;
8447 uint max= (*m_file)->extra_rec_buf_length();
8448
8449 for (file= m_file, file++; *file; file++)
8450 if (max < (*file)->extra_rec_buf_length())
8451 max= (*file)->extra_rec_buf_length();
8452 return max;
8453 }
8454
8455
min_record_length(uint options) const8456 uint ha_partition::min_record_length(uint options) const
8457 {
8458 handler **file;
8459 uint max= (*m_file)->min_record_length(options);
8460
8461 for (file= m_file, file++; *file; file++)
8462 if (max < (*file)->min_record_length(options))
8463 max= (*file)->min_record_length(options);
8464 return max;
8465 }
8466
8467
8468 /****************************************************************************
8469 MODULE compare records
8470 ****************************************************************************/
8471 /*
8472 Compare two positions
8473
8474 SYNOPSIS
8475 cmp_ref()
8476 ref1 First position
8477 ref2 Second position
8478
8479 RETURN VALUE
8480 <0 ref1 < ref2
8481 0 Equal
8482 >0 ref1 > ref2
8483
8484 DESCRIPTION
8485 We get two references and need to check if those records are the same.
8486 If they belong to different partitions we decide that they are not
8487 the same record. Otherwise we use the particular handler to decide if
8488 they are the same. Sort in partition id order if not equal.
8489 */
8490
cmp_ref(const uchar * ref1,const uchar * ref2)8491 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
8492 {
8493 int cmp;
8494 my_ptrdiff_t diff1, diff2;
8495 DBUG_ENTER("ha_partition::cmp_ref");
8496
8497 cmp = m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
8498 (ref2 + PARTITION_BYTES_IN_POS));
8499 if (cmp)
8500 DBUG_RETURN(cmp);
8501
8502 if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
8503 {
8504 /* This means that the references are same and are in same partition.*/
8505 DBUG_RETURN(0);
8506 }
8507
8508 /*
8509 In Innodb we compare with either primary key value or global DB_ROW_ID so
8510 it is not possible that the two references are equal and are in different
8511 partitions, but in myisam it is possible since we are comparing offsets.
8512 Remove this assert if DB_ROW_ID is changed to be per partition.
8513 */
8514 DBUG_ASSERT(!m_innodb);
8515
8516 diff1= ref2[1] - ref1[1];
8517 diff2= ref2[0] - ref1[0];
8518 if (diff1 > 0)
8519 {
8520 DBUG_RETURN(-1);
8521 }
8522 if (diff1 < 0)
8523 {
8524 DBUG_RETURN(+1);
8525 }
8526 if (diff2 > 0)
8527 {
8528 DBUG_RETURN(-1);
8529 }
8530 DBUG_RETURN(+1);
8531 }
8532
8533
8534 /****************************************************************************
8535 MODULE auto increment
8536 ****************************************************************************/
8537
8538
reset_auto_increment(ulonglong value)8539 int ha_partition::reset_auto_increment(ulonglong value)
8540 {
8541 handler **file= m_file;
8542 int res;
8543 DBUG_ENTER("ha_partition::reset_auto_increment");
8544 lock_auto_increment();
8545 part_share->auto_inc_initialized= false;
8546 part_share->next_auto_inc_val= 0;
8547 do
8548 {
8549 if ((res= (*file)->ha_reset_auto_increment(value)) != 0)
8550 break;
8551 } while (*(++file));
8552 unlock_auto_increment();
8553 DBUG_RETURN(res);
8554 }
8555
8556
8557 /**
8558 This method is called by update_auto_increment which in turn is called
8559 by the individual handlers as part of write_row. We use the
8560 part_share->next_auto_inc_val, or search all
8561 partitions for the highest auto_increment_value if not initialized or
8562 if auto_increment field is a secondary part of a key, we must search
8563 every partition when holding a mutex to be sure of correctness.
8564 */
8565
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)8566 void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
8567 ulonglong nb_desired_values,
8568 ulonglong *first_value,
8569 ulonglong *nb_reserved_values)
8570 {
8571 DBUG_ENTER("ha_partition::get_auto_increment");
8572 DBUG_PRINT("info", ("offset: %lu inc: %lu desired_values: %lu "
8573 "first_value: %lu", (ulong) offset, (ulong) increment,
8574 (ulong) nb_desired_values, (ulong) *first_value));
8575 DBUG_ASSERT(increment && nb_desired_values);
8576 *first_value= 0;
8577 if (table->s->next_number_keypart)
8578 {
8579 /*
8580 next_number_keypart is != 0 if the auto_increment column is a secondary
8581 column in the index (it is allowed in MyISAM)
8582 */
8583 DBUG_PRINT("info", ("next_number_keypart != 0"));
8584 ulonglong nb_reserved_values_part;
8585 ulonglong first_value_part, max_first_value;
8586 handler **file= m_file;
8587 first_value_part= max_first_value= *first_value;
8588 /* Must lock and find highest value among all partitions. */
8589 lock_auto_increment();
8590 do
8591 {
8592 /* Only nb_desired_values = 1 makes sense */
8593 (*file)->get_auto_increment(offset, increment, 1,
8594 &first_value_part, &nb_reserved_values_part);
8595 if (first_value_part == ULONGLONG_MAX) // error in one partition
8596 {
8597 *first_value= first_value_part;
8598 /* log that the error was between table/partition handler */
8599 sql_print_error("Partition failed to reserve auto_increment value");
8600 unlock_auto_increment();
8601 DBUG_VOID_RETURN;
8602 }
8603 DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
8604 set_if_bigger(max_first_value, first_value_part);
8605 } while (*(++file));
8606 *first_value= max_first_value;
8607 *nb_reserved_values= 1;
8608 unlock_auto_increment();
8609 }
8610 else
8611 {
8612 THD *thd= ha_thd();
8613 /*
8614 This is initialized in the beginning of the first write_row call.
8615 */
8616 DBUG_ASSERT(part_share->auto_inc_initialized);
8617 /*
8618 Get a lock for handling the auto_increment in part_share
8619 for avoiding two concurrent statements getting the same number.
8620 */
8621
8622 lock_auto_increment();
8623
8624 /*
8625 In a multi-row insert statement like INSERT SELECT and LOAD DATA
8626 where the number of candidate rows to insert is not known in advance
8627 we must hold a lock/mutex for the whole statement if we have statement
8628 based replication. Because the statement-based binary log contains
8629 only the first generated value used by the statement, and slaves assumes
8630 all other generated values used by this statement were consecutive to
8631 this first one, we must exclusively lock the generator until the statement
8632 is done.
8633 */
8634 if (!auto_increment_safe_stmt_log_lock &&
8635 thd->lex->sql_command != SQLCOM_INSERT &&
8636 mysql_bin_log.is_open() &&
8637 !thd->is_current_stmt_binlog_format_row() &&
8638 (thd->variables.option_bits & OPTION_BIN_LOG))
8639 {
8640 DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
8641 auto_increment_safe_stmt_log_lock= TRUE;
8642 }
8643
8644 /* this gets corrected (for offset/increment) in update_auto_increment */
8645 *first_value= part_share->next_auto_inc_val;
8646 part_share->next_auto_inc_val+= nb_desired_values * increment;
8647
8648 unlock_auto_increment();
8649 DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
8650 *nb_reserved_values= nb_desired_values;
8651 }
8652 DBUG_VOID_RETURN;
8653 }
8654
release_auto_increment()8655 void ha_partition::release_auto_increment()
8656 {
8657 DBUG_ENTER("ha_partition::release_auto_increment");
8658
8659 if (table->s->next_number_keypart)
8660 {
8661 uint i;
8662 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
8663 i < m_tot_parts;
8664 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
8665 {
8666 m_file[i]->ha_release_auto_increment();
8667 }
8668 }
8669 else if (next_insert_id)
8670 {
8671 ulonglong next_auto_inc_val;
8672 lock_auto_increment();
8673 next_auto_inc_val= part_share->next_auto_inc_val;
8674 /*
8675 If the current auto_increment values is lower than the reserved
8676 value, and the reserved value was reserved by this thread,
8677 we can lower the reserved value.
8678 */
8679 if (next_insert_id < next_auto_inc_val &&
8680 auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val)
8681 {
8682 THD *thd= ha_thd();
8683 /*
8684 Check that we do not lower the value because of a failed insert
8685 with SET INSERT_ID, i.e. forced/non generated values.
8686 */
8687 if (thd->auto_inc_intervals_forced.maximum() < next_insert_id)
8688 part_share->next_auto_inc_val= next_insert_id;
8689 }
8690 DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
8691 (ulong) part_share->next_auto_inc_val));
8692
8693 /* Unlock the multi row statement lock taken in get_auto_increment */
8694 if (auto_increment_safe_stmt_log_lock)
8695 {
8696 auto_increment_safe_stmt_log_lock= FALSE;
8697 DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
8698 }
8699
8700 unlock_auto_increment();
8701 }
8702 DBUG_VOID_RETURN;
8703 }
8704
8705 /****************************************************************************
8706 MODULE initialize handler for HANDLER call
8707 ****************************************************************************/
8708
init_table_handle_for_HANDLER()8709 void ha_partition::init_table_handle_for_HANDLER()
8710 {
8711 return;
8712 }
8713
8714
8715 /**
8716 Return the checksum of the table (all partitions)
8717 */
8718
checksum() const8719 uint ha_partition::checksum() const
8720 {
8721 ha_checksum sum= 0;
8722
8723 DBUG_ENTER("ha_partition::checksum");
8724 if ((table_flags() & HA_HAS_CHECKSUM))
8725 {
8726 handler **file= m_file;
8727 do
8728 {
8729 sum+= (*file)->checksum();
8730 } while (*(++file));
8731 }
8732 DBUG_RETURN(sum);
8733 }
8734
8735
8736 /****************************************************************************
8737 MODULE enable/disable indexes
8738 ****************************************************************************/
8739
8740 /*
8741 Disable indexes for a while
8742 SYNOPSIS
8743 disable_indexes()
8744 mode Mode
8745 RETURN VALUES
8746 0 Success
8747 != 0 Error
8748 */
8749
disable_indexes(uint mode)8750 int ha_partition::disable_indexes(uint mode)
8751 {
8752 handler **file;
8753 int error= 0;
8754
8755 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8756 for (file= m_file; *file; file++)
8757 {
8758 if ((error= (*file)->ha_disable_indexes(mode)))
8759 break;
8760 }
8761 return error;
8762 }
8763
8764
8765 /*
8766 Enable indexes again
8767 SYNOPSIS
8768 enable_indexes()
8769 mode Mode
8770 RETURN VALUES
8771 0 Success
8772 != 0 Error
8773 */
8774
enable_indexes(uint mode)8775 int ha_partition::enable_indexes(uint mode)
8776 {
8777 handler **file;
8778 int error= 0;
8779
8780 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8781 for (file= m_file; *file; file++)
8782 {
8783 if ((error= (*file)->ha_enable_indexes(mode)))
8784 break;
8785 }
8786 return error;
8787 }
8788
8789
8790 /*
8791 Check if indexes are disabled
8792 SYNOPSIS
8793 indexes_are_disabled()
8794
8795 RETURN VALUES
8796 0 Indexes are enabled
8797 != 0 Indexes are disabled
8798 */
8799
indexes_are_disabled(void)8800 int ha_partition::indexes_are_disabled(void)
8801 {
8802 handler **file;
8803 int error= 0;
8804
8805 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
8806 for (file= m_file; *file; file++)
8807 {
8808 if ((error= (*file)->indexes_are_disabled()))
8809 break;
8810 }
8811 return error;
8812 }
8813
8814
8815 /**
8816 Check/fix misplaced rows.
8817
8818 @param read_part_id Partition to check/fix.
8819 @param repair If true, move misplaced rows to correct partition.
8820
8821 @return Operation status.
8822 @retval 0 Success
8823 @retval != 0 Error
8824 */
8825
check_misplaced_rows(uint read_part_id,bool repair)8826 int ha_partition::check_misplaced_rows(uint read_part_id, bool repair)
8827 {
8828 int result= 0;
8829 uint32 correct_part_id;
8830 longlong func_value;
8831 longlong num_misplaced_rows= 0;
8832
8833 DBUG_ENTER("ha_partition::check_misplaced_rows");
8834
8835 DBUG_ASSERT(m_file);
8836
8837 if (repair)
8838 {
8839 /* We must read the full row, if we need to move it! */
8840 bitmap_set_all(table->read_set);
8841 bitmap_set_all(table->write_set);
8842 }
8843 else
8844 {
8845 /* Only need to read the partitioning fields. */
8846 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
8847 }
8848
8849 if ((result= m_file[read_part_id]->ha_rnd_init(1)))
8850 DBUG_RETURN(result);
8851
8852 while (true)
8853 {
8854 if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0)))
8855 {
8856 if (result == HA_ERR_RECORD_DELETED)
8857 continue;
8858 if (result != HA_ERR_END_OF_FILE)
8859 break;
8860
8861 if (num_misplaced_rows > 0)
8862 {
8863 print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "warning",
8864 table_share->db.str, table->alias,
8865 opt_op_name[REPAIR_PARTS],
8866 "Moved %lld misplaced rows",
8867 num_misplaced_rows);
8868 }
8869 /* End-of-file reached, all rows are now OK, reset result and break. */
8870 result= 0;
8871 break;
8872 }
8873
8874 result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
8875 &func_value);
8876 if (result)
8877 break;
8878
8879 if (correct_part_id != read_part_id)
8880 {
8881 num_misplaced_rows++;
8882 if (!repair)
8883 {
8884 /* Check. */
8885 print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "error",
8886 table_share->db.str, table->alias,
8887 opt_op_name[CHECK_PARTS],
8888 "Found a misplaced row");
8889 /* Break on first misplaced row! */
8890 result= HA_ADMIN_NEEDS_UPGRADE;
8891 break;
8892 }
8893 else
8894 {
8895 DBUG_PRINT("info", ("Moving row from partition %d to %d",
8896 read_part_id, correct_part_id));
8897
8898 /*
8899 Insert row into correct partition. Notice that there are no commit
8900 for every N row, so the repair will be one large transaction!
8901 */
8902 if ((result= m_file[correct_part_id]->ha_write_row(m_rec0)))
8903 {
8904 /*
8905 We have failed to insert a row, it might have been a duplicate!
8906 */
8907 char buf[MAX_KEY_LENGTH];
8908 String str(buf,sizeof(buf),system_charset_info);
8909 str.length(0);
8910 if (result == HA_ERR_FOUND_DUPP_KEY)
8911 {
8912 str.append("Duplicate key found, "
8913 "please update or delete the record:\n");
8914 result= HA_ADMIN_CORRUPT;
8915 }
8916 m_err_rec= NULL;
8917 append_row_to_str(str);
8918
8919 /*
8920 If the engine supports transactions, the failure will be
8921 rollbacked.
8922 */
8923 if (!m_file[correct_part_id]->has_transactions())
8924 {
8925 /* Log this error, so the DBA can notice it and fix it! */
8926 sql_print_error("Table '%-192s' failed to move/insert a row"
8927 " from part %d into part %d:\n%s",
8928 table->s->table_name.str,
8929 read_part_id,
8930 correct_part_id,
8931 str.c_ptr_safe());
8932 }
8933 print_admin_msg(ha_thd(), MI_MAX_MSG_BUF, "error",
8934 table_share->db.str, table->alias,
8935 opt_op_name[REPAIR_PARTS],
8936 "Failed to move/insert a row"
8937 " from part %d into part %d:\n%s",
8938 read_part_id,
8939 correct_part_id,
8940 str.c_ptr_safe());
8941 break;
8942 }
8943
8944 /* Delete row from wrong partition. */
8945 if ((result= m_file[read_part_id]->ha_delete_row(m_rec0)))
8946 {
8947 if (m_file[correct_part_id]->has_transactions())
8948 break;
8949 /*
8950 We have introduced a duplicate, since we failed to remove it
8951 from the wrong partition.
8952 */
8953 char buf[MAX_KEY_LENGTH];
8954 String str(buf,sizeof(buf),system_charset_info);
8955 str.length(0);
8956 m_err_rec= NULL;
8957 append_row_to_str(str);
8958
8959 /* Log this error, so the DBA can notice it and fix it! */
8960 sql_print_error("Table '%-192s': Delete from part %d failed with"
8961 " error %d. But it was already inserted into"
8962 " part %d, when moving the misplaced row!"
8963 "\nPlease manually fix the duplicate row:\n%s",
8964 table->s->table_name.str,
8965 read_part_id,
8966 result,
8967 correct_part_id,
8968 str.c_ptr_safe());
8969 break;
8970 }
8971 }
8972 }
8973 }
8974
8975 int tmp_result= m_file[read_part_id]->ha_rnd_end();
8976 DBUG_RETURN(result ? result : tmp_result);
8977 }
8978
8979
8980 #define KEY_PARTITIONING_CHANGED_STR \
8981 "KEY () partitioning changed, please run:\n" \
8982 "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
8983
check_for_upgrade(HA_CHECK_OPT * check_opt)8984 int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt)
8985 {
8986 int error= HA_ADMIN_NEEDS_CHECK;
8987 DBUG_ENTER("ha_partition::check_for_upgrade");
8988
8989 /*
8990 This is called even without FOR UPGRADE,
8991 if the .frm version is lower than the current version.
8992 In that case return that it needs checking!
8993 */
8994 if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
8995 DBUG_RETURN(error);
8996
8997 /*
8998 Partitions will be checked for during their ha_check!
8999
9000 Check if KEY (sub)partitioning was used and any field's hash calculation
9001 differs from 5.1, see bug#14521864.
9002 */
9003 if (table->s->mysql_version < 50503 && // 5.1 table (<5.5.3)
9004 ((m_part_info->part_type == HASH_PARTITION && // KEY partitioned
9005 m_part_info->list_of_part_fields) ||
9006 (m_is_sub_partitioned && // KEY subpartitioned
9007 m_part_info->list_of_subpart_fields)))
9008 {
9009 Field **field;
9010 if (m_is_sub_partitioned)
9011 {
9012 field= m_part_info->subpart_field_array;
9013 }
9014 else
9015 {
9016 field= m_part_info->part_field_array;
9017 }
9018 for (; *field; field++)
9019 {
9020 switch ((*field)->real_type()) {
9021 case MYSQL_TYPE_TINY:
9022 case MYSQL_TYPE_SHORT:
9023 case MYSQL_TYPE_LONG:
9024 case MYSQL_TYPE_FLOAT:
9025 case MYSQL_TYPE_DOUBLE:
9026 case MYSQL_TYPE_NEWDECIMAL:
9027 case MYSQL_TYPE_TIMESTAMP:
9028 case MYSQL_TYPE_LONGLONG:
9029 case MYSQL_TYPE_INT24:
9030 case MYSQL_TYPE_TIME:
9031 case MYSQL_TYPE_DATETIME:
9032 case MYSQL_TYPE_YEAR:
9033 case MYSQL_TYPE_NEWDATE:
9034 case MYSQL_TYPE_ENUM:
9035 case MYSQL_TYPE_SET:
9036 {
9037 THD *thd= ha_thd();
9038 char *part_buf;
9039 String db_name, table_name;
9040 uint part_buf_len;
9041 bool skip_generation= false;
9042 partition_info::enum_key_algorithm old_algorithm;
9043 old_algorithm= m_part_info->key_algorithm;
9044 error= HA_ADMIN_FAILED;
9045 append_identifier(ha_thd(), &db_name, table_share->db.str,
9046 table_share->db.length);
9047 append_identifier(ha_thd(), &table_name, table_share->table_name.str,
9048 table_share->table_name.length);
9049 if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
9050 {
9051 /*
9052 Only possible when someone tampered with .frm files,
9053 like during tests :)
9054 */
9055 skip_generation= true;
9056 }
9057 m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
9058 if (skip_generation ||
9059 !(part_buf= generate_partition_syntax(m_part_info,
9060 &part_buf_len,
9061 true,
9062 true,
9063 NULL,
9064 NULL,
9065 NULL)) ||
9066 print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
9067 table_share->db.str,
9068 table->alias,
9069 opt_op_name[CHECK_PARTS],
9070 KEY_PARTITIONING_CHANGED_STR,
9071 db_name.c_ptr_safe(),
9072 table_name.c_ptr_safe(),
9073 part_buf))
9074 {
9075 /* Error creating admin message (too long string?). */
9076 print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
9077 table_share->db.str, table->alias,
9078 opt_op_name[CHECK_PARTS],
9079 KEY_PARTITIONING_CHANGED_STR,
9080 db_name.c_ptr_safe(), table_name.c_ptr_safe(),
9081 "<old partition clause>, but add ALGORITHM = 1"
9082 " between 'KEY' and '(' to change the metadata"
9083 " without the need of a full table rebuild.");
9084 }
9085 m_part_info->key_algorithm= old_algorithm;
9086 DBUG_RETURN(error);
9087 }
9088 default:
9089 /* Not affected! */
9090 ;
9091 }
9092 }
9093 }
9094
9095 DBUG_RETURN(error);
9096 }
9097
9098
9099 struct st_mysql_storage_engine partition_storage_engine=
9100 { MYSQL_HANDLERTON_INTERFACE_VERSION };
9101
mysql_declare_plugin(partition)9102 mysql_declare_plugin(partition)
9103 {
9104 MYSQL_STORAGE_ENGINE_PLUGIN,
9105 &partition_storage_engine,
9106 "partition",
9107 "Mikael Ronstrom, MySQL AB",
9108 "Partition Storage Engine Helper",
9109 PLUGIN_LICENSE_GPL,
9110 partition_initialize, /* Plugin Init */
9111 NULL, /* Plugin Deinit */
9112 0x0100, /* 1.0 */
9113 NULL, /* status variables */
9114 NULL, /* system variables */
9115 NULL, /* config options */
9116 0, /* flags */
9117 }
9118 mysql_declare_plugin_end;
9119
9120 #endif
9121