1 /*
2 Copyright (c) 2005, 2019, Oracle and/or its affiliates.
3 Copyright (c) 2009, 2021, MariaDB
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
17 */
18
19 /*
20 This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
21 It is an abstraction layer on top of other handlers such as MyISAM,
22 InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
23 be handled by a storage engine. The current example of this is NDB
24 Cluster that has internally handled partitioning. This have benefits in
25 that many loops needed in the partition handler can be avoided.
26
27 Partitioning has an inherent feature which in some cases is positive and
28 in some cases is negative. It splits the data into chunks. This makes
29 the data more manageable, queries can easily be parallelised towards the
30 parts and indexes are split such that there are less levels in the
31 index trees. The inherent disadvantage is that to use a split index
32 one has to scan all index parts which is ok for large queries but for
33 small queries it can be a disadvantage.
34
35 Partitioning lays the foundation for more manageable databases that are
36 extremely large. It does also lay the foundation for more parallelism
37 in the execution of queries. This functionality will grow with later
38 versions of MySQL/MariaDB.
39
40 The partition is setup to use table locks. It implements an partition "SHARE"
41 that is inserted into a hash by table name. You can use this to store
42 information of state that any partition handler object will be able to see
43 if it is using the same table.
44
45 Please read the object definition in ha_partition.h before reading the rest
46 if this file.
47 */
48
49 #include "mariadb.h"
50 #include "sql_priv.h"
51 #include "sql_parse.h" // append_file_to_dir
52 #include "create_options.h"
53
54 #ifdef WITH_PARTITION_STORAGE_ENGINE
55 #include "ha_partition.h"
56 #include "sql_table.h" // tablename_to_filename
57 #include "key.h"
58 #include "sql_plugin.h"
59 #include "sql_show.h" // append_identifier
60 #include "sql_admin.h" // SQL_ADMIN_MSG_TEXT_SIZE
61 #include "sql_select.h"
62
63 #include "debug_sync.h"
64
65 /* First 4 bytes in the .par file is the number of 32-bit words in the file */
66 #define PAR_WORD_SIZE 4
67 /* offset to the .par file checksum */
68 #define PAR_CHECKSUM_OFFSET 4
69 /* offset to the total number of partitions */
70 #define PAR_NUM_PARTS_OFFSET 8
71 /* offset to the engines array */
72 #define PAR_ENGINES_OFFSET 12
73 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
74 HA_REC_NOT_IN_SEQ | \
75 HA_CAN_REPAIR | \
76 HA_REUSES_FILE_NAMES)
77 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
78 HA_DUPLICATE_POS | \
79 HA_CAN_INSERT_DELAYED | \
80 HA_READ_BEFORE_WRITE_REMOVAL |\
81 HA_CAN_TABLES_WITHOUT_ROLLBACK)
82
83 static const char *ha_par_ext= PAR_EXT;
84
85 /****************************************************************************
86 MODULE create/delete handler object
87 ****************************************************************************/
88
89 static handler *partition_create_handler(handlerton *hton,
90 TABLE_SHARE *share,
91 MEM_ROOT *mem_root);
92 static uint partition_flags();
93 static alter_table_operations alter_table_flags(alter_table_operations flags);
94
95
notify_tabledef_changed(LEX_CSTRING * db,LEX_CSTRING * org_table_name,LEX_CUSTRING * frm,LEX_CUSTRING * version)96 int ha_partition::notify_tabledef_changed(LEX_CSTRING *db,
97 LEX_CSTRING *org_table_name,
98 LEX_CUSTRING *frm,
99 LEX_CUSTRING *version)
100 {
101 char from_buff[FN_REFLEN + 1], from_lc_buff[FN_REFLEN + 1];
102 const char *from_path, *name_buffer_ptr, *from;
103 int res= 0;
104 handler **file= m_file;
105 DBUG_ENTER("ha_partition::notify_tabledef_changed");
106
107 from= table->s->normalized_path.str;
108
109 /* setup m_name_buffer_ptr */
110 if (read_par_file(table->s->normalized_path.str))
111 DBUG_RETURN(1);
112
113 from_path= get_canonical_filename(*file, from, from_lc_buff);
114 name_buffer_ptr= m_name_buffer_ptr;
115 do
116 {
117 LEX_CSTRING table_name;
118 const char *table_name_ptr;
119 if (create_partition_name(from_buff, sizeof(from_buff),
120 from_path, name_buffer_ptr,
121 NORMAL_PART_NAME, FALSE))
122 res=1;
123 table_name_ptr= from_buff + dirname_length(from_buff);
124
125 lex_string_set3(&table_name, table_name_ptr, strlen(table_name_ptr));
126
127 if (((*file)->ht)->notify_tabledef_changed((*file)->ht, db, &table_name,
128 frm, version, *file))
129 res=1;
130 name_buffer_ptr= strend(name_buffer_ptr) + 1;
131 } while (*(++file));
132 DBUG_RETURN(res);
133 }
134
135
136 static int
partition_notify_tabledef_changed(handlerton *,LEX_CSTRING * db,LEX_CSTRING * table,LEX_CUSTRING * frm,LEX_CUSTRING * version,handler * file)137 partition_notify_tabledef_changed(handlerton *,
138 LEX_CSTRING *db,
139 LEX_CSTRING *table,
140 LEX_CUSTRING *frm,
141 LEX_CUSTRING *version,
142 handler *file)
143 {
144 DBUG_ENTER("partition_notify_tabledef_changed");
145 DBUG_RETURN(static_cast<ha_partition*>
146 (file)->notify_tabledef_changed(db, table, frm, version));
147 }
148
149
150 /*
151 If frm_error() is called then we will use this to to find out what file
152 extensions exist for the storage engine. This is also used by the default
153 rename_table and delete_table method in handler.cc.
154 */
155 static const char *ha_partition_ext[]=
156 {
157 ha_par_ext, NullS
158 };
159
160 static PSI_memory_key key_memory_Partition_share;
161 static PSI_memory_key key_memory_partition_sort_buffer;
162 static PSI_memory_key key_memory_Partition_admin;
163
164 static PSI_memory_key key_memory_ha_partition_file;
165 //static PSI_memory_key key_memory_ha_partition_engine_array;
166 static PSI_memory_key key_memory_ha_partition_part_ids;
167
168 #ifdef HAVE_PSI_INTERFACE
169 PSI_mutex_key key_partition_auto_inc_mutex;
170 PSI_file_key key_file_ha_partition_par;
171
172 static PSI_mutex_info all_partition_mutexes[]=
173 {
174 { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0}
175 };
176 static PSI_memory_info all_partitioning_memory[]=
177 { { &key_memory_Partition_share, "Partition_share", 0},
178 { &key_memory_partition_sort_buffer, "partition_sort_buffer", 0},
179 { &key_memory_Partition_admin, "Partition_admin", 0},
180 { &key_memory_ha_partition_file, "ha_partition::file", 0},
181 // { &key_memory_ha_partition_engine_array, "ha_partition::engine_array", 0},
182 { &key_memory_ha_partition_part_ids, "ha_partition::part_ids", 0} };
183 static PSI_file_info all_partition_file[]=
184 { { &key_file_ha_partition_par, "ha_partition::parfile", 0} };
185
init_partition_psi_keys(void)186 static void init_partition_psi_keys(void)
187 {
188 const char* category= "partition";
189 int count;
190
191 count= array_elements(all_partitioning_memory);
192 mysql_memory_register(category, all_partitioning_memory, count);
193 count= array_elements(all_partition_mutexes);
194 mysql_mutex_register(category, all_partition_mutexes, count);
195 count= array_elements(all_partition_file);
196 mysql_file_register(category, all_partition_file, count);
197 }
198 #endif /* HAVE_PSI_INTERFACE */
199
partition_initialize(void * p)200 static int partition_initialize(void *p)
201 {
202 handlerton *partition_hton;
203 partition_hton= (handlerton *)p;
204
205 partition_hton->db_type= DB_TYPE_PARTITION_DB;
206 partition_hton->create= partition_create_handler;
207
208 partition_hton->partition_flags= partition_flags;
209 partition_hton->notify_tabledef_changed= partition_notify_tabledef_changed;
210 partition_hton->alter_table_flags= alter_table_flags;
211 partition_hton->flags= HTON_NOT_USER_SELECTABLE |
212 HTON_HIDDEN |
213 HTON_TEMPORARY_NOT_SUPPORTED;
214 partition_hton->tablefile_extensions= ha_partition_ext;
215
216 #ifdef HAVE_PSI_INTERFACE
217 init_partition_psi_keys();
218 #endif
219 return 0;
220 }
221
222
223 /**
224 Initialize and allocate space for partitions shares.
225
226 @param num_parts Number of partitions to allocate storage for.
227
228 @return Operation status.
229 @retval true Failure (out of memory).
230 @retval false Success.
231 */
232
init(uint num_parts)233 bool Partition_share::init(uint num_parts)
234 {
235 DBUG_ENTER("Partition_share::init");
236 auto_inc_initialized= false;
237 partition_name_hash_initialized= false;
238 next_auto_inc_val= 0;
239 if (partitions_share_refs.init(num_parts))
240 {
241 DBUG_RETURN(true);
242 }
243 DBUG_RETURN(false);
244 }
245
246
247 /*
248 Create new partition handler
249
250 SYNOPSIS
251 partition_create_handler()
252 table Table object
253
254 RETURN VALUE
255 New partition object
256 */
257
partition_create_handler(handlerton * hton,TABLE_SHARE * share,MEM_ROOT * mem_root)258 static handler *partition_create_handler(handlerton *hton,
259 TABLE_SHARE *share,
260 MEM_ROOT *mem_root)
261 {
262 ha_partition *file= new (mem_root) ha_partition(hton, share);
263 if (file && file->initialize_partition(mem_root))
264 {
265 delete file;
266 file= 0;
267 }
268 return file;
269 }
270
partition_flags()271 static uint partition_flags()
272 {
273 return HA_CAN_PARTITION;
274 }
275
alter_table_flags(alter_table_operations flags)276 static alter_table_operations alter_table_flags(alter_table_operations flags __attribute__((unused)))
277 {
278 return (HA_PARTITION_FUNCTION_SUPPORTED |
279 HA_FAST_CHANGE_PARTITION);
280 }
281
282 /*
283 Constructor method
284
285 SYNOPSIS
286 ha_partition()
287 table Table object
288
289 RETURN VALUE
290 NONE
291 */
292
ha_partition(handlerton * hton,TABLE_SHARE * share)293 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
294 :handler(hton, share)
295 {
296 DBUG_ENTER("ha_partition::ha_partition(table)");
297 ha_partition_init();
298 DBUG_VOID_RETURN;
299 }
300
301
302 /* Initialize all partition variables */
303
ha_partition_init()304 void ha_partition::ha_partition_init()
305 {
306 init_alloc_root(PSI_INSTRUMENT_ME, &m_mem_root, 512, 512, MYF(0));
307 init_handler_variables();
308 }
309
310 /*
311 Constructor method
312
313 SYNOPSIS
314 ha_partition()
315 part_info Partition info
316
317 RETURN VALUE
318 NONE
319 */
320
ha_partition(handlerton * hton,partition_info * part_info)321 ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
322 :handler(hton, NULL)
323 {
324 DBUG_ENTER("ha_partition::ha_partition(part_info)");
325 DBUG_ASSERT(part_info);
326 ha_partition_init();
327 m_part_info= part_info;
328 m_create_handler= TRUE;
329 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
330 DBUG_VOID_RETURN;
331 }
332
333 /**
334 ha_partition constructor method used by ha_partition::clone()
335
336 @param hton Handlerton (partition_hton)
337 @param share Table share object
338 @param part_info_arg partition_info to use
339 @param clone_arg ha_partition to clone
340 @param clme_mem_root_arg MEM_ROOT to use
341
342 @return New partition handler
343 */
344
ha_partition(handlerton * hton,TABLE_SHARE * share,partition_info * part_info_arg,ha_partition * clone_arg,MEM_ROOT * clone_mem_root_arg)345 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
346 partition_info *part_info_arg,
347 ha_partition *clone_arg,
348 MEM_ROOT *clone_mem_root_arg)
349 :handler(hton, share)
350 {
351 DBUG_ENTER("ha_partition::ha_partition(clone)");
352 ha_partition_init();
353 m_part_info= part_info_arg;
354 m_create_handler= TRUE;
355 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
356 m_is_clone_of= clone_arg;
357 m_clone_mem_root= clone_mem_root_arg;
358 part_share= clone_arg->part_share;
359 m_tot_parts= clone_arg->m_tot_parts;
360 DBUG_VOID_RETURN;
361 }
362
363 /*
364 Initialize handler object
365
366 SYNOPSIS
367 init_handler_variables()
368
369 RETURN VALUE
370 NONE
371 */
372
init_handler_variables()373 void ha_partition::init_handler_variables()
374 {
375 active_index= MAX_KEY;
376 m_mode= 0;
377 m_open_test_lock= 0;
378 m_file_buffer= NULL;
379 m_name_buffer_ptr= NULL;
380 m_engine_array= NULL;
381 m_connect_string= NULL;
382 m_file= NULL;
383 m_file_tot_parts= 0;
384 m_reorged_file= NULL;
385 m_new_file= NULL;
386 m_reorged_parts= 0;
387 m_added_file= NULL;
388 m_tot_parts= 0;
389 m_part_spec.start_part= NO_CURRENT_PART_ID;
390 m_scan_value= 2;
391 m_ref_length= 0;
392 m_part_spec.end_part= NO_CURRENT_PART_ID;
393 m_index_scan_type= partition_no_index_scan;
394 m_start_key.key= NULL;
395 m_start_key.length= 0;
396 m_myisam= FALSE;
397 m_innodb= FALSE;
398 m_extra_cache= FALSE;
399 m_extra_cache_size= 0;
400 m_extra_prepare_for_update= FALSE;
401 m_extra_cache_part_id= NO_CURRENT_PART_ID;
402 m_handler_status= handler_not_initialized;
403 m_part_field_array= NULL;
404 m_ordered_rec_buffer= NULL;
405 m_top_entry= NO_CURRENT_PART_ID;
406 m_rec_length= 0;
407 m_last_part= 0;
408 m_rec0= 0;
409 m_err_rec= NULL;
410 m_curr_key_info[0]= NULL;
411 m_curr_key_info[1]= NULL;
412 m_part_func_monotonicity_info= NON_MONOTONIC;
413 m_key_not_found= FALSE;
414 auto_increment_lock= FALSE;
415 auto_increment_safe_stmt_log_lock= FALSE;
416 /*
417 this allows blackhole to work properly
418 */
419 m_num_locks= 0;
420 m_part_info= NULL;
421 m_create_handler= FALSE;
422 m_is_sub_partitioned= 0;
423 m_is_clone_of= NULL;
424 m_clone_mem_root= NULL;
425 part_share= NULL;
426 m_new_partitions_share_refs.empty();
427 m_part_ids_sorted_by_num_of_records= NULL;
428 m_partitions_to_open= NULL;
429
430 m_range_info= NULL;
431 m_mrr_full_buffer_size= 0;
432 m_mrr_new_full_buffer_size= 0;
433 m_mrr_full_buffer= NULL;
434 m_mrr_range_first= NULL;
435
436 m_pre_calling= FALSE;
437 m_pre_call_use_parallel= FALSE;
438
439 ft_first= ft_current= NULL;
440 bulk_access_executing= FALSE; // For future
441
442 /*
443 Clear bitmaps to allow on one to call my_bitmap_free() on them at any time
444 */
445 my_bitmap_clear(&m_bulk_insert_started);
446 my_bitmap_clear(&m_locked_partitions);
447 my_bitmap_clear(&m_partitions_to_reset);
448 my_bitmap_clear(&m_key_not_found_partitions);
449 my_bitmap_clear(&m_mrr_used_partitions);
450 my_bitmap_clear(&m_opened_partitions);
451 m_file_sample= NULL;
452
453 #ifdef DONT_HAVE_TO_BE_INITALIZED
454 m_start_key.flag= 0;
455 m_ordered= TRUE;
456 #endif
457 }
458
459
table_type() const460 const char *ha_partition::table_type() const
461 {
462 // we can do this since we only support a single engine type
463 return m_file[0]->table_type();
464 }
465
466
467 /*
468 Destructor method
469
470 SYNOPSIS
471 ~ha_partition()
472
473 RETURN VALUE
474 NONE
475 */
476
~ha_partition()477 ha_partition::~ha_partition()
478 {
479 DBUG_ENTER("ha_partition::~ha_partition");
480 if (m_new_partitions_share_refs.elements)
481 m_new_partitions_share_refs.delete_elements();
482 if (m_file != NULL)
483 {
484 uint i;
485 for (i= 0; i < m_tot_parts; i++)
486 delete m_file[i];
487 }
488 destroy_record_priority_queue();
489 my_free(m_part_ids_sorted_by_num_of_records);
490
491 if (m_added_file)
492 {
493 for (handler **ph= m_added_file; *ph; ph++)
494 delete (*ph);
495 }
496 clear_handler_file();
497 free_root(&m_mem_root, MYF(0));
498
499 DBUG_VOID_RETURN;
500 }
501
502
503 /*
504 Initialize partition handler object
505
506 SYNOPSIS
507 initialize_partition()
508 mem_root Allocate memory through this
509
510 RETURN VALUE
511 1 Error
512 0 Success
513
514 DESCRIPTION
515
516 The partition handler is only a layer on top of other engines. Thus it
517 can't really perform anything without the underlying handlers. Thus we
518 add this method as part of the allocation of a handler object.
519
520 1) Allocation of underlying handlers
521 If we have access to the partition info we will allocate one handler
522 instance for each partition.
523 2) Allocation without partition info
524 The cases where we don't have access to this information is when called
525 in preparation for delete_table and rename_table and in that case we
526 only need to set HA_FILE_BASED. In that case we will use the .par file
527 that contains information about the partitions and their engines and
528 the names of each partition.
529 3) Table flags initialisation
530 We need also to set table flags for the partition handler. This is not
531 static since it depends on what storage engines are used as underlying
532 handlers.
533 The table flags is set in this routine to simulate the behaviour of a
534 normal storage engine
535 The flag HA_FILE_BASED will be set independent of the underlying handlers
536 4) Index flags initialisation
537 When knowledge exists on the indexes it is also possible to initialize the
538 index flags. Again the index flags must be initialized by using the under-
539 lying handlers since this is storage engine dependent.
540 The flag HA_READ_ORDER will be reset for the time being to indicate no
541 ordered output is available from partition handler indexes. Later a merge
542 sort will be performed using the underlying handlers.
543 5) has_transactions are calculated here.
544
545 */
546
initialize_partition(MEM_ROOT * mem_root)547 bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
548 {
549 handler **file_array, *file;
550 ulonglong check_table_flags;
551 DBUG_ENTER("ha_partition::initialize_partition");
552
553 if (m_create_handler)
554 {
555 m_tot_parts= m_part_info->get_tot_partitions();
556 DBUG_ASSERT(m_tot_parts > 0);
557 if (new_handlers_from_part_info(mem_root))
558 DBUG_RETURN(1);
559 }
560 else if (!table_share || !table_share->normalized_path.str)
561 {
562 /*
563 Called with dummy table share (delete, rename and alter table).
564 Don't need to set-up anything.
565 */
566 DBUG_RETURN(0);
567 }
568 else if (get_from_handler_file(table_share->normalized_path.str,
569 mem_root, false))
570 {
571 my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
572 DBUG_RETURN(1);
573 }
574 /*
575 We create all underlying table handlers here. We do it in this special
576 method to be able to report allocation errors.
577
578 Set up has_transactions since they are called often in all kinds of places,
579 other parameters are calculated on demand.
580 Verify that all partitions have the same table_flags.
581 */
582 check_table_flags= m_file[0]->ha_table_flags();
583 file_array= m_file;
584 do
585 {
586 file= *file_array;
587 if (check_table_flags != file->ha_table_flags())
588 {
589 my_error(ER_MIX_HANDLER_ERROR, MYF(0));
590 DBUG_RETURN(1);
591 }
592 } while (*(++file_array));
593 m_handler_status= handler_initialized;
594 DBUG_RETURN(0);
595 }
596
597 /****************************************************************************
598 MODULE meta data changes
599 ****************************************************************************/
600 /*
601 Delete a table
602
603 SYNOPSIS
604 delete_table()
605 name Full path of table name
606
607 RETURN VALUE
608 >0 Error
609 0 Success
610
611 DESCRIPTION
612 Used to delete a table. By the time delete_table() has been called all
613 opened references to this table will have been closed (and your globally
614 shared references released. The variable name will just be the name of
615 the table. You will need to remove any files you have created at this
616 point.
617
618 If you do not implement this, the default delete_table() is called from
619 handler.cc and it will delete all files with the file extensions returned
620 by bas_ext().
621
622 Called from handler.cc by delete_table and ha_create_table(). Only used
623 during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
624 the storage engine.
625 */
626
delete_table(const char * name)627 int ha_partition::delete_table(const char *name)
628 {
629 DBUG_ENTER("ha_partition::delete_table");
630
631 DBUG_RETURN(del_ren_table(name, NULL));
632 }
633
634
635 /*
636 Rename a table
637
638 SYNOPSIS
639 rename_table()
640 from Full path of old table name
641 to Full path of new table name
642
643 RETURN VALUE
644 >0 Error
645 0 Success
646
647 DESCRIPTION
648 Renames a table from one name to another from alter table call.
649
650 If you do not implement this, the default rename_table() is called from
651 handler.cc and it will rename all files with the file extensions returned
652 by bas_ext().
653
654 Called from sql_table.cc by mysql_rename_table().
655 */
656
rename_table(const char * from,const char * to)657 int ha_partition::rename_table(const char *from, const char *to)
658 {
659 DBUG_ENTER("ha_partition::rename_table");
660
661 DBUG_RETURN(del_ren_table(from, to));
662 }
663
664
665 /*
666 Create the handler file (.par-file)
667
668 SYNOPSIS
669 create_partitioning_metadata()
670 path Path to the new frm file (without ext)
671 old_p Path to the old frm file (without ext)
672 create_info Create info generated for CREATE TABLE
673
674 RETURN VALUE
675 >0 Error
676 0 Success
677
678 DESCRIPTION
679 create_partitioning_metadata is called to create any handler specific files
680 before opening the file with openfrm to later call ::create on the
681 file object.
682 In the partition handler this is used to store the names of partitions
683 and types of engines in the partitions.
684 */
685
create_partitioning_metadata(const char * path,const char * old_path,chf_create_flags action_flag)686 int ha_partition::create_partitioning_metadata(const char *path,
687 const char *old_path,
688 chf_create_flags action_flag)
689 {
690 partition_element *part;
691 DBUG_ENTER("ha_partition::create_partitioning_metadata");
692
693 /*
694 We need to update total number of parts since we might write the handler
695 file as part of a partition management command
696 */
697 if (action_flag == CHF_DELETE_FLAG ||
698 action_flag == CHF_RENAME_FLAG)
699 {
700 char name[FN_REFLEN];
701 char old_name[FN_REFLEN];
702
703 strxmov(name, path, ha_par_ext, NullS);
704 strxmov(old_name, old_path, ha_par_ext, NullS);
705 if ((action_flag == CHF_DELETE_FLAG &&
706 mysql_file_delete(key_file_ha_partition_par, name, MYF(MY_WME))) ||
707 (action_flag == CHF_RENAME_FLAG &&
708 mysql_file_rename(key_file_ha_partition_par, old_name, name,
709 MYF(MY_WME))))
710 {
711 DBUG_RETURN(TRUE);
712 }
713 }
714 else if (action_flag == CHF_CREATE_FLAG)
715 {
716 if (create_handler_file(path))
717 {
718 my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
719 DBUG_RETURN(1);
720 }
721 }
722
723 /* m_part_info is only NULL when we failed to create a partition table */
724 if (m_part_info)
725 {
726 part= m_part_info->partitions.head();
727 /* part->engine_type may be 0 when we failed to create the partition */
728 if (part->engine_type &&
729 (part->engine_type)->create_partitioning_metadata &&
730 ((part->engine_type)->create_partitioning_metadata)(path, old_path,
731 action_flag))
732 {
733 my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
734 DBUG_RETURN(1);
735 }
736 }
737 DBUG_RETURN(0);
738 }
739
740
741 /*
742 Create a partitioned table
743
744 SYNOPSIS
745 create()
746 name Full path of table name
747 table_arg Table object
748 create_info Create info generated for CREATE TABLE
749
750 RETURN VALUE
751 >0 Error
752 0 Success
753
754 DESCRIPTION
755 create() is called to create a table. The variable name will have the name
756 of the table. When create() is called you do not need to worry about
757 opening the table. Also, the FRM file will have already been created so
758 adjusting create_info will not do you any good. You can overwrite the frm
759 file at this point if you wish to change the table definition, but there
760 are no methods currently provided for doing that.
761
762 Called from handler.cc by ha_create_table().
763 */
764
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * create_info)765 int ha_partition::create(const char *name, TABLE *table_arg,
766 HA_CREATE_INFO *create_info)
767 {
768 int error;
769 char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN];
770 char *name_buffer_ptr;
771 const char *path;
772 uint i;
773 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
774 partition_element *part_elem;
775 handler **file, **abort_file;
776 DBUG_ENTER("ha_partition::create");
777 DBUG_PRINT("enter", ("name: '%s'", name));
778
779 DBUG_ASSERT(!fn_frm_ext(name));
780
781 /* Not allowed to create temporary partitioned tables */
782 if (create_info && create_info->tmp_table())
783 {
784 my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), "CREATE TEMPORARY TABLE");
785 DBUG_RETURN(TRUE);
786 }
787
788 if (get_from_handler_file(name, ha_thd()->mem_root, false))
789 DBUG_RETURN(TRUE);
790 DBUG_ASSERT(m_file_buffer);
791 name_buffer_ptr= m_name_buffer_ptr;
792 file= m_file;
793 /*
794 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
795 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
796 See Bug#37402, for Mac OS X.
797 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
798 Using the first partitions handler, since mixing handlers is not allowed.
799 */
800 path= get_canonical_filename(*file, name, name_lc_buff);
801 for (i= 0; i < m_part_info->num_parts; i++)
802 {
803 part_elem= part_it++;
804 if (m_is_sub_partitioned)
805 {
806 uint j;
807 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
808 for (j= 0; j < m_part_info->num_subparts; j++)
809 {
810 part_elem= sub_it++;
811 if (unlikely((error= create_partition_name(name_buff,
812 sizeof(name_buff), path,
813 name_buffer_ptr,
814 NORMAL_PART_NAME, FALSE))))
815 goto create_error;
816 if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
817 create_info,
818 part_elem)) ||
819 ((error= (*file)->ha_create(name_buff, table_arg,
820 create_info)))))
821 goto create_error;
822
823 name_buffer_ptr= strend(name_buffer_ptr) + 1;
824 file++;
825 }
826 }
827 else
828 {
829 if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
830 path, name_buffer_ptr,
831 NORMAL_PART_NAME, FALSE))))
832 goto create_error;
833 if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
834 create_info,
835 part_elem)) ||
836 ((error= (*file)->ha_create(name_buff, table_arg,
837 create_info)))))
838 goto create_error;
839
840 name_buffer_ptr= strend(name_buffer_ptr) + 1;
841 file++;
842 }
843 }
844 DBUG_RETURN(0);
845
846 create_error:
847 name_buffer_ptr= m_name_buffer_ptr;
848 for (abort_file= file, file= m_file; file < abort_file; file++)
849 {
850 if (!create_partition_name(name_buff, sizeof(name_buff), path,
851 name_buffer_ptr, NORMAL_PART_NAME, FALSE))
852 (void) (*file)->delete_table((const char*) name_buff);
853 name_buffer_ptr= strend(name_buffer_ptr) + 1;
854 }
855 handler::delete_table(name);
856 DBUG_RETURN(error);
857 }
858
859
860 /*
861 Drop partitions as part of ALTER TABLE of partitions
862
863 SYNOPSIS
864 drop_partitions()
865 path Complete path of db and table name
866
867 RETURN VALUE
868 >0 Failure
869 0 Success
870
871 DESCRIPTION
872 Use part_info object on handler object to deduce which partitions to
873 drop (each partition has a state attached to it)
874 */
875
drop_partitions(const char * path)876 int ha_partition::drop_partitions(const char *path)
877 {
878 List_iterator<partition_element> part_it(m_part_info->partitions);
879 char part_name_buff[FN_REFLEN + 1];
880 uint num_parts= m_part_info->partitions.elements;
881 uint num_subparts= m_part_info->num_subparts;
882 uint i= 0;
883 uint name_variant;
884 int ret_error;
885 int error= 0;
886 DBUG_ENTER("ha_partition::drop_partitions");
887
888 /*
889 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
890 We use m_file[0] as long as all partitions have the same storage engine.
891 */
892 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
893 part_name_buff)));
894 do
895 {
896 partition_element *part_elem= part_it++;
897 if (part_elem->part_state == PART_TO_BE_DROPPED)
898 {
899 handler *file;
900 /*
901 This part is to be dropped, meaning the part or all its subparts.
902 */
903 name_variant= NORMAL_PART_NAME;
904 if (m_is_sub_partitioned)
905 {
906 List_iterator<partition_element> sub_it(part_elem->subpartitions);
907 uint j= 0, part;
908 do
909 {
910 partition_element *sub_elem= sub_it++;
911 part= i * num_subparts + j;
912 if (unlikely((ret_error=
913 create_subpartition_name(part_name_buff,
914 sizeof(part_name_buff), path,
915 part_elem->partition_name,
916 sub_elem->partition_name,
917 name_variant))))
918 error= ret_error;
919 file= m_file[part];
920 DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
921 if (unlikely((ret_error= file->delete_table(part_name_buff))))
922 error= ret_error;
923 if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
924 entry_pos)))
925 error= 1;
926 } while (++j < num_subparts);
927 }
928 else
929 {
930 if ((ret_error= create_partition_name(part_name_buff,
931 sizeof(part_name_buff), path,
932 part_elem->partition_name, name_variant, TRUE)))
933 error= ret_error;
934 else
935 {
936 file= m_file[i];
937 DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
938 if (unlikely((ret_error= file->delete_table(part_name_buff))))
939 error= ret_error;
940 if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
941 entry_pos)))
942 error= 1;
943 }
944 }
945 if (part_elem->part_state == PART_IS_CHANGED)
946 part_elem->part_state= PART_NORMAL;
947 else
948 part_elem->part_state= PART_IS_DROPPED;
949 }
950 } while (++i < num_parts);
951 (void) sync_ddl_log();
952 DBUG_RETURN(error);
953 }
954
955
956 /*
957 Rename partitions as part of ALTER TABLE of partitions
958
959 SYNOPSIS
960 rename_partitions()
961 path Complete path of db and table name
962
963 RETURN VALUE
964 TRUE Failure
965 FALSE Success
966
967 DESCRIPTION
968 When reorganising partitions, adding hash partitions and coalescing
969 partitions it can be necessary to rename partitions while holding
970 an exclusive lock on the table.
971 Which partitions to rename is given by state of partitions found by the
972 partition info struct referenced from the handler object
973 */
974
rename_partitions(const char * path)975 int ha_partition::rename_partitions(const char *path)
976 {
977 List_iterator<partition_element> part_it(m_part_info->partitions);
978 List_iterator<partition_element> temp_it(m_part_info->temp_partitions);
979 char part_name_buff[FN_REFLEN + 1];
980 char norm_name_buff[FN_REFLEN + 1];
981 uint num_parts= m_part_info->partitions.elements;
982 uint part_count= 0;
983 uint num_subparts= m_part_info->num_subparts;
984 uint i= 0;
985 uint j= 0;
986 int error= 0;
987 int ret_error;
988 uint temp_partitions= m_part_info->temp_partitions.elements;
989 handler *file;
990 partition_element *part_elem, *sub_elem;
991 DBUG_ENTER("ha_partition::rename_partitions");
992
993 /*
994 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
995 We use m_file[0] as long as all partitions have the same storage engine.
996 */
997 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
998 norm_name_buff)));
999
1000 DEBUG_SYNC(ha_thd(), "before_rename_partitions");
1001 if (temp_partitions)
1002 {
1003 /*
1004 These are the reorganised partitions that have already been copied.
1005 We delete the partitions and log the delete by inactivating the
1006 delete log entry in the table log. We only need to synchronise
1007 these writes before moving to the next loop since there is no
1008 interaction among reorganised partitions, they cannot have the
1009 same name.
1010 */
1011 do
1012 {
1013 part_elem= temp_it++;
1014 if (m_is_sub_partitioned)
1015 {
1016 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1017 j= 0;
1018 do
1019 {
1020 sub_elem= sub_it++;
1021 file= m_reorged_file[part_count++];
1022 if (unlikely((ret_error=
1023 create_subpartition_name(norm_name_buff,
1024 sizeof(norm_name_buff), path,
1025 part_elem->partition_name,
1026 sub_elem->partition_name,
1027 NORMAL_PART_NAME))))
1028 error= ret_error;
1029 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1030 if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1031 error= ret_error;
1032 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1033 entry_pos)))
1034 error= 1;
1035 else
1036 sub_elem->log_entry= NULL; /* Indicate success */
1037 } while (++j < num_subparts);
1038 }
1039 else
1040 {
1041 file= m_reorged_file[part_count++];
1042 if (unlikely((ret_error=
1043 create_partition_name(norm_name_buff,
1044 sizeof(norm_name_buff), path,
1045 part_elem->partition_name,
1046 NORMAL_PART_NAME, TRUE))))
1047 error= ret_error;
1048 else
1049 {
1050 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1051 if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1052 error= ret_error;
1053 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1054 entry_pos)))
1055 error= 1;
1056 else
1057 part_elem->log_entry= NULL; /* Indicate success */
1058 }
1059 }
1060 } while (++i < temp_partitions);
1061 (void) sync_ddl_log();
1062 }
1063 i= 0;
1064 do
1065 {
1066 /*
1067 When state is PART_IS_CHANGED it means that we have created a new
1068 TEMP partition that is to be renamed to normal partition name and
1069 we are to delete the old partition with currently the normal name.
1070
1071 We perform this operation by
1072 1) Delete old partition with normal partition name
1073 2) Signal this in table log entry
1074 3) Synch table log to ensure we have consistency in crashes
1075 4) Rename temporary partition name to normal partition name
1076 5) Signal this to table log entry
1077 It is not necessary to synch the last state since a new rename
1078 should not corrupt things if there was no temporary partition.
1079
1080 The only other parts we need to cater for are new parts that
1081 replace reorganised parts. The reorganised parts were deleted
1082 by the code above that goes through the temp_partitions list.
1083 Thus the synch above makes it safe to simply perform step 4 and 5
1084 for those entries.
1085 */
1086 part_elem= part_it++;
1087 if (part_elem->part_state == PART_IS_CHANGED ||
1088 part_elem->part_state == PART_TO_BE_DROPPED ||
1089 (part_elem->part_state == PART_IS_ADDED && temp_partitions))
1090 {
1091 if (m_is_sub_partitioned)
1092 {
1093 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1094 uint part;
1095
1096 j= 0;
1097 do
1098 {
1099 sub_elem= sub_it++;
1100 part= i * num_subparts + j;
1101 if (unlikely((ret_error=
1102 create_subpartition_name(norm_name_buff,
1103 sizeof(norm_name_buff), path,
1104 part_elem->partition_name,
1105 sub_elem->partition_name,
1106 NORMAL_PART_NAME))))
1107 error= ret_error;
1108 if (part_elem->part_state == PART_IS_CHANGED)
1109 {
1110 file= m_reorged_file[part_count++];
1111 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1112 if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1113 error= ret_error;
1114 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1115 entry_pos)))
1116 error= 1;
1117 (void) sync_ddl_log();
1118 }
1119 file= m_new_file[part];
1120 if (unlikely((ret_error=
1121 create_subpartition_name(part_name_buff,
1122 sizeof(part_name_buff), path,
1123 part_elem->partition_name,
1124 sub_elem->partition_name,
1125 TEMP_PART_NAME))))
1126 error= ret_error;
1127 DBUG_PRINT("info", ("Rename subpartition from %s to %s",
1128 part_name_buff, norm_name_buff));
1129 if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1130 norm_name_buff))))
1131 error= ret_error;
1132 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1133 entry_pos)))
1134 error= 1;
1135 else
1136 sub_elem->log_entry= NULL;
1137 } while (++j < num_subparts);
1138 }
1139 else
1140 {
1141 if (unlikely((ret_error=
1142 create_partition_name(norm_name_buff,
1143 sizeof(norm_name_buff), path,
1144 part_elem->partition_name,
1145 NORMAL_PART_NAME, TRUE)) ||
1146 (ret_error= create_partition_name(part_name_buff,
1147 sizeof(part_name_buff),
1148 path,
1149 part_elem->
1150 partition_name,
1151 TEMP_PART_NAME, TRUE))))
1152 error= ret_error;
1153 else
1154 {
1155 if (part_elem->part_state == PART_IS_CHANGED)
1156 {
1157 file= m_reorged_file[part_count++];
1158 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1159 if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1160 error= ret_error;
1161 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1162 entry_pos)))
1163 error= 1;
1164 (void) sync_ddl_log();
1165 }
1166 file= m_new_file[i];
1167 DBUG_PRINT("info", ("Rename partition from %s to %s",
1168 part_name_buff, norm_name_buff));
1169 if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1170 norm_name_buff))))
1171 error= ret_error;
1172 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1173 entry_pos)))
1174 error= 1;
1175 else
1176 part_elem->log_entry= NULL;
1177 }
1178 }
1179 }
1180 } while (++i < num_parts);
1181 (void) sync_ddl_log();
1182 DBUG_RETURN(error);
1183 }
1184
1185
1186 #define OPTIMIZE_PARTS 1
1187 #define ANALYZE_PARTS 2
1188 #define CHECK_PARTS 3
1189 #define REPAIR_PARTS 4
1190 #define ASSIGN_KEYCACHE_PARTS 5
1191 #define PRELOAD_KEYS_PARTS 6
1192
1193 static const char *opt_op_name[]= {NULL,
1194 "optimize", "analyze", "check", "repair",
1195 "assign_to_keycache", "preload_keys"};
1196
1197 /*
1198 Optimize table
1199
1200 SYNOPSIS
1201 optimize()
1202 thd Thread object
1203 check_opt Check/analyze/repair/optimize options
1204
1205 RETURN VALUES
1206 >0 Error
1207 0 Success
1208 */
1209
optimize(THD * thd,HA_CHECK_OPT * check_opt)1210 int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
1211 {
1212 DBUG_ENTER("ha_partition::optimize");
1213
1214 DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
1215 }
1216
1217
1218 /*
1219 Analyze table
1220
1221 SYNOPSIS
1222 analyze()
1223 thd Thread object
1224 check_opt Check/analyze/repair/optimize options
1225
1226 RETURN VALUES
1227 >0 Error
1228 0 Success
1229 */
1230
analyze(THD * thd,HA_CHECK_OPT * check_opt)1231 int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
1232 {
1233 DBUG_ENTER("ha_partition::analyze");
1234
1235 int result= handle_opt_partitions(thd, check_opt, ANALYZE_PARTS);
1236
1237 if ((result == 0) && m_file[0]
1238 && (m_file[0]->ha_table_flags() & HA_ONLINE_ANALYZE))
1239 {
1240 /* If this is ANALYZE TABLE that will not force table definition cache
1241 eviction, update statistics for the partition handler. */
1242 this->info(HA_STATUS_CONST | HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
1243 }
1244
1245 DBUG_RETURN(result);
1246 }
1247
1248
1249 /*
1250 Check table
1251
1252 SYNOPSIS
1253 check()
1254 thd Thread object
1255 check_opt Check/analyze/repair/optimize options
1256
1257 RETURN VALUES
1258 >0 Error
1259 0 Success
1260 */
1261
check(THD * thd,HA_CHECK_OPT * check_opt)1262 int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
1263 {
1264 DBUG_ENTER("ha_partition::check");
1265
1266 DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
1267 }
1268
1269
1270 /*
1271 Repair table
1272
1273 SYNOPSIS
1274 repair()
1275 thd Thread object
1276 check_opt Check/analyze/repair/optimize options
1277
1278 RETURN VALUES
1279 >0 Error
1280 0 Success
1281 */
1282
repair(THD * thd,HA_CHECK_OPT * check_opt)1283 int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
1284 {
1285 DBUG_ENTER("ha_partition::repair");
1286
1287 int res= handle_opt_partitions(thd, check_opt, REPAIR_PARTS);
1288 DBUG_RETURN(res);
1289 }
1290
1291 /**
1292 Assign to keycache
1293
1294 @param thd Thread object
1295 @param check_opt Check/analyze/repair/optimize options
1296
1297 @return
1298 @retval >0 Error
1299 @retval 0 Success
1300 */
1301
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1302 int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
1303 {
1304 DBUG_ENTER("ha_partition::assign_to_keycache");
1305
1306 DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
1307 }
1308
1309
1310 /**
1311 Preload to keycache
1312
1313 @param thd Thread object
1314 @param check_opt Check/analyze/repair/optimize options
1315
1316 @return
1317 @retval >0 Error
1318 @retval 0 Success
1319 */
1320
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1321 int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
1322 {
1323 DBUG_ENTER("ha_partition::preload_keys");
1324
1325 DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
1326 }
1327
1328
1329 /*
1330 Handle optimize/analyze/check/repair of one partition
1331
1332 SYNOPSIS
1333 handle_opt_part()
1334 thd Thread object
1335 check_opt Options
1336 file Handler object of partition
1337 flag Optimize/Analyze/Check/Repair flag
1338
1339 RETURN VALUE
1340 >0 Failure
1341 0 Success
1342 */
1343
handle_opt_part(THD * thd,HA_CHECK_OPT * check_opt,uint part_id,uint flag)1344 int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
1345 uint part_id, uint flag)
1346 {
1347 int error;
1348 handler *file= m_file[part_id];
1349 DBUG_ENTER("handle_opt_part");
1350 DBUG_PRINT("enter", ("flag: %u", flag));
1351
1352 if (flag == OPTIMIZE_PARTS)
1353 error= file->ha_optimize(thd, check_opt);
1354 else if (flag == ANALYZE_PARTS)
1355 error= file->ha_analyze(thd, check_opt);
1356 else if (flag == CHECK_PARTS)
1357 {
1358 error= file->ha_check(thd, check_opt);
1359 if (!error ||
1360 error == HA_ADMIN_ALREADY_DONE ||
1361 error == HA_ADMIN_NOT_IMPLEMENTED)
1362 {
1363 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1364 error= check_misplaced_rows(part_id, false);
1365 }
1366 }
1367 else if (flag == REPAIR_PARTS)
1368 {
1369 error= file->ha_repair(thd, check_opt);
1370 if (!error ||
1371 error == HA_ADMIN_ALREADY_DONE ||
1372 error == HA_ADMIN_NOT_IMPLEMENTED)
1373 {
1374 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1375 error= check_misplaced_rows(part_id, true);
1376 }
1377 }
1378 else if (flag == ASSIGN_KEYCACHE_PARTS)
1379 error= file->assign_to_keycache(thd, check_opt);
1380 else if (flag == PRELOAD_KEYS_PARTS)
1381 error= file->preload_keys(thd, check_opt);
1382 else
1383 {
1384 DBUG_ASSERT(FALSE);
1385 error= 1;
1386 }
1387 if (error == HA_ADMIN_ALREADY_DONE)
1388 error= 0;
1389 DBUG_RETURN(error);
1390 }
1391
1392
1393 /*
1394 print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE
1395 (modelled after mi_check_print_msg)
1396 TODO: move this into the handler, or rewrite mysql_admin_table.
1397 */
1398 bool print_admin_msg(THD* thd, uint len,
1399 const char* msg_type,
1400 const char* db_name, String &table_name,
1401 const char* op_name, const char *fmt, ...)
1402 ATTRIBUTE_FORMAT(printf, 7, 8);
print_admin_msg(THD * thd,uint len,const char * msg_type,const char * db_name,String & table_name,const char * op_name,const char * fmt,...)1403 bool print_admin_msg(THD* thd, uint len,
1404 const char* msg_type,
1405 const char* db_name, String &table_name,
1406 const char* op_name, const char *fmt, ...)
1407 {
1408 va_list args;
1409 Protocol *protocol= thd->protocol;
1410 size_t length;
1411 size_t msg_length;
1412 char name[NAME_LEN*2+2];
1413 char *msgbuf;
1414 bool error= true;
1415
1416 if (!(msgbuf= (char*) my_malloc(key_memory_Partition_admin, len, MYF(0))))
1417 return true;
1418 va_start(args, fmt);
1419 msg_length= my_vsnprintf(msgbuf, len, fmt, args);
1420 va_end(args);
1421 if (msg_length >= (len - 1))
1422 goto err;
1423 msgbuf[len - 1]= 0; // healthy paranoia
1424
1425
1426 if (!thd->vio_ok())
1427 {
1428 sql_print_error("%s", msgbuf);
1429 goto err;
1430 }
1431
1432 length=(size_t)(strxmov(name, db_name, ".", table_name.c_ptr_safe(), NullS) - name);
1433 /*
1434 TODO: switch from protocol to push_warning here. The main reason we didn't
1435 it yet is parallel repair, which threads have no THD object accessible via
1436 current_thd.
1437
1438 Also we likely need to lock mutex here (in both cases with protocol and
1439 push_warning).
1440 */
1441 DBUG_PRINT("info",("print_admin_msg: %s, %s, %s, %s", name, op_name,
1442 msg_type, msgbuf));
1443 protocol->prepare_for_resend();
1444 protocol->store(name, length, system_charset_info);
1445 protocol->store(op_name, system_charset_info);
1446 protocol->store(msg_type, system_charset_info);
1447 protocol->store(msgbuf, msg_length, system_charset_info);
1448 if (protocol->write())
1449 {
1450 sql_print_error("Failed on my_net_write, writing to stderr instead: %s",
1451 msgbuf);
1452 goto err;
1453 }
1454 error= false;
1455 err:
1456 my_free(msgbuf);
1457 return error;
1458 }
1459
1460
1461 /*
1462 Handle optimize/analyze/check/repair of partitions
1463
1464 SYNOPSIS
1465 handle_opt_partitions()
1466 thd Thread object
1467 check_opt Options
1468 flag Optimize/Analyze/Check/Repair flag
1469
1470 RETURN VALUE
1471 >0 Failure
1472 0 Success
1473 */
1474
handle_opt_partitions(THD * thd,HA_CHECK_OPT * check_opt,uint flag)1475 int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1476 uint flag)
1477 {
1478 List_iterator<partition_element> part_it(m_part_info->partitions);
1479 uint num_parts= m_part_info->num_parts;
1480 uint num_subparts= m_part_info->num_subparts;
1481 uint i= 0;
1482 int error;
1483 DBUG_ENTER("ha_partition::handle_opt_partitions");
1484 DBUG_PRINT("enter", ("flag= %u", flag));
1485
1486 do
1487 {
1488 partition_element *part_elem= part_it++;
1489 /*
1490 when ALTER TABLE <CMD> PARTITION ...
1491 it should only do named partitions, otherwise all partitions
1492 */
1493 if (!(thd->lex->alter_info.partition_flags & ALTER_PARTITION_ADMIN) ||
1494 part_elem->part_state == PART_ADMIN)
1495 {
1496 if (m_is_sub_partitioned)
1497 {
1498 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1499 partition_element *sub_elem;
1500 uint j= 0, part;
1501 do
1502 {
1503 sub_elem= subpart_it++;
1504 part= i * num_subparts + j;
1505 DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1506 part, sub_elem->partition_name));
1507 if (unlikely((error= handle_opt_part(thd, check_opt, part, flag))))
1508 {
1509 /* print a line which partition the error belongs to */
1510 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1511 error != HA_ADMIN_ALREADY_DONE &&
1512 error != HA_ADMIN_TRY_ALTER &&
1513 error != HA_ERR_TABLE_READONLY)
1514 {
1515 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1516 table_share->db.str, table->alias,
1517 opt_op_name[flag],
1518 "Subpartition %s returned error",
1519 sub_elem->partition_name);
1520 }
1521 /* reset part_state for the remaining partitions */
1522 do
1523 {
1524 if (part_elem->part_state == PART_ADMIN)
1525 part_elem->part_state= PART_NORMAL;
1526 } while ((part_elem= part_it++));
1527 DBUG_RETURN(error);
1528 }
1529 } while (++j < num_subparts);
1530 }
1531 else
1532 {
1533 DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1534 part_elem->partition_name));
1535 if (unlikely((error= handle_opt_part(thd, check_opt, i, flag))))
1536 {
1537 /* print a line which partition the error belongs to */
1538 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1539 error != HA_ADMIN_ALREADY_DONE &&
1540 error != HA_ADMIN_TRY_ALTER)
1541 {
1542 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1543 table_share->db.str, table->alias,
1544 opt_op_name[flag], "Partition %s returned error",
1545 part_elem->partition_name);
1546 }
1547 /* reset part_state for the remaining partitions */
1548 do
1549 {
1550 if (part_elem->part_state == PART_ADMIN)
1551 part_elem->part_state= PART_NORMAL;
1552 } while ((part_elem= part_it++));
1553 DBUG_RETURN(error);
1554 }
1555 }
1556 part_elem->part_state= PART_NORMAL;
1557 }
1558 } while (++i < num_parts);
1559 DBUG_RETURN(FALSE);
1560 }
1561
1562
1563 /**
1564 @brief Check and repair the table if necessary
1565
1566 @param thd Thread object
1567
1568 @retval TRUE Error/Not supported
1569 @retval FALSE Success
1570
1571 @note Called if open_table_from_share fails and ::is_crashed().
1572 */
1573
check_and_repair(THD * thd)1574 bool ha_partition::check_and_repair(THD *thd)
1575 {
1576 handler **file= m_file;
1577 DBUG_ENTER("ha_partition::check_and_repair");
1578
1579 do
1580 {
1581 if ((*file)->ha_check_and_repair(thd))
1582 DBUG_RETURN(TRUE);
1583 } while (*(++file));
1584 DBUG_RETURN(FALSE);
1585 }
1586
1587
1588 /**
1589 @breif Check if the table can be automatically repaired
1590
1591 @retval TRUE Can be auto repaired
1592 @retval FALSE Cannot be auto repaired
1593 */
1594
auto_repair(int error) const1595 bool ha_partition::auto_repair(int error) const
1596 {
1597 DBUG_ENTER("ha_partition::auto_repair");
1598
1599 /*
1600 As long as we only support one storage engine per table,
1601 we can use the first partition for this function.
1602 */
1603 DBUG_RETURN(m_file[0]->auto_repair(error));
1604 }
1605
1606
1607 /**
1608 @breif Check if the table is crashed
1609
1610 @retval TRUE Crashed
1611 @retval FALSE Not crashed
1612 */
1613
is_crashed() const1614 bool ha_partition::is_crashed() const
1615 {
1616 handler **file= m_file;
1617 DBUG_ENTER("ha_partition::is_crashed");
1618
1619 do
1620 {
1621 if ((*file)->is_crashed())
1622 DBUG_RETURN(TRUE);
1623 } while (*(++file));
1624 DBUG_RETURN(FALSE);
1625 }
1626
1627
1628 /*
1629 Prepare by creating a new partition
1630
1631 SYNOPSIS
1632 prepare_new_partition()
1633 table Table object
1634 create_info Create info from CREATE TABLE
1635 file Handler object of new partition
1636 part_name partition name
1637
1638 RETURN VALUE
1639 >0 Error
1640 0 Success
1641 */
1642
prepare_new_partition(TABLE * tbl,HA_CREATE_INFO * create_info,handler * file,const char * part_name,partition_element * p_elem,uint disable_non_uniq_indexes)1643 int ha_partition::prepare_new_partition(TABLE *tbl,
1644 HA_CREATE_INFO *create_info,
1645 handler *file, const char *part_name,
1646 partition_element *p_elem,
1647 uint disable_non_uniq_indexes)
1648 {
1649 int error;
1650 DBUG_ENTER("prepare_new_partition");
1651
1652 /*
1653 This call to set_up_table_before_create() is done for an alter table.
1654 So this may be the second time around for this partition_element,
1655 depending on how many partitions and subpartitions there were before,
1656 and how many there are now.
1657 The first time, on the CREATE, data_file_name and index_file_name
1658 came from the parser. They did not have the file name attached to
1659 the end. But if this partition is less than the total number of
1660 previous partitions, it's data_file_name has the filename attached.
1661 So we need to take the partition filename off if it exists.
1662 That file name may be different from part_name, which will be
1663 attached in append_file_to_dir().
1664 */
1665 truncate_partition_filename((char*) p_elem->data_file_name);
1666 truncate_partition_filename((char*) p_elem->index_file_name);
1667
1668 if (unlikely((error= set_up_table_before_create(tbl, part_name, create_info,
1669 p_elem))))
1670 goto error_create;
1671
1672 if (!(file->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
1673 tbl->s->connect_string= p_elem->connect_string;
1674 create_info->options|= HA_CREATE_TMP_ALTER;
1675 if ((error= file->ha_create(part_name, tbl, create_info)))
1676 {
1677 /*
1678 Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1679 if the table/partition already exists.
1680 If we return that error code, then print_error would try to
1681 get_dup_key on a non-existing partition.
1682 So return a more reasonable error code.
1683 */
1684 if (error == HA_ERR_FOUND_DUPP_KEY)
1685 error= HA_ERR_TABLE_EXIST;
1686 goto error_create;
1687 }
1688 DBUG_PRINT("info", ("partition %s created", part_name));
1689 if (unlikely((error= file->ha_open(tbl, part_name, m_mode,
1690 m_open_test_lock | HA_OPEN_NO_PSI_CALL |
1691 HA_OPEN_FOR_CREATE))))
1692 goto error_open;
1693 DBUG_PRINT("info", ("partition %s opened", part_name));
1694
1695 /*
1696 Note: if you plan to add another call that may return failure,
1697 better to do it before external_lock() as cleanup_new_partition()
1698 assumes that external_lock() is last call that may fail here.
1699 Otherwise see description for cleanup_new_partition().
1700 */
1701 if (unlikely((error= file->ha_external_lock(ha_thd(), F_WRLCK))))
1702 goto error_external_lock;
1703 DBUG_PRINT("info", ("partition %s external locked", part_name));
1704
1705 if (disable_non_uniq_indexes)
1706 file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
1707
1708 DBUG_RETURN(0);
1709 error_external_lock:
1710 (void) file->ha_close();
1711 error_open:
1712 (void) file->delete_table(part_name);
1713 error_create:
1714 DBUG_RETURN(error);
1715 }
1716
1717
1718 /*
1719 Cleanup by removing all created partitions after error
1720
1721 SYNOPSIS
1722 cleanup_new_partition()
1723 part_count Number of partitions to remove
1724
1725 RETURN VALUE
1726 NONE
1727
1728 DESCRIPTION
1729 This function is called immediately after prepare_new_partition() in
1730 case the latter fails.
1731
1732 In prepare_new_partition() last call that may return failure is
1733 external_lock(). That means if prepare_new_partition() fails,
1734 partition does not have external lock. Thus no need to call
1735 external_lock(F_UNLCK) here.
1736
1737 TODO:
1738 We must ensure that in the case that we get an error during the process
1739 that we call external_lock with F_UNLCK, close the table and delete the
1740 table in the case where we have been successful with prepare_handler.
1741 We solve this by keeping an array of successful calls to prepare_handler
1742 which can then be used to undo the call.
1743 */
1744
cleanup_new_partition(uint part_count)1745 void ha_partition::cleanup_new_partition(uint part_count)
1746 {
1747 DBUG_ENTER("ha_partition::cleanup_new_partition");
1748
1749 if (m_added_file)
1750 {
1751 THD *thd= ha_thd();
1752 handler **file= m_added_file;
1753 while ((part_count > 0) && (*file))
1754 {
1755 (*file)->ha_external_unlock(thd);
1756 (*file)->ha_close();
1757
1758 /* Leave the (*file)->delete_table(part_name) to the ddl-log */
1759
1760 file++;
1761 part_count--;
1762 }
1763 m_added_file= NULL;
1764 }
1765 DBUG_VOID_RETURN;
1766 }
1767
1768 /*
1769 Implement the partition changes defined by ALTER TABLE of partitions
1770
1771 SYNOPSIS
1772 change_partitions()
1773 create_info HA_CREATE_INFO object describing all
1774 fields and indexes in table
1775 path Complete path of db and table name
1776 out: copied Output parameter where number of copied
1777 records are added
1778 out: deleted Output parameter where number of deleted
1779 records are added
1780 pack_frm_data Reference to packed frm file
1781 pack_frm_len Length of packed frm file
1782
1783 RETURN VALUE
1784 >0 Failure
1785 0 Success
1786
1787 DESCRIPTION
1788 Add and copy if needed a number of partitions, during this operation
1789 no other operation is ongoing in the server. This is used by
1790 ADD PARTITION all types as well as by REORGANIZE PARTITION. For
1791 one-phased implementations it is used also by DROP and COALESCE
1792 PARTITIONs.
1793 One-phased implementation needs the new frm file, other handlers will
1794 get zero length and a NULL reference here.
1795 */
1796
change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)1797 int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
1798 const char *path,
1799 ulonglong * const copied,
1800 ulonglong * const deleted,
1801 const uchar *pack_frm_data
1802 __attribute__((unused)),
1803 size_t pack_frm_len
1804 __attribute__((unused)))
1805 {
1806 List_iterator<partition_element> part_it(m_part_info->partitions);
1807 List_iterator <partition_element> t_it(m_part_info->temp_partitions);
1808 char part_name_buff[FN_REFLEN + 1];
1809 uint num_parts= m_part_info->partitions.elements;
1810 uint num_subparts= m_part_info->num_subparts;
1811 uint i= 0;
1812 uint num_remain_partitions, part_count, orig_count;
1813 handler **new_file_array;
1814 int error= 1;
1815 bool first;
1816 uint temp_partitions= m_part_info->temp_partitions.elements;
1817 THD *thd= ha_thd();
1818 DBUG_ENTER("ha_partition::change_partitions");
1819
1820 /*
1821 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
1822 We use m_file[0] as long as all partitions have the same storage engine.
1823 */
1824 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
1825 part_name_buff)));
1826 m_reorged_parts= 0;
1827 if (!m_part_info->is_sub_partitioned())
1828 num_subparts= 1;
1829
1830 /*
1831 Step 1:
1832 Calculate number of reorganised partitions and allocate space for
1833 their handler references.
1834 */
1835 if (temp_partitions)
1836 {
1837 m_reorged_parts= temp_partitions * num_subparts;
1838 }
1839 else
1840 {
1841 do
1842 {
1843 partition_element *part_elem= part_it++;
1844 if (part_elem->part_state == PART_CHANGED ||
1845 part_elem->part_state == PART_REORGED_DROPPED)
1846 {
1847 m_reorged_parts+= num_subparts;
1848 }
1849 } while (++i < num_parts);
1850 }
1851 if (m_reorged_parts &&
1852 !(m_reorged_file= (handler**) thd->calloc(sizeof(handler*)*
1853 (m_reorged_parts + 1))))
1854 {
1855 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1856 }
1857
1858 /*
1859 Step 2:
1860 Calculate number of partitions after change and allocate space for
1861 their handler references.
1862 */
1863 num_remain_partitions= 0;
1864 if (temp_partitions)
1865 {
1866 num_remain_partitions= num_parts * num_subparts;
1867 }
1868 else
1869 {
1870 part_it.rewind();
1871 i= 0;
1872 do
1873 {
1874 partition_element *part_elem= part_it++;
1875 if (part_elem->part_state == PART_NORMAL ||
1876 part_elem->part_state == PART_TO_BE_ADDED ||
1877 part_elem->part_state == PART_CHANGED)
1878 {
1879 num_remain_partitions+= num_subparts;
1880 }
1881 } while (++i < num_parts);
1882 }
1883 if (!(new_file_array= ((handler**)
1884 thd->calloc(sizeof(handler*)*
1885 (2*(num_remain_partitions + 1))))))
1886 {
1887 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1888 }
1889 m_added_file= &new_file_array[num_remain_partitions + 1];
1890
1891 /*
1892 Step 3:
1893 Fill m_reorged_file with handler references and NULL at the end
1894 */
1895 if (m_reorged_parts)
1896 {
1897 i= 0;
1898 part_count= 0;
1899 first= TRUE;
1900 part_it.rewind();
1901 do
1902 {
1903 partition_element *part_elem= part_it++;
1904 if (part_elem->part_state == PART_CHANGED ||
1905 part_elem->part_state == PART_REORGED_DROPPED)
1906 {
1907 memcpy((void*)&m_reorged_file[part_count],
1908 (void*)&m_file[i*num_subparts],
1909 sizeof(handler*)*num_subparts);
1910 part_count+= num_subparts;
1911 }
1912 else if (first && temp_partitions &&
1913 part_elem->part_state == PART_TO_BE_ADDED)
1914 {
1915 /*
1916 When doing an ALTER TABLE REORGANIZE PARTITION a number of
1917 partitions is to be reorganised into a set of new partitions.
1918 The reorganised partitions are in this case in the temp_partitions
1919 list. We copy all of them in one batch and thus we only do this
1920 until we find the first partition with state PART_TO_BE_ADDED
1921 since this is where the new partitions go in and where the old
1922 ones used to be.
1923 */
1924 first= FALSE;
1925 DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts);
1926 memcpy((void*)m_reorged_file, &m_file[i*num_subparts],
1927 sizeof(handler*)*m_reorged_parts);
1928 }
1929 } while (++i < num_parts);
1930 }
1931
1932 /*
1933 Step 4:
1934 Fill new_array_file with handler references. Create the handlers if
1935 needed.
1936 */
1937 i= 0;
1938 part_count= 0;
1939 orig_count= 0;
1940 first= TRUE;
1941 part_it.rewind();
1942 do
1943 {
1944 partition_element *part_elem= part_it++;
1945 if (part_elem->part_state == PART_NORMAL)
1946 {
1947 DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts);
1948 memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count],
1949 sizeof(handler*)*num_subparts);
1950 part_count+= num_subparts;
1951 orig_count+= num_subparts;
1952 }
1953 else if (part_elem->part_state == PART_CHANGED ||
1954 part_elem->part_state == PART_TO_BE_ADDED)
1955 {
1956 uint j= 0;
1957 Parts_share_refs *p_share_refs;
1958 /*
1959 The Handler_shares for each partition's handler can be allocated
1960 within this handler, since there will not be any more instances of the
1961 new partitions, until the table is reopened after the ALTER succeeded.
1962 */
1963 p_share_refs= new Parts_share_refs;
1964 if (!p_share_refs)
1965 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1966 if (p_share_refs->init(num_subparts))
1967 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1968 if (m_new_partitions_share_refs.push_back(p_share_refs, thd->mem_root))
1969 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1970 do
1971 {
1972 handler **new_file= &new_file_array[part_count++];
1973 if (!(*new_file=
1974 get_new_handler(table->s,
1975 thd->mem_root,
1976 part_elem->engine_type)))
1977 {
1978 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1979 }
1980 if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j]))
1981 {
1982 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1983 }
1984 } while (++j < num_subparts);
1985 if (part_elem->part_state == PART_CHANGED)
1986 orig_count+= num_subparts;
1987 else if (temp_partitions && first)
1988 {
1989 orig_count+= (num_subparts * temp_partitions);
1990 first= FALSE;
1991 }
1992 }
1993 } while (++i < num_parts);
1994 first= FALSE;
1995 /*
1996 Step 5:
1997 Create the new partitions and also open, lock and call external_lock
1998 on them to prepare them for copy phase and also for later close
1999 calls
2000 */
2001
2002 /*
2003 Before creating new partitions check whether indexes are disabled
2004 in the partitions.
2005 */
2006
2007 uint disable_non_uniq_indexes= indexes_are_disabled();
2008
2009 i= 0;
2010 part_count= 0;
2011 part_it.rewind();
2012 do
2013 {
2014 partition_element *part_elem= part_it++;
2015 if (part_elem->part_state == PART_TO_BE_ADDED ||
2016 part_elem->part_state == PART_CHANGED)
2017 {
2018 /*
2019 A new partition needs to be created PART_TO_BE_ADDED means an
2020 entirely new partition and PART_CHANGED means a changed partition
2021 that will still exist with either more or less data in it.
2022 */
2023 uint name_variant= NORMAL_PART_NAME;
2024 if (part_elem->part_state == PART_CHANGED ||
2025 (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
2026 name_variant= TEMP_PART_NAME;
2027 if (m_part_info->is_sub_partitioned())
2028 {
2029 List_iterator<partition_element> sub_it(part_elem->subpartitions);
2030 uint j= 0, part;
2031 do
2032 {
2033 partition_element *sub_elem= sub_it++;
2034 if (unlikely((error=
2035 create_subpartition_name(part_name_buff,
2036 sizeof(part_name_buff), path,
2037 part_elem->partition_name,
2038 sub_elem->partition_name,
2039 name_variant))))
2040 {
2041 cleanup_new_partition(part_count);
2042 DBUG_RETURN(error);
2043 }
2044 part= i * num_subparts + j;
2045 DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
2046 if (unlikely((error=
2047 prepare_new_partition(table, create_info,
2048 new_file_array[part],
2049 (const char *)part_name_buff,
2050 sub_elem,
2051 disable_non_uniq_indexes))))
2052 {
2053 cleanup_new_partition(part_count);
2054 DBUG_RETURN(error);
2055 }
2056
2057 m_added_file[part_count++]= new_file_array[part];
2058 } while (++j < num_subparts);
2059 }
2060 else
2061 {
2062 if (unlikely((error=
2063 create_partition_name(part_name_buff,
2064 sizeof(part_name_buff), path,
2065 part_elem->partition_name,
2066 name_variant, TRUE))))
2067 {
2068 cleanup_new_partition(part_count);
2069 DBUG_RETURN(error);
2070 }
2071
2072 DBUG_PRINT("info", ("Add partition %s", part_name_buff));
2073 if (unlikely((error=
2074 prepare_new_partition(table, create_info,
2075 new_file_array[i],
2076 (const char *)part_name_buff,
2077 part_elem,
2078 disable_non_uniq_indexes))))
2079 {
2080 cleanup_new_partition(part_count);
2081 DBUG_RETURN(error);
2082 }
2083
2084 m_added_file[part_count++]= new_file_array[i];
2085 }
2086 }
2087 } while (++i < num_parts);
2088
2089 /*
2090 Step 6:
2091 State update to prepare for next write of the frm file.
2092 */
2093 i= 0;
2094 part_it.rewind();
2095 do
2096 {
2097 partition_element *part_elem= part_it++;
2098 if (part_elem->part_state == PART_TO_BE_ADDED)
2099 part_elem->part_state= PART_IS_ADDED;
2100 else if (part_elem->part_state == PART_CHANGED)
2101 part_elem->part_state= PART_IS_CHANGED;
2102 else if (part_elem->part_state == PART_REORGED_DROPPED)
2103 part_elem->part_state= PART_TO_BE_DROPPED;
2104 } while (++i < num_parts);
2105 for (i= 0; i < temp_partitions; i++)
2106 {
2107 partition_element *part_elem= t_it++;
2108 DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
2109 part_elem->part_state= PART_TO_BE_DROPPED;
2110 }
2111 DBUG_ASSERT(m_new_file == 0);
2112 m_new_file= new_file_array;
2113 if (unlikely((error= copy_partitions(copied, deleted))))
2114 {
2115 /*
2116 Close and unlock the new temporary partitions.
2117 They will later be deleted through the ddl-log.
2118 */
2119 cleanup_new_partition(part_count);
2120 m_new_file= 0;
2121 }
2122 DBUG_RETURN(error);
2123 }
2124
2125
2126 /*
2127 Copy partitions as part of ALTER TABLE of partitions
2128
2129 SYNOPSIS
2130 copy_partitions()
2131 out:copied Number of records copied
2132 out:deleted Number of records deleted
2133
2134 RETURN VALUE
2135 >0 Error code
2136 0 Success
2137
2138 DESCRIPTION
2139 change_partitions has done all the preparations, now it is time to
2140 actually copy the data from the reorganised partitions to the new
2141 partitions.
2142 */
2143
copy_partitions(ulonglong * const copied,ulonglong * const deleted)2144 int ha_partition::copy_partitions(ulonglong * const copied,
2145 ulonglong * const deleted)
2146 {
2147 uint reorg_part= 0;
2148 int result= 0;
2149 longlong func_value;
2150 DBUG_ENTER("ha_partition::copy_partitions");
2151
2152 if (m_part_info->linear_hash_ind)
2153 {
2154 if (m_part_info->part_type == HASH_PARTITION)
2155 set_linear_hash_mask(m_part_info, m_part_info->num_parts);
2156 else
2157 set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
2158 }
2159 else if (m_part_info->part_type == VERSIONING_PARTITION)
2160 {
2161 if (m_part_info->check_constants(ha_thd(), m_part_info))
2162 goto init_error;
2163 }
2164
2165 while (reorg_part < m_reorged_parts)
2166 {
2167 handler *file= m_reorged_file[reorg_part];
2168 uint32 new_part;
2169
2170 late_extra_cache(reorg_part);
2171 if (unlikely((result= file->ha_rnd_init_with_error(1))))
2172 goto init_error;
2173 while (TRUE)
2174 {
2175 if ((result= file->ha_rnd_next(m_rec0)))
2176 {
2177 if (result != HA_ERR_END_OF_FILE)
2178 goto error;
2179 /*
2180 End-of-file reached, break out to continue with next partition or
2181 end the copy process.
2182 */
2183 break;
2184 }
2185 /* Found record to insert into new handler */
2186 if (m_part_info->get_partition_id(m_part_info, &new_part,
2187 &func_value))
2188 {
2189 /*
2190 This record is in the original table but will not be in the new
2191 table since it doesn't fit into any partition any longer due to
2192 changed partitioning ranges or list values.
2193 */
2194 (*deleted)++;
2195 }
2196 else
2197 {
2198 /* Copy record to new handler */
2199 (*copied)++;
2200 DBUG_ASSERT(!m_new_file[new_part]->row_logging);
2201 result= m_new_file[new_part]->ha_write_row(m_rec0);
2202 if (result)
2203 goto error;
2204 }
2205 }
2206 late_extra_no_cache(reorg_part);
2207 file->ha_rnd_end();
2208 reorg_part++;
2209 }
2210 DBUG_EXECUTE_IF("debug_abort_copy_partitions",
2211 DBUG_RETURN(HA_ERR_UNSUPPORTED); );
2212 DBUG_RETURN(FALSE);
2213 error:
2214 m_reorged_file[reorg_part]->ha_rnd_end();
2215 init_error:
2216 DBUG_RETURN(result);
2217 }
2218
2219 /*
2220 Update create info as part of ALTER TABLE
2221
2222 SYNOPSIS
2223 update_create_info()
2224 create_info Create info from ALTER TABLE
2225
2226 RETURN VALUE
2227 NONE
2228
2229 DESCRIPTION
2230 Forward this handler call to the storage engine foreach
2231 partition handler. The data_file_name for each partition may
2232 need to be reset if the tablespace was moved. Use a dummy
2233 HA_CREATE_INFO structure and transfer necessary data.
2234 */
2235
update_create_info(HA_CREATE_INFO * create_info)2236 void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
2237 {
2238 DBUG_ENTER("ha_partition::update_create_info");
2239
2240 /*
2241 Fix for bug#38751, some engines needs info-calls in ALTER.
2242 Archive need this since it flushes in ::info.
2243 HA_STATUS_AUTO is optimized so it will not always be forwarded
2244 to all partitions, but HA_STATUS_VARIABLE will.
2245 */
2246 info(HA_STATUS_VARIABLE | HA_STATUS_OPEN);
2247
2248 info(HA_STATUS_AUTO);
2249
2250 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2251 create_info->auto_increment_value= stats.auto_increment_value;
2252
2253 /*
2254 DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
2255 partitioned table, only its parts.
2256 */
2257 my_bool from_alter= (create_info->data_file_name == (const char*) -1);
2258 create_info->data_file_name= create_info->index_file_name= NULL;
2259
2260 if (!(m_file[0]->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
2261 create_info->connect_string= null_clex_str;
2262
2263 /*
2264 We do not need to update the individual partition DATA DIRECTORY settings
2265 since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
2266 */
2267 if (from_alter)
2268 DBUG_VOID_RETURN;
2269
2270 /*
2271 send Handler::update_create_info() to the storage engine for each
2272 partition that currently has a handler object. Using a dummy
2273 HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
2274 */
2275
2276 List_iterator<partition_element> part_it(m_part_info->partitions);
2277 partition_element *part_elem, *sub_elem;
2278 uint num_subparts= m_part_info->num_subparts;
2279 uint num_parts= (num_subparts ? m_file_tot_parts / num_subparts :
2280 m_file_tot_parts);
2281 HA_CREATE_INFO dummy_info;
2282 dummy_info.init();
2283
2284 /*
2285 Since update_create_info() can be called from mysql_prepare_alter_table()
2286 when not all handlers are set up, we look for that condition first.
2287 If all handlers are not available, do not call update_create_info for any.
2288 */
2289 uint i, j, part;
2290 for (i= 0; i < num_parts; i++)
2291 {
2292 part_elem= part_it++;
2293 if (!part_elem)
2294 DBUG_VOID_RETURN;
2295 if (m_is_sub_partitioned)
2296 {
2297 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2298 for (j= 0; j < num_subparts; j++)
2299 {
2300 sub_elem= subpart_it++;
2301 if (!sub_elem)
2302 DBUG_VOID_RETURN;
2303 part= i * num_subparts + j;
2304 if (part >= m_file_tot_parts || !m_file[part])
2305 DBUG_VOID_RETURN;
2306 }
2307 }
2308 else
2309 {
2310 if (!m_file[i])
2311 DBUG_VOID_RETURN;
2312 }
2313 }
2314 part_it.rewind();
2315
2316 for (i= 0; i < num_parts; i++)
2317 {
2318 part_elem= part_it++;
2319 DBUG_ASSERT(part_elem);
2320 if (m_is_sub_partitioned)
2321 {
2322 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2323 for (j= 0; j < num_subparts; j++)
2324 {
2325 sub_elem= subpart_it++;
2326 DBUG_ASSERT(sub_elem);
2327 part= i * num_subparts + j;
2328 DBUG_ASSERT(part < m_file_tot_parts);
2329 DBUG_ASSERT(m_file[part]);
2330 dummy_info.data_file_name= dummy_info.index_file_name = NULL;
2331 m_file[part]->update_create_info(&dummy_info);
2332 sub_elem->data_file_name = (char*) dummy_info.data_file_name;
2333 sub_elem->index_file_name = (char*) dummy_info.index_file_name;
2334 }
2335 }
2336 else
2337 {
2338 DBUG_ASSERT(m_file[i]);
2339 dummy_info.data_file_name= dummy_info.index_file_name= NULL;
2340 m_file[i]->update_create_info(&dummy_info);
2341 part_elem->data_file_name = (char*) dummy_info.data_file_name;
2342 part_elem->index_file_name = (char*) dummy_info.index_file_name;
2343 }
2344 }
2345 DBUG_VOID_RETURN;
2346 }
2347
2348
2349 /**
2350 Change the internal TABLE_SHARE pointer
2351
2352 @param table_arg TABLE object
2353 @param share New share to use
2354
2355 @note Is used in error handling in delete_table.
2356 All handlers should exist (lock_partitions should not be used)
2357 */
2358
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2359 void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2360 {
2361 handler **file_array;
2362 table= table_arg;
2363 table_share= share;
2364 /*
2365 m_file can be NULL when using an old cached table in DROP TABLE, when the
2366 table just has REMOVED PARTITIONING, see Bug#42438
2367 */
2368 if (m_file)
2369 {
2370 file_array= m_file;
2371 DBUG_ASSERT(*file_array);
2372 do
2373 {
2374 (*file_array)->change_table_ptr(table_arg, share);
2375 } while (*(++file_array));
2376 }
2377
2378 if (m_added_file && m_added_file[0])
2379 {
2380 /* if in middle of a drop/rename etc */
2381 file_array= m_added_file;
2382 do
2383 {
2384 (*file_array)->change_table_ptr(table_arg, share);
2385 } while (*(++file_array));
2386 }
2387 }
2388
2389
2390 /**
2391 Handle delete and rename table
2392
2393 @param from Full path of old table
2394 @param to Full path of new table. May be NULL in case of delete
2395
2396 @return Operation status
2397 @retval >0 Error
2398 @retval 0 Success
2399
2400 @note Common routine to handle delete_table and rename_table.
2401 The routine uses the partition handler file to get the
2402 names of the partition instances. Both these routines
2403 are called after creating the handler without table
2404 object and thus the file is needed to discover the
2405 names of the partitions and the underlying storage engines.
2406 */
2407
del_ren_table(const char * from,const char * to)2408 uint ha_partition::del_ren_table(const char *from, const char *to)
2409 {
2410 int save_error= 0;
2411 int error;
2412 char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1],
2413 from_lc_buff[FN_REFLEN], to_lc_buff[FN_REFLEN];
2414 char *name_buffer_ptr;
2415 const char *from_path;
2416 const char *to_path= NULL;
2417 uint i;
2418 handler **file, **abort_file;
2419 THD *thd= ha_thd();
2420 DBUG_ENTER("ha_partition::del_ren_table");
2421
2422 if (get_from_handler_file(from, thd->mem_root, false))
2423 DBUG_RETURN(my_errno ? my_errno : ENOENT);
2424 DBUG_ASSERT(m_file_buffer);
2425 DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
2426 name_buffer_ptr= m_name_buffer_ptr;
2427
2428 file= m_file;
2429 /* The command should be logged with IF EXISTS if using a shared table */
2430 if (m_file[0]->ht->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
2431 thd->replication_flags|= OPTION_IF_EXISTS;
2432
2433 if (to == NULL)
2434 {
2435 /*
2436 Delete table, start by delete the .par file. If error, break, otherwise
2437 delete as much as possible.
2438 */
2439 if (unlikely((error= handler::delete_table(from))))
2440 DBUG_RETURN(error);
2441 }
2442
2443 if (ha_check_if_updates_are_ignored(thd, partition_ht(),
2444 to ? "RENAME" : "DROP"))
2445 DBUG_RETURN(0);
2446
2447 /*
2448 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
2449 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
2450 See Bug#37402, for Mac OS X.
2451 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
2452 Using the first partitions handler, since mixing handlers is not allowed.
2453 */
2454 from_path= get_canonical_filename(*file, from, from_lc_buff);
2455 if (to != NULL)
2456 to_path= get_canonical_filename(*file, to, to_lc_buff);
2457 i= 0;
2458 do
2459 {
2460 if (unlikely((error= create_partition_name(from_buff, sizeof(from_buff),
2461 from_path, name_buffer_ptr,
2462 NORMAL_PART_NAME, FALSE))))
2463 goto rename_error;
2464
2465 if (to != NULL)
2466 { // Rename branch
2467 if (unlikely((error= create_partition_name(to_buff, sizeof(to_buff),
2468 to_path, name_buffer_ptr,
2469 NORMAL_PART_NAME, FALSE))))
2470 goto rename_error;
2471 error= (*file)->ha_rename_table(from_buff, to_buff);
2472 if (unlikely(error))
2473 goto rename_error;
2474 }
2475 else // delete branch
2476 {
2477 error= (*file)->delete_table(from_buff);
2478 }
2479 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2480 if (unlikely(error))
2481 save_error= error;
2482 i++;
2483 } while (*(++file));
2484 if (to != NULL)
2485 {
2486 if (unlikely((error= handler::rename_table(from, to))))
2487 {
2488 /* Try to revert everything, ignore errors */
2489 (void) handler::rename_table(to, from);
2490 goto rename_error;
2491 }
2492 }
2493
2494 /* Update .par file in the handlers that supports it */
2495 if ((*m_file)->ht->create_partitioning_metadata)
2496 {
2497 error= (*m_file)->ht->create_partitioning_metadata(to, from,
2498 to == NULL ?
2499 CHF_DELETE_FLAG :
2500 CHF_RENAME_FLAG);
2501 DBUG_EXECUTE_IF("failed_create_partitioning_metadata",
2502 { my_message_sql(ER_OUT_OF_RESOURCES,"Simulated crash",MYF(0));
2503 error= 1;
2504 });
2505 if (error)
2506 {
2507 if (to)
2508 {
2509 (void) handler::rename_table(to, from);
2510 (void) (*m_file)->ht->create_partitioning_metadata(from, to,
2511 CHF_RENAME_FLAG);
2512 goto rename_error;
2513 }
2514 else
2515 save_error=error;
2516 }
2517 }
2518 DBUG_RETURN(save_error);
2519
2520 rename_error:
2521 name_buffer_ptr= m_name_buffer_ptr;
2522 for (abort_file= file, file= m_file; file < abort_file; file++)
2523 {
2524 /* Revert the rename, back from 'to' to the original 'from' */
2525 if (!create_partition_name(from_buff, sizeof(from_buff), from_path,
2526 name_buffer_ptr, NORMAL_PART_NAME, FALSE) &&
2527 !create_partition_name(to_buff, sizeof(to_buff), to_path,
2528 name_buffer_ptr, NORMAL_PART_NAME, FALSE))
2529 {
2530 /* Ignore error here */
2531 (void) (*file)->ha_rename_table(to_buff, from_buff);
2532 }
2533 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2534 }
2535 DBUG_RETURN(error);
2536 }
2537
count_query_cache_dependant_tables(uint8 * tables_type)2538 uint ha_partition::count_query_cache_dependant_tables(uint8 *tables_type)
2539 {
2540 DBUG_ENTER("ha_partition::count_query_cache_dependant_tables");
2541 /* Here we rely on the fact that all tables are of the same type */
2542 uint8 type= m_file[0]->table_cache_type();
2543 (*tables_type)|= type;
2544 DBUG_PRINT("enter", ("cnt: %u", (uint) m_tot_parts));
2545 /*
2546 We need save underlying tables only for HA_CACHE_TBL_ASKTRANSACT:
2547 HA_CACHE_TBL_NONTRANSACT - because all changes goes through partition table
2548 HA_CACHE_TBL_NOCACHE - because will not be cached
2549 HA_CACHE_TBL_TRANSACT - QC need to know that such type present
2550 */
2551 DBUG_RETURN(type == HA_CACHE_TBL_ASKTRANSACT ? m_tot_parts : 0);
2552 }
2553
2554 my_bool ha_partition::
reg_query_cache_dependant_table(THD * thd,char * engine_key,uint engine_key_len,char * cache_key,uint cache_key_len,uint8 type,Query_cache * cache,Query_cache_block_table ** block_table,handler * file,uint * n)2555 reg_query_cache_dependant_table(THD *thd,
2556 char *engine_key, uint engine_key_len,
2557 char *cache_key, uint cache_key_len,
2558 uint8 type,
2559 Query_cache *cache,
2560 Query_cache_block_table **block_table,
2561 handler *file,
2562 uint *n)
2563 {
2564 DBUG_ENTER("ha_partition::reg_query_cache_dependant_table");
2565 qc_engine_callback engine_callback;
2566 ulonglong engine_data;
2567 /* ask undelying engine */
2568 if (!file->register_query_cache_table(thd, engine_key,
2569 engine_key_len,
2570 &engine_callback,
2571 &engine_data))
2572 {
2573 DBUG_PRINT("qcache", ("Handler does not allow caching for %.*s",
2574 engine_key_len, engine_key));
2575 /*
2576 As this can change from call to call, don't reset set
2577 thd->lex->safe_to_cache_query
2578 */
2579 thd->query_cache_is_applicable= 0; // Query can't be cached
2580 DBUG_RETURN(TRUE);
2581 }
2582 (++(*block_table))->n= ++(*n);
2583 if (!cache->insert_table(thd, cache_key_len,
2584 cache_key, (*block_table),
2585 (uint32) table_share->db.length,
2586 (uint8) (cache_key_len -
2587 table_share->table_cache_key.length),
2588 type,
2589 engine_callback, engine_data,
2590 FALSE))
2591 DBUG_RETURN(TRUE);
2592 DBUG_RETURN(FALSE);
2593 }
2594
2595
2596 my_bool ha_partition::
register_query_cache_dependant_tables(THD * thd,Query_cache * cache,Query_cache_block_table ** block_table,uint * n)2597 register_query_cache_dependant_tables(THD *thd,
2598 Query_cache *cache,
2599 Query_cache_block_table **block_table,
2600 uint *n)
2601 {
2602 char *engine_key_end, *query_cache_key_end;
2603 uint i;
2604 uint num_parts= m_part_info->num_parts;
2605 uint num_subparts= m_part_info->num_subparts;
2606 int diff_length;
2607 List_iterator<partition_element> part_it(m_part_info->partitions);
2608 char engine_key[FN_REFLEN], query_cache_key[FN_REFLEN];
2609 DBUG_ENTER("ha_partition::register_query_cache_dependant_tables");
2610
2611 /* see ha_partition::count_query_cache_dependant_tables */
2612 if (m_file[0]->table_cache_type() != HA_CACHE_TBL_ASKTRANSACT)
2613 DBUG_RETURN(FALSE); // nothing to register
2614
2615 /* prepare static part of the key */
2616 memcpy(engine_key, table_share->normalized_path.str,
2617 table_share->normalized_path.length);
2618 memcpy(query_cache_key, table_share->table_cache_key.str,
2619 table_share->table_cache_key.length);
2620
2621 diff_length= ((int) table_share->table_cache_key.length -
2622 (int) table_share->normalized_path.length -1);
2623
2624 engine_key_end= engine_key + table_share->normalized_path.length;
2625 query_cache_key_end= query_cache_key + table_share->table_cache_key.length -1;
2626
2627 engine_key_end[0]= engine_key_end[2]= query_cache_key_end[0]=
2628 query_cache_key_end[2]= '#';
2629 query_cache_key_end[1]= engine_key_end[1]= 'P';
2630 engine_key_end+= 3;
2631 query_cache_key_end+= 3;
2632
2633 i= 0;
2634 do
2635 {
2636 partition_element *part_elem= part_it++;
2637 char *engine_pos= strmov(engine_key_end, part_elem->partition_name);
2638 if (m_is_sub_partitioned)
2639 {
2640 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2641 partition_element *sub_elem;
2642 uint j= 0, part;
2643 engine_pos[0]= engine_pos[3]= '#';
2644 engine_pos[1]= 'S';
2645 engine_pos[2]= 'P';
2646 engine_pos += 4;
2647 do
2648 {
2649 char *end;
2650 uint length;
2651 sub_elem= subpart_it++;
2652 part= i * num_subparts + j;
2653 /* we store the end \0 as part of the key */
2654 end= strmov(engine_pos, sub_elem->partition_name) + 1;
2655 length= (uint)(end - engine_key);
2656 /* Copy the suffix and end 0 to query cache key */
2657 memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2658 if (reg_query_cache_dependant_table(thd, engine_key, length,
2659 query_cache_key,
2660 length + diff_length,
2661 m_file[part]->table_cache_type(),
2662 cache,
2663 block_table, m_file[part],
2664 n))
2665 DBUG_RETURN(TRUE);
2666 } while (++j < num_subparts);
2667 }
2668 else
2669 {
2670 char *end= engine_pos+1; // copy end \0
2671 uint length= (uint)(end - engine_key);
2672 /* Copy the suffix and end 0 to query cache key */
2673 memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2674 if (reg_query_cache_dependant_table(thd, engine_key, length,
2675 query_cache_key,
2676 length + diff_length,
2677 m_file[i]->table_cache_type(),
2678 cache,
2679 block_table, m_file[i],
2680 n))
2681 DBUG_RETURN(TRUE);
2682 }
2683 } while (++i < num_parts);
2684 DBUG_PRINT("info", ("cnt: %u", (uint)m_tot_parts));
2685 DBUG_RETURN(FALSE);
2686 }
2687
2688
2689 /**
2690 Set up table share object before calling create on underlying handler
2691
2692 @param table Table object
2693 @param info Create info
2694 @param part_elem[in,out] Pointer to used partition_element, searched if NULL
2695
2696 @return status
2697 @retval TRUE Error
2698 @retval FALSE Success
2699
2700 @details
2701 Set up
2702 1) Comment on partition
2703 2) MAX_ROWS, MIN_ROWS on partition
2704 3) Index file name on partition
2705 4) Data file name on partition
2706 */
2707
set_up_table_before_create(TABLE * tbl,const char * partition_name_with_path,HA_CREATE_INFO * info,partition_element * part_elem)2708 int ha_partition::set_up_table_before_create(TABLE *tbl,
2709 const char *partition_name_with_path,
2710 HA_CREATE_INFO *info,
2711 partition_element *part_elem)
2712 {
2713 int error= 0;
2714 LEX_CSTRING part_name;
2715 THD *thd= ha_thd();
2716 DBUG_ENTER("set_up_table_before_create");
2717
2718 DBUG_ASSERT(part_elem);
2719
2720 if (!part_elem)
2721 DBUG_RETURN(1);
2722 tbl->s->max_rows= part_elem->part_max_rows;
2723 tbl->s->min_rows= part_elem->part_min_rows;
2724 part_name.str= strrchr(partition_name_with_path, FN_LIBCHAR)+1;
2725 part_name.length= strlen(part_name.str);
2726 if ((part_elem->index_file_name &&
2727 (error= append_file_to_dir(thd,
2728 (const char**)&part_elem->index_file_name,
2729 &part_name))) ||
2730 (part_elem->data_file_name &&
2731 (error= append_file_to_dir(thd,
2732 (const char**)&part_elem->data_file_name,
2733 &part_name))))
2734 {
2735 DBUG_RETURN(error);
2736 }
2737 info->index_file_name= part_elem->index_file_name;
2738 info->data_file_name= part_elem->data_file_name;
2739 info->connect_string= part_elem->connect_string;
2740 if (info->connect_string.length)
2741 info->used_fields|= HA_CREATE_USED_CONNECTION;
2742 tbl->s->connect_string= part_elem->connect_string;
2743 DBUG_RETURN(0);
2744 }
2745
2746
2747 /*
2748 Add two names together
2749
2750 SYNOPSIS
2751 name_add()
2752 out:dest Destination string
2753 first_name First name
2754 sec_name Second name
2755
2756 RETURN VALUE
2757 >0 Error
2758 0 Success
2759
2760 DESCRIPTION
2761 Routine used to add two names with '_' in between then. Service routine
2762 to create_handler_file
2763 Include the NULL in the count of characters since it is needed as separator
2764 between the partition names.
2765 */
2766
name_add(char * dest,const char * first_name,const char * sec_name)2767 static uint name_add(char *dest, const char *first_name, const char *sec_name)
2768 {
2769 return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
2770 }
2771
2772
2773 /**
2774 Create the special .par file
2775
2776 @param name Full path of table name
2777
2778 @return Operation status
2779 @retval FALSE Error code
2780 @retval TRUE Success
2781
2782 @note
2783 Method used to create handler file with names of partitions, their
2784 engine types and the number of partitions.
2785 */
2786
create_handler_file(const char * name)2787 bool ha_partition::create_handler_file(const char *name)
2788 {
2789 partition_element *part_elem, *subpart_elem;
2790 size_t i, j, part_name_len, subpart_name_len;
2791 size_t tot_partition_words, tot_name_len, num_parts;
2792 size_t tot_parts= 0;
2793 size_t tot_len_words, tot_len_byte, chksum, tot_name_words;
2794 char *name_buffer_ptr;
2795 uchar *file_buffer, *engine_array;
2796 bool result= TRUE;
2797 char file_name[FN_REFLEN];
2798 char part_name[FN_REFLEN];
2799 char subpart_name[FN_REFLEN];
2800 File file;
2801 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2802 DBUG_ENTER("create_handler_file");
2803
2804 num_parts= m_part_info->partitions.elements;
2805 DBUG_PRINT("enter", ("table name: %s num_parts: %zu", name, num_parts));
2806 tot_name_len= 0;
2807 for (i= 0; i < num_parts; i++)
2808 {
2809 part_elem= part_it++;
2810 if (part_elem->part_state != PART_NORMAL &&
2811 part_elem->part_state != PART_TO_BE_ADDED &&
2812 part_elem->part_state != PART_CHANGED)
2813 continue;
2814 tablename_to_filename(part_elem->partition_name, part_name,
2815 FN_REFLEN);
2816 part_name_len= strlen(part_name);
2817 if (!m_is_sub_partitioned)
2818 {
2819 tot_name_len+= part_name_len + 1;
2820 tot_parts++;
2821 }
2822 else
2823 {
2824 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2825 for (j= 0; j < m_part_info->num_subparts; j++)
2826 {
2827 subpart_elem= sub_it++;
2828 tablename_to_filename(subpart_elem->partition_name,
2829 subpart_name,
2830 FN_REFLEN);
2831 subpart_name_len= strlen(subpart_name);
2832 tot_name_len+= part_name_len + subpart_name_len + 5;
2833 tot_parts++;
2834 }
2835 }
2836 }
2837 /*
2838 File format:
2839 Length in words 4 byte
2840 Checksum 4 byte
2841 Total number of partitions 4 byte
2842 Array of engine types n * 4 bytes where
2843 n = (m_tot_parts + 3)/4
2844 Length of name part in bytes 4 bytes
2845 (Names in filename format)
2846 Name part m * 4 bytes where
2847 m = ((length_name_part + 3)/4)*4
2848
2849 All padding bytes are zeroed
2850 */
2851 tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2852 tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2853 /* 4 static words (tot words, checksum, tot partitions, name length) */
2854 tot_len_words= 4 + tot_partition_words + tot_name_words;
2855 tot_len_byte= PAR_WORD_SIZE * tot_len_words;
2856 if (!(file_buffer= (uchar *) my_malloc(key_memory_ha_partition_file,
2857 tot_len_byte, MYF(MY_ZEROFILL))))
2858 DBUG_RETURN(TRUE);
2859 engine_array= (file_buffer + PAR_ENGINES_OFFSET);
2860 name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
2861 + PAR_WORD_SIZE);
2862 part_it.rewind();
2863 for (i= 0; i < num_parts; i++)
2864 {
2865 part_elem= part_it++;
2866 if (part_elem->part_state != PART_NORMAL &&
2867 part_elem->part_state != PART_TO_BE_ADDED &&
2868 part_elem->part_state != PART_CHANGED)
2869 continue;
2870 if (!m_is_sub_partitioned)
2871 {
2872 tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
2873 name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1;
2874 *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
2875 DBUG_PRINT("info", ("engine: %u", *engine_array));
2876 engine_array++;
2877 }
2878 else
2879 {
2880 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2881 for (j= 0; j < m_part_info->num_subparts; j++)
2882 {
2883 subpart_elem= sub_it++;
2884 tablename_to_filename(part_elem->partition_name, part_name,
2885 FN_REFLEN);
2886 tablename_to_filename(subpart_elem->partition_name, subpart_name,
2887 FN_REFLEN);
2888 name_buffer_ptr+= name_add(name_buffer_ptr,
2889 part_name,
2890 subpart_name);
2891 *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
2892 DBUG_PRINT("info", ("engine: %u", *engine_array));
2893 engine_array++;
2894 }
2895 }
2896 }
2897 chksum= 0;
2898 int4store(file_buffer, tot_len_words);
2899 int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
2900 int4store(file_buffer + PAR_ENGINES_OFFSET +
2901 (tot_partition_words * PAR_WORD_SIZE),
2902 tot_name_len);
2903 for (i= 0; i < tot_len_words; i++)
2904 chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
2905 int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
2906 /*
2907 Add .par extension to the file name.
2908 Create and write and close file
2909 to be used at open, delete_table and rename_table
2910 */
2911 fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
2912 if ((file= mysql_file_create(key_file_ha_partition_par,
2913 file_name, CREATE_MODE, O_RDWR | O_TRUNC,
2914 MYF(MY_WME))) >= 0)
2915 {
2916 result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
2917 MYF(MY_WME | MY_NABP)) != 0;
2918
2919 /* Write connection information (for federatedx engine) */
2920 part_it.rewind();
2921 for (i= 0; i < num_parts && !result; i++)
2922 {
2923 uchar buffer[4];
2924 part_elem= part_it++;
2925 size_t length= part_elem->connect_string.length;
2926 int4store(buffer, length);
2927 if (my_write(file, buffer, 4, MYF(MY_WME | MY_NABP)) ||
2928 my_write(file, (uchar *) part_elem->connect_string.str, length,
2929 MYF(MY_WME | MY_NABP)))
2930 {
2931 result= TRUE;
2932 break;
2933 }
2934 }
2935 (void) mysql_file_close(file, MYF(0));
2936 if (result)
2937 mysql_file_delete(key_file_ha_partition_par, file_name, MYF(MY_WME));
2938 }
2939 else
2940 result= TRUE;
2941 my_free(file_buffer);
2942 DBUG_RETURN(result);
2943 }
2944
2945
2946 /**
2947 Clear handler variables and free some memory
2948 */
2949
clear_handler_file()2950 void ha_partition::clear_handler_file()
2951 {
2952 if (m_engine_array)
2953 plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
2954 free_root(&m_mem_root, MYF(MY_KEEP_PREALLOC));
2955 m_file_buffer= NULL;
2956 m_engine_array= NULL;
2957 m_connect_string= NULL;
2958 }
2959
2960
2961 /**
2962 Create underlying handler objects
2963
2964 @param mem_root Allocate memory through this
2965
2966 @return Operation status
2967 @retval TRUE Error
2968 @retval FALSE Success
2969 */
2970
create_handlers(MEM_ROOT * mem_root)2971 bool ha_partition::create_handlers(MEM_ROOT *mem_root)
2972 {
2973 uint i;
2974 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2975 handlerton *hton0;
2976 DBUG_ENTER("create_handlers");
2977
2978 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2979 DBUG_RETURN(TRUE);
2980 m_file_tot_parts= m_tot_parts;
2981 bzero((char*) m_file, alloc_len);
2982 for (i= 0; i < m_tot_parts; i++)
2983 {
2984 handlerton *hton= plugin_data(m_engine_array[i], handlerton*);
2985 if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
2986 DBUG_RETURN(TRUE);
2987 DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
2988 }
2989 /* For the moment we only support partition over the same table engine */
2990 hton0= plugin_data(m_engine_array[0], handlerton*);
2991 if (hton0 == myisam_hton)
2992 {
2993 DBUG_PRINT("info", ("MyISAM"));
2994 m_myisam= TRUE;
2995 }
2996 /* INNODB may not be compiled in... */
2997 else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
2998 {
2999 DBUG_PRINT("info", ("InnoDB"));
3000 m_innodb= TRUE;
3001 }
3002 DBUG_RETURN(FALSE);
3003 }
3004
3005
3006 /*
3007 Create underlying handler objects from partition info
3008
3009 SYNOPSIS
3010 new_handlers_from_part_info()
3011 mem_root Allocate memory through this
3012
3013 RETURN VALUE
3014 TRUE Error
3015 FALSE Success
3016 */
3017
new_handlers_from_part_info(MEM_ROOT * mem_root)3018 bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
3019 {
3020 uint i, j, part_count;
3021 partition_element *part_elem;
3022 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3023 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
3024 DBUG_ENTER("ha_partition::new_handlers_from_part_info");
3025
3026 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
3027 goto error;
3028
3029 m_file_tot_parts= m_tot_parts;
3030 bzero((char*) m_file, alloc_len);
3031 DBUG_ASSERT(m_part_info->num_parts > 0);
3032
3033 i= 0;
3034 part_count= 0;
3035 /*
3036 Don't know the size of the underlying storage engine, invent a number of
3037 bytes allocated for error message if allocation fails
3038 */
3039 do
3040 {
3041 part_elem= part_it++;
3042 if (m_is_sub_partitioned)
3043 {
3044 for (j= 0; j < m_part_info->num_subparts; j++)
3045 {
3046 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
3047 part_elem->engine_type)))
3048 goto error;
3049 DBUG_PRINT("info", ("engine_type: %u",
3050 (uint) ha_legacy_type(part_elem->engine_type)));
3051 }
3052 }
3053 else
3054 {
3055 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
3056 part_elem->engine_type)))
3057 goto error;
3058 DBUG_PRINT("info", ("engine_type: %u",
3059 (uint) ha_legacy_type(part_elem->engine_type)));
3060 }
3061 } while (++i < m_part_info->num_parts);
3062 if (part_elem->engine_type == myisam_hton)
3063 {
3064 DBUG_PRINT("info", ("MyISAM"));
3065 m_myisam= TRUE;
3066 }
3067 DBUG_RETURN(FALSE);
3068 error:
3069 DBUG_RETURN(TRUE);
3070 }
3071
3072
3073 /**
3074 Read the .par file to get the partitions engines and names
3075
3076 @param name Name of table file (without extension)
3077
3078 @return Operation status
3079 @retval true Failure
3080 @retval false Success
3081
3082 @note On success, m_file_buffer is allocated and must be
3083 freed by the caller. m_name_buffer_ptr and m_tot_parts is also set.
3084 */
3085
read_par_file(const char * name)3086 bool ha_partition::read_par_file(const char *name)
3087 {
3088 char buff[FN_REFLEN];
3089 uchar *tot_name_len_offset;
3090 File file;
3091 uchar *file_buffer;
3092 uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
3093 DBUG_ENTER("ha_partition::read_par_file");
3094 DBUG_PRINT("enter", ("table name: '%s'", name));
3095
3096 if (m_file_buffer)
3097 DBUG_RETURN(false);
3098 fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
3099
3100 /* Following could be done with mysql_file_stat to read in whole file */
3101 if ((file= mysql_file_open(key_file_ha_partition_par,
3102 buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
3103 DBUG_RETURN(TRUE);
3104 if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
3105 goto err1;
3106 len_words= uint4korr(buff);
3107 len_bytes= PAR_WORD_SIZE * len_words;
3108 if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
3109 goto err1;
3110 if (!(file_buffer= (uchar*) alloc_root(&m_mem_root, len_bytes)))
3111 goto err1;
3112 if (mysql_file_read(file, file_buffer, len_bytes, MYF(MY_NABP)))
3113 goto err2;
3114
3115 chksum= 0;
3116 for (i= 0; i < len_words; i++)
3117 chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
3118 if (chksum)
3119 goto err2;
3120 m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
3121 DBUG_PRINT("info", ("No of parts: %u", m_tot_parts));
3122 tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
3123
3124 tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
3125 PAR_WORD_SIZE * tot_partition_words;
3126 tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
3127 PAR_WORD_SIZE;
3128 /*
3129 Verify the total length = tot size word, checksum word, num parts word +
3130 engines array + name length word + name array.
3131 */
3132 if (len_words != (tot_partition_words + tot_name_words + 4))
3133 goto err2;
3134 m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
3135 m_name_buffer_ptr= (char*) (tot_name_len_offset + PAR_WORD_SIZE);
3136
3137 if (!(m_connect_string= (LEX_CSTRING*)
3138 alloc_root(&m_mem_root, m_tot_parts * sizeof(LEX_CSTRING))))
3139 goto err2;
3140 bzero(m_connect_string, m_tot_parts * sizeof(LEX_CSTRING));
3141
3142 /* Read connection arguments (for federated X engine) */
3143 for (i= 0; i < m_tot_parts; i++)
3144 {
3145 LEX_CSTRING connect_string;
3146 uchar buffer[4];
3147 char *tmp;
3148 if (my_read(file, buffer, 4, MYF(MY_NABP)))
3149 {
3150 /* No extra options; Probably not a federatedx engine */
3151 break;
3152 }
3153 connect_string.length= uint4korr(buffer);
3154 connect_string.str= tmp= (char*) alloc_root(&m_mem_root,
3155 connect_string.length+1);
3156 if (my_read(file, (uchar*) connect_string.str, connect_string.length,
3157 MYF(MY_NABP)))
3158 break;
3159 tmp[connect_string.length]= 0;
3160 m_connect_string[i]= connect_string;
3161 }
3162
3163 (void) mysql_file_close(file, MYF(0));
3164 DBUG_RETURN(false);
3165
3166 err2:
3167 err1:
3168 (void) mysql_file_close(file, MYF(0));
3169 DBUG_RETURN(true);
3170 }
3171
3172
3173 /**
3174 Setup m_engine_array
3175
3176 @param mem_root MEM_ROOT to use for allocating new handlers
3177
3178 @return Operation status
3179 @retval false Success
3180 @retval true Failure
3181 */
3182
setup_engine_array(MEM_ROOT * mem_root)3183 bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
3184 {
3185 uint i;
3186 uchar *buff;
3187 handlerton **engine_array, *first_engine;
3188 enum legacy_db_type db_type, first_db_type;
3189
3190 DBUG_ASSERT(!m_file);
3191 DBUG_ENTER("ha_partition::setup_engine_array");
3192 engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*));
3193 if (!engine_array)
3194 DBUG_RETURN(true);
3195
3196 buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
3197 first_db_type= (enum legacy_db_type) buff[0];
3198 first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
3199 if (!first_engine)
3200 goto err;
3201
3202 if (!(m_engine_array= (plugin_ref*)
3203 alloc_root(&m_mem_root, m_tot_parts * sizeof(plugin_ref))))
3204 goto err;
3205
3206 for (i= 0; i < m_tot_parts; i++)
3207 {
3208 db_type= (enum legacy_db_type) buff[i];
3209 if (db_type != first_db_type)
3210 {
3211 DBUG_PRINT("error", ("partition %u engine %d is not same as "
3212 "first partition %d", i, db_type,
3213 (int) first_db_type));
3214 DBUG_ASSERT(0);
3215 clear_handler_file();
3216 goto err;
3217 }
3218 m_engine_array[i]= ha_lock_engine(NULL, first_engine);
3219 if (!m_engine_array[i])
3220 {
3221 clear_handler_file();
3222 goto err;
3223 }
3224 }
3225
3226 my_afree(engine_array);
3227
3228 if (create_handlers(mem_root))
3229 {
3230 clear_handler_file();
3231 DBUG_RETURN(true);
3232 }
3233
3234 DBUG_RETURN(false);
3235
3236 err:
3237 my_afree(engine_array);
3238 DBUG_RETURN(true);
3239 }
3240
3241
3242 /**
3243 Get info about partition engines and their names from the .par file
3244
3245 @param name Full path of table name
3246 @param mem_root Allocate memory through this
3247 @param is_clone If it is a clone, don't create new handlers
3248
3249 @return Operation status
3250 @retval true Error
3251 @retval false Success
3252
3253 @note Open handler file to get partition names, engine types and number of
3254 partitions.
3255 */
3256
get_from_handler_file(const char * name,MEM_ROOT * mem_root,bool is_clone)3257 bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
3258 bool is_clone)
3259 {
3260 DBUG_ENTER("ha_partition::get_from_handler_file");
3261 DBUG_PRINT("enter", ("table name: '%s'", name));
3262
3263 if (m_file_buffer)
3264 DBUG_RETURN(false);
3265
3266 if (read_par_file(name))
3267 DBUG_RETURN(true);
3268
3269 if (!is_clone && setup_engine_array(mem_root))
3270 DBUG_RETURN(true);
3271
3272 DBUG_RETURN(false);
3273 }
3274
3275
3276 /****************************************************************************
3277 MODULE open/close object
3278 ****************************************************************************/
3279
3280 /**
3281 Get the partition name.
3282
3283 @param part Struct containing name and length
3284 @param[out] length Length of the name
3285
3286 @return Partition name
3287 */
3288
get_part_name(PART_NAME_DEF * part,size_t * length,my_bool not_used)3289 static uchar *get_part_name(PART_NAME_DEF *part, size_t *length,
3290 my_bool not_used __attribute__((unused)))
3291 {
3292 *length= part->length;
3293 return part->partition_name;
3294 }
3295
3296
3297 /**
3298 Insert a partition name in the partition_name_hash.
3299
3300 @param name Name of partition
3301 @param part_id Partition id (number)
3302 @param is_subpart Set if the name belongs to a subpartition
3303
3304 @return Operation status
3305 @retval true Failure
3306 @retval false Success
3307 */
3308
insert_partition_name_in_hash(const char * name,uint part_id,bool is_subpart)3309 bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id,
3310 bool is_subpart)
3311 {
3312 PART_NAME_DEF *part_def;
3313 uchar *part_name;
3314 size_t part_name_length;
3315 DBUG_ENTER("ha_partition::insert_partition_name_in_hash");
3316 /*
3317 Calculate and store the length here, to avoid doing it when
3318 searching the hash.
3319 */
3320 part_name_length= strlen(name);
3321 /*
3322 Must use memory that lives as long as table_share.
3323 Freed in the Partition_share destructor.
3324 Since we use my_multi_malloc, then my_free(part_def) will also free
3325 part_name, as a part of my_hash_free.
3326 */
3327 if (!my_multi_malloc(key_memory_Partition_share, MY_WME,
3328 &part_def, sizeof(PART_NAME_DEF),
3329 &part_name, part_name_length + 1,
3330 NULL))
3331 DBUG_RETURN(true);
3332 memcpy(part_name, name, part_name_length + 1);
3333 part_def->partition_name= part_name;
3334 part_def->length= (uint)part_name_length;
3335 part_def->part_id= part_id;
3336 part_def->is_subpart= is_subpart;
3337 if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def))
3338 {
3339 my_free(part_def);
3340 DBUG_RETURN(true);
3341 }
3342 DBUG_RETURN(false);
3343 }
3344
3345
3346 /**
3347 Populate the partition_name_hash in part_share.
3348 */
3349
populate_partition_name_hash()3350 bool ha_partition::populate_partition_name_hash()
3351 {
3352 List_iterator<partition_element> part_it(m_part_info->partitions);
3353 uint num_parts= m_part_info->num_parts;
3354 uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1;
3355 uint tot_names;
3356 uint i= 0;
3357 DBUG_ASSERT(part_share);
3358
3359 DBUG_ENTER("ha_partition::populate_partition_name_hash");
3360
3361 /*
3362 partition_name_hash is only set once and never changed
3363 -> OK to check without locking.
3364 */
3365
3366 if (part_share->partition_name_hash_initialized)
3367 DBUG_RETURN(false);
3368 lock_shared_ha_data();
3369 if (part_share->partition_name_hash_initialized)
3370 {
3371 unlock_shared_ha_data();
3372 DBUG_RETURN(false);
3373 }
3374 tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts;
3375 if (my_hash_init(key_memory_Partition_share,
3376 &part_share->partition_name_hash, system_charset_info,
3377 tot_names, 0, 0, (my_hash_get_key) get_part_name, my_free,
3378 HASH_UNIQUE))
3379 {
3380 unlock_shared_ha_data();
3381 DBUG_RETURN(TRUE);
3382 }
3383
3384 do
3385 {
3386 partition_element *part_elem= part_it++;
3387 DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
3388 if (part_elem->part_state == PART_NORMAL)
3389 {
3390 if (insert_partition_name_in_hash(part_elem->partition_name,
3391 i * num_subparts, false))
3392 goto err;
3393 if (m_is_sub_partitioned)
3394 {
3395 List_iterator<partition_element>
3396 subpart_it(part_elem->subpartitions);
3397 partition_element *sub_elem;
3398 uint j= 0;
3399 do
3400 {
3401 sub_elem= subpart_it++;
3402 if (insert_partition_name_in_hash(sub_elem->partition_name,
3403 i * num_subparts + j, true))
3404 goto err;
3405
3406 } while (++j < num_subparts);
3407 }
3408 }
3409 } while (++i < num_parts);
3410
3411 part_share->partition_name_hash_initialized= true;
3412 unlock_shared_ha_data();
3413
3414 DBUG_RETURN(FALSE);
3415 err:
3416 my_hash_free(&part_share->partition_name_hash);
3417 unlock_shared_ha_data();
3418
3419 DBUG_RETURN(TRUE);
3420 }
3421
3422
3423 /**
3424 Set Handler_share pointer and allocate Handler_share pointers
3425 for each partition and set those.
3426
3427 @param ha_share_arg Where to store/retrieve the Partitioning_share pointer
3428 to be shared by all instances of the same table.
3429
3430 @return Operation status
3431 @retval true Failure
3432 @retval false Success
3433 */
3434
set_ha_share_ref(Handler_share ** ha_share_arg)3435 bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
3436 {
3437 Handler_share **ha_shares;
3438 uint i;
3439 DBUG_ENTER("ha_partition::set_ha_share_ref");
3440
3441 DBUG_ASSERT(!part_share);
3442 DBUG_ASSERT(table_share);
3443 DBUG_ASSERT(!m_is_clone_of);
3444 DBUG_ASSERT(m_tot_parts);
3445 if (handler::set_ha_share_ref(ha_share_arg))
3446 DBUG_RETURN(true);
3447 if (!(part_share= get_share()))
3448 DBUG_RETURN(true);
3449 DBUG_ASSERT(part_share->partitions_share_refs.num_parts >= m_tot_parts);
3450 ha_shares= part_share->partitions_share_refs.ha_shares;
3451 for (i= 0; i < m_tot_parts; i++)
3452 {
3453 if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
3454 DBUG_RETURN(true);
3455 }
3456 DBUG_RETURN(false);
3457 }
3458
3459
3460 /**
3461 Get the PARTITION_SHARE for the table.
3462
3463 @return Operation status
3464 @retval true Error
3465 @retval false Success
3466
3467 @note Gets or initializes the Partition_share object used by partitioning.
3468 The Partition_share is used for handling the auto_increment etc.
3469 */
3470
get_share()3471 Partition_share *ha_partition::get_share()
3472 {
3473 Partition_share *tmp_share;
3474 DBUG_ENTER("ha_partition::get_share");
3475 DBUG_ASSERT(table_share);
3476
3477 lock_shared_ha_data();
3478 if (!(tmp_share= static_cast<Partition_share*>(get_ha_share_ptr())))
3479 {
3480 tmp_share= new Partition_share;
3481 if (!tmp_share)
3482 goto err;
3483 if (tmp_share->init(m_tot_parts))
3484 {
3485 delete tmp_share;
3486 tmp_share= NULL;
3487 goto err;
3488 }
3489 set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
3490 }
3491 err:
3492 unlock_shared_ha_data();
3493 DBUG_RETURN(tmp_share);
3494 }
3495
3496
3497
3498 /**
3499 Helper function for freeing all internal bitmaps.
3500 */
3501
free_partition_bitmaps()3502 void ha_partition::free_partition_bitmaps()
3503 {
3504 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3505 my_bitmap_free(&m_bulk_insert_started);
3506 my_bitmap_free(&m_locked_partitions);
3507 my_bitmap_free(&m_partitions_to_reset);
3508 my_bitmap_free(&m_key_not_found_partitions);
3509 my_bitmap_free(&m_opened_partitions);
3510 my_bitmap_free(&m_mrr_used_partitions);
3511 }
3512
3513
3514 /**
3515 Helper function for initializing all internal bitmaps.
3516
3517 Note:
3518 All bitmaps, including partially allocated, are freed in
3519 free_partion_bitmaps()
3520 */
3521
init_partition_bitmaps()3522 bool ha_partition::init_partition_bitmaps()
3523 {
3524 DBUG_ENTER("ha_partition::init_partition_bitmaps");
3525
3526 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3527 if (my_bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
3528 DBUG_RETURN(true);
3529
3530 /* Initialize the bitmap we use to keep track of locked partitions */
3531 if (my_bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
3532 DBUG_RETURN(true);
3533
3534 /*
3535 Initialize the bitmap we use to keep track of partitions which may have
3536 something to reset in ha_reset().
3537 */
3538 if (my_bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
3539 DBUG_RETURN(true);
3540
3541 /*
3542 Initialize the bitmap we use to keep track of partitions which returned
3543 HA_ERR_KEY_NOT_FOUND from index_read_map.
3544 */
3545 if (my_bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, FALSE))
3546 DBUG_RETURN(true);
3547
3548 if (bitmap_init(&m_mrr_used_partitions, NULL, m_tot_parts, TRUE))
3549 DBUG_RETURN(true);
3550
3551 if (my_bitmap_init(&m_opened_partitions, NULL, m_tot_parts, FALSE))
3552 DBUG_RETURN(true);
3553
3554 m_file_sample= NULL;
3555
3556 /* Initialize the bitmap for read/lock_partitions */
3557 if (!m_is_clone_of)
3558 {
3559 DBUG_ASSERT(!m_clone_mem_root);
3560 if (m_part_info->set_partition_bitmaps(NULL))
3561 DBUG_RETURN(true);
3562 }
3563 DBUG_RETURN(false);
3564 }
3565
3566
3567 /*
3568 Open handler object
3569 SYNOPSIS
3570 open()
3571 name Full path of table name
3572 mode Open mode flags
3573 test_if_locked ?
3574
3575 RETURN VALUE
3576 >0 Error
3577 0 Success
3578
3579 DESCRIPTION
3580 Used for opening tables. The name will be the name of the file.
3581 A table is opened when it needs to be opened. For instance
3582 when a request comes in for a select on the table (tables are not
3583 open and closed for each request, they are cached).
3584
3585 Called from handler.cc by handler::ha_open(). The server opens all tables
3586 by calling ha_open() which then calls the handler specific open().
3587 */
3588
open(const char * name,int mode,uint test_if_locked)3589 int ha_partition::open(const char *name, int mode, uint test_if_locked)
3590 {
3591 int error= HA_ERR_INITIALIZATION;
3592 handler **file;
3593 char name_buff[FN_REFLEN + 1];
3594 ulonglong check_table_flags;
3595 DBUG_ENTER("ha_partition::open");
3596
3597 DBUG_ASSERT(table->s == table_share);
3598 ref_length= 0;
3599 m_mode= mode;
3600 m_open_test_lock= test_if_locked;
3601 m_part_field_array= m_part_info->full_part_field_array;
3602 if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of)))
3603 DBUG_RETURN(error);
3604 if (populate_partition_name_hash())
3605 {
3606 DBUG_RETURN(HA_ERR_INITIALIZATION);
3607 }
3608 m_start_key.length= 0;
3609 m_rec0= table->record[0];
3610 m_rec_length= table_share->reclength;
3611 if (!m_part_ids_sorted_by_num_of_records)
3612 {
3613 if (!(m_part_ids_sorted_by_num_of_records=
3614 (uint32*) my_malloc(key_memory_ha_partition_part_ids,
3615 m_tot_parts * sizeof(uint32), MYF(MY_WME))))
3616 DBUG_RETURN(error);
3617 uint32 i;
3618 /* Initialize it with all partition ids. */
3619 for (i= 0; i < m_tot_parts; i++)
3620 m_part_ids_sorted_by_num_of_records[i]= i;
3621 }
3622
3623 if (init_partition_bitmaps())
3624 goto err_alloc;
3625
3626 if (!MY_TEST(m_is_clone_of) &&
3627 unlikely((error=
3628 m_part_info->set_partition_bitmaps(m_partitions_to_open))))
3629 goto err_alloc;
3630
3631 /* Allocate memory used with MMR */
3632 if (!(m_range_info= (void **)
3633 my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
3634 &m_range_info, sizeof(range_id_t) * m_tot_parts,
3635 &m_stock_range_seq, sizeof(uint) * m_tot_parts,
3636 &m_mrr_buffer, sizeof(HANDLER_BUFFER) * m_tot_parts,
3637 &m_mrr_buffer_size, sizeof(uint) * m_tot_parts,
3638 &m_part_mrr_range_length, sizeof(uint) * m_tot_parts,
3639 &m_part_mrr_range_first,
3640 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3641 &m_part_mrr_range_current,
3642 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3643 &m_partition_part_key_multi_range_hld,
3644 sizeof(PARTITION_PART_KEY_MULTI_RANGE_HLD) * m_tot_parts,
3645 NullS)))
3646 goto err_alloc;
3647
3648 bzero(m_mrr_buffer, m_tot_parts * sizeof(HANDLER_BUFFER));
3649 bzero(m_part_mrr_range_first,
3650 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts);
3651
3652 if (m_is_clone_of)
3653 {
3654 uint i, alloc_len;
3655 char *name_buffer_ptr;
3656 DBUG_ASSERT(m_clone_mem_root);
3657 /* Allocate an array of handler pointers for the partitions handlers. */
3658 alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3659 if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
3660 {
3661 error= HA_ERR_INITIALIZATION;
3662 goto err_alloc;
3663 }
3664 memset(m_file, 0, alloc_len);
3665 name_buffer_ptr= m_name_buffer_ptr;
3666 /*
3667 Populate them by cloning the original partitions. This also opens them.
3668 Note that file->ref is allocated too.
3669 */
3670 file= m_is_clone_of->m_file;
3671 for (i= 0; i < m_tot_parts; i++)
3672 {
3673 if (!bitmap_is_set(&m_is_clone_of->m_opened_partitions, i))
3674 continue;
3675
3676 if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
3677 name, name_buffer_ptr,
3678 NORMAL_PART_NAME, FALSE))))
3679 goto err_handler;
3680 /* ::clone() will also set ha_share from the original. */
3681 if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
3682 {
3683 error= HA_ERR_INITIALIZATION;
3684 file= &m_file[i];
3685 goto err_handler;
3686 }
3687 if (!m_file_sample)
3688 m_file_sample= m_file[i];
3689 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3690 bitmap_set_bit(&m_opened_partitions, i);
3691 }
3692 }
3693 else
3694 {
3695 check_insert_autoincrement();
3696 if (unlikely((error= open_read_partitions(name_buff, sizeof(name_buff)))))
3697 goto err_handler;
3698 m_num_locks= m_file_sample->lock_count();
3699 }
3700 /*
3701 We want to know the upper bound for locks, to allocate enough memory.
3702 There is no performance lost if we simply return in lock_count() the
3703 maximum number locks needed, only some minor over allocation of memory
3704 in get_lock_data().
3705 */
3706 m_num_locks*= m_tot_parts;
3707
3708 file= m_file;
3709 ref_length= get_open_file_sample()->ref_length;
3710 check_table_flags= ((get_open_file_sample()->ha_table_flags() &
3711 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3712 (PARTITION_ENABLED_TABLE_FLAGS));
3713 while (*(++file))
3714 {
3715 if (!bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3716 continue;
3717 /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
3718 set_if_bigger(ref_length, ((*file)->ref_length));
3719 /*
3720 Verify that all partitions have the same set of table flags.
3721 Mask all flags that partitioning enables/disables.
3722 */
3723 if (check_table_flags != (((*file)->ha_table_flags() &
3724 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3725 (PARTITION_ENABLED_TABLE_FLAGS)))
3726 {
3727 error= HA_ERR_INITIALIZATION;
3728 /* set file to last handler, so all of them are closed */
3729 file= &m_file[m_tot_parts - 1];
3730 goto err_handler;
3731 }
3732 }
3733 key_used_on_scan= get_open_file_sample()->key_used_on_scan;
3734 implicit_emptied= get_open_file_sample()->implicit_emptied;
3735 /*
3736 Add 2 bytes for partition id in position ref length.
3737 ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
3738 */
3739 ref_length+= PARTITION_BYTES_IN_POS;
3740 m_ref_length= ref_length;
3741
3742 /*
3743 Release buffer read from .par file. It will not be reused again after
3744 being opened once.
3745 */
3746 clear_handler_file();
3747
3748 /*
3749 Some handlers update statistics as part of the open call. This will in
3750 some cases corrupt the statistics of the partition handler and thus
3751 to ensure we have correct statistics we call info from open after
3752 calling open on all individual handlers.
3753 */
3754 m_handler_status= handler_opened;
3755 if (m_part_info->part_expr)
3756 m_part_func_monotonicity_info=
3757 m_part_info->part_expr->get_monotonicity_info();
3758 else if (m_part_info->list_of_part_fields)
3759 m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
3760 info(HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN);
3761 DBUG_RETURN(0);
3762
3763 err_handler:
3764 DEBUG_SYNC(ha_thd(), "partition_open_error");
3765 DBUG_ASSERT(m_tot_parts > 0);
3766 for (uint i= m_tot_parts - 1; ; --i)
3767 {
3768 if (bitmap_is_set(&m_opened_partitions, i))
3769 m_file[i]->ha_close();
3770 if (!i)
3771 break;
3772 }
3773 err_alloc:
3774 free_partition_bitmaps();
3775 my_free(m_range_info);
3776 m_range_info= 0;
3777
3778 DBUG_RETURN(error);
3779 }
3780
3781
3782 /*
3783 Disabled since it is not possible to prune yet.
3784 without pruning, it need to rebind/unbind every partition in every
3785 statement which uses a table from the table cache. Will also use
3786 as many PSI_tables as there are partitions.
3787 */
3788
3789 #ifdef HAVE_M_PSI_PER_PARTITION
unbind_psi()3790 void ha_partition::unbind_psi()
3791 {
3792 uint i;
3793
3794 DBUG_ENTER("ha_partition::unbind_psi");
3795 handler::unbind_psi();
3796 for (i= 0; i < m_tot_parts; i++)
3797 {
3798 DBUG_ASSERT(m_file[i] != NULL);
3799 m_file[i]->unbind_psi();
3800 }
3801 DBUG_VOID_RETURN;
3802 }
3803
rebind()3804 int ha_partition::rebind()
3805 {
3806 uint i;
3807
3808 DBUG_ENTER("ha_partition::rebind");
3809 if (int error= handler::rebind())
3810 DBUG_RETURN(error);
3811 for (i= 0; i < m_tot_parts; i++)
3812 {
3813 DBUG_ASSERT(m_file[i] != NULL);
3814 if (int error= m_file[i]->rebind())
3815 {
3816 while (i)
3817 m_file[--i]->unbind_psi();
3818 handler::unbind_psi();
3819 DBUG_RETURN(error);
3820 }
3821 }
3822 DBUG_RETURN(0);
3823 }
3824 #endif /* HAVE_M_PSI_PER_PARTITION */
3825
3826
3827 /*
3828 Check if the table definition has changed for the part tables
3829 We use the first partition for the check.
3830 */
3831
discover_check_version()3832 int ha_partition::discover_check_version()
3833 {
3834 return m_file[0]->discover_check_version();
3835 }
3836
3837 /**
3838 Clone the open and locked partitioning handler.
3839
3840 @param mem_root MEM_ROOT to use.
3841
3842 @return Pointer to the successfully created clone or NULL
3843
3844 @details
3845 This function creates a new ha_partition handler as a clone/copy. The
3846 original (this) must already be opened and locked. The clone will use
3847 the originals m_part_info.
3848 It also allocates memory for ref + ref_dup.
3849 In ha_partition::open() it will clone its original handlers partitions
3850 which will allocate then on the correct MEM_ROOT and also open them.
3851 */
3852
clone(const char * name,MEM_ROOT * mem_root)3853 handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
3854 {
3855 ha_partition *new_handler;
3856
3857 DBUG_ENTER("ha_partition::clone");
3858 new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
3859 this, mem_root);
3860 if (!new_handler)
3861 DBUG_RETURN(NULL);
3862
3863 /*
3864 We will not clone each partition's handler here, it will be done in
3865 ha_partition::open() for clones. Also set_ha_share_ref is not needed
3866 here, since 1) ha_share is copied in the constructor used above
3867 2) each partition's cloned handler will set it from its original.
3868 */
3869
3870 /*
3871 Allocate new_handler->ref here because otherwise ha_open will allocate it
3872 on this->table->mem_root and we will not be able to reclaim that memory
3873 when the clone handler object is destroyed.
3874 */
3875 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
3876 ALIGN_SIZE(m_ref_length)*2)))
3877 goto err;
3878
3879 if (new_handler->ha_open(table, name,
3880 table->db_stat,
3881 HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
3882 goto err;
3883
3884 DBUG_RETURN((handler*) new_handler);
3885
3886 err:
3887 delete new_handler;
3888 DBUG_RETURN(NULL);
3889 }
3890
3891
3892 /*
3893 Close handler object
3894
3895 SYNOPSIS
3896 close()
3897
3898 RETURN VALUE
3899 >0 Error code
3900 0 Success
3901
3902 DESCRIPTION
3903 Called from sql_base.cc, sql_select.cc, and table.cc.
3904 In sql_select.cc it is only used to close up temporary tables or during
3905 the process where a temporary table is converted over to being a
3906 myisam table.
3907 For sql_base.cc look at close_data_tables().
3908 */
3909
close(void)3910 int ha_partition::close(void)
3911 {
3912 bool first= TRUE;
3913 handler **file;
3914 uint i;
3915 st_partition_ft_info *tmp_ft_info;
3916 DBUG_ENTER("ha_partition::close");
3917 DBUG_ASSERT(table->s == table_share);
3918 DBUG_ASSERT(m_part_info);
3919
3920 destroy_record_priority_queue();
3921
3922 for (; ft_first ; ft_first= tmp_ft_info)
3923 {
3924 tmp_ft_info= ft_first->next;
3925 my_free(ft_first);
3926 }
3927
3928 /* Free active mrr_ranges */
3929 for (i= 0; i < m_tot_parts; i++)
3930 {
3931 if (m_part_mrr_range_first[i])
3932 {
3933 PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_first=
3934 m_part_mrr_range_first[i];
3935 do
3936 {
3937 PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_current;
3938 tmp_mrr_range_current= tmp_mrr_range_first;
3939 tmp_mrr_range_first= tmp_mrr_range_first->next;
3940 my_free(tmp_mrr_range_current);
3941 } while (tmp_mrr_range_first);
3942 }
3943 }
3944 if (m_mrr_range_first)
3945 {
3946 do
3947 {
3948 m_mrr_range_current= m_mrr_range_first;
3949 m_mrr_range_first= m_mrr_range_first->next;
3950 if (m_mrr_range_current->key[0])
3951 my_free(m_mrr_range_current->key[0]);
3952 if (m_mrr_range_current->key[1])
3953 my_free(m_mrr_range_current->key[1]);
3954 my_free(m_mrr_range_current);
3955 } while (m_mrr_range_first);
3956 }
3957 my_free(m_range_info);
3958 m_range_info= NULL; // Safety
3959
3960 if (m_mrr_full_buffer)
3961 {
3962 my_free(m_mrr_full_buffer);
3963 m_mrr_full_buffer= NULL;
3964 m_mrr_full_buffer_size= 0;
3965 }
3966 file= m_file;
3967
3968 repeat:
3969 do
3970 {
3971 if (!first || bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3972 (*file)->ha_close();
3973 } while (*(++file));
3974
3975 free_partition_bitmaps();
3976
3977 if (first && m_added_file && m_added_file[0])
3978 {
3979 file= m_added_file;
3980 first= FALSE;
3981 goto repeat;
3982 }
3983
3984 m_handler_status= handler_closed;
3985 DBUG_RETURN(0);
3986 }
3987
3988 /****************************************************************************
3989 MODULE start/end statement
3990 ****************************************************************************/
3991 /*
3992 A number of methods to define various constants for the handler. In
3993 the case of the partition handler we need to use some max and min
3994 of the underlying handlers in most cases.
3995 */
3996
3997 /*
3998 Set external locks on table
3999
4000 SYNOPSIS
4001 external_lock()
4002 thd Thread object
4003 lock_type Type of external lock
4004
4005 RETURN VALUE
4006 >0 Error code
4007 0 Success
4008
4009 DESCRIPTION
4010 First you should go read the section "locking functions for mysql" in
4011 lock.cc to understand this.
4012 This create a lock on the table. If you are implementing a storage engine
4013 that can handle transactions look at ha_berkeley.cc to see how you will
4014 want to go about doing this. Otherwise you should consider calling
4015 flock() here.
4016 Originally this method was used to set locks on file level to enable
4017 several MySQL Servers to work on the same data. For transactional
4018 engines it has been "abused" to also mean start and end of statements
4019 to enable proper rollback of statements and transactions. When LOCK
4020 TABLES has been issued the start_stmt method takes over the role of
4021 indicating start of statement but in this case there is no end of
4022 statement indicator(?).
4023
4024 Called from lock.cc by lock_external() and unlock_external(). Also called
4025 from sql_table.cc by copy_data_between_tables().
4026 */
4027
external_lock(THD * thd,int lock_type)4028 int ha_partition::external_lock(THD *thd, int lock_type)
4029 {
4030 int error;
4031 uint i, first_used_partition;
4032 MY_BITMAP *used_partitions;
4033 DBUG_ENTER("ha_partition::external_lock");
4034
4035 DBUG_ASSERT(!auto_increment_lock);
4036 DBUG_ASSERT(!auto_increment_safe_stmt_log_lock);
4037
4038 if (lock_type == F_UNLCK)
4039 used_partitions= &m_locked_partitions;
4040 else
4041 used_partitions= &(m_part_info->lock_partitions);
4042
4043 first_used_partition= bitmap_get_first_set(used_partitions);
4044
4045 for (i= first_used_partition;
4046 i < m_tot_parts;
4047 i= bitmap_get_next_set(used_partitions, i))
4048 {
4049 DBUG_PRINT("info", ("external_lock(thd, %d) part %u", lock_type, i));
4050 if (unlikely((error= m_file[i]->ha_external_lock(thd, lock_type))))
4051 {
4052 if (lock_type != F_UNLCK)
4053 goto err_handler;
4054 }
4055 DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
4056 if (lock_type != F_UNLCK)
4057 bitmap_set_bit(&m_locked_partitions, i);
4058 }
4059 if (lock_type == F_UNLCK)
4060 {
4061 bitmap_clear_all(used_partitions);
4062 }
4063 else
4064 {
4065 /* Add touched partitions to be included in reset(). */
4066 bitmap_union(&m_partitions_to_reset, used_partitions);
4067 }
4068
4069 if (m_added_file && m_added_file[0])
4070 {
4071 handler **file= m_added_file;
4072 DBUG_ASSERT(lock_type == F_UNLCK);
4073 do
4074 {
4075 (void) (*file)->ha_external_lock(thd, lock_type);
4076 } while (*(++file));
4077 }
4078 if (lock_type == F_WRLCK)
4079 {
4080 if (m_part_info->part_expr)
4081 m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
4082 if (m_part_info->part_type == VERSIONING_PARTITION &&
4083 /* TODO: MDEV-20345 exclude more inapproriate commands like INSERT
4084 These commands may be excluded because working history partition is needed
4085 only for versioned DML. */
4086 thd->lex->sql_command != SQLCOM_SELECT &&
4087 thd->lex->sql_command != SQLCOM_INSERT_SELECT &&
4088 (error= m_part_info->vers_set_hist_part(thd)))
4089 goto err_handler;
4090 }
4091 DBUG_RETURN(0);
4092
4093 err_handler:
4094 uint j;
4095 for (j= first_used_partition;
4096 j < i;
4097 j= bitmap_get_next_set(&m_locked_partitions, j))
4098 {
4099 (void) m_file[j]->ha_external_unlock(thd);
4100 }
4101 bitmap_clear_all(&m_locked_partitions);
4102 DBUG_RETURN(error);
4103 }
4104
4105
4106 /*
4107 Get the lock(s) for the table and perform conversion of locks if needed
4108
4109 SYNOPSIS
4110 store_lock()
4111 thd Thread object
4112 to Lock object array
4113 lock_type Table lock type
4114
4115 RETURN VALUE
4116 >0 Error code
4117 0 Success
4118
4119 DESCRIPTION
4120 The idea with handler::store_lock() is the following:
4121
4122 The statement decided which locks we should need for the table
4123 for updates/deletes/inserts we get WRITE locks, for SELECT... we get
4124 read locks.
4125
4126 Before adding the lock into the table lock handler (see thr_lock.c)
4127 mysqld calls store lock with the requested locks. Store lock can now
4128 modify a write lock to a read lock (or some other lock), ignore the
4129 lock (if we don't want to use MySQL table locks at all) or add locks
4130 for many tables (like we do when we are using a MERGE handler).
4131
4132 Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
4133 (which signals that we are doing WRITES, but we are still allowing other
4134 reader's and writer's.
4135
4136 When releasing locks, store_lock() is also called. In this case one
4137 usually doesn't have to do anything.
4138
4139 store_lock is called when holding a global mutex to ensure that only
4140 one thread at a time changes the locking information of tables.
4141
4142 In some exceptional cases MySQL may send a request for a TL_IGNORE;
4143 This means that we are requesting the same lock as last time and this
4144 should also be ignored. (This may happen when someone does a flush
4145 table when we have opened a part of the tables, in which case mysqld
4146 closes and reopens the tables and tries to get the same locks as last
4147 time). In the future we will probably try to remove this.
4148
4149 Called from lock.cc by get_lock_data().
4150 */
4151
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)4152 THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
4153 THR_LOCK_DATA **to,
4154 enum thr_lock_type lock_type)
4155 {
4156 uint i;
4157 DBUG_ENTER("ha_partition::store_lock");
4158 DBUG_ASSERT(thd == current_thd);
4159
4160 /*
4161 This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
4162 even when thd != table->in_use. In that case don't use partition pruning,
4163 but use all partitions instead to avoid using another threads structures.
4164 */
4165 if (thd != table->in_use)
4166 {
4167 for (i= 0; i < m_tot_parts; i++)
4168 to= m_file[i]->store_lock(thd, to, lock_type);
4169 }
4170 else
4171 {
4172 MY_BITMAP *used_partitions= lock_type == TL_UNLOCK ||
4173 lock_type == TL_IGNORE ?
4174 &m_locked_partitions :
4175 &m_part_info->lock_partitions;
4176
4177 for (i= bitmap_get_first_set(used_partitions);
4178 i < m_tot_parts;
4179 i= bitmap_get_next_set(used_partitions, i))
4180 {
4181 DBUG_PRINT("info", ("store lock %u iteration", i));
4182 to= m_file[i]->store_lock(thd, to, lock_type);
4183 }
4184 }
4185 DBUG_RETURN(to);
4186 }
4187
4188 /*
4189 Start a statement when table is locked
4190
4191 SYNOPSIS
4192 start_stmt()
4193 thd Thread object
4194 lock_type Type of external lock
4195
4196 RETURN VALUE
4197 >0 Error code
4198 0 Success
4199
4200 DESCRIPTION
4201 This method is called instead of external lock when the table is locked
4202 before the statement is executed.
4203 */
4204
start_stmt(THD * thd,thr_lock_type lock_type)4205 int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
4206 {
4207 int error= 0;
4208 uint i;
4209 /* Assert that read_partitions is included in lock_partitions */
4210 DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
4211 &m_part_info->lock_partitions));
4212 /*
4213 m_locked_partitions is set in previous external_lock/LOCK TABLES.
4214 Current statement's lock requests must not include any partitions
4215 not previously locked.
4216 */
4217 DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
4218 &m_locked_partitions));
4219 DBUG_ENTER("ha_partition::start_stmt");
4220
4221 for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
4222 i < m_tot_parts;
4223 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
4224 {
4225 if (unlikely((error= m_file[i]->start_stmt(thd, lock_type))))
4226 DBUG_RETURN(error);
4227 /* Add partition to be called in reset(). */
4228 bitmap_set_bit(&m_partitions_to_reset, i);
4229 }
4230 switch (lock_type)
4231 {
4232 case TL_WRITE_ALLOW_WRITE:
4233 case TL_WRITE_CONCURRENT_INSERT:
4234 case TL_WRITE_DELAYED:
4235 case TL_WRITE_DEFAULT:
4236 case TL_WRITE_LOW_PRIORITY:
4237 case TL_WRITE:
4238 case TL_WRITE_ONLY:
4239 if (m_part_info->part_expr)
4240 m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
4241 if (m_part_info->part_type == VERSIONING_PARTITION &&
4242 // TODO: MDEV-20345 (see above)
4243 thd->lex->sql_command != SQLCOM_SELECT &&
4244 thd->lex->sql_command != SQLCOM_INSERT_SELECT)
4245 error= m_part_info->vers_set_hist_part(thd);
4246 default:;
4247 }
4248 DBUG_RETURN(error);
4249 }
4250
4251
4252 /**
4253 Get number of lock objects returned in store_lock
4254
4255 @returns Number of locks returned in call to store_lock
4256
4257 @desc
4258 Returns the maxinum possible number of store locks needed in call to
4259 store lock.
4260 */
4261
lock_count() const4262 uint ha_partition::lock_count() const
4263 {
4264 DBUG_ENTER("ha_partition::lock_count");
4265 DBUG_RETURN(m_num_locks);
4266 }
4267
4268
4269 /*
4270 Unlock last accessed row
4271
4272 SYNOPSIS
4273 unlock_row()
4274
4275 RETURN VALUE
4276 NONE
4277
4278 DESCRIPTION
4279 Record currently processed was not in the result set of the statement
4280 and is thus unlocked. Used for UPDATE and DELETE queries.
4281 */
4282
unlock_row()4283 void ha_partition::unlock_row()
4284 {
4285 DBUG_ENTER("ha_partition::unlock_row");
4286 m_file[m_last_part]->unlock_row();
4287 DBUG_VOID_RETURN;
4288 }
4289
4290 /**
4291 Check if semi consistent read was used
4292
4293 SYNOPSIS
4294 was_semi_consistent_read()
4295
4296 RETURN VALUE
4297 TRUE Previous read was a semi consistent read
4298 FALSE Previous read was not a semi consistent read
4299
4300 DESCRIPTION
4301 See handler.h:
4302 In an UPDATE or DELETE, if the row under the cursor was locked by another
4303 transaction, and the engine used an optimistic read of the last
4304 committed row value under the cursor, then the engine returns 1 from this
4305 function. MySQL must NOT try to update this optimistic value. If the
4306 optimistic value does not match the WHERE condition, MySQL can decide to
4307 skip over this row. Currently only works for InnoDB. This can be used to
4308 avoid unnecessary lock waits.
4309
4310 If this method returns nonzero, it will also signal the storage
4311 engine that the next read will be a locking re-read of the row.
4312 */
was_semi_consistent_read()4313 bool ha_partition::was_semi_consistent_read()
4314 {
4315 DBUG_ENTER("ha_partition::was_semi_consistent_read");
4316 DBUG_ASSERT(m_last_part < m_tot_parts);
4317 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4318 DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
4319 }
4320
4321 /**
4322 Use semi consistent read if possible
4323
4324 SYNOPSIS
4325 try_semi_consistent_read()
4326 yes Turn on semi consistent read
4327
4328 RETURN VALUE
4329 NONE
4330
4331 DESCRIPTION
4332 See handler.h:
4333 Tell the engine whether it should avoid unnecessary lock waits.
4334 If yes, in an UPDATE or DELETE, if the row under the cursor was locked
4335 by another transaction, the engine may try an optimistic read of
4336 the last committed row value under the cursor.
4337 Note: prune_partitions are already called before this call, so using
4338 pruning is OK.
4339 */
try_semi_consistent_read(bool yes)4340 void ha_partition::try_semi_consistent_read(bool yes)
4341 {
4342 uint i;
4343 DBUG_ENTER("ha_partition::try_semi_consistent_read");
4344
4345 i= bitmap_get_first_set(&(m_part_info->read_partitions));
4346 DBUG_ASSERT(i != MY_BIT_NONE);
4347 for (;
4348 i < m_tot_parts;
4349 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4350 {
4351 m_file[i]->try_semi_consistent_read(yes);
4352 }
4353 DBUG_VOID_RETURN;
4354 }
4355
4356
4357 /****************************************************************************
4358 MODULE change record
4359 ****************************************************************************/
4360
4361 /*
4362 Insert a row to the table
4363
4364 SYNOPSIS
4365 write_row()
4366 buf The row in MySQL Row Format
4367
4368 RETURN VALUE
4369 >0 Error code
4370 0 Success
4371
4372 DESCRIPTION
4373 write_row() inserts a row. buf() is a byte array of data, normally
4374 record[0].
4375
4376 You can use the field information to extract the data from the native byte
4377 array type.
4378
4379 Example of this would be:
4380 for (Field **field=table->field ; *field ; field++)
4381 {
4382 ...
4383 }
4384
4385 See ha_tina.cc for a variant of extracting all of the data as strings.
4386 ha_berkeley.cc has a variant of how to store it intact by "packing" it
4387 for ha_berkeley's own native storage type.
4388
4389 Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
4390 sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
4391
4392 ADDITIONAL INFO:
4393
4394 We have to set auto_increment fields, because those may be used in
4395 determining which partition the row should be written to.
4396 */
4397
write_row(const uchar * buf)4398 int ha_partition::write_row(const uchar * buf)
4399 {
4400 uint32 part_id;
4401 int error;
4402 longlong func_value;
4403 bool have_auto_increment= table->next_number_field && buf == table->record[0];
4404 MY_BITMAP *old_map;
4405 THD *thd= ha_thd();
4406 Sql_mode_save sms(thd);
4407 bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null;
4408 DBUG_ENTER("ha_partition::write_row");
4409 DBUG_PRINT("enter", ("partition this: %p", this));
4410
4411 /*
4412 If we have an auto_increment column and we are writing a changed row
4413 or a new row, then update the auto_increment value in the record.
4414 */
4415 if (have_auto_increment)
4416 {
4417 if (!table_share->next_number_keypart)
4418 update_next_auto_inc_val();
4419 error= update_auto_increment();
4420
4421 /*
4422 If we have failed to set the auto-increment value for this row,
4423 it is highly likely that we will not be able to insert it into
4424 the correct partition. We must check and fail if necessary.
4425 */
4426 if (unlikely(error))
4427 goto exit;
4428
4429 /*
4430 Don't allow generation of auto_increment value the partitions handler.
4431 If a partitions handler would change the value, then it might not
4432 match the partition any longer.
4433 This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
4434 So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
4435 The partitions handler::next_insert_id must always be 0. Otherwise
4436 we need to forward release_auto_increment, or reset it for all
4437 partitions.
4438 */
4439 if (table->next_number_field->val_int() == 0)
4440 {
4441 table->auto_increment_field_not_null= TRUE;
4442 thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
4443 }
4444 }
4445 old_map= dbug_tmp_use_all_columns(table, &table->read_set);
4446 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4447 dbug_tmp_restore_column_map(&table->read_set, old_map);
4448 if (unlikely(error))
4449 {
4450 m_part_info->err_value= func_value;
4451 goto exit;
4452 }
4453 if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4454 {
4455 DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
4456 part_id, (long) func_value));
4457 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4458 goto exit;
4459 }
4460 m_last_part= part_id;
4461 DBUG_PRINT("info", ("Insert in partition %u", part_id));
4462
4463 start_part_bulk_insert(thd, part_id);
4464
4465 DBUG_ASSERT(!m_file[part_id]->row_logging);
4466 error= m_file[part_id]->ha_write_row(buf);
4467 if (!error && have_auto_increment && !table->s->next_number_keypart)
4468 set_auto_increment_if_higher(table->next_number_field);
4469
4470 exit:
4471 table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
4472 DBUG_RETURN(error);
4473 }
4474
4475
4476 /*
4477 Update an existing row
4478
4479 SYNOPSIS
4480 update_row()
4481 old_data Old record in MySQL Row Format
4482 new_data New record in MySQL Row Format
4483
4484 RETURN VALUE
4485 >0 Error code
4486 0 Success
4487
4488 DESCRIPTION
4489 Yes, update_row() does what you expect, it updates a row. old_data will
4490 have the previous row record in it, while new_data will have the newest
4491 data in it.
4492 Keep in mind that the server can do updates based on ordering if an
4493 ORDER BY clause was used. Consecutive ordering is not guaranteed.
4494
4495 Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
4496 new_data is always record[0]
4497 old_data is always record[1]
4498 */
4499
update_row(const uchar * old_data,const uchar * new_data)4500 int ha_partition::update_row(const uchar *old_data, const uchar *new_data)
4501 {
4502 THD *thd= ha_thd();
4503 uint32 new_part_id, old_part_id= m_last_part;
4504 int error= 0;
4505 DBUG_ENTER("ha_partition::update_row");
4506 m_err_rec= NULL;
4507
4508 // Need to read partition-related columns, to locate the row's partition:
4509 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4510 table->read_set));
4511 #ifndef DBUG_OFF
4512 /*
4513 The protocol for updating a row is:
4514 1) position the handler (cursor) on the row to be updated,
4515 either through the last read row (rnd or index) or by rnd_pos.
4516 2) call update_row with both old and new full records as arguments.
4517
4518 This means that m_last_part should already be set to actual partition
4519 where the row was read from. And if that is not the same as the
4520 calculated part_id we found a misplaced row, we return an error to
4521 notify the user that something is broken in the row distribution
4522 between partitions! Since we don't check all rows on read, we return an
4523 error instead of correcting m_last_part, to make the user aware of the
4524 problem!
4525
4526 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4527 so this is not supported for this engine.
4528 */
4529 error= get_part_for_buf(old_data, m_rec0, m_part_info, &old_part_id);
4530 DBUG_ASSERT(!error);
4531 DBUG_ASSERT(old_part_id == m_last_part);
4532 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
4533 #endif
4534
4535 if (unlikely((error= get_part_for_buf(new_data, m_rec0, m_part_info,
4536 &new_part_id))))
4537 goto exit;
4538 if (unlikely(!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id)))
4539 {
4540 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4541 goto exit;
4542 }
4543
4544
4545 m_last_part= new_part_id;
4546 start_part_bulk_insert(thd, new_part_id);
4547 DBUG_ASSERT(!m_file[new_part_id]->row_logging);
4548 if (new_part_id == old_part_id)
4549 {
4550 DBUG_PRINT("info", ("Update in partition %u", (uint) new_part_id));
4551 error= m_file[new_part_id]->ha_update_row(old_data, new_data);
4552 goto exit;
4553 }
4554 else
4555 {
4556 Field *saved_next_number_field= table->next_number_field;
4557 /*
4558 Don't allow generation of auto_increment value for update.
4559 table->next_number_field is never set on UPDATE.
4560 But is set for INSERT ... ON DUPLICATE KEY UPDATE,
4561 and since update_row() does not generate or update an auto_inc value,
4562 we cannot have next_number_field set when moving a row
4563 to another partition with write_row(), since that could
4564 generate/update the auto_inc value.
4565 This gives the same behavior for partitioned vs non partitioned tables.
4566 */
4567 table->next_number_field= NULL;
4568 DBUG_PRINT("info", ("Update from partition %u to partition %u",
4569 (uint) old_part_id, (uint) new_part_id));
4570 error= m_file[new_part_id]->ha_write_row((uchar*) new_data);
4571 table->next_number_field= saved_next_number_field;
4572 if (unlikely(error))
4573 goto exit;
4574
4575 error= m_file[old_part_id]->ha_delete_row(old_data);
4576 if (unlikely(error))
4577 goto exit;
4578 }
4579
4580 exit:
4581 /*
4582 if updating an auto_increment column, update
4583 part_share->next_auto_inc_val if needed.
4584 (not to be used if auto_increment on secondary field in a multi-column
4585 index)
4586 mysql_update does not set table->next_number_field, so we use
4587 table->found_next_number_field instead.
4588 Also checking that the field is marked in the write set.
4589 */
4590 if (table->found_next_number_field &&
4591 new_data == table->record[0] &&
4592 !table->s->next_number_keypart &&
4593 bitmap_is_set(table->write_set,
4594 table->found_next_number_field->field_index))
4595 {
4596 update_next_auto_inc_val();
4597 if (part_share->auto_inc_initialized)
4598 set_auto_increment_if_higher(table->found_next_number_field);
4599 }
4600 DBUG_RETURN(error);
4601 }
4602
4603
4604 /*
4605 Remove an existing row
4606
4607 SYNOPSIS
4608 delete_row
4609 buf Deleted row in MySQL Row Format
4610
4611 RETURN VALUE
4612 >0 Error Code
4613 0 Success
4614
4615 DESCRIPTION
4616 This will delete a row. buf will contain a copy of the row to be deleted.
4617 The server will call this right after the current row has been read
4618 (from either a previous rnd_xxx() or index_xxx() call).
4619 If you keep a pointer to the last row or can access a primary key it will
4620 make doing the deletion quite a bit easier.
4621 Keep in mind that the server does no guarantee consecutive deletions.
4622 ORDER BY clauses can be used.
4623
4624 Called in sql_acl.cc and sql_udf.cc to manage internal table information.
4625 Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
4626 it is used for removing duplicates while in insert it is used for REPLACE
4627 calls.
4628
4629 buf is either record[0] or record[1]
4630 */
4631
delete_row(const uchar * buf)4632 int ha_partition::delete_row(const uchar *buf)
4633 {
4634 int error;
4635 DBUG_ENTER("ha_partition::delete_row");
4636 m_err_rec= NULL;
4637
4638 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4639 table->read_set));
4640 #ifndef DBUG_OFF
4641 THD* thd = ha_thd();
4642 /*
4643 The protocol for deleting a row is:
4644 1) position the handler (cursor) on the row to be deleted,
4645 either through the last read row (rnd or index) or by rnd_pos.
4646 2) call delete_row with the full record as argument.
4647
4648 This means that m_last_part should already be set to actual partition
4649 where the row was read from. And if that is not the same as the
4650 calculated part_id we found a misplaced row, we return an error to
4651 notify the user that something is broken in the row distribution
4652 between partitions! Since we don't check all rows on read, we return an
4653 error instead of forwarding the delete to the correct (m_last_part)
4654 partition!
4655
4656 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4657 so this is not supported for this engine.
4658
4659 For partitions by system_time, get_part_for_buf() is always either current
4660 or last historical partition, but DELETE HISTORY can delete from any
4661 historical partition. So, skip the check in this case.
4662 */
4663 if (!thd->lex->vers_conditions.delete_history)
4664 {
4665 uint32 part_id;
4666 error= get_part_for_buf(buf, m_rec0, m_part_info, &part_id);
4667 DBUG_ASSERT(!error);
4668 DBUG_ASSERT(part_id == m_last_part);
4669 }
4670 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4671 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), m_last_part));
4672 #endif
4673
4674 if (!bitmap_is_set(&(m_part_info->lock_partitions), m_last_part))
4675 DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
4676
4677 DBUG_ASSERT(!m_file[m_last_part]->row_logging);
4678 error= m_file[m_last_part]->ha_delete_row(buf);
4679 DBUG_RETURN(error);
4680 }
4681
4682
4683 /*
4684 Delete all rows in a table
4685
4686 SYNOPSIS
4687 delete_all_rows()
4688
4689 RETURN VALUE
4690 >0 Error Code
4691 0 Success
4692
4693 DESCRIPTION
4694 Used to delete all rows in a table. Both for cases of truncate and
4695 for cases where the optimizer realizes that all rows will be
4696 removed as a result of a SQL statement.
4697
4698 Called from item_sum.cc by Item_func_group_concat::clear(),
4699 Item_sum_count::clear(), and Item_func_group_concat::clear().
4700 Called from sql_delete.cc by mysql_delete().
4701 Called from sql_select.cc by JOIN::reset().
4702 Called from sql_union.cc by st_select_lex_unit::exec().
4703 */
4704
delete_all_rows()4705 int ha_partition::delete_all_rows()
4706 {
4707 int error;
4708 uint i;
4709 DBUG_ENTER("ha_partition::delete_all_rows");
4710
4711 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4712 i < m_tot_parts;
4713 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4714 {
4715 /* Can be pruned, like DELETE FROM t PARTITION (pX) */
4716 if (unlikely((error= m_file[i]->ha_delete_all_rows())))
4717 DBUG_RETURN(error);
4718 }
4719 DBUG_RETURN(0);
4720 }
4721
4722
4723 /**
4724 Manually truncate the table.
4725
4726 @retval 0 Success.
4727 @retval > 0 Error code.
4728 */
4729
truncate()4730 int ha_partition::truncate()
4731 {
4732 int error;
4733 handler **file;
4734 DBUG_ENTER("ha_partition::truncate");
4735
4736 /*
4737 TRUNCATE also means resetting auto_increment. Hence, reset
4738 it so that it will be initialized again at the next use.
4739 */
4740 lock_auto_increment();
4741 part_share->next_auto_inc_val= 0;
4742 part_share->auto_inc_initialized= false;
4743 unlock_auto_increment();
4744
4745 file= m_file;
4746 do
4747 {
4748 if (unlikely((error= (*file)->ha_truncate())))
4749 DBUG_RETURN(error);
4750 } while (*(++file));
4751 DBUG_RETURN(0);
4752 }
4753
4754
4755 /**
4756 Truncate a set of specific partitions.
4757
4758 @remark Auto increment value will be truncated in that partition as well!
4759
4760 ALTER TABLE t TRUNCATE PARTITION ...
4761 */
4762
truncate_partition(Alter_info * alter_info,bool * binlog_stmt)4763 int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt)
4764 {
4765 int error= 0;
4766 List_iterator<partition_element> part_it(m_part_info->partitions);
4767 uint num_parts= m_part_info->num_parts;
4768 uint num_subparts= m_part_info->num_subparts;
4769 uint i= 0;
4770 DBUG_ENTER("ha_partition::truncate_partition");
4771
4772 /* Only binlog when it starts any call to the partitions handlers */
4773 *binlog_stmt= false;
4774
4775 if (set_part_state(alter_info, m_part_info, PART_ADMIN))
4776 DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
4777
4778 /*
4779 TRUNCATE also means resetting auto_increment. Hence, reset
4780 it so that it will be initialized again at the next use.
4781 */
4782 lock_auto_increment();
4783 part_share->next_auto_inc_val= 0;
4784 part_share->auto_inc_initialized= FALSE;
4785 unlock_auto_increment();
4786
4787 *binlog_stmt= true;
4788
4789 do
4790 {
4791 partition_element *part_elem= part_it++;
4792 if (part_elem->part_state == PART_ADMIN)
4793 {
4794 if (m_is_sub_partitioned)
4795 {
4796 List_iterator<partition_element>
4797 subpart_it(part_elem->subpartitions);
4798 partition_element *sub_elem;
4799 uint j= 0, part;
4800 do
4801 {
4802 sub_elem= subpart_it++;
4803 part= i * num_subparts + j;
4804 DBUG_PRINT("info", ("truncate subpartition %u (%s)",
4805 part, sub_elem->partition_name));
4806 if (unlikely((error= m_file[part]->ha_truncate())))
4807 break;
4808 sub_elem->part_state= PART_NORMAL;
4809 } while (++j < num_subparts);
4810 }
4811 else
4812 {
4813 DBUG_PRINT("info", ("truncate partition %u (%s)", i,
4814 part_elem->partition_name));
4815 error= m_file[i]->ha_truncate();
4816 }
4817 part_elem->part_state= PART_NORMAL;
4818 }
4819 } while (!error && (++i < num_parts));
4820 DBUG_RETURN(error);
4821 }
4822
4823
4824 /*
4825 Start a large batch of insert rows
4826
4827 SYNOPSIS
4828 start_bulk_insert()
4829 rows Number of rows to insert
4830 flags Flags to control index creation
4831
4832 RETURN VALUE
4833 NONE
4834
4835 DESCRIPTION
4836 rows == 0 means we will probably insert many rows
4837 */
start_bulk_insert(ha_rows rows,uint flags)4838 void ha_partition::start_bulk_insert(ha_rows rows, uint flags)
4839 {
4840 DBUG_ENTER("ha_partition::start_bulk_insert");
4841
4842 m_bulk_inserted_rows= 0;
4843 bitmap_clear_all(&m_bulk_insert_started);
4844 /* use the last bit for marking if bulk_insert_started was called */
4845 bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
4846 DBUG_VOID_RETURN;
4847 }
4848
4849
4850 /*
4851 Check if start_bulk_insert has been called for this partition,
4852 if not, call it and mark it called
4853 */
start_part_bulk_insert(THD * thd,uint part_id)4854 void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
4855 {
4856 long old_buffer_size;
4857 if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
4858 bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4859 {
4860 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4861 old_buffer_size= thd->variables.read_buff_size;
4862 /* Update read_buffer_size for this partition */
4863 thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
4864 m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
4865 bitmap_set_bit(&m_bulk_insert_started, part_id);
4866 thd->variables.read_buff_size= old_buffer_size;
4867 }
4868 m_bulk_inserted_rows++;
4869 }
4870
4871 /*
4872 Estimate the read buffer size for each partition.
4873 SYNOPSIS
4874 ha_partition::estimate_read_buffer_size()
4875 original_size read buffer size originally set for the server
4876 RETURN VALUE
4877 estimated buffer size.
4878 DESCRIPTION
4879 If the estimated number of rows to insert is less than 10 (but not 0)
4880 the new buffer size is same as original buffer size.
4881 In case of first partition of when partition function is monotonic
4882 new buffer size is same as the original buffer size.
4883 For rest of the partition total buffer of 10*original_size is divided
4884 equally if number of partition is more than 10 other wise each partition
4885 will be allowed to use original buffer size.
4886 */
estimate_read_buffer_size(long original_size)4887 long ha_partition::estimate_read_buffer_size(long original_size)
4888 {
4889 /*
4890 If number of rows to insert is less than 10, but not 0,
4891 return original buffer size.
4892 */
4893 if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
4894 return (original_size);
4895 /*
4896 If first insert/partition and monotonic partition function,
4897 allow using buffer size originally set.
4898 */
4899 if (!m_bulk_inserted_rows &&
4900 m_part_func_monotonicity_info != NON_MONOTONIC &&
4901 m_tot_parts > 1)
4902 return original_size;
4903 /*
4904 Allow total buffer used in all partition to go up to 10*read_buffer_size.
4905 11*read_buffer_size in case of monotonic partition function.
4906 */
4907
4908 if (m_tot_parts < 10)
4909 return original_size;
4910 return (original_size * 10 / m_tot_parts);
4911 }
4912
4913 /*
4914 Try to predict the number of inserts into this partition.
4915
4916 If less than 10 rows (including 0 which means Unknown)
4917 just give that as a guess
4918 If monotonic partitioning function was used
4919 guess that 50 % of the inserts goes to the first partition
4920 For all other cases, guess on equal distribution between the partitions
4921 */
guess_bulk_insert_rows()4922 ha_rows ha_partition::guess_bulk_insert_rows()
4923 {
4924 DBUG_ENTER("guess_bulk_insert_rows");
4925
4926 if (estimation_rows_to_insert < 10)
4927 DBUG_RETURN(estimation_rows_to_insert);
4928
4929 /* If first insert/partition and monotonic partition function, guess 50%. */
4930 if (!m_bulk_inserted_rows &&
4931 m_part_func_monotonicity_info != NON_MONOTONIC &&
4932 m_tot_parts > 1)
4933 DBUG_RETURN(estimation_rows_to_insert / 2);
4934
4935 /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
4936 if (m_bulk_inserted_rows < estimation_rows_to_insert)
4937 DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
4938 / m_tot_parts) + 1);
4939 /* The estimation was wrong, must say 'Unknown' */
4940 DBUG_RETURN(0);
4941 }
4942
4943
sum_copy_info(handler * file)4944 void ha_partition::sum_copy_info(handler *file)
4945 {
4946 copy_info.records+= file->copy_info.records;
4947 copy_info.touched+= file->copy_info.touched;
4948 copy_info.copied+= file->copy_info.copied;
4949 copy_info.deleted+= file->copy_info.deleted;
4950 copy_info.updated+= file->copy_info.updated;
4951 }
4952
4953
sum_copy_infos()4954 void ha_partition::sum_copy_infos()
4955 {
4956 handler **file_array;
4957 bzero(©_info, sizeof(copy_info));
4958 file_array= m_file;
4959 do
4960 {
4961 if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
4962 sum_copy_info(*file_array);
4963 } while (*(++file_array));
4964 }
4965
reset_copy_info()4966 void ha_partition::reset_copy_info()
4967 {
4968 handler **file_array;
4969 bzero(©_info, sizeof(copy_info));
4970 file_array= m_file;
4971 do
4972 {
4973 if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
4974 bzero(&(*file_array)->copy_info, sizeof(copy_info));
4975 } while (*(++file_array));
4976 }
4977
4978
4979
4980 /*
4981 Finish a large batch of insert rows
4982
4983 SYNOPSIS
4984 end_bulk_insert()
4985
4986 RETURN VALUE
4987 >0 Error code
4988 0 Success
4989
4990 Note: end_bulk_insert can be called without start_bulk_insert
4991 being called, see bug#44108.
4992
4993 */
4994
end_bulk_insert()4995 int ha_partition::end_bulk_insert()
4996 {
4997 int error= 0;
4998 uint i;
4999 DBUG_ENTER("ha_partition::end_bulk_insert");
5000
5001 if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
5002 DBUG_RETURN(error);
5003
5004 for (i= bitmap_get_first_set(&m_bulk_insert_started);
5005 i < m_tot_parts;
5006 i= bitmap_get_next_set(&m_bulk_insert_started, i))
5007 {
5008 int tmp;
5009 if ((tmp= m_file[i]->ha_end_bulk_insert()))
5010 error= tmp;
5011 sum_copy_info(m_file[i]);
5012 }
5013 bitmap_clear_all(&m_bulk_insert_started);
5014 DBUG_RETURN(error);
5015 }
5016
5017
5018 /****************************************************************************
5019 MODULE full table scan
5020 ****************************************************************************/
5021 /*
5022 Initialize engine for random reads
5023
5024 SYNOPSIS
5025 ha_partition::rnd_init()
5026 scan 0 Initialize for random reads through rnd_pos()
5027 1 Initialize for random scan through rnd_next()
5028
5029 RETURN VALUE
5030 >0 Error code
5031 0 Success
5032
5033 DESCRIPTION
5034 rnd_init() is called when the server wants the storage engine to do a
5035 table scan or when the server wants to access data through rnd_pos.
5036
5037 When scan is used we will scan one handler partition at a time.
5038 When preparing for rnd_pos we will init all handler partitions.
5039 No extra cache handling is needed when scanning is not performed.
5040
5041 Before initialising we will call rnd_end to ensure that we clean up from
5042 any previous incarnation of a table scan.
5043 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
5044 sql_table.cc, and sql_update.cc.
5045 */
5046
rnd_init(bool scan)5047 int ha_partition::rnd_init(bool scan)
5048 {
5049 int error;
5050 uint i= 0;
5051 uint32 part_id;
5052 DBUG_ENTER("ha_partition::rnd_init");
5053
5054 /*
5055 For operations that may need to change data, we may need to extend
5056 read_set.
5057 */
5058 if (get_lock_type() == F_WRLCK)
5059 {
5060 /*
5061 If write_set contains any of the fields used in partition and
5062 subpartition expression, we need to set all bits in read_set because
5063 the row may need to be inserted in a different [sub]partition. In
5064 other words update_row() can be converted into write_row(), which
5065 requires a complete record.
5066 */
5067 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
5068 table->write_set))
5069 {
5070 DBUG_PRINT("info", ("partition set full bitmap"));
5071 bitmap_set_all(table->read_set);
5072 }
5073 else
5074 {
5075 /*
5076 Some handlers only read fields as specified by the bitmap for the
5077 read set. For partitioned handlers we always require that the
5078 fields of the partition functions are read such that we can
5079 calculate the partition id to place updated and deleted records.
5080 */
5081 DBUG_PRINT("info", ("partition set part_field bitmap"));
5082 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5083 }
5084 }
5085
5086 /* Now we see what the index of our first important partition is */
5087 DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
5088 m_part_info->read_partitions.bitmap));
5089 part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
5090 DBUG_PRINT("info", ("m_part_spec.start_part: %u", (uint) part_id));
5091
5092 if (part_id == MY_BIT_NONE)
5093 {
5094 error= 0;
5095 goto err1;
5096 }
5097
5098 /*
5099 We have a partition and we are scanning with rnd_next
5100 so we bump our cache
5101 */
5102 DBUG_PRINT("info", ("rnd_init on partition: %u", (uint) part_id));
5103 if (scan)
5104 {
5105 /*
5106 rnd_end() is needed for partitioning to reset internal data if scan
5107 is already in use
5108 */
5109 rnd_end();
5110 late_extra_cache(part_id);
5111
5112 m_index_scan_type= partition_no_index_scan;
5113 }
5114
5115 for (i= part_id;
5116 i < m_tot_parts;
5117 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5118 {
5119 if (unlikely((error= m_file[i]->ha_rnd_init(scan))))
5120 goto err;
5121 }
5122
5123 m_scan_value= scan;
5124 m_part_spec.start_part= part_id;
5125 m_part_spec.end_part= m_tot_parts - 1;
5126 m_rnd_init_and_first= TRUE;
5127 DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
5128 DBUG_RETURN(0);
5129
5130 err:
5131 if (scan)
5132 late_extra_no_cache(part_id);
5133
5134 /* Call rnd_end for all previously inited partitions. */
5135 for (;
5136 part_id < i;
5137 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id))
5138 {
5139 m_file[part_id]->ha_rnd_end();
5140 }
5141 err1:
5142 m_scan_value= 2;
5143 m_part_spec.start_part= NO_CURRENT_PART_ID;
5144 DBUG_RETURN(error);
5145 }
5146
5147
5148 /*
5149 End of a table scan
5150
5151 SYNOPSIS
5152 rnd_end()
5153
5154 RETURN VALUE
5155 >0 Error code
5156 0 Success
5157 */
5158
rnd_end()5159 int ha_partition::rnd_end()
5160 {
5161 DBUG_ENTER("ha_partition::rnd_end");
5162 switch (m_scan_value) {
5163 case 2: // Error
5164 break;
5165 case 1: // Table scan
5166 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
5167 late_extra_no_cache(m_part_spec.start_part);
5168 /* fall through */
5169 case 0:
5170 uint i;
5171 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5172 i < m_tot_parts;
5173 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5174 {
5175 m_file[i]->ha_rnd_end();
5176 }
5177 break;
5178 }
5179 m_scan_value= 2;
5180 m_part_spec.start_part= NO_CURRENT_PART_ID;
5181 DBUG_RETURN(0);
5182 }
5183
5184
5185 /*
5186 read next row during full table scan (scan in random row order)
5187
5188 SYNOPSIS
5189 rnd_next()
5190 buf buffer that should be filled with data
5191
5192 RETURN VALUE
5193 >0 Error code
5194 0 Success
5195
5196 DESCRIPTION
5197 This is called for each row of the table scan. When you run out of records
5198 you should return HA_ERR_END_OF_FILE.
5199 The Field structure for the table is the key to getting data into buf
5200 in a manner that will allow the server to understand it.
5201
5202 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
5203 sql_table.cc, and sql_update.cc.
5204 */
5205
rnd_next(uchar * buf)5206 int ha_partition::rnd_next(uchar *buf)
5207 {
5208 handler *file;
5209 int result= HA_ERR_END_OF_FILE, error;
5210 uint part_id= m_part_spec.start_part;
5211 DBUG_ENTER("ha_partition::rnd_next");
5212 DBUG_PRINT("enter", ("partition this: %p", this));
5213
5214 /* upper level will increment this once again at end of call */
5215 decrement_statistics(&SSV::ha_read_rnd_next_count);
5216
5217 if (part_id == NO_CURRENT_PART_ID)
5218 {
5219 /*
5220 The original set of partitions to scan was empty and thus we report
5221 the result here.
5222 */
5223 goto end;
5224 }
5225
5226 DBUG_ASSERT(m_scan_value == 1);
5227
5228 if (m_rnd_init_and_first)
5229 {
5230 m_rnd_init_and_first= FALSE;
5231 error= handle_pre_scan(FALSE, check_parallel_search());
5232 if (m_pre_calling || error)
5233 DBUG_RETURN(error);
5234 }
5235
5236 file= m_file[part_id];
5237
5238 while (TRUE)
5239 {
5240 result= file->ha_rnd_next(buf);
5241 if (!result)
5242 {
5243 m_last_part= part_id;
5244 DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5245 m_part_spec.start_part= part_id;
5246 table->status= 0;
5247 DBUG_RETURN(0);
5248 }
5249
5250 /*
5251 if we get here, then the current partition ha_rnd_next returned failure
5252 */
5253 if (result != HA_ERR_END_OF_FILE)
5254 goto end_dont_reset_start_part; // Return error
5255
5256 /* End current partition */
5257 late_extra_no_cache(part_id);
5258 /* Shift to next partition */
5259 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id);
5260 if (part_id >= m_tot_parts)
5261 {
5262 result= HA_ERR_END_OF_FILE;
5263 break;
5264 }
5265 m_last_part= part_id;
5266 DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5267 m_part_spec.start_part= part_id;
5268 file= m_file[part_id];
5269 late_extra_cache(part_id);
5270 }
5271
5272 end:
5273 DBUG_PRINT("exit", ("reset start_part"));
5274 m_part_spec.start_part= NO_CURRENT_PART_ID;
5275 end_dont_reset_start_part:
5276 DBUG_RETURN(result);
5277 }
5278
5279
5280 /*
5281 Save position of current row
5282
5283 SYNOPSIS
5284 position()
5285 record Current record in MySQL Row Format
5286
5287 RETURN VALUE
5288 NONE
5289
5290 DESCRIPTION
5291 position() is called after each call to rnd_next() if the data needs
5292 to be ordered. You can do something like the following to store
5293 the position:
5294 ha_store_ptr(ref, ref_length, current_position);
5295
5296 The server uses ref to store data. ref_length in the above case is
5297 the size needed to store current_position. ref is just a byte array
5298 that the server will maintain. If you are using offsets to mark rows, then
5299 current_position should be the offset. If it is a primary key like in
5300 BDB, then it needs to be a primary key.
5301
5302 Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
5303 */
5304
position(const uchar * record)5305 void ha_partition::position(const uchar *record)
5306 {
5307 handler *file= m_file[m_last_part];
5308 size_t pad_length;
5309 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
5310 DBUG_ENTER("ha_partition::position");
5311
5312 file->position(record);
5313 int2store(ref, m_last_part);
5314 memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length);
5315 pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
5316 if (pad_length)
5317 memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
5318
5319 DBUG_VOID_RETURN;
5320 }
5321
5322
5323 /*
5324 Read row using position
5325
5326 SYNOPSIS
5327 rnd_pos()
5328 out:buf Row read in MySQL Row Format
5329 position Position of read row
5330
5331 RETURN VALUE
5332 >0 Error code
5333 0 Success
5334
5335 DESCRIPTION
5336 This is like rnd_next, but you are given a position to use
5337 to determine the row. The position will be of the type that you stored in
5338 ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
5339 or position you saved when position() was called.
5340 Called from filesort.cc records.cc sql_insert.cc sql_select.cc
5341 sql_update.cc.
5342 */
5343
rnd_pos(uchar * buf,uchar * pos)5344 int ha_partition::rnd_pos(uchar * buf, uchar *pos)
5345 {
5346 uint part_id;
5347 handler *file;
5348 DBUG_ENTER("ha_partition::rnd_pos");
5349 decrement_statistics(&SSV::ha_read_rnd_count);
5350
5351 part_id= uint2korr((const uchar *) pos);
5352 DBUG_ASSERT(part_id < m_tot_parts);
5353 file= m_file[part_id];
5354 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
5355 m_last_part= part_id;
5356 DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
5357 }
5358
5359
5360 /*
5361 Read row using position using given record to find
5362
5363 SYNOPSIS
5364 rnd_pos_by_record()
5365 record Current record in MySQL Row Format
5366
5367 RETURN VALUE
5368 >0 Error code
5369 0 Success
5370
5371 DESCRIPTION
5372 this works as position()+rnd_pos() functions, but does some extra work,
5373 calculating m_last_part - the partition to where the 'record'
5374 should go.
5375
5376 called from replication (log_event.cc)
5377 */
5378
rnd_pos_by_record(uchar * record)5379 int ha_partition::rnd_pos_by_record(uchar *record)
5380 {
5381 DBUG_ENTER("ha_partition::rnd_pos_by_record");
5382
5383 if (unlikely(get_part_for_buf(record, m_rec0, m_part_info, &m_last_part)))
5384 DBUG_RETURN(1);
5385
5386 int err= m_file[m_last_part]->rnd_pos_by_record(record);
5387 DBUG_RETURN(err);
5388 }
5389
5390
5391 /****************************************************************************
5392 MODULE index scan
5393 ****************************************************************************/
5394 /*
5395 Positions an index cursor to the index specified in the handle. Fetches the
5396 row if available. If the key value is null, begin at the first key of the
5397 index.
5398
5399 There are loads of optimisations possible here for the partition handler.
5400 The same optimisations can also be checked for full table scan although
5401 only through conditions and not from index ranges.
5402 Phase one optimisations:
5403 Check if the fields of the partition function are bound. If so only use
5404 the single partition it becomes bound to.
5405 Phase two optimisations:
5406 If it can be deducted through range or list partitioning that only a
5407 subset of the partitions are used, then only use those partitions.
5408 */
5409
5410
5411 /**
5412 Setup the ordered record buffer and the priority queue.
5413 */
5414
init_record_priority_queue()5415 bool ha_partition::init_record_priority_queue()
5416 {
5417 DBUG_ENTER("ha_partition::init_record_priority_queue");
5418 DBUG_ASSERT(!m_ordered_rec_buffer);
5419 /*
5420 Initialize the ordered record buffer.
5421 */
5422 size_t alloc_len;
5423 uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
5424
5425 if (used_parts == 0) /* Do nothing since no records expected. */
5426 DBUG_RETURN(false);
5427
5428 /* Allocate record buffer for each used partition. */
5429 m_priority_queue_rec_len= m_rec_length + ORDERED_REC_OFFSET;
5430 if (!m_using_extended_keys)
5431 m_priority_queue_rec_len+= get_open_file_sample()->ref_length;
5432 alloc_len= used_parts * m_priority_queue_rec_len;
5433 /* Allocate a key for temporary use when setting up the scan. */
5434 alloc_len+= table_share->max_key_length;
5435 Ordered_blob_storage **blob_storage;
5436 Ordered_blob_storage *objs;
5437 const size_t n_all= used_parts * table->s->blob_fields;
5438
5439 if (!my_multi_malloc(key_memory_partition_sort_buffer, MYF(MY_WME),
5440 &m_ordered_rec_buffer, alloc_len,
5441 &blob_storage, n_all * sizeof *blob_storage,
5442 &objs, n_all * sizeof *objs, NULL))
5443 DBUG_RETURN(true);
5444
5445 /*
5446 We set-up one record per partition and each record has 2 bytes in
5447 front where the partition id is written. This is used by ordered
5448 index_read.
5449 We also set-up a reference to the first record for temporary use in
5450 setting up the scan.
5451 */
5452 char *ptr= (char*) m_ordered_rec_buffer;
5453 uint i;
5454 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5455 i < m_tot_parts;
5456 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5457 {
5458 DBUG_PRINT("info", ("init rec-buf for part %u", i));
5459 if (table->s->blob_fields)
5460 {
5461 for (uint j= 0; j < table->s->blob_fields; ++j, ++objs)
5462 blob_storage[j]= new (objs) Ordered_blob_storage;
5463 *((Ordered_blob_storage ***) ptr)= blob_storage;
5464 blob_storage+= table->s->blob_fields;
5465 }
5466 int2store(ptr + sizeof(String **), i);
5467 ptr+= m_priority_queue_rec_len;
5468 }
5469 m_start_key.key= (const uchar*)ptr;
5470
5471 /* Initialize priority queue, initialized to reading forward. */
5472 int (*cmp_func)(void *, uchar *, uchar *);
5473 void *cmp_arg= (void*) this;
5474 if (!m_using_extended_keys && !(table_flags() & HA_SLOW_CMP_REF))
5475 cmp_func= cmp_key_rowid_part_id;
5476 else
5477 cmp_func= cmp_key_part_id;
5478 DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts));
5479 if (init_queue(&m_queue, used_parts, ORDERED_PART_NUM_OFFSET,
5480 0, cmp_func, cmp_arg, 0, 0))
5481 {
5482 my_free(m_ordered_rec_buffer);
5483 m_ordered_rec_buffer= NULL;
5484 DBUG_RETURN(true);
5485 }
5486 DBUG_RETURN(false);
5487 }
5488
5489
5490 /**
5491 Destroy the ordered record buffer and the priority queue.
5492 */
5493
destroy_record_priority_queue()5494 void ha_partition::destroy_record_priority_queue()
5495 {
5496 DBUG_ENTER("ha_partition::destroy_record_priority_queue");
5497 if (m_ordered_rec_buffer)
5498 {
5499 if (table->s->blob_fields)
5500 {
5501 char *ptr= (char *) m_ordered_rec_buffer;
5502 for (uint i= bitmap_get_first_set(&m_part_info->read_partitions);
5503 i < m_tot_parts;
5504 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5505 {
5506 Ordered_blob_storage **blob_storage= *((Ordered_blob_storage ***) ptr);
5507 for (uint b= 0; b < table->s->blob_fields; ++b)
5508 blob_storage[b]->blob.free();
5509 ptr+= m_priority_queue_rec_len;
5510 }
5511 }
5512
5513 delete_queue(&m_queue);
5514 my_free(m_ordered_rec_buffer);
5515 m_ordered_rec_buffer= NULL;
5516 }
5517 DBUG_VOID_RETURN;
5518 }
5519
5520
5521 /*
5522 Initialize handler before start of index scan
5523
5524 SYNOPSIS
5525 index_init()
5526 inx Index number
5527 sorted Is rows to be returned in sorted order
5528
5529 RETURN VALUE
5530 >0 Error code
5531 0 Success
5532
5533 DESCRIPTION
5534 index_init is always called before starting index scans (except when
5535 starting through index_read_idx and using read_range variants).
5536 */
5537
index_init(uint inx,bool sorted)5538 int ha_partition::index_init(uint inx, bool sorted)
5539 {
5540 int error= 0;
5541 uint i;
5542 DBUG_ENTER("ha_partition::index_init");
5543 DBUG_PRINT("enter", ("partition this: %p inx: %u sorted: %u", this, inx, sorted));
5544
5545 active_index= inx;
5546 m_part_spec.start_part= NO_CURRENT_PART_ID;
5547 m_start_key.length= 0;
5548 m_ordered= sorted;
5549 m_ordered_scan_ongoing= FALSE;
5550 m_curr_key_info[0]= table->key_info+inx;
5551 if (pk_is_clustering_key(table->s->primary_key))
5552 {
5553 /*
5554 if PK is clustered, then the key cmp must use the pk to
5555 differentiate between equal key in given index.
5556 */
5557 DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
5558 m_curr_key_info[1]= table->key_info+table->s->primary_key;
5559 m_curr_key_info[2]= NULL;
5560 m_using_extended_keys= TRUE;
5561 }
5562 else
5563 {
5564 m_curr_key_info[1]= NULL;
5565 m_using_extended_keys= FALSE;
5566 }
5567
5568 if (init_record_priority_queue())
5569 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5570
5571 /*
5572 Some handlers only read fields as specified by the bitmap for the
5573 read set. For partitioned handlers we always require that the
5574 fields of the partition functions are read such that we can
5575 calculate the partition id to place updated and deleted records.
5576 But this is required for operations that may need to change data only.
5577 */
5578 if (get_lock_type() == F_WRLCK)
5579 {
5580 DBUG_PRINT("info", ("partition set part_field bitmap"));
5581 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5582 }
5583 if (sorted)
5584 {
5585 /*
5586 An ordered scan is requested. We must make sure all fields of the
5587 used index are in the read set, as partitioning requires them for
5588 sorting (see ha_partition::handle_ordered_index_scan).
5589
5590 The SQL layer may request an ordered index scan without having index
5591 fields in the read set when
5592 - it needs to do an ordered scan over an index prefix.
5593 - it evaluates ORDER BY with SELECT COUNT(*) FROM t1.
5594
5595 TODO: handle COUNT(*) queries via unordered scan.
5596 */
5597 KEY **key_info= m_curr_key_info;
5598 do
5599 {
5600 for (i= 0; i < (*key_info)->user_defined_key_parts; i++)
5601 (*key_info)->key_part[i].field->register_field_in_read_map();
5602 } while (*(++key_info));
5603 }
5604 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5605 i < m_tot_parts;
5606 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5607 {
5608 if (unlikely((error= m_file[i]->ha_index_init(inx, sorted))))
5609 goto err;
5610
5611 DBUG_EXECUTE_IF("ha_partition_fail_index_init", {
5612 i++;
5613 error= HA_ERR_NO_PARTITION_FOUND;
5614 goto err;
5615 });
5616 }
5617 err:
5618 if (unlikely(error))
5619 {
5620 /* End the previously initialized indexes. */
5621 uint j;
5622 for (j= bitmap_get_first_set(&m_part_info->read_partitions);
5623 j < i;
5624 j= bitmap_get_next_set(&m_part_info->read_partitions, j))
5625 {
5626 (void) m_file[j]->ha_index_end();
5627 }
5628 destroy_record_priority_queue();
5629 }
5630 DBUG_RETURN(error);
5631 }
5632
5633
5634 /*
5635 End of index scan
5636
5637 SYNOPSIS
5638 index_end()
5639
5640 RETURN VALUE
5641 >0 Error code
5642 0 Success
5643
5644 DESCRIPTION
5645 index_end is called at the end of an index scan to clean up any
5646 things needed to clean up.
5647 */
5648
index_end()5649 int ha_partition::index_end()
5650 {
5651 int error= 0;
5652 handler **file;
5653 DBUG_ENTER("ha_partition::index_end");
5654
5655 active_index= MAX_KEY;
5656 m_part_spec.start_part= NO_CURRENT_PART_ID;
5657 file= m_file;
5658 do
5659 {
5660 if ((*file)->inited == INDEX)
5661 {
5662 int tmp;
5663 if ((tmp= (*file)->ha_index_end()))
5664 error= tmp;
5665 }
5666 else if ((*file)->inited == RND)
5667 {
5668 // Possible due to MRR
5669 int tmp;
5670 if ((tmp= (*file)->ha_rnd_end()))
5671 error= tmp;
5672 }
5673 } while (*(++file));
5674 destroy_record_priority_queue();
5675 DBUG_RETURN(error);
5676 }
5677
5678
5679 /*
5680 Read one record in an index scan and start an index scan
5681
5682 SYNOPSIS
5683 index_read_map()
5684 buf Read row in MySQL Row Format
5685 key Key parts in consecutive order
5686 keypart_map Which part of key is used
5687 find_flag What type of key condition is used
5688
5689 RETURN VALUE
5690 >0 Error code
5691 0 Success
5692
5693 DESCRIPTION
5694 index_read_map starts a new index scan using a start key. The MySQL Server
5695 will check the end key on its own. Thus to function properly the
5696 partitioned handler need to ensure that it delivers records in the sort
5697 order of the MySQL Server.
5698 index_read_map can be restarted without calling index_end on the previous
5699 index scan and without calling index_init. In this case the index_read_map
5700 is on the same index as the previous index_scan. This is particularly
5701 used in conjuntion with multi read ranges.
5702 */
5703
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5704 int ha_partition::index_read_map(uchar *buf, const uchar *key,
5705 key_part_map keypart_map,
5706 enum ha_rkey_function find_flag)
5707 {
5708 DBUG_ENTER("ha_partition::index_read_map");
5709 decrement_statistics(&SSV::ha_read_key_count);
5710 end_range= 0;
5711 m_index_scan_type= partition_index_read;
5712 m_start_key.key= key;
5713 m_start_key.keypart_map= keypart_map;
5714 m_start_key.flag= find_flag;
5715 DBUG_RETURN(common_index_read(buf, TRUE));
5716 }
5717
5718
5719 /* Compare two part_no partition numbers */
cmp_part_ids(uchar * ref1,uchar * ref2)5720 static int cmp_part_ids(uchar *ref1, uchar *ref2)
5721 {
5722 uint32 diff2= uint2korr(ref2);
5723 uint32 diff1= uint2korr(ref1);
5724 if (diff2 > diff1)
5725 return -1;
5726 if (diff2 < diff1)
5727 return 1;
5728 return 0;
5729 }
5730
5731
5732 /*
5733 @brief
5734 Provide ordering by (key_value, part_no).
5735 */
5736
cmp_key_part_id(void * ptr,uchar * ref1,uchar * ref2)5737 extern "C" int cmp_key_part_id(void *ptr, uchar *ref1, uchar *ref2)
5738 {
5739 ha_partition *file= (ha_partition*)ptr;
5740 if (int res= key_rec_cmp(file->m_curr_key_info,
5741 ref1 + PARTITION_BYTES_IN_POS,
5742 ref2 + PARTITION_BYTES_IN_POS))
5743 return res;
5744 return cmp_part_ids(ref1, ref2);
5745 }
5746
5747 /*
5748 @brief
5749 Provide ordering by (key_value, underying_table_rowid, part_no).
5750 */
cmp_key_rowid_part_id(void * ptr,uchar * ref1,uchar * ref2)5751 extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2)
5752 {
5753 ha_partition *file= (ha_partition*)ptr;
5754 int res;
5755
5756 if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
5757 ref2 + PARTITION_BYTES_IN_POS)))
5758 {
5759 return res;
5760 }
5761 if ((res= file->get_open_file_sample()->cmp_ref(ref1 +
5762 PARTITION_BYTES_IN_POS + file->m_rec_length,
5763 ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length)))
5764 {
5765 return res;
5766 }
5767 return cmp_part_ids(ref1, ref2);
5768 }
5769
5770
5771 /**
5772 Common routine for a number of index_read variants
5773
5774 @param buf Buffer where the record should be returned.
5775 @param have_start_key TRUE <=> the left endpoint is available, i.e.
5776 we're in index_read call or in read_range_first
5777 call and the range has left endpoint.
5778 FALSE <=> there is no left endpoint (we're in
5779 read_range_first() call and the range has no left
5780 endpoint).
5781
5782 @return Operation status
5783 @retval 0 OK
5784 @retval HA_ERR_END_OF_FILE Whole index scanned, without finding the record.
5785 @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
5786 @retval other error code.
5787
5788 @details
5789 Start scanning the range (when invoked from read_range_first()) or doing
5790 an index lookup (when invoked from index_read_XXX):
5791 - If possible, perform partition selection
5792 - Find the set of partitions we're going to use
5793 - Depending on whether we need ordering:
5794 NO: Get the first record from first used partition (see
5795 handle_unordered_scan_next_partition)
5796 YES: Fill the priority queue and get the record that is the first in
5797 the ordering
5798 */
5799
common_index_read(uchar * buf,bool have_start_key)5800 int ha_partition::common_index_read(uchar *buf, bool have_start_key)
5801 {
5802 int error;
5803 uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */
5804 bool reverse_order= FALSE;
5805 DBUG_ENTER("ha_partition::common_index_read");
5806
5807 DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
5808 m_ordered, m_ordered_scan_ongoing));
5809
5810 if (have_start_key)
5811 {
5812 m_start_key.length= key_len= calculate_key_len(table, active_index,
5813 m_start_key.key,
5814 m_start_key.keypart_map);
5815 DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
5816 m_start_key.keypart_map, m_start_key.flag, key_len));
5817 DBUG_ASSERT(key_len);
5818 }
5819 if (unlikely((error= partition_scan_set_up(buf, have_start_key))))
5820 {
5821 DBUG_RETURN(error);
5822 }
5823
5824 if (have_start_key &&
5825 (m_start_key.flag == HA_READ_PREFIX_LAST ||
5826 m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
5827 m_start_key.flag == HA_READ_BEFORE_KEY))
5828 {
5829 reverse_order= TRUE;
5830 m_ordered_scan_ongoing= TRUE;
5831 }
5832 DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
5833 m_ordered, m_ordered_scan_ongoing, have_start_key));
5834 if (!m_ordered_scan_ongoing)
5835 {
5836 /*
5837 We use unordered index scan when read_range is used and flag
5838 is set to not use ordered.
5839 We also use an unordered index scan when the number of partitions to
5840 scan is only one.
5841 The unordered index scan will use the partition set created.
5842 */
5843 DBUG_PRINT("info", ("doing unordered scan"));
5844 error= handle_pre_scan(FALSE, FALSE);
5845 if (likely(!error))
5846 error= handle_unordered_scan_next_partition(buf);
5847 }
5848 else
5849 {
5850 /*
5851 In all other cases we will use the ordered index scan. This will use
5852 the partition set created by the get_partition_set method.
5853 */
5854 error= handle_ordered_index_scan(buf, reverse_order);
5855 }
5856 DBUG_RETURN(error);
5857 }
5858
5859
5860 /*
5861 Start an index scan from leftmost record and return first record
5862
5863 SYNOPSIS
5864 index_first()
5865 buf Read row in MySQL Row Format
5866
5867 RETURN VALUE
5868 >0 Error code
5869 0 Success
5870
5871 DESCRIPTION
5872 index_first() asks for the first key in the index.
5873 This is similar to index_read except that there is no start key since
5874 the scan starts from the leftmost entry and proceeds forward with
5875 index_next.
5876
5877 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5878 and sql_select.cc.
5879 */
5880
index_first(uchar * buf)5881 int ha_partition::index_first(uchar * buf)
5882 {
5883 DBUG_ENTER("ha_partition::index_first");
5884 decrement_statistics(&SSV::ha_read_first_count);
5885
5886 end_range= 0;
5887 m_index_scan_type= partition_index_first;
5888 DBUG_RETURN(common_first_last(buf));
5889 }
5890
5891
5892 /*
5893 Start an index scan from rightmost record and return first record
5894
5895 SYNOPSIS
5896 index_last()
5897 buf Read row in MySQL Row Format
5898
5899 RETURN VALUE
5900 >0 Error code
5901 0 Success
5902
5903 DESCRIPTION
5904 index_last() asks for the last key in the index.
5905 This is similar to index_read except that there is no start key since
5906 the scan starts from the rightmost entry and proceeds forward with
5907 index_prev.
5908
5909 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5910 and sql_select.cc.
5911 */
5912
index_last(uchar * buf)5913 int ha_partition::index_last(uchar * buf)
5914 {
5915 DBUG_ENTER("ha_partition::index_last");
5916 decrement_statistics(&SSV::ha_read_last_count);
5917
5918 m_index_scan_type= partition_index_last;
5919 DBUG_RETURN(common_first_last(buf));
5920 }
5921
5922 /*
5923 Common routine for index_first/index_last
5924
5925 SYNOPSIS
5926 ha_partition::common_first_last()
5927
5928 see index_first for rest
5929 */
5930
common_first_last(uchar * buf)5931 int ha_partition::common_first_last(uchar *buf)
5932 {
5933 int error;
5934
5935 if (unlikely((error= partition_scan_set_up(buf, FALSE))))
5936 return error;
5937 if (!m_ordered_scan_ongoing &&
5938 m_index_scan_type != partition_index_last)
5939 {
5940 if (unlikely((error= handle_pre_scan(FALSE, check_parallel_search()))))
5941 return error;
5942 return handle_unordered_scan_next_partition(buf);
5943 }
5944 return handle_ordered_index_scan(buf, FALSE);
5945 }
5946
5947
5948 /*
5949 Optimization of the default implementation to take advantage of dynamic
5950 partition pruning.
5951 */
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5952 int ha_partition::index_read_idx_map(uchar *buf, uint index,
5953 const uchar *key,
5954 key_part_map keypart_map,
5955 enum ha_rkey_function find_flag)
5956 {
5957 int error= HA_ERR_KEY_NOT_FOUND;
5958 DBUG_ENTER("ha_partition::index_read_idx_map");
5959 decrement_statistics(&SSV::ha_read_key_count);
5960
5961 if (find_flag == HA_READ_KEY_EXACT)
5962 {
5963 uint part;
5964 m_start_key.key= key;
5965 m_start_key.keypart_map= keypart_map;
5966 m_start_key.flag= find_flag;
5967 m_start_key.length= calculate_key_len(table, index, m_start_key.key,
5968 m_start_key.keypart_map);
5969
5970 get_partition_set(table, buf, index, &m_start_key, &m_part_spec);
5971
5972 /* The start part is must be marked as used. */
5973 DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
5974 bitmap_is_set(&(m_part_info->read_partitions),
5975 m_part_spec.start_part));
5976
5977 for (part= m_part_spec.start_part;
5978 part <= m_part_spec.end_part;
5979 part= bitmap_get_next_set(&m_part_info->read_partitions, part))
5980 {
5981 error= m_file[part]->ha_index_read_idx_map(buf, index, key,
5982 keypart_map, find_flag);
5983 if (likely(error != HA_ERR_KEY_NOT_FOUND &&
5984 error != HA_ERR_END_OF_FILE))
5985 break;
5986 }
5987 if (part <= m_part_spec.end_part)
5988 m_last_part= part;
5989 }
5990 else
5991 {
5992 /*
5993 If not only used with READ_EXACT, we should investigate if possible
5994 to optimize for other find_flag's as well.
5995 */
5996 DBUG_ASSERT(0);
5997 /* fall back on the default implementation */
5998 error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag);
5999 }
6000 DBUG_RETURN(error);
6001 }
6002
6003
6004 /*
6005 Read next record in a forward index scan
6006
6007 SYNOPSIS
6008 index_next()
6009 buf Read row in MySQL Row Format
6010
6011 RETURN VALUE
6012 >0 Error code
6013 0 Success
6014
6015 DESCRIPTION
6016 Used to read forward through the index.
6017 */
6018
index_next(uchar * buf)6019 int ha_partition::index_next(uchar * buf)
6020 {
6021 DBUG_ENTER("ha_partition::index_next");
6022 decrement_statistics(&SSV::ha_read_next_count);
6023
6024 /*
6025 TODO(low priority):
6026 If we want partition to work with the HANDLER commands, we
6027 must be able to do index_last() -> index_prev() -> index_next()
6028 and if direction changes, we must step back those partitions in
6029 the record queue so we don't return a value from the wrong direction.
6030 */
6031 if (m_index_scan_type == partition_index_last)
6032 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6033 if (!m_ordered_scan_ongoing)
6034 {
6035 DBUG_RETURN(handle_unordered_next(buf, FALSE));
6036 }
6037 DBUG_RETURN(handle_ordered_next(buf, FALSE));
6038 }
6039
6040
6041 /*
6042 Read next record special
6043
6044 SYNOPSIS
6045 index_next_same()
6046 buf Read row in MySQL Row Format
6047 key Key
6048 keylen Length of key
6049
6050 RETURN VALUE
6051 >0 Error code
6052 0 Success
6053
6054 DESCRIPTION
6055 This routine is used to read the next but only if the key is the same
6056 as supplied in the call.
6057 */
6058
index_next_same(uchar * buf,const uchar * key,uint keylen)6059 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
6060 {
6061 DBUG_ENTER("ha_partition::index_next_same");
6062 decrement_statistics(&SSV::ha_read_next_count);
6063
6064 DBUG_ASSERT(keylen == m_start_key.length);
6065 if (m_index_scan_type == partition_index_last)
6066 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6067 if (!m_ordered_scan_ongoing)
6068 DBUG_RETURN(handle_unordered_next(buf, TRUE));
6069 DBUG_RETURN(handle_ordered_next(buf, TRUE));
6070 }
6071
6072
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)6073 int ha_partition::index_read_last_map(uchar *buf,
6074 const uchar *key,
6075 key_part_map keypart_map)
6076 {
6077 DBUG_ENTER("ha_partition::index_read_last_map");
6078
6079 m_ordered= true; // Safety measure
6080 end_range= NULL;
6081 m_index_scan_type= partition_index_read_last;
6082 m_start_key.key= key;
6083 m_start_key.keypart_map= keypart_map;
6084 m_start_key.flag= HA_READ_PREFIX_LAST;
6085 DBUG_RETURN(common_index_read(buf, true));
6086 }
6087
6088
6089 /*
6090 Read next record when performing index scan backwards
6091
6092 SYNOPSIS
6093 index_prev()
6094 buf Read row in MySQL Row Format
6095
6096 RETURN VALUE
6097 >0 Error code
6098 0 Success
6099
6100 DESCRIPTION
6101 Used to read backwards through the index.
6102 */
6103
index_prev(uchar * buf)6104 int ha_partition::index_prev(uchar * buf)
6105 {
6106 DBUG_ENTER("ha_partition::index_prev");
6107 decrement_statistics(&SSV::ha_read_prev_count);
6108
6109 /* TODO: read comment in index_next */
6110 if (m_index_scan_type == partition_index_first)
6111 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6112 DBUG_RETURN(handle_ordered_prev(buf));
6113 }
6114
6115
6116 /*
6117 Start a read of one range with start and end key
6118
6119 SYNOPSIS
6120 read_range_first()
6121 start_key Specification of start key
6122 end_key Specification of end key
6123 eq_range_arg Is it equal range
6124 sorted Should records be returned in sorted order
6125
6126 RETURN VALUE
6127 >0 Error code
6128 0 Success
6129
6130 DESCRIPTION
6131 We reimplement read_range_first since we don't want the compare_key
6132 check at the end. This is already performed in the partition handler.
6133 read_range_next is very much different due to that we need to scan
6134 all underlying handlers.
6135 */
6136
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)6137 int ha_partition::read_range_first(const key_range *start_key,
6138 const key_range *end_key,
6139 bool eq_range_arg, bool sorted)
6140 {
6141 int error;
6142 DBUG_ENTER("ha_partition::read_range_first");
6143
6144 m_ordered= sorted;
6145 eq_range= eq_range_arg;
6146 set_end_range(end_key);
6147
6148 range_key_part= m_curr_key_info[0]->key_part;
6149 if (start_key)
6150 m_start_key= *start_key;
6151 else
6152 m_start_key.key= NULL;
6153
6154 m_index_scan_type= partition_read_range;
6155 error= common_index_read(m_rec0, MY_TEST(start_key));
6156 DBUG_RETURN(error);
6157 }
6158
6159
6160 /*
6161 Read next record in read of a range with start and end key
6162
6163 SYNOPSIS
6164 read_range_next()
6165
6166 RETURN VALUE
6167 >0 Error code
6168 0 Success
6169 */
6170
read_range_next()6171 int ha_partition::read_range_next()
6172 {
6173 DBUG_ENTER("ha_partition::read_range_next");
6174
6175 if (m_ordered_scan_ongoing)
6176 {
6177 DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
6178 }
6179 DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
6180 }
6181
6182 /**
6183 Create a copy of all keys used by multi_range_read()
6184
6185 @retval 0 ok
6186 @retval HA_ERR_END_OF_FILE no keys in range
6187 @retval other value: error
6188
6189 TODO to save memory:
6190 - If (mrr_mode & HA_MRR_MATERIALIZED_KEYS) is set then the keys data is
6191 stable and we don't have to copy the keys, only store a pointer to the
6192 key.
6193 - When allocating key data, store things in a MEM_ROOT buffer instead of
6194 a malloc() per key. This will simplify and speed up the current code
6195 and use less memory.
6196 */
6197
multi_range_key_create_key(RANGE_SEQ_IF * seq,range_seq_t seq_it)6198 int ha_partition::multi_range_key_create_key(RANGE_SEQ_IF *seq,
6199 range_seq_t seq_it)
6200 {
6201 uint i, length;
6202 key_range *start_key, *end_key;
6203 KEY_MULTI_RANGE *range;
6204 DBUG_ENTER("ha_partition::multi_range_key_create_key");
6205
6206 bitmap_clear_all(&m_mrr_used_partitions);
6207 m_mrr_range_length= 0;
6208 bzero(m_part_mrr_range_length,
6209 sizeof(*m_part_mrr_range_length) * m_tot_parts);
6210 if (!m_mrr_range_first)
6211 {
6212 if (!(m_mrr_range_first= (PARTITION_KEY_MULTI_RANGE *)
6213 my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
6214 &m_mrr_range_current, sizeof(PARTITION_KEY_MULTI_RANGE),
6215 NullS)))
6216 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6217
6218 m_mrr_range_first->id= 1;
6219 m_mrr_range_first->key[0]= NULL;
6220 m_mrr_range_first->key[1]= NULL;
6221 m_mrr_range_first->next= NULL;
6222 }
6223 else
6224 m_mrr_range_current= m_mrr_range_first;
6225
6226 for (i= 0; i < m_tot_parts; i++)
6227 {
6228 if (!m_part_mrr_range_first[i])
6229 {
6230 if (!(m_part_mrr_range_first[i]= (PARTITION_PART_KEY_MULTI_RANGE *)
6231 my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL),
6232 &m_part_mrr_range_current[i], sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6233 NullS)))
6234 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6235 }
6236 else
6237 {
6238 m_part_mrr_range_current[i]= m_part_mrr_range_first[i];
6239 m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6240 }
6241 }
6242 m_mrr_range_current->key_multi_range.start_key.key= NULL;
6243 m_mrr_range_current->key_multi_range.end_key.key= NULL;
6244
6245 while (!seq->next(seq_it, &m_mrr_range_current->key_multi_range))
6246 {
6247 m_mrr_range_length++;
6248 range= &m_mrr_range_current->key_multi_range;
6249
6250 /* Copy start key */
6251 start_key= &range->start_key;
6252 DBUG_PRINT("info",("partition range->range_flag: %u", range->range_flag));
6253 DBUG_PRINT("info",("partition start_key->key: %p", start_key->key));
6254 DBUG_PRINT("info",("partition start_key->length: %u", start_key->length));
6255 DBUG_PRINT("info",("partition start_key->keypart_map: %lu",
6256 start_key->keypart_map));
6257 DBUG_PRINT("info",("partition start_key->flag: %u", start_key->flag));
6258
6259 if (start_key->key)
6260 {
6261 length= start_key->length;
6262 if (!m_mrr_range_current->key[0] ||
6263 m_mrr_range_current->length[0] < length)
6264 {
6265 if (m_mrr_range_current->key[0])
6266 my_free(m_mrr_range_current->key[0]);
6267 if (!(m_mrr_range_current->key[0]=
6268 (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME))))
6269 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6270 m_mrr_range_current->length[0]= length;
6271 }
6272 memcpy(m_mrr_range_current->key[0], start_key->key, length);
6273 start_key->key= m_mrr_range_current->key[0];
6274 }
6275
6276 /* Copy end key */
6277 end_key= &range->end_key;
6278 DBUG_PRINT("info",("partition end_key->key: %p", end_key->key));
6279 DBUG_PRINT("info",("partition end_key->length: %u", end_key->length));
6280 DBUG_PRINT("info",("partition end_key->keypart_map: %lu",
6281 end_key->keypart_map));
6282 DBUG_PRINT("info",("partition end_key->flag: %u", end_key->flag));
6283 if (end_key->key)
6284 {
6285 length= end_key->length;
6286 if (!m_mrr_range_current->key[1] ||
6287 m_mrr_range_current->length[1] < length)
6288 {
6289 if (m_mrr_range_current->key[1])
6290 my_free(m_mrr_range_current->key[1]);
6291 if (!(m_mrr_range_current->key[1]=
6292 (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME))))
6293 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6294 m_mrr_range_current->length[1]= length;
6295 }
6296 memcpy(m_mrr_range_current->key[1], end_key->key, length);
6297 end_key->key= m_mrr_range_current->key[1];
6298 }
6299
6300 m_mrr_range_current->ptr= m_mrr_range_current->key_multi_range.ptr;
6301 m_mrr_range_current->key_multi_range.ptr= m_mrr_range_current;
6302
6303 if (start_key->key && (start_key->flag & HA_READ_KEY_EXACT))
6304 get_partition_set(table, table->record[0], active_index,
6305 start_key, &m_part_spec);
6306 else
6307 {
6308 m_part_spec.start_part= 0;
6309 m_part_spec.end_part= m_tot_parts - 1;
6310 }
6311
6312 /* Copy key to those partitions that needs it */
6313 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
6314 {
6315 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6316 {
6317 bitmap_set_bit(&m_mrr_used_partitions, i);
6318 m_part_mrr_range_length[i]++;
6319 m_part_mrr_range_current[i]->partition_key_multi_range=
6320 m_mrr_range_current;
6321
6322 if (!m_part_mrr_range_current[i]->next)
6323 {
6324 PARTITION_PART_KEY_MULTI_RANGE *tmp_part_mrr_range;
6325 if (!(tmp_part_mrr_range= (PARTITION_PART_KEY_MULTI_RANGE *)
6326 my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6327 MYF(MY_WME | MY_ZEROFILL))))
6328 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6329
6330 m_part_mrr_range_current[i]->next= tmp_part_mrr_range;
6331 m_part_mrr_range_current[i]= tmp_part_mrr_range;
6332 }
6333 else
6334 {
6335 m_part_mrr_range_current[i]= m_part_mrr_range_current[i]->next;
6336 m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6337 }
6338 }
6339 }
6340
6341 if (!m_mrr_range_current->next)
6342 {
6343 /* Add end of range sentinel */
6344 PARTITION_KEY_MULTI_RANGE *tmp_mrr_range;
6345 if (!(tmp_mrr_range= (PARTITION_KEY_MULTI_RANGE *)
6346 my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_KEY_MULTI_RANGE), MYF(MY_WME))))
6347 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6348
6349 tmp_mrr_range->id= m_mrr_range_current->id + 1;
6350 tmp_mrr_range->key[0]= NULL;
6351 tmp_mrr_range->key[1]= NULL;
6352 tmp_mrr_range->next= NULL;
6353 m_mrr_range_current->next= tmp_mrr_range;
6354 }
6355 m_mrr_range_current= m_mrr_range_current->next;
6356 }
6357
6358 if (!m_mrr_range_length)
6359 {
6360 DBUG_PRINT("Warning",("No keys to use for mrr"));
6361 DBUG_RETURN(HA_ERR_END_OF_FILE);
6362 }
6363
6364 /* set start and end part */
6365 m_part_spec.start_part= bitmap_get_first_set(&m_mrr_used_partitions);
6366
6367 for (i= m_tot_parts; i-- > 0;)
6368 {
6369 if (bitmap_is_set(&m_mrr_used_partitions, i))
6370 {
6371 m_part_spec.end_part= i;
6372 break;
6373 }
6374 }
6375 for (i= 0; i < m_tot_parts; i++)
6376 {
6377 m_partition_part_key_multi_range_hld[i].partition= this;
6378 m_partition_part_key_multi_range_hld[i].part_id= i;
6379 m_partition_part_key_multi_range_hld[i].partition_part_key_multi_range=
6380 m_part_mrr_range_first[i];
6381 }
6382 DBUG_PRINT("return",("OK"));
6383 DBUG_RETURN(0);
6384 }
6385
6386
partition_multi_range_key_get_key_info(void * init_params,uint * length,key_part_map * map)6387 static void partition_multi_range_key_get_key_info(void *init_params,
6388 uint *length,
6389 key_part_map *map)
6390 {
6391 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6392 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6393 ha_partition *partition= hld->partition;
6394 key_range *start_key= (&partition->m_mrr_range_first->
6395 key_multi_range.start_key);
6396 DBUG_ENTER("partition_multi_range_key_get_key_info");
6397 *length= start_key->length;
6398 *map= start_key->keypart_map;
6399 DBUG_VOID_RETURN;
6400 }
6401
6402
partition_multi_range_key_init(void * init_params,uint n_ranges,uint flags)6403 static range_seq_t partition_multi_range_key_init(void *init_params,
6404 uint n_ranges,
6405 uint flags)
6406 {
6407 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6408 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6409 ha_partition *partition= hld->partition;
6410 uint i= hld->part_id;
6411 DBUG_ENTER("partition_multi_range_key_init");
6412 // not used: partition->m_mrr_range_init_flags= flags;
6413 hld->partition_part_key_multi_range= partition->m_part_mrr_range_first[i];
6414 DBUG_RETURN(init_params);
6415 }
6416
6417
partition_multi_range_key_next(range_seq_t seq,KEY_MULTI_RANGE * range)6418 static bool partition_multi_range_key_next(range_seq_t seq,
6419 KEY_MULTI_RANGE *range)
6420 {
6421 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6422 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6423 PARTITION_KEY_MULTI_RANGE *partition_key_multi_range=
6424 hld->partition_part_key_multi_range->partition_key_multi_range;
6425 DBUG_ENTER("partition_multi_range_key_next");
6426 if (!partition_key_multi_range)
6427 DBUG_RETURN(TRUE);
6428 *range= partition_key_multi_range->key_multi_range;
6429 hld->partition_part_key_multi_range=
6430 hld->partition_part_key_multi_range->next;
6431 DBUG_RETURN(FALSE);
6432 }
6433
6434
partition_multi_range_key_skip_record(range_seq_t seq,range_id_t range_info,uchar * rowid)6435 static bool partition_multi_range_key_skip_record(range_seq_t seq,
6436 range_id_t range_info,
6437 uchar *rowid)
6438 {
6439 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6440 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6441 PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info;
6442 DBUG_ENTER("partition_multi_range_key_skip_record");
6443 DBUG_RETURN(hld->partition->m_seq_if->skip_record(hld->partition->m_seq,
6444 pkmr->ptr, rowid));
6445 }
6446
6447
partition_multi_range_key_skip_index_tuple(range_seq_t seq,range_id_t range_info)6448 static bool partition_multi_range_key_skip_index_tuple(range_seq_t seq,
6449 range_id_t range_info)
6450 {
6451 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6452 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6453 PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info;
6454 DBUG_ENTER("partition_multi_range_key_skip_index_tuple");
6455 DBUG_RETURN(hld->partition->m_seq_if->skip_index_tuple(hld->partition->m_seq,
6456 pkmr->ptr));
6457 }
6458
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * mrr_mode,Cost_estimate * cost)6459 ha_rows ha_partition::multi_range_read_info_const(uint keyno,
6460 RANGE_SEQ_IF *seq,
6461 void *seq_init_param,
6462 uint n_ranges, uint *bufsz,
6463 uint *mrr_mode,
6464 Cost_estimate *cost)
6465 {
6466 int error;
6467 uint i;
6468 handler **file;
6469 ha_rows rows= 0;
6470 uint ret_mrr_mode= 0;
6471 range_seq_t seq_it;
6472 part_id_range save_part_spec;
6473 Cost_estimate part_cost;
6474 DBUG_ENTER("ha_partition::multi_range_read_info_const");
6475 DBUG_PRINT("enter", ("partition this: %p", this));
6476
6477 m_mrr_new_full_buffer_size= 0;
6478 save_part_spec= m_part_spec;
6479
6480 cost->reset();
6481
6482 seq_it= seq->init(seq_init_param, n_ranges, *mrr_mode);
6483 if (unlikely((error= multi_range_key_create_key(seq, seq_it))))
6484 {
6485 if (likely(error == HA_ERR_END_OF_FILE)) // No keys in range
6486 {
6487 rows= 0;
6488 goto end;
6489 }
6490 /*
6491 This error means that we can't do multi_range_read for the moment
6492 (probably running out of memory) and we need to fallback to
6493 normal reads
6494 */
6495 m_part_spec= save_part_spec;
6496 DBUG_RETURN(HA_POS_ERROR);
6497 }
6498 m_part_seq_if.get_key_info=
6499 seq->get_key_info ? partition_multi_range_key_get_key_info : NULL;
6500 m_part_seq_if.init= partition_multi_range_key_init;
6501 m_part_seq_if.next= partition_multi_range_key_next;
6502 m_part_seq_if.skip_record= (seq->skip_record ?
6503 partition_multi_range_key_skip_record : NULL);
6504 m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6505 partition_multi_range_key_skip_index_tuple :
6506 NULL);
6507 file= m_file;
6508 do
6509 {
6510 i= (uint)(file - m_file);
6511 DBUG_PRINT("info",("partition part_id: %u", i));
6512 if (bitmap_is_set(&m_mrr_used_partitions, i))
6513 {
6514 ha_rows tmp_rows;
6515 uint tmp_mrr_mode;
6516 m_mrr_buffer_size[i]= 0;
6517 part_cost.reset();
6518 tmp_mrr_mode= *mrr_mode;
6519 tmp_rows= (*file)->
6520 multi_range_read_info_const(keyno, &m_part_seq_if,
6521 &m_partition_part_key_multi_range_hld[i],
6522 m_part_mrr_range_length[i],
6523 &m_mrr_buffer_size[i],
6524 &tmp_mrr_mode, &part_cost);
6525 if (tmp_rows == HA_POS_ERROR)
6526 {
6527 m_part_spec= save_part_spec;
6528 DBUG_RETURN(HA_POS_ERROR);
6529 }
6530 cost->add(&part_cost);
6531 rows+= tmp_rows;
6532 ret_mrr_mode|= tmp_mrr_mode;
6533 m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6534 }
6535 } while (*(++file));
6536 *mrr_mode= ret_mrr_mode;
6537
6538 end:
6539 m_part_spec= save_part_spec;
6540 DBUG_RETURN(rows);
6541 }
6542
6543
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * mrr_mode,Cost_estimate * cost)6544 ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges,
6545 uint keys,
6546 uint key_parts, uint *bufsz,
6547 uint *mrr_mode,
6548 Cost_estimate *cost)
6549 {
6550 uint i;
6551 handler **file;
6552 ha_rows rows= 0;
6553 Cost_estimate part_cost;
6554 DBUG_ENTER("ha_partition::multi_range_read_info");
6555 DBUG_PRINT("enter", ("partition this: %p", this));
6556
6557 cost->reset();
6558
6559 m_mrr_new_full_buffer_size= 0;
6560 file= m_file;
6561 do
6562 {
6563 i= (uint)(file - m_file);
6564 if (bitmap_is_set(&(m_part_info->read_partitions), (i)))
6565 {
6566 ha_rows tmp_rows;
6567 m_mrr_buffer_size[i]= 0;
6568 part_cost.reset();
6569 if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys,
6570 key_parts,
6571 &m_mrr_buffer_size[i],
6572 mrr_mode, &part_cost)))
6573 DBUG_RETURN(rows);
6574 cost->add(&part_cost);
6575 rows+= tmp_rows;
6576 m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6577 }
6578 } while (*(++file));
6579
6580 DBUG_RETURN(0);
6581 }
6582
6583
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mrr_mode,HANDLER_BUFFER * buf)6584 int ha_partition::multi_range_read_init(RANGE_SEQ_IF *seq,
6585 void *seq_init_param,
6586 uint n_ranges, uint mrr_mode,
6587 HANDLER_BUFFER *buf)
6588 {
6589 int error;
6590 uint i;
6591 handler **file;
6592 uchar *tmp_buffer;
6593 DBUG_ENTER("ha_partition::multi_range_read_init");
6594 DBUG_PRINT("enter", ("partition this: %p", this));
6595
6596 eq_range= 0;
6597 m_seq_if= seq;
6598 m_seq= seq->init(seq_init_param, n_ranges, mrr_mode);
6599 if (unlikely((error= multi_range_key_create_key(seq, m_seq))))
6600 DBUG_RETURN(0);
6601
6602 m_part_seq_if.get_key_info= (seq->get_key_info ?
6603 partition_multi_range_key_get_key_info :
6604 NULL);
6605 m_part_seq_if.init= partition_multi_range_key_init;
6606 m_part_seq_if.next= partition_multi_range_key_next;
6607 m_part_seq_if.skip_record= (seq->skip_record ?
6608 partition_multi_range_key_skip_record :
6609 NULL);
6610 m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6611 partition_multi_range_key_skip_index_tuple :
6612 NULL);
6613
6614 /* m_mrr_new_full_buffer_size was calculated in multi_range_read_info */
6615 if (m_mrr_full_buffer_size < m_mrr_new_full_buffer_size)
6616 {
6617 if (m_mrr_full_buffer)
6618 my_free(m_mrr_full_buffer);
6619 if (!(m_mrr_full_buffer=
6620 (uchar *) my_malloc(PSI_INSTRUMENT_ME, m_mrr_new_full_buffer_size, MYF(MY_WME))))
6621 {
6622 m_mrr_full_buffer_size= 0;
6623 error= HA_ERR_OUT_OF_MEM;
6624 goto error;
6625 }
6626 m_mrr_full_buffer_size= m_mrr_new_full_buffer_size;
6627 }
6628
6629 tmp_buffer= m_mrr_full_buffer;
6630 file= m_file;
6631 do
6632 {
6633 i= (uint)(file - m_file);
6634 DBUG_PRINT("info",("partition part_id: %u", i));
6635 if (bitmap_is_set(&m_mrr_used_partitions, i))
6636 {
6637 if (m_mrr_new_full_buffer_size)
6638 {
6639 if (m_mrr_buffer_size[i])
6640 {
6641 m_mrr_buffer[i].buffer= tmp_buffer;
6642 m_mrr_buffer[i].end_of_used_area= tmp_buffer;
6643 tmp_buffer+= m_mrr_buffer_size[i];
6644 m_mrr_buffer[i].buffer_end= tmp_buffer;
6645 }
6646 }
6647 else
6648 m_mrr_buffer[i]= *buf;
6649
6650 if (unlikely((error= (*file)->
6651 multi_range_read_init(&m_part_seq_if,
6652 &m_partition_part_key_multi_range_hld[i],
6653 m_part_mrr_range_length[i],
6654 mrr_mode,
6655 &m_mrr_buffer[i]))))
6656 goto error;
6657 m_stock_range_seq[i]= 0;
6658 }
6659 } while (*(++file));
6660
6661 m_multi_range_read_first= TRUE;
6662 m_mrr_range_current= m_mrr_range_first;
6663 m_index_scan_type= partition_read_multi_range;
6664 m_mrr_mode= mrr_mode;
6665 m_mrr_n_ranges= n_ranges;
6666 DBUG_RETURN(0);
6667
6668 error:
6669 DBUG_RETURN(error);
6670 }
6671
6672
multi_range_read_next(range_id_t * range_info)6673 int ha_partition::multi_range_read_next(range_id_t *range_info)
6674 {
6675 int error;
6676 DBUG_ENTER("ha_partition::multi_range_read_next");
6677 DBUG_PRINT("enter", ("partition this: %p partition m_mrr_mode: %u",
6678 this, m_mrr_mode));
6679
6680 if ((m_mrr_mode & HA_MRR_SORTED))
6681 {
6682 if (m_multi_range_read_first)
6683 {
6684 if (unlikely((error= handle_ordered_index_scan(table->record[0],
6685 FALSE))))
6686 DBUG_RETURN(error);
6687 if (!m_pre_calling)
6688 m_multi_range_read_first= FALSE;
6689 }
6690 else if (unlikely((error= handle_ordered_next(table->record[0],
6691 eq_range))))
6692 DBUG_RETURN(error);
6693 *range_info= m_mrr_range_current->ptr;
6694 }
6695 else
6696 {
6697 if (unlikely(m_multi_range_read_first))
6698 {
6699 if (unlikely((error=
6700 handle_unordered_scan_next_partition(table->record[0]))))
6701 DBUG_RETURN(error);
6702 if (!m_pre_calling)
6703 m_multi_range_read_first= FALSE;
6704 }
6705 else if (unlikely((error= handle_unordered_next(table->record[0], FALSE))))
6706 DBUG_RETURN(error);
6707
6708 if (!(m_mrr_mode & HA_MRR_NO_ASSOCIATION))
6709 {
6710 *range_info=
6711 ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr;
6712 }
6713 }
6714 DBUG_RETURN(0);
6715 }
6716
6717
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)6718 int ha_partition::multi_range_read_explain_info(uint mrr_mode, char *str,
6719 size_t size)
6720 {
6721 DBUG_ENTER("ha_partition::multi_range_read_explain_info");
6722 DBUG_RETURN(get_open_file_sample()->
6723 multi_range_read_explain_info(mrr_mode, str, size));
6724 }
6725
6726
6727 /**
6728 Find and retrieve the Full Text Search relevance ranking for a search string
6729 in a full text index.
6730
6731 @param handler Full Text Search handler
6732 @param record Search string
6733 @param length Length of the search string
6734
6735 @retval Relevance value
6736 */
6737
partition_ft_find_relevance(FT_INFO * handler,uchar * record,uint length)6738 float partition_ft_find_relevance(FT_INFO *handler,
6739 uchar *record, uint length)
6740 {
6741 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6742 uint m_last_part= ((ha_partition*) info->file)->last_part();
6743 FT_INFO *m_handler= info->part_ft_info[m_last_part];
6744 DBUG_ENTER("partition_ft_find_relevance");
6745 if (!m_handler)
6746 DBUG_RETURN((float)-1.0);
6747 DBUG_RETURN(m_handler->please->find_relevance(m_handler, record, length));
6748 }
6749
6750
6751 /**
6752 Retrieve the Full Text Search relevance ranking for the current
6753 full text search.
6754
6755 @param handler Full Text Search handler
6756
6757 @retval Relevance value
6758 */
6759
partition_ft_get_relevance(FT_INFO * handler)6760 float partition_ft_get_relevance(FT_INFO *handler)
6761 {
6762 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6763 uint m_last_part= ((ha_partition*) info->file)->last_part();
6764 FT_INFO *m_handler= info->part_ft_info[m_last_part];
6765 DBUG_ENTER("partition_ft_get_relevance");
6766 if (!m_handler)
6767 DBUG_RETURN((float)-1.0);
6768 DBUG_RETURN(m_handler->please->get_relevance(m_handler));
6769 }
6770
6771
6772 /**
6773 Free the memory for a full text search handler.
6774
6775 @param handler Full Text Search handler
6776 */
6777
partition_ft_close_search(FT_INFO * handler)6778 void partition_ft_close_search(FT_INFO *handler)
6779 {
6780 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6781 info->file->ft_close_search(handler);
6782 }
6783
6784
6785 /**
6786 Free the memory for a full text search handler.
6787
6788 @param handler Full Text Search handler
6789 */
6790
ft_close_search(FT_INFO * handler)6791 void ha_partition::ft_close_search(FT_INFO *handler)
6792 {
6793 uint i;
6794 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6795 DBUG_ENTER("ha_partition::ft_close_search");
6796
6797 for (i= 0; i < m_tot_parts; i++)
6798 {
6799 FT_INFO *m_handler= info->part_ft_info[i];
6800 DBUG_ASSERT(!m_handler ||
6801 (m_handler->please && m_handler->please->close_search));
6802 if (m_handler &&
6803 m_handler->please &&
6804 m_handler->please->close_search)
6805 m_handler->please->close_search(m_handler);
6806 }
6807 DBUG_VOID_RETURN;
6808 }
6809
6810
6811 /* Partition Full Text search function table */
6812 _ft_vft partition_ft_vft =
6813 {
6814 NULL, // partition_ft_read_next
6815 partition_ft_find_relevance,
6816 partition_ft_close_search,
6817 partition_ft_get_relevance,
6818 NULL // partition_ft_reinit_search
6819 };
6820
6821
6822 /**
6823 Initialize a full text search.
6824 */
6825
ft_init()6826 int ha_partition::ft_init()
6827 {
6828 int error;
6829 uint i= 0;
6830 uint32 part_id;
6831 DBUG_ENTER("ha_partition::ft_init");
6832 DBUG_PRINT("info", ("partition this: %p", this));
6833
6834 /*
6835 For operations that may need to change data, we may need to extend
6836 read_set.
6837 */
6838 if (get_lock_type() == F_WRLCK)
6839 {
6840 /*
6841 If write_set contains any of the fields used in partition and
6842 subpartition expression, we need to set all bits in read_set because
6843 the row may need to be inserted in a different [sub]partition. In
6844 other words update_row() can be converted into write_row(), which
6845 requires a complete record.
6846 */
6847 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
6848 table->write_set))
6849 bitmap_set_all(table->read_set);
6850 else
6851 {
6852 /*
6853 Some handlers only read fields as specified by the bitmap for the
6854 read set. For partitioned handlers we always require that the
6855 fields of the partition functions are read such that we can
6856 calculate the partition id to place updated and deleted records.
6857 */
6858 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
6859 }
6860 }
6861
6862 /* Now we see what the index of our first important partition is */
6863 DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
6864 (void *) m_part_info->read_partitions.bitmap));
6865 part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
6866 DBUG_PRINT("info", ("m_part_spec.start_part %u", (uint) part_id));
6867
6868 if (part_id == MY_BIT_NONE)
6869 {
6870 error= 0;
6871 goto err1;
6872 }
6873
6874 DBUG_PRINT("info", ("ft_init on partition %u", (uint) part_id));
6875 /*
6876 ft_end() is needed for partitioning to reset internal data if scan
6877 is already in use
6878 */
6879 if (m_pre_calling)
6880 {
6881 if (unlikely((error= pre_ft_end())))
6882 goto err1;
6883 }
6884 else
6885 ft_end();
6886 m_index_scan_type= partition_ft_read;
6887 for (i= part_id; i < m_tot_parts; i++)
6888 {
6889 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6890 {
6891 error= m_pre_calling ? m_file[i]->pre_ft_init() : m_file[i]->ft_init();
6892 if (unlikely(error))
6893 goto err2;
6894 }
6895 }
6896 m_scan_value= 1;
6897 m_part_spec.start_part= part_id;
6898 m_part_spec.end_part= m_tot_parts - 1;
6899 m_ft_init_and_first= TRUE;
6900 DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
6901 DBUG_RETURN(0);
6902
6903 err2:
6904 late_extra_no_cache(part_id);
6905 while ((int)--i >= (int)part_id)
6906 {
6907 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6908 {
6909 if (m_pre_calling)
6910 m_file[i]->pre_ft_end();
6911 else
6912 m_file[i]->ft_end();
6913 }
6914 }
6915 err1:
6916 m_scan_value= 2;
6917 m_part_spec.start_part= NO_CURRENT_PART_ID;
6918 DBUG_RETURN(error);
6919 }
6920
6921
6922 /**
6923 Initialize a full text search during a bulk access request.
6924 */
6925
pre_ft_init()6926 int ha_partition::pre_ft_init()
6927 {
6928 bool save_m_pre_calling;
6929 int error;
6930 DBUG_ENTER("ha_partition::pre_ft_init");
6931 save_m_pre_calling= m_pre_calling;
6932 m_pre_calling= TRUE;
6933 error= ft_init();
6934 m_pre_calling= save_m_pre_calling;
6935 DBUG_RETURN(error);
6936 }
6937
6938
6939 /**
6940 Terminate a full text search.
6941 */
6942
ft_end()6943 void ha_partition::ft_end()
6944 {
6945 handler **file;
6946 DBUG_ENTER("ha_partition::ft_end");
6947 DBUG_PRINT("info", ("partition this: %p", this));
6948
6949 switch (m_scan_value) {
6950 case 2: // Error
6951 break;
6952 case 1: // Table scan
6953 if (NO_CURRENT_PART_ID != m_part_spec.start_part)
6954 late_extra_no_cache(m_part_spec.start_part);
6955 file= m_file;
6956 do
6957 {
6958 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
6959 {
6960 if (m_pre_calling)
6961 (*file)->pre_ft_end();
6962 else
6963 (*file)->ft_end();
6964 }
6965 } while (*(++file));
6966 break;
6967 }
6968 m_scan_value= 2;
6969 m_part_spec.start_part= NO_CURRENT_PART_ID;
6970 ft_current= 0;
6971 DBUG_VOID_RETURN;
6972 }
6973
6974
6975 /**
6976 Terminate a full text search during a bulk access request.
6977 */
6978
pre_ft_end()6979 int ha_partition::pre_ft_end()
6980 {
6981 bool save_m_pre_calling;
6982 DBUG_ENTER("ha_partition::pre_ft_end");
6983 save_m_pre_calling= m_pre_calling;
6984 m_pre_calling= TRUE;
6985 ft_end();
6986 m_pre_calling= save_m_pre_calling;
6987 DBUG_RETURN(0);
6988 }
6989
6990
swap_blobs(uchar * rec_buf,Ordered_blob_storage ** storage,bool restore)6991 void ha_partition::swap_blobs(uchar * rec_buf, Ordered_blob_storage ** storage, bool restore)
6992 {
6993 uint *ptr, *end;
6994 uint blob_n= 0;
6995 table->move_fields(table->field, rec_buf, table->record[0]);
6996 for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields;
6997 ptr != end; ++ptr, ++blob_n)
6998 {
6999 DBUG_ASSERT(*ptr < table->s->fields);
7000 Field_blob *blob= (Field_blob*) table->field[*ptr];
7001 DBUG_ASSERT(blob->flags & BLOB_FLAG);
7002 DBUG_ASSERT(blob->field_index == *ptr);
7003 if (!bitmap_is_set(table->read_set, *ptr) || blob->is_null())
7004 continue;
7005
7006 Ordered_blob_storage &s= *storage[blob_n];
7007
7008 if (restore)
7009 {
7010 /*
7011 We protect only blob cache (value or read_value). If the cache was
7012 empty that doesn't mean the blob was empty. Blobs allocated by a
7013 storage engine should work just fine.
7014 */
7015 if (!s.blob.is_empty())
7016 blob->swap(s.blob, s.set_read_value);
7017 }
7018 else
7019 {
7020 bool set_read_value;
7021 String *cached= blob->cached(&set_read_value);
7022 if (cached)
7023 {
7024 cached->swap(s.blob);
7025 s.set_read_value= set_read_value;
7026 }
7027 }
7028 }
7029 table->move_fields(table->field, table->record[0], rec_buf);
7030 }
7031
7032
7033 /**
7034 Initialize a full text search using the extended API.
7035
7036 @param flags Search flags
7037 @param inx Key number
7038 @param key Key value
7039
7040 @return FT_INFO structure if successful
7041 NULL otherwise
7042 */
7043
ft_init_ext(uint flags,uint inx,String * key)7044 FT_INFO *ha_partition::ft_init_ext(uint flags, uint inx, String *key)
7045 {
7046 FT_INFO *ft_handler;
7047 handler **file;
7048 st_partition_ft_info *ft_target, **parent;
7049 DBUG_ENTER("ha_partition::ft_init_ext");
7050
7051 if (ft_current)
7052 parent= &ft_current->next;
7053 else
7054 parent= &ft_first;
7055
7056 if (!(ft_target= *parent))
7057 {
7058 FT_INFO **tmp_ft_info;
7059 if (!(ft_target= (st_partition_ft_info *)
7060 my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL),
7061 &ft_target, sizeof(st_partition_ft_info),
7062 &tmp_ft_info, sizeof(FT_INFO *) * m_tot_parts,
7063 NullS)))
7064 {
7065 my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL));
7066 DBUG_RETURN(NULL);
7067 }
7068 ft_target->part_ft_info= tmp_ft_info;
7069 (*parent)= ft_target;
7070 }
7071
7072 ft_current= ft_target;
7073 file= m_file;
7074 do
7075 {
7076 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
7077 {
7078 if ((ft_handler= (*file)->ft_init_ext(flags, inx, key)))
7079 (*file)->ft_handler= ft_handler;
7080 else
7081 (*file)->ft_handler= NULL;
7082 ft_target->part_ft_info[file - m_file]= ft_handler;
7083 }
7084 else
7085 {
7086 (*file)->ft_handler= NULL;
7087 ft_target->part_ft_info[file - m_file]= NULL;
7088 }
7089 } while (*(++file));
7090
7091 ft_target->please= &partition_ft_vft;
7092 ft_target->file= this;
7093 DBUG_RETURN((FT_INFO*)ft_target);
7094 }
7095
7096
7097 /**
7098 Return the next record from the FT result set during an ordered index
7099 pre-scan
7100
7101 @param use_parallel Is it a parallel search
7102
7103 @return >0 Error code
7104 0 Success
7105 */
7106
pre_ft_read(bool use_parallel)7107 int ha_partition::pre_ft_read(bool use_parallel)
7108 {
7109 bool save_m_pre_calling;
7110 int error;
7111 DBUG_ENTER("ha_partition::pre_ft_read");
7112 DBUG_PRINT("info", ("partition this: %p", this));
7113 save_m_pre_calling= m_pre_calling;
7114 m_pre_calling= TRUE;
7115 m_pre_call_use_parallel= use_parallel;
7116 error= ft_read(table->record[0]);
7117 m_pre_calling= save_m_pre_calling;
7118 DBUG_RETURN(error);
7119 }
7120
7121
7122 /**
7123 Return the first or next record in a full text search.
7124
7125 @param buf Buffer where the record should be returned
7126
7127 @return >0 Error code
7128 0 Success
7129 */
7130
ft_read(uchar * buf)7131 int ha_partition::ft_read(uchar *buf)
7132 {
7133 handler *file;
7134 int result= HA_ERR_END_OF_FILE, error;
7135 uint part_id= m_part_spec.start_part;
7136 DBUG_ENTER("ha_partition::ft_read");
7137 DBUG_PRINT("info", ("partition this: %p", this));
7138 DBUG_PRINT("info", ("part_id: %u", part_id));
7139
7140 if (part_id == NO_CURRENT_PART_ID)
7141 {
7142 /*
7143 The original set of partitions to scan was empty and thus we report
7144 the result here.
7145 */
7146 DBUG_PRINT("info", ("NO_CURRENT_PART_ID"));
7147 goto end;
7148 }
7149
7150 DBUG_ASSERT(m_scan_value == 1);
7151
7152 if (m_ft_init_and_first) // First call to ft_read()
7153 {
7154 m_ft_init_and_first= FALSE;
7155 if (!bulk_access_executing)
7156 {
7157 error= handle_pre_scan(FALSE, check_parallel_search());
7158 if (m_pre_calling || error)
7159 DBUG_RETURN(error);
7160 }
7161 late_extra_cache(part_id);
7162 }
7163
7164 file= m_file[part_id];
7165
7166 while (TRUE)
7167 {
7168 if (!(result= file->ft_read(buf)))
7169 {
7170 /* Found row: remember position and return it. */
7171 m_part_spec.start_part= m_last_part= part_id;
7172 table->status= 0;
7173 DBUG_RETURN(0);
7174 }
7175
7176 /*
7177 if we get here, then the current partition ft_next returned failure
7178 */
7179 if (result != HA_ERR_END_OF_FILE)
7180 goto end_dont_reset_start_part; // Return error
7181
7182 /* End current partition */
7183 late_extra_no_cache(part_id);
7184 DBUG_PRINT("info", ("stopping using partition %u", (uint) part_id));
7185
7186 /* Shift to next partition */
7187 while (++part_id < m_tot_parts &&
7188 !bitmap_is_set(&(m_part_info->read_partitions), part_id))
7189 ;
7190 if (part_id >= m_tot_parts)
7191 {
7192 result= HA_ERR_END_OF_FILE;
7193 break;
7194 }
7195 m_part_spec.start_part= m_last_part= part_id;
7196 file= m_file[part_id];
7197 DBUG_PRINT("info", ("now using partition %u", (uint) part_id));
7198 late_extra_cache(part_id);
7199 }
7200
7201 end:
7202 m_part_spec.start_part= NO_CURRENT_PART_ID;
7203 end_dont_reset_start_part:
7204 table->status= STATUS_NOT_FOUND;
7205 DBUG_RETURN(result);
7206 }
7207
7208
7209 /*
7210 Common routine to set up index scans
7211
7212 SYNOPSIS
7213 ha_partition::partition_scan_set_up()
7214 buf Buffer to later return record in (this function
7215 needs it to calculcate partitioning function
7216 values)
7217
7218 idx_read_flag TRUE <=> m_start_key has range start endpoint which
7219 probably can be used to determine the set of partitions
7220 to scan.
7221 FALSE <=> there is no start endpoint.
7222
7223 DESCRIPTION
7224 Find out which partitions we'll need to read when scanning the specified
7225 range.
7226
7227 If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
7228 as we will not need to do merge ordering.
7229
7230 RETURN VALUE
7231 >0 Error code
7232 0 Success
7233 */
7234
partition_scan_set_up(uchar * buf,bool idx_read_flag)7235 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
7236 {
7237 DBUG_ENTER("ha_partition::partition_scan_set_up");
7238
7239 if (idx_read_flag)
7240 get_partition_set(table, buf, active_index, &m_start_key, &m_part_spec);
7241 else
7242 {
7243 m_part_spec.start_part= 0;
7244 m_part_spec.end_part= m_tot_parts - 1;
7245 }
7246 if (m_part_spec.start_part > m_part_spec.end_part)
7247 {
7248 /*
7249 We discovered a partition set but the set was empty so we report
7250 key not found.
7251 */
7252 DBUG_PRINT("info", ("scan with no partition to scan"));
7253 DBUG_RETURN(HA_ERR_END_OF_FILE);
7254 }
7255 if (m_part_spec.start_part == m_part_spec.end_part)
7256 {
7257 /*
7258 We discovered a single partition to scan, this never needs to be
7259 performed using the ordered index scan.
7260 */
7261 DBUG_PRINT("info", ("index scan using the single partition %u",
7262 (uint) m_part_spec.start_part));
7263 m_ordered_scan_ongoing= FALSE;
7264 }
7265 else
7266 {
7267 /*
7268 Set m_ordered_scan_ongoing according how the scan should be done
7269 Only exact partitions are discovered atm by get_partition_set.
7270 Verify this, also bitmap must have at least one bit set otherwise
7271 the result from this table is the empty set.
7272 */
7273 uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions));
7274 if (start_part == MY_BIT_NONE)
7275 {
7276 DBUG_PRINT("info", ("scan with no partition to scan"));
7277 DBUG_RETURN(HA_ERR_END_OF_FILE);
7278 }
7279 if (start_part > m_part_spec.start_part)
7280 m_part_spec.start_part= start_part;
7281 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
7282 m_ordered_scan_ongoing= m_ordered;
7283 }
7284 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
7285 DBUG_ASSERT(m_part_spec.end_part < m_tot_parts);
7286 DBUG_RETURN(0);
7287 }
7288
7289 /**
7290 Check if we can search partitions in parallel
7291
7292 @retval TRUE yes
7293 @retval FALSE no
7294 */
7295
check_parallel_search()7296 bool ha_partition::check_parallel_search()
7297 {
7298 TABLE_LIST *table_list= table->pos_in_table_list;
7299 st_select_lex *select_lex;
7300 JOIN *join;
7301 DBUG_ENTER("ha_partition::check_parallel_search");
7302 if (!table_list)
7303 goto not_parallel;
7304
7305 while (table_list->parent_l)
7306 table_list= table_list->parent_l;
7307
7308 select_lex= table_list->select_lex;
7309 DBUG_PRINT("info",("partition select_lex: %p", select_lex));
7310 if (!select_lex)
7311 goto not_parallel;
7312 if (!select_lex->explicit_limit)
7313 {
7314 DBUG_PRINT("info",("partition not using explicit_limit"));
7315 goto parallel;
7316 }
7317
7318 join= select_lex->join;
7319 DBUG_PRINT("info",("partition join: %p", join));
7320 if (join && join->skip_sort_order)
7321 {
7322 DBUG_PRINT("info",("partition order_list.elements: %u",
7323 select_lex->order_list.elements));
7324 if (select_lex->order_list.elements)
7325 {
7326 Item *item= *select_lex->order_list.first->item;
7327 DBUG_PRINT("info",("partition item: %p", item));
7328 DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7329 DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7330 m_part_info->part_type));
7331 DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7332 m_is_sub_partitioned ? "TRUE" : "FALSE"));
7333 DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7334 m_part_info->part_expr));
7335 if (item->type() == Item::FIELD_ITEM &&
7336 m_part_info->part_type == RANGE_PARTITION &&
7337 !m_is_sub_partitioned &&
7338 (!m_part_info->part_expr ||
7339 m_part_info->part_expr->type() == Item::FIELD_ITEM))
7340 {
7341 Field *order_field= ((Item_field *)item)->field;
7342 DBUG_PRINT("info",("partition order_field: %p", order_field));
7343 if (order_field && order_field->table == table_list->table)
7344 {
7345 Field *part_field= m_part_info->full_part_field_array[0];
7346 DBUG_PRINT("info",("partition order_field: %p", order_field));
7347 DBUG_PRINT("info",("partition part_field: %p", part_field));
7348 if (part_field == order_field)
7349 {
7350 /*
7351 We are using ORDER BY partition_field LIMIT #
7352 In this case, let's not do things in parallel as it's
7353 likely that the query can be satisfied from the first
7354 partition
7355 */
7356 DBUG_PRINT("info",("partition with ORDER on partition field"));
7357 goto not_parallel;
7358 }
7359 }
7360 }
7361 DBUG_PRINT("info",("partition have order"));
7362 goto parallel;
7363 }
7364
7365 DBUG_PRINT("info",("partition group_list.elements: %u",
7366 select_lex->group_list.elements));
7367 if (select_lex->group_list.elements)
7368 {
7369 Item *item= *select_lex->group_list.first->item;
7370 DBUG_PRINT("info",("partition item: %p", item));
7371 DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7372 DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7373 m_part_info->part_type));
7374 DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7375 m_is_sub_partitioned ? "TRUE" : "FALSE"));
7376 DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7377 m_part_info->part_expr));
7378 if (item->type() == Item::FIELD_ITEM &&
7379 m_part_info->part_type == RANGE_PARTITION &&
7380 !m_is_sub_partitioned &&
7381 (!m_part_info->part_expr ||
7382 m_part_info->part_expr->type() == Item::FIELD_ITEM))
7383 {
7384 Field *group_field= ((Item_field *)item)->field;
7385 DBUG_PRINT("info",("partition group_field: %p", group_field));
7386 if (group_field && group_field->table == table_list->table)
7387 {
7388 Field *part_field= m_part_info->full_part_field_array[0];
7389 DBUG_PRINT("info",("partition group_field: %p", group_field));
7390 DBUG_PRINT("info",("partition part_field: %p", part_field));
7391 if (part_field == group_field)
7392 {
7393 DBUG_PRINT("info",("partition with GROUP BY on partition field"));
7394 goto not_parallel;
7395 }
7396 }
7397 }
7398 DBUG_PRINT("info",("partition with GROUP BY"));
7399 goto parallel;
7400 }
7401 }
7402 else if (select_lex->order_list.elements ||
7403 select_lex->group_list.elements)
7404 {
7405 DBUG_PRINT("info",("partition is not skip_order"));
7406 DBUG_PRINT("info",("partition order_list.elements: %u",
7407 select_lex->order_list.elements));
7408 DBUG_PRINT("info",("partition group_list.elements: %u",
7409 select_lex->group_list.elements));
7410 goto parallel;
7411 }
7412 DBUG_PRINT("info",("partition is not skip_order"));
7413
7414 not_parallel:
7415 DBUG_PRINT("return",("partition FALSE"));
7416 DBUG_RETURN(FALSE);
7417
7418 parallel:
7419 DBUG_PRINT("return",("partition TRUE"));
7420 DBUG_RETURN(TRUE);
7421 }
7422
7423
handle_pre_scan(bool reverse_order,bool use_parallel)7424 int ha_partition::handle_pre_scan(bool reverse_order, bool use_parallel)
7425 {
7426 uint i;
7427 DBUG_ENTER("ha_partition::handle_pre_scan");
7428 DBUG_PRINT("enter",
7429 ("m_part_spec.start_part: %u m_part_spec.end_part: %u",
7430 (uint) m_part_spec.start_part, (uint) m_part_spec.end_part));
7431
7432 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
7433 {
7434 if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
7435 continue;
7436 int error;
7437 handler *file= m_file[i];
7438
7439 switch (m_index_scan_type) {
7440 case partition_index_read:
7441 error= file->pre_index_read_map(m_start_key.key,
7442 m_start_key.keypart_map,
7443 m_start_key.flag,
7444 use_parallel);
7445 break;
7446 case partition_index_first:
7447 error= file->pre_index_first(use_parallel);
7448 break;
7449 case partition_index_last:
7450 error= file->pre_index_last(use_parallel);
7451 break;
7452 case partition_index_read_last:
7453 error= file->pre_index_read_last_map(m_start_key.key,
7454 m_start_key.keypart_map,
7455 use_parallel);
7456 break;
7457 case partition_read_range:
7458 error= file->pre_read_range_first(m_start_key.key? &m_start_key: NULL,
7459 end_range, eq_range, TRUE, use_parallel);
7460 break;
7461 case partition_read_multi_range:
7462 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7463 continue;
7464 error= file->pre_multi_range_read_next(use_parallel);
7465 break;
7466 case partition_ft_read:
7467 error= file->pre_ft_read(use_parallel);
7468 break;
7469 case partition_no_index_scan:
7470 error= file->pre_rnd_next(use_parallel);
7471 break;
7472 default:
7473 DBUG_ASSERT(FALSE);
7474 DBUG_RETURN(0);
7475 }
7476 if (error == HA_ERR_END_OF_FILE)
7477 error= 0;
7478 if (unlikely(error))
7479 DBUG_RETURN(error);
7480 }
7481 table->status= 0;
7482 DBUG_RETURN(0);
7483 }
7484
7485
7486 /****************************************************************************
7487 Unordered Index Scan Routines
7488 ****************************************************************************/
7489 /*
7490 Common routine to handle index_next with unordered results
7491
7492 SYNOPSIS
7493 handle_unordered_next()
7494 out:buf Read row in MySQL Row Format
7495 next_same Called from index_next_same
7496
7497 RETURN VALUE
7498 HA_ERR_END_OF_FILE End of scan
7499 0 Success
7500 other Error code
7501
7502 DESCRIPTION
7503 These routines are used to scan partitions without considering order.
7504 This is performed in two situations.
7505 1) In read_multi_range this is the normal case
7506 2) When performing any type of index_read, index_first, index_last where
7507 all fields in the partition function is bound. In this case the index
7508 scan is performed on only one partition and thus it isn't necessary to
7509 perform any sort.
7510 */
7511
handle_unordered_next(uchar * buf,bool is_next_same)7512 int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
7513 {
7514 handler *file;
7515 int error;
7516 DBUG_ENTER("ha_partition::handle_unordered_next");
7517
7518 if (m_part_spec.start_part >= m_tot_parts)
7519 {
7520 /* Should never happen! */
7521 DBUG_ASSERT(0);
7522 DBUG_RETURN(HA_ERR_END_OF_FILE);
7523 }
7524 file= m_file[m_part_spec.start_part];
7525
7526 /*
7527 We should consider if this should be split into three functions as
7528 partition_read_range is_next_same are always local constants
7529 */
7530
7531 if (m_index_scan_type == partition_read_multi_range)
7532 {
7533 if (likely(!(error= file->
7534 multi_range_read_next(&m_range_info[m_part_spec.start_part]))))
7535 {
7536 m_last_part= m_part_spec.start_part;
7537 DBUG_RETURN(0);
7538 }
7539 }
7540 else if (m_index_scan_type == partition_read_range)
7541 {
7542 if (likely(!(error= file->read_range_next())))
7543 {
7544 m_last_part= m_part_spec.start_part;
7545 DBUG_RETURN(0);
7546 }
7547 }
7548 else if (is_next_same)
7549 {
7550 if (likely(!(error= file->ha_index_next_same(buf, m_start_key.key,
7551 m_start_key.length))))
7552 {
7553 m_last_part= m_part_spec.start_part;
7554 DBUG_RETURN(0);
7555 }
7556 }
7557 else
7558 {
7559 if (likely(!(error= file->ha_index_next(buf))))
7560 {
7561 m_last_part= m_part_spec.start_part;
7562 DBUG_RETURN(0); // Row was in range
7563 }
7564 }
7565
7566 if (unlikely(error == HA_ERR_END_OF_FILE))
7567 {
7568 m_part_spec.start_part++; // Start using next part
7569 error= handle_unordered_scan_next_partition(buf);
7570 }
7571 DBUG_RETURN(error);
7572 }
7573
7574
7575 /*
7576 Handle index_next when changing to new partition
7577
7578 SYNOPSIS
7579 handle_unordered_scan_next_partition()
7580 buf Read row in MariaDB Row Format
7581
7582 RETURN VALUE
7583 HA_ERR_END_OF_FILE End of scan
7584 0 Success
7585 other Error code
7586
7587 DESCRIPTION
7588 This routine is used to start the index scan on the next partition.
7589 Both initial start and after completing scan on one partition.
7590 */
7591
handle_unordered_scan_next_partition(uchar * buf)7592 int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
7593 {
7594 uint i= m_part_spec.start_part;
7595 int saved_error= HA_ERR_END_OF_FILE;
7596 DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
7597
7598 /* Read next partition that includes start_part */
7599 if (i)
7600 i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1);
7601 else
7602 i= bitmap_get_first_set(&m_part_info->read_partitions);
7603
7604 for (;
7605 i <= m_part_spec.end_part;
7606 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7607 {
7608 int error;
7609 handler *file= m_file[i];
7610 m_part_spec.start_part= i;
7611
7612 switch (m_index_scan_type) {
7613 case partition_read_multi_range:
7614 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7615 continue;
7616 DBUG_PRINT("info", ("read_multi_range on partition %u", i));
7617 error= file->multi_range_read_next(&m_range_info[i]);
7618 break;
7619 case partition_read_range:
7620 DBUG_PRINT("info", ("read_range_first on partition %u", i));
7621 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7622 end_range, eq_range, FALSE);
7623 break;
7624 case partition_index_read:
7625 DBUG_PRINT("info", ("index_read on partition %u", i));
7626 error= file->ha_index_read_map(buf, m_start_key.key,
7627 m_start_key.keypart_map,
7628 m_start_key.flag);
7629 break;
7630 case partition_index_first:
7631 DBUG_PRINT("info", ("index_first on partition %u", i));
7632 error= file->ha_index_first(buf);
7633 break;
7634 default:
7635 DBUG_ASSERT(FALSE);
7636 DBUG_RETURN(1);
7637 }
7638 if (likely(!error))
7639 {
7640 m_last_part= i;
7641 DBUG_RETURN(0);
7642 }
7643 if (likely((error != HA_ERR_END_OF_FILE) &&
7644 (error != HA_ERR_KEY_NOT_FOUND)))
7645 DBUG_RETURN(error);
7646
7647 /*
7648 If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
7649 HA_ERR_END_OF_FILE, to be able to continue search.
7650 */
7651 if (saved_error != HA_ERR_KEY_NOT_FOUND)
7652 saved_error= error;
7653 DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %u", i));
7654 }
7655 if (saved_error == HA_ERR_END_OF_FILE)
7656 m_part_spec.start_part= NO_CURRENT_PART_ID;
7657 DBUG_RETURN(saved_error);
7658 }
7659
7660
7661 /**
7662 Common routine to start index scan with ordered results.
7663
7664 @param[out] buf Read row in MariaDB Row Format
7665
7666 @return Operation status
7667 @retval HA_ERR_END_OF_FILE End of scan
7668 @retval HA_ERR_KEY_NOT_FOUNE End of scan
7669 @retval 0 Success
7670 @retval other Error code
7671
7672 @details
7673 This part contains the logic to handle index scans that require ordered
7674 output. This includes all except those started by read_range_first with
7675 the flag ordered set to FALSE. Thus most direct index_read and all
7676 index_first and index_last.
7677
7678 We implement ordering by keeping one record plus a key buffer for each
7679 partition. Every time a new entry is requested we will fetch a new
7680 entry from the partition that is currently not filled with an entry.
7681 Then the entry is put into its proper sort position.
7682
7683 Returning a record is done by getting the top record, copying the
7684 record to the request buffer and setting the partition as empty on
7685 entries.
7686 */
7687
handle_ordered_index_scan(uchar * buf,bool reverse_order)7688 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
7689 {
7690 int error;
7691 uint i;
7692 uint j= queue_first_element(&m_queue);
7693 uint smallest_range_seq= 0;
7694 bool found= FALSE;
7695 uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
7696 int saved_error= HA_ERR_END_OF_FILE;
7697 DBUG_ENTER("ha_partition::handle_ordered_index_scan");
7698 DBUG_PRINT("enter", ("partition this: %p", this));
7699
7700 if (m_pre_calling)
7701 error= handle_pre_scan(reverse_order, m_pre_call_use_parallel);
7702 else
7703 error= handle_pre_scan(reverse_order, check_parallel_search());
7704 if (unlikely(error))
7705 DBUG_RETURN(error);
7706
7707 if (m_key_not_found)
7708 {
7709 /* m_key_not_found was set in the previous call to this function */
7710 m_key_not_found= false;
7711 bitmap_clear_all(&m_key_not_found_partitions);
7712 }
7713 m_top_entry= NO_CURRENT_PART_ID;
7714 DBUG_PRINT("info", ("partition queue_remove_all(1)"));
7715 queue_remove_all(&m_queue);
7716 DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions,
7717 m_part_spec.start_part));
7718
7719 /*
7720 Position part_rec_buf_ptr to point to the first used partition >=
7721 start_part. There may be partitions marked by used_partitions,
7722 but is before start_part. These partitions has allocated record buffers
7723 but is dynamically pruned, so those buffers must be skipped.
7724 */
7725 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7726 i < m_part_spec.start_part;
7727 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7728 {
7729 part_rec_buf_ptr+= m_priority_queue_rec_len;
7730 }
7731 DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
7732 m_part_spec.start_part, i));
7733 for (/* continue from above */ ;
7734 i <= m_part_spec.end_part ;
7735 i= bitmap_get_next_set(&m_part_info->read_partitions, i),
7736 part_rec_buf_ptr+= m_priority_queue_rec_len)
7737 {
7738 DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
7739 i, m_index_scan_type));
7740 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET));
7741 uchar *rec_buf_ptr= part_rec_buf_ptr + ORDERED_REC_OFFSET;
7742 handler *file= m_file[i];
7743
7744 switch (m_index_scan_type) {
7745 case partition_index_read:
7746 error= file->ha_index_read_map(rec_buf_ptr,
7747 m_start_key.key,
7748 m_start_key.keypart_map,
7749 m_start_key.flag);
7750 /* Caller has specified reverse_order */
7751 break;
7752 case partition_index_first:
7753 error= file->ha_index_first(rec_buf_ptr);
7754 reverse_order= FALSE;
7755 break;
7756 case partition_index_last:
7757 error= file->ha_index_last(rec_buf_ptr);
7758 reverse_order= TRUE;
7759 break;
7760 case partition_read_range:
7761 {
7762 /*
7763 This can only read record to table->record[0], as it was set when
7764 the table was being opened. We have to memcpy data ourselves.
7765 */
7766 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7767 end_range, eq_range, TRUE);
7768 if (likely(!error))
7769 memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7770 reverse_order= FALSE;
7771 break;
7772 }
7773 case partition_read_multi_range:
7774 {
7775 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7776 continue;
7777 DBUG_PRINT("info", ("partition %u", i));
7778 error= file->multi_range_read_next(&m_range_info[i]);
7779 DBUG_PRINT("info", ("error: %d", error));
7780 if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
7781 {
7782 bitmap_clear_bit(&m_mrr_used_partitions, i);
7783 continue;
7784 }
7785 if (likely(!error))
7786 {
7787 memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7788 reverse_order= FALSE;
7789 m_stock_range_seq[i]= (((PARTITION_KEY_MULTI_RANGE *)
7790 m_range_info[i])->id);
7791 /* Test if the key is in the first key range */
7792 if (m_stock_range_seq[i] != m_mrr_range_current->id)
7793 {
7794 /*
7795 smallest_range_seq contains the smallest key range we have seen
7796 so far
7797 */
7798 if (!smallest_range_seq || smallest_range_seq > m_stock_range_seq[i])
7799 smallest_range_seq= m_stock_range_seq[i];
7800 continue;
7801 }
7802 }
7803 break;
7804 }
7805 default:
7806 DBUG_ASSERT(FALSE);
7807 DBUG_RETURN(HA_ERR_END_OF_FILE);
7808 }
7809 if (likely(!error))
7810 {
7811 found= TRUE;
7812 if (!m_using_extended_keys)
7813 {
7814 file->position(rec_buf_ptr);
7815 memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length);
7816 }
7817 /*
7818 Initialize queue without order first, simply insert
7819 */
7820 queue_element(&m_queue, j++)= part_rec_buf_ptr;
7821 if (table->s->blob_fields)
7822 {
7823 Ordered_blob_storage **storage=
7824 *((Ordered_blob_storage ***) part_rec_buf_ptr);
7825 swap_blobs(rec_buf_ptr, storage, false);
7826 }
7827 }
7828 else if (error == HA_ERR_KEY_NOT_FOUND)
7829 {
7830 DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
7831 bitmap_set_bit(&m_key_not_found_partitions, i);
7832 m_key_not_found= true;
7833 saved_error= error;
7834 }
7835 else if (error != HA_ERR_END_OF_FILE)
7836 {
7837 DBUG_RETURN(error);
7838 }
7839 }
7840
7841 if (!found && smallest_range_seq)
7842 {
7843 /* We know that there is an existing row based on code above */
7844 found= TRUE;
7845 part_rec_buf_ptr= m_ordered_rec_buffer;
7846
7847 /*
7848 No key found in the first key range
7849 Collect all partitions that has a key in smallest_range_seq
7850 */
7851 DBUG_PRINT("info", ("partition !found && smallest_range_seq"));
7852 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7853 i <= m_part_spec.end_part;
7854 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7855 {
7856 DBUG_PRINT("info", ("partition current_part: %u", i));
7857 if (i < m_part_spec.start_part)
7858 {
7859 part_rec_buf_ptr+= m_priority_queue_rec_len;
7860 DBUG_PRINT("info", ("partition i < m_part_spec.start_part"));
7861 continue;
7862 }
7863 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7864 {
7865 part_rec_buf_ptr+= m_priority_queue_rec_len;
7866 DBUG_PRINT("info", ("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
7867 continue;
7868 }
7869 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET));
7870 if (smallest_range_seq == m_stock_range_seq[i])
7871 {
7872 m_stock_range_seq[i]= 0;
7873 queue_element(&m_queue, j++)= (uchar *) part_rec_buf_ptr;
7874 DBUG_PRINT("info", ("partition smallest_range_seq == m_stock_range_seq[i]"));
7875 }
7876 part_rec_buf_ptr+= m_priority_queue_rec_len;
7877 }
7878
7879 /* Update global m_mrr_range_current to the current range */
7880 while (m_mrr_range_current->id < smallest_range_seq)
7881 m_mrr_range_current= m_mrr_range_current->next;
7882 }
7883 if (found)
7884 {
7885 /*
7886 We found at least one partition with data, now sort all entries and
7887 after that read the first entry and copy it to the buffer to return in.
7888 */
7889 queue_set_max_at_top(&m_queue, reverse_order);
7890 queue_set_cmp_arg(&m_queue, (void*) this);
7891 m_queue.elements= j - queue_first_element(&m_queue);
7892 queue_fix(&m_queue);
7893 return_top_record(buf);
7894 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
7895 DBUG_RETURN(0);
7896 }
7897 DBUG_RETURN(saved_error);
7898 }
7899
7900
7901 /*
7902 Return the top record in sort order
7903
7904 SYNOPSIS
7905 return_top_record()
7906 out:buf Row returned in MySQL Row Format
7907
7908 RETURN VALUE
7909 NONE
7910 */
7911
return_top_record(uchar * buf)7912 void ha_partition::return_top_record(uchar *buf)
7913 {
7914 uint part_id;
7915 uchar *key_buffer= queue_top(&m_queue);
7916 uchar *rec_buffer= key_buffer + ORDERED_REC_OFFSET;
7917 DBUG_ENTER("ha_partition::return_top_record");
7918 DBUG_PRINT("enter", ("partition this: %p", this));
7919
7920 part_id= uint2korr(key_buffer + ORDERED_PART_NUM_OFFSET);
7921 memcpy(buf, rec_buffer, m_rec_length);
7922 if (table->s->blob_fields)
7923 {
7924 Ordered_blob_storage **storage= *((Ordered_blob_storage ***) key_buffer);
7925 swap_blobs(buf, storage, true);
7926 }
7927 m_last_part= part_id;
7928 DBUG_PRINT("info", ("partition m_last_part: %u", m_last_part));
7929 m_top_entry= part_id;
7930 table->status= 0; // Found an existing row
7931 m_file[part_id]->return_record_by_parent();
7932 DBUG_VOID_RETURN;
7933 }
7934
7935 /*
7936 This function is only used if the partitioned table has own partitions.
7937 This can happen if the partitioned VP engine is used (part of spider).
7938 */
7939
return_record_by_parent()7940 void ha_partition::return_record_by_parent()
7941 {
7942 m_file[m_last_part]->return_record_by_parent();
7943 DBUG_ASSERT(0);
7944 }
7945
7946
7947 /**
7948 Add index_next/prev from partitions without exact match.
7949
7950 If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
7951 ha_index_read_map was done, those partitions must be included in the
7952 following index_next/prev call.
7953 */
7954
handle_ordered_index_scan_key_not_found()7955 int ha_partition::handle_ordered_index_scan_key_not_found()
7956 {
7957 int error;
7958 uint i, old_elements= m_queue.elements;
7959 uchar *part_buf= m_ordered_rec_buffer;
7960 uchar *curr_rec_buf= NULL;
7961 DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found");
7962 DBUG_PRINT("enter", ("partition this: %p", this));
7963 DBUG_ASSERT(m_key_not_found);
7964 /*
7965 Loop over all used partitions to get the correct offset
7966 into m_ordered_rec_buffer.
7967 */
7968 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7969 i < m_tot_parts;
7970 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7971 {
7972 if (bitmap_is_set(&m_key_not_found_partitions, i))
7973 {
7974 /*
7975 This partition is used and did return HA_ERR_KEY_NOT_FOUND
7976 in index_read_map.
7977 */
7978 curr_rec_buf= part_buf + ORDERED_REC_OFFSET;
7979 error= m_file[i]->ha_index_next(curr_rec_buf);
7980 /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
7981 DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
7982 if (likely(!error))
7983 {
7984 DBUG_PRINT("info", ("partition queue_insert(1)"));
7985 queue_insert(&m_queue, part_buf);
7986 }
7987 else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
7988 DBUG_RETURN(error);
7989 }
7990 part_buf += m_priority_queue_rec_len;
7991 }
7992 DBUG_ASSERT(curr_rec_buf);
7993 bitmap_clear_all(&m_key_not_found_partitions);
7994 m_key_not_found= false;
7995
7996 if (m_queue.elements > old_elements)
7997 {
7998 /* Update m_top_entry, which may have changed. */
7999 uchar *key_buffer= queue_top(&m_queue);
8000 m_top_entry= uint2korr(key_buffer);
8001 }
8002 DBUG_RETURN(0);
8003 }
8004
8005
8006 /*
8007 Common routine to handle index_next with ordered results
8008
8009 SYNOPSIS
8010 handle_ordered_next()
8011 out:buf Read row in MySQL Row Format
8012 next_same Called from index_next_same
8013
8014 RETURN VALUE
8015 HA_ERR_END_OF_FILE End of scan
8016 0 Success
8017 other Error code
8018 */
8019
handle_ordered_next(uchar * buf,bool is_next_same)8020 int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
8021 {
8022 int error;
8023 DBUG_ENTER("ha_partition::handle_ordered_next");
8024
8025 if (m_top_entry == NO_CURRENT_PART_ID)
8026 DBUG_RETURN(HA_ERR_END_OF_FILE);
8027
8028 uint part_id= m_top_entry;
8029 uchar *part_rec_buf_ptr= queue_top(&m_queue);
8030 uchar *rec_buf= part_rec_buf_ptr + ORDERED_REC_OFFSET;
8031 handler *file;
8032
8033 if (m_key_not_found)
8034 {
8035 if (is_next_same)
8036 {
8037 /* Only rows which match the key. */
8038 m_key_not_found= false;
8039 bitmap_clear_all(&m_key_not_found_partitions);
8040 }
8041 else
8042 {
8043 /* There are partitions not included in the index record queue. */
8044 uint old_elements= m_queue.elements;
8045 if (unlikely((error= handle_ordered_index_scan_key_not_found())))
8046 DBUG_RETURN(error);
8047 /*
8048 If the queue top changed, i.e. one of the partitions that gave
8049 HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
8050 return it.
8051 Otherwise replace the old with a call to index_next (fall through).
8052 */
8053 if (old_elements != m_queue.elements && part_id != m_top_entry)
8054 {
8055 return_top_record(buf);
8056 DBUG_RETURN(0);
8057 }
8058 }
8059 }
8060 if (part_id >= m_tot_parts)
8061 {
8062 /* This should never happen! */
8063 DBUG_ASSERT(0);
8064 DBUG_RETURN(HA_ERR_END_OF_FILE);
8065 }
8066
8067 file= m_file[part_id];
8068
8069 if (m_index_scan_type == partition_read_range)
8070 {
8071 error= file->read_range_next();
8072 if (likely(!error))
8073 {
8074 memcpy(rec_buf, table->record[0], m_rec_length);
8075 if (table->s->blob_fields)
8076 {
8077 Ordered_blob_storage **storage=
8078 *((Ordered_blob_storage ***) part_rec_buf_ptr);
8079 swap_blobs(rec_buf, storage, false);
8080 }
8081 }
8082 }
8083 else if (m_index_scan_type == partition_read_multi_range)
8084 {
8085 DBUG_PRINT("info", ("partition_read_multi_range route"));
8086 DBUG_PRINT("info", ("part_id: %u", part_id));
8087 bool get_next= FALSE;
8088 error= file->multi_range_read_next(&m_range_info[part_id]);
8089 DBUG_PRINT("info", ("error: %d", error));
8090 if (unlikely(error == HA_ERR_KEY_NOT_FOUND))
8091 error= HA_ERR_END_OF_FILE;
8092 if (unlikely(error == HA_ERR_END_OF_FILE))
8093 {
8094 bitmap_clear_bit(&m_mrr_used_partitions, part_id);
8095 DBUG_PRINT("info", ("partition m_queue.elements: %u", m_queue.elements));
8096 if (m_queue.elements)
8097 {
8098 DBUG_PRINT("info", ("partition queue_remove_top(1)"));
8099 queue_remove_top(&m_queue);
8100 if (m_queue.elements)
8101 {
8102 return_top_record(buf);
8103 DBUG_PRINT("info", ("Record returned from partition %u (3)",
8104 m_top_entry));
8105 DBUG_RETURN(0);
8106 }
8107 }
8108 get_next= TRUE;
8109 }
8110 else if (likely(!error))
8111 {
8112 DBUG_PRINT("info", ("m_range_info[%u])->id: %u", part_id,
8113 ((PARTITION_KEY_MULTI_RANGE *)
8114 m_range_info[part_id])->id));
8115 DBUG_PRINT("info", ("m_mrr_range_current->id: %u",
8116 m_mrr_range_current->id));
8117 memcpy(rec_buf, table->record[0], m_rec_length);
8118 if (table->s->blob_fields)
8119 {
8120 Ordered_blob_storage **storage= *((Ordered_blob_storage ***) part_rec_buf_ptr);
8121 swap_blobs(rec_buf, storage, false);
8122 }
8123 if (((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id !=
8124 m_mrr_range_current->id)
8125 {
8126 m_stock_range_seq[part_id]=
8127 ((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id;
8128 DBUG_PRINT("info", ("partition queue_remove_top(2)"));
8129 queue_remove_top(&m_queue);
8130 if (!m_queue.elements)
8131 get_next= TRUE;
8132 }
8133 }
8134 if (get_next)
8135 {
8136 DBUG_PRINT("info", ("get_next route"));
8137 uint i, j= 0, smallest_range_seq= UINT_MAX32;
8138 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
8139 {
8140 if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
8141 continue;
8142 if (!bitmap_is_set(&m_mrr_used_partitions, i))
8143 continue;
8144 if (smallest_range_seq > m_stock_range_seq[i])
8145 smallest_range_seq= m_stock_range_seq[i];
8146 }
8147
8148 DBUG_PRINT("info", ("smallest_range_seq: %u", smallest_range_seq));
8149 if (smallest_range_seq != UINT_MAX32)
8150 {
8151 uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
8152 DBUG_PRINT("info", ("partition queue_remove_all(2)"));
8153 queue_remove_all(&m_queue);
8154 DBUG_PRINT("info", ("m_part_spec.start_part: %u",
8155 m_part_spec.start_part));
8156
8157 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
8158 i <= m_part_spec.end_part;
8159 i= bitmap_get_next_set(&m_part_info->read_partitions, i),
8160 part_rec_buf_ptr+= m_priority_queue_rec_len)
8161 {
8162 DBUG_PRINT("info",("partition part_id: %u", i));
8163 if (i < m_part_spec.start_part)
8164 {
8165 DBUG_PRINT("info",("partition i < m_part_spec.start_part"));
8166 continue;
8167 }
8168 if (!bitmap_is_set(&m_mrr_used_partitions, i))
8169 {
8170 DBUG_PRINT("info",("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
8171 continue;
8172 }
8173 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr +
8174 ORDERED_PART_NUM_OFFSET));
8175 DBUG_PRINT("info", ("partition m_stock_range_seq[%u]: %u",
8176 i, m_stock_range_seq[i]));
8177 if (smallest_range_seq == m_stock_range_seq[i])
8178 {
8179 m_stock_range_seq[i]= 0;
8180 DBUG_PRINT("info", ("partition queue_insert(2)"));
8181 queue_insert(&m_queue, part_rec_buf_ptr);
8182 j++;
8183 }
8184 }
8185 while (m_mrr_range_current->id < smallest_range_seq)
8186 m_mrr_range_current= m_mrr_range_current->next;
8187
8188 DBUG_PRINT("info",("partition m_mrr_range_current: %p",
8189 m_mrr_range_current));
8190 DBUG_PRINT("info",("partition m_mrr_range_current->id: %u",
8191 m_mrr_range_current ? m_mrr_range_current->id : 0));
8192 queue_set_max_at_top(&m_queue, FALSE);
8193 queue_set_cmp_arg(&m_queue, (void*) this);
8194 m_queue.elements= j;
8195 queue_fix(&m_queue);
8196 return_top_record(buf);
8197 DBUG_PRINT("info", ("Record returned from partition %u (4)",
8198 m_top_entry));
8199 DBUG_RETURN(0);
8200 }
8201 }
8202 }
8203 else if (!is_next_same)
8204 error= file->ha_index_next(rec_buf);
8205 else
8206 error= file->ha_index_next_same(rec_buf, m_start_key.key,
8207 m_start_key.length);
8208
8209 if (unlikely(error))
8210 {
8211 if (error == HA_ERR_END_OF_FILE && m_queue.elements)
8212 {
8213 /* Return next buffered row */
8214 DBUG_PRINT("info", ("partition queue_remove_top(3)"));
8215 queue_remove_top(&m_queue);
8216 if (m_queue.elements)
8217 {
8218 return_top_record(buf);
8219 DBUG_PRINT("info", ("Record returned from partition %u (2)",
8220 m_top_entry));
8221 error= 0;
8222 }
8223 }
8224 DBUG_RETURN(error);
8225 }
8226
8227 if (!m_using_extended_keys)
8228 {
8229 file->position(rec_buf);
8230 memcpy(rec_buf + m_rec_length, file->ref, file->ref_length);
8231 }
8232
8233 queue_replace_top(&m_queue);
8234 return_top_record(buf);
8235 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
8236 DBUG_RETURN(0);
8237 }
8238
8239
8240 /*
8241 Common routine to handle index_prev with ordered results
8242
8243 SYNOPSIS
8244 handle_ordered_prev()
8245 out:buf Read row in MySQL Row Format
8246
8247 RETURN VALUE
8248 HA_ERR_END_OF_FILE End of scan
8249 0 Success
8250 other Error code
8251 */
8252
handle_ordered_prev(uchar * buf)8253 int ha_partition::handle_ordered_prev(uchar *buf)
8254 {
8255 int error;
8256 DBUG_ENTER("ha_partition::handle_ordered_prev");
8257 DBUG_PRINT("enter", ("partition: %p", this));
8258
8259 if (m_top_entry == NO_CURRENT_PART_ID)
8260 DBUG_RETURN(HA_ERR_END_OF_FILE);
8261
8262 uint part_id= m_top_entry;
8263 uchar *rec_buf= queue_top(&m_queue) + ORDERED_REC_OFFSET;
8264 handler *file= m_file[part_id];
8265
8266 if (unlikely((error= file->ha_index_prev(rec_buf))))
8267 {
8268 if (error == HA_ERR_END_OF_FILE && m_queue.elements)
8269 {
8270 DBUG_PRINT("info", ("partition queue_remove_top(4)"));
8271 queue_remove_top(&m_queue);
8272 if (m_queue.elements)
8273 {
8274 return_top_record(buf);
8275 DBUG_PRINT("info", ("Record returned from partition %u (2)",
8276 m_top_entry));
8277 error= 0;
8278 }
8279 }
8280 DBUG_RETURN(error);
8281 }
8282 queue_replace_top(&m_queue);
8283 return_top_record(buf);
8284 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
8285 DBUG_RETURN(0);
8286 }
8287
8288
8289 /****************************************************************************
8290 MODULE information calls
8291 ****************************************************************************/
8292
8293 /*
8294 These are all first approximations of the extra, info, scan_time
8295 and read_time calls
8296 */
8297
8298 /**
8299 Helper function for sorting according to number of rows in descending order.
8300 */
8301
compare_number_of_records(ha_partition * me,const uint32 * a,const uint32 * b)8302 int ha_partition::compare_number_of_records(ha_partition *me,
8303 const uint32 *a,
8304 const uint32 *b)
8305 {
8306 handler **file= me->m_file;
8307 /* Note: sorting in descending order! */
8308 if (file[*a]->stats.records > file[*b]->stats.records)
8309 return -1;
8310 if (file[*a]->stats.records < file[*b]->stats.records)
8311 return 1;
8312 return 0;
8313 }
8314
8315
8316 /*
8317 General method to gather info from handler
8318
8319 SYNOPSIS
8320 info()
8321 flag Specifies what info is requested
8322
8323 RETURN VALUE
8324 NONE
8325
8326 DESCRIPTION
8327 ::info() is used to return information to the optimizer.
8328 Currently this table handler doesn't implement most of the fields
8329 really needed. SHOW also makes use of this data
8330 Another note, if your handler doesn't provide exact record count,
8331 you will probably want to have the following in your code:
8332 if (records < 2)
8333 records = 2;
8334 The reason is that the server will optimize for cases of only a single
8335 record. If in a table scan you don't know the number of records
8336 it will probably be better to set records to two so you can return
8337 as many records as you need.
8338
8339 Along with records a few more variables you may wish to set are:
8340 records
8341 deleted
8342 data_file_length
8343 index_file_length
8344 delete_length
8345 check_time
8346 Take a look at the public variables in handler.h for more information.
8347
8348 Called in:
8349 filesort.cc
8350 ha_heap.cc
8351 item_sum.cc
8352 opt_sum.cc
8353 sql_delete.cc
8354 sql_delete.cc
8355 sql_derived.cc
8356 sql_select.cc
8357 sql_select.cc
8358 sql_select.cc
8359 sql_select.cc
8360 sql_select.cc
8361 sql_show.cc
8362 sql_show.cc
8363 sql_show.cc
8364 sql_show.cc
8365 sql_table.cc
8366 sql_union.cc
8367 sql_update.cc
8368
8369 Some flags that are not implemented
8370 HA_STATUS_POS:
8371 This parameter is never used from the MySQL Server. It is checked in a
8372 place in MyISAM so could potentially be used by MyISAM specific
8373 programs.
8374 HA_STATUS_NO_LOCK:
8375 This is declared and often used. It's only used by MyISAM.
8376 It means that MySQL doesn't need the absolute latest statistics
8377 information. This may save the handler from doing internal locks while
8378 retrieving statistics data.
8379 */
8380
info(uint flag)8381 int ha_partition::info(uint flag)
8382 {
8383 uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
8384 uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
8385 DBUG_ENTER("ha_partition::info");
8386
8387 #ifndef DBUG_OFF
8388 if (bitmap_is_set_all(&(m_part_info->read_partitions)))
8389 DBUG_PRINT("info", ("All partitions are used"));
8390 #endif /* DBUG_OFF */
8391 if (flag & HA_STATUS_AUTO)
8392 {
8393 bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0);
8394 bool all_parts_opened= true;
8395 DBUG_PRINT("info", ("HA_STATUS_AUTO"));
8396 if (!table->found_next_number_field)
8397 stats.auto_increment_value= 0;
8398 else if (part_share->auto_inc_initialized)
8399 {
8400 lock_auto_increment();
8401 stats.auto_increment_value= part_share->next_auto_inc_val;
8402 unlock_auto_increment();
8403 }
8404 else
8405 {
8406 lock_auto_increment();
8407 /* to avoid two concurrent initializations, check again when locked */
8408 if (part_share->auto_inc_initialized)
8409 stats.auto_increment_value= part_share->next_auto_inc_val;
8410 else
8411 {
8412 /*
8413 The auto-inc mutex in the table_share is locked, so we do not need
8414 to have the handlers locked.
8415 HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
8416 the mutex, because it is initialized.
8417 */
8418 handler *file, **file_array;
8419 ulonglong auto_increment_value= 0;
8420 file_array= m_file;
8421 DBUG_PRINT("info",
8422 ("checking all partitions for auto_increment_value"));
8423 do
8424 {
8425 if (!bitmap_is_set(&m_opened_partitions, (uint)(file_array - m_file)))
8426 {
8427 /*
8428 Some partitions aren't opened.
8429 So we can't calculate the autoincrement.
8430 */
8431 all_parts_opened= false;
8432 break;
8433 }
8434 file= *file_array;
8435 file->info(HA_STATUS_AUTO | no_lock_flag);
8436 set_if_bigger(auto_increment_value,
8437 file->stats.auto_increment_value);
8438 } while (*(++file_array));
8439
8440 DBUG_ASSERT(auto_increment_value);
8441 stats.auto_increment_value= auto_increment_value;
8442 if (all_parts_opened && auto_inc_is_first_in_idx)
8443 {
8444 set_if_bigger(part_share->next_auto_inc_val,
8445 auto_increment_value);
8446 if (can_use_for_auto_inc_init())
8447 part_share->auto_inc_initialized= true;
8448 DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
8449 (ulong) part_share->next_auto_inc_val));
8450 }
8451 }
8452 unlock_auto_increment();
8453 }
8454 }
8455 if (flag & HA_STATUS_VARIABLE)
8456 {
8457 uint i;
8458 DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
8459 /*
8460 Calculates statistical variables
8461 records: Estimate of number records in table
8462 We report sum (always at least 2 if not empty)
8463 deleted: Estimate of number holes in the table due to
8464 deletes
8465 We report sum
8466 data_file_length: Length of data file, in principle bytes in table
8467 We report sum
8468 index_file_length: Length of index file, in principle bytes in
8469 indexes in the table
8470 We report sum
8471 delete_length: Length of free space easily used by new records in table
8472 We report sum
8473 mean_record_length:Mean record length in the table
8474 We calculate this
8475 check_time: Time of last check (only applicable to MyISAM)
8476 We report last time of all underlying handlers
8477 */
8478 handler *file;
8479 stats.records= 0;
8480 stats.deleted= 0;
8481 stats.data_file_length= 0;
8482 stats.index_file_length= 0;
8483 stats.delete_length= 0;
8484 stats.check_time= 0;
8485 stats.checksum= 0;
8486 stats.checksum_null= TRUE;
8487 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
8488 i < m_tot_parts;
8489 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
8490 {
8491 file= m_file[i];
8492 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8493 stats.records+= file->stats.records;
8494 stats.deleted+= file->stats.deleted;
8495 stats.data_file_length+= file->stats.data_file_length;
8496 stats.index_file_length+= file->stats.index_file_length;
8497 stats.delete_length+= file->stats.delete_length;
8498 if (file->stats.check_time > stats.check_time)
8499 stats.check_time= file->stats.check_time;
8500 if (!file->stats.checksum_null)
8501 {
8502 stats.checksum+= file->stats.checksum;
8503 stats.checksum_null= FALSE;
8504 }
8505 }
8506 if (stats.records && stats.records < 2 &&
8507 !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
8508 stats.records= 2;
8509 if (stats.records > 0)
8510 stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
8511 else
8512 stats.mean_rec_length= 0;
8513 }
8514 if (flag & HA_STATUS_CONST)
8515 {
8516 DBUG_PRINT("info", ("HA_STATUS_CONST"));
8517 /*
8518 Recalculate loads of constant variables. MyISAM also sets things
8519 directly on the table share object.
8520
8521 Check whether this should be fixed since handlers should not
8522 change things directly on the table object.
8523
8524 Monty comment: This should NOT be changed! It's the handlers
8525 responsibility to correct table->s->keys_xxxx information if keys
8526 have been disabled.
8527
8528 The most important parameters set here is records per key on
8529 all indexes. block_size and primar key ref_length.
8530
8531 For each index there is an array of rec_per_key.
8532 As an example if we have an index with three attributes a,b and c
8533 we will have an array of 3 rec_per_key.
8534 rec_per_key[0] is an estimate of number of records divided by
8535 number of unique values of the field a.
8536 rec_per_key[1] is an estimate of the number of records divided
8537 by the number of unique combinations of the fields a and b.
8538 rec_per_key[2] is an estimate of the number of records divided
8539 by the number of unique combinations of the fields a,b and c.
8540
8541 Many handlers only set the value of rec_per_key when all fields
8542 are bound (rec_per_key[2] in the example above).
8543
8544 If the handler doesn't support statistics, it should set all of the
8545 above to 0.
8546
8547 We first scans through all partitions to get the one holding most rows.
8548 We will then allow the handler with the most rows to set
8549 the rec_per_key and use this as an estimate on the total table.
8550
8551 max_data_file_length: Maximum data file length
8552 We ignore it, is only used in
8553 SHOW TABLE STATUS
8554 max_index_file_length: Maximum index file length
8555 We ignore it since it is never used
8556 block_size: Block size used
8557 We set it to the value of the first handler
8558 ref_length: We set this to the value calculated
8559 and stored in local object
8560 create_time: Creation time of table
8561
8562 So we calculate these constants by using the variables from the
8563 handler with most rows.
8564 */
8565 handler *file, **file_array;
8566 ulonglong max_records= 0;
8567 uint32 i= 0;
8568 uint32 handler_instance= 0;
8569 bool handler_instance_set= 0;
8570
8571 file_array= m_file;
8572 do
8573 {
8574 file= *file_array;
8575 if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
8576 {
8577 /* Get variables if not already done */
8578 if (!(flag & HA_STATUS_VARIABLE) ||
8579 !bitmap_is_set(&(m_part_info->read_partitions),
8580 (uint) (file_array - m_file)))
8581 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8582 if (file->stats.records > max_records || !handler_instance_set)
8583 {
8584 handler_instance_set= 1;
8585 max_records= file->stats.records;
8586 handler_instance= i;
8587 }
8588 }
8589 i++;
8590 } while (*(++file_array));
8591 /*
8592 Sort the array of part_ids by number of records in
8593 in descending order.
8594 */
8595 my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
8596 m_tot_parts,
8597 sizeof(uint32),
8598 (qsort2_cmp) compare_number_of_records,
8599 this);
8600
8601 file= m_file[handler_instance];
8602 file->info(HA_STATUS_CONST | no_lock_flag);
8603 stats.block_size= file->stats.block_size;
8604 stats.create_time= file->stats.create_time;
8605 ref_length= m_ref_length;
8606 }
8607 if (flag & HA_STATUS_ERRKEY)
8608 {
8609 handler *file= m_file[m_last_part];
8610 DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
8611 /*
8612 This flag is used to get index number of the unique index that
8613 reported duplicate key
8614 We will report the errkey on the last handler used and ignore the rest
8615 Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
8616 */
8617 file->errkey= errkey;
8618 file->info(HA_STATUS_ERRKEY | no_lock_flag);
8619 errkey= file->errkey;
8620 }
8621 if (flag & HA_STATUS_TIME)
8622 {
8623 handler *file, **file_array;
8624 DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
8625 /*
8626 This flag is used to set the latest update time of the table.
8627 Used by SHOW commands
8628 We will report the maximum of these times
8629 */
8630 stats.update_time= 0;
8631 file_array= m_file;
8632 do
8633 {
8634 file= *file_array;
8635 file->info(HA_STATUS_TIME | no_lock_flag);
8636 if (file->stats.update_time > stats.update_time)
8637 stats.update_time= file->stats.update_time;
8638 } while (*(++file_array));
8639 }
8640 DBUG_RETURN(0);
8641 }
8642
8643
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)8644 void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
8645 uint part_id)
8646 {
8647 handler *file= m_file[part_id];
8648 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
8649 file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
8650 HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
8651
8652 stat_info->records= file->stats.records;
8653 stat_info->mean_rec_length= file->stats.mean_rec_length;
8654 stat_info->data_file_length= file->stats.data_file_length;
8655 stat_info->max_data_file_length= file->stats.max_data_file_length;
8656 stat_info->index_file_length= file->stats.index_file_length;
8657 stat_info->max_index_file_length= file->stats.max_index_file_length;
8658 stat_info->delete_length= file->stats.delete_length;
8659 stat_info->create_time= file->stats.create_time;
8660 stat_info->update_time= file->stats.update_time;
8661 stat_info->check_time= file->stats.check_time;
8662 stat_info->check_sum= file->stats.checksum;
8663 stat_info->check_sum_null= file->stats.checksum_null;
8664 }
8665
8666
set_partitions_to_open(List<String> * partition_names)8667 void ha_partition::set_partitions_to_open(List<String> *partition_names)
8668 {
8669 m_partitions_to_open= partition_names;
8670 }
8671
8672
open_read_partitions(char * name_buff,size_t name_buff_size)8673 int ha_partition::open_read_partitions(char *name_buff, size_t name_buff_size)
8674 {
8675 handler **file;
8676 char *name_buffer_ptr;
8677 int error= 0;
8678
8679 name_buffer_ptr= m_name_buffer_ptr;
8680 file= m_file;
8681 m_file_sample= NULL;
8682 do
8683 {
8684 int n_file= (int)(file-m_file);
8685 int is_open= bitmap_is_set(&m_opened_partitions, n_file);
8686 int should_be_open= bitmap_is_set(&m_part_info->read_partitions, n_file);
8687
8688 /*
8689 TODO: we can close some opened partitions if they're not
8690 used in the query. It probably should be syncronized with the
8691 table_open_cache value.
8692
8693 if (is_open && !should_be_open)
8694 {
8695 if (unlikely((error= (*file)->ha_close())))
8696 goto err_handler;
8697 bitmap_clear_bit(&m_opened_partitions, n_file);
8698 }
8699 else
8700 */
8701 if (!is_open && should_be_open)
8702 {
8703 LEX_CSTRING save_connect_string= table->s->connect_string;
8704 if (unlikely((error=
8705 create_partition_name(name_buff, name_buff_size,
8706 table->s->normalized_path.str,
8707 name_buffer_ptr, NORMAL_PART_NAME,
8708 FALSE))))
8709 goto err_handler;
8710 if (!((*file)->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
8711 table->s->connect_string= m_connect_string[(uint)(file-m_file)];
8712 error= (*file)->ha_open(table, name_buff, m_mode,
8713 m_open_test_lock | HA_OPEN_NO_PSI_CALL);
8714 table->s->connect_string= save_connect_string;
8715 if (error)
8716 goto err_handler;
8717 bitmap_set_bit(&m_opened_partitions, n_file);
8718 m_last_part= n_file;
8719 }
8720 if (!m_file_sample && should_be_open)
8721 m_file_sample= *file;
8722 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
8723 } while (*(++file));
8724
8725 err_handler:
8726 return error;
8727 }
8728
8729
change_partitions_to_open(List<String> * partition_names)8730 int ha_partition::change_partitions_to_open(List<String> *partition_names)
8731 {
8732 char name_buff[FN_REFLEN+1];
8733 int error= 0;
8734
8735 if (m_is_clone_of)
8736 return 0;
8737
8738 m_partitions_to_open= partition_names;
8739 if (unlikely((error= m_part_info->set_partition_bitmaps(partition_names))))
8740 goto err_handler;
8741
8742 if (m_lock_type != F_UNLCK)
8743 {
8744 /*
8745 That happens after the LOCK TABLE statement.
8746 Do nothing in this case.
8747 */
8748 return 0;
8749 }
8750
8751 check_insert_autoincrement();
8752 if (bitmap_cmp(&m_opened_partitions, &m_part_info->read_partitions) != 0)
8753 return 0;
8754
8755 if (unlikely((error= read_par_file(table->s->normalized_path.str)) ||
8756 (error= open_read_partitions(name_buff, sizeof(name_buff)))))
8757 goto err_handler;
8758
8759 clear_handler_file();
8760
8761 err_handler:
8762 return error;
8763 }
8764
8765
extra_cb(handler * h,void * operation)8766 static int extra_cb(handler *h, void *operation)
8767 {
8768 return h->extra(*(enum ha_extra_function*)operation);
8769 }
8770
8771
start_keyread_cb(handler * h,void * p)8772 static int start_keyread_cb(handler* h, void *p)
8773 {
8774 return h->ha_start_keyread(*(uint*)p);
8775 }
8776
8777
end_keyread_cb(handler * h,void * unused)8778 static int end_keyread_cb(handler* h, void *unused)
8779 {
8780 return h->ha_end_keyread();
8781 }
8782
8783
8784 /**
8785 General function to prepare handler for certain behavior.
8786
8787 @param[in] operation operation to execute
8788
8789 @return status
8790 @retval 0 success
8791 @retval >0 error code
8792
8793 @detail
8794
8795 extra() is called whenever the server wishes to send a hint to
8796 the storage engine. The MyISAM engine implements the most hints.
8797
8798 We divide the parameters into the following categories:
8799 1) Operations used by most handlers
8800 2) Operations used by some non-MyISAM handlers
8801 3) Operations used only by MyISAM
8802 4) Operations only used by temporary tables for query processing
8803 5) Operations only used by MyISAM internally
8804 6) Operations not used at all
8805 7) Operations only used by federated tables for query processing
8806 8) Operations only used by NDB
8807 9) Operations only used by MERGE
8808
8809 The partition handler need to handle category 1), 2) and 3).
8810
8811 1) Operations used by most handlers
8812 -----------------------------------
8813 HA_EXTRA_RESET:
8814 This option is used by most handlers and it resets the handler state
8815 to the same state as after an open call. This includes releasing
8816 any READ CACHE or WRITE CACHE or other internal buffer used.
8817
8818 It is called from the reset method in the handler interface. There are
8819 three instances where this is called.
8820 1) After completing a INSERT ... SELECT ... query the handler for the
8821 table inserted into is reset
8822 2) It is called from close_thread_table which in turn is called from
8823 close_thread_tables except in the case where the tables are locked
8824 in which case ha_commit_stmt is called instead.
8825 It is only called from here if refresh_version hasn't changed and the
8826 table is not an old table when calling close_thread_table.
8827 close_thread_tables is called from many places as a general clean up
8828 function after completing a query.
8829 3) It is called when deleting the QUICK_RANGE_SELECT object if the
8830 QUICK_RANGE_SELECT object had its own handler object. It is called
8831 immediately before close of this local handler object.
8832 HA_EXTRA_KEYREAD:
8833 HA_EXTRA_NO_KEYREAD:
8834 These parameters are used to provide an optimisation hint to the handler.
8835 If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
8836 many handlers this means that the index-only scans can be used and it
8837 is not necessary to use the real records to satisfy this part of the
8838 query. Index-only scans is a very important optimisation for disk-based
8839 indexes. For main-memory indexes most indexes contain a reference to the
8840 record and thus KEYREAD only says that it is enough to read key fields.
8841 HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
8842 will disable this option.
8843 The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
8844 feature is supported.
8845 HA_EXTRA_FLUSH:
8846 Indication to flush tables to disk, is supposed to be used to
8847 ensure disk based tables are flushed at end of query execution.
8848 Currently is never used.
8849
8850 HA_EXTRA_FORCE_REOPEN:
8851 Only used by MyISAM and Archive, called when altering table,
8852 closing tables to enforce a reopen of the table files.
8853
8854 2) Operations used by some non-MyISAM handlers
8855 ----------------------------------------------
8856 HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
8857 This is a strictly InnoDB feature that is more or less undocumented.
8858 When it is activated InnoDB copies field by field from its fetch
8859 cache instead of all fields in one memcpy. Have no idea what the
8860 purpose of this is.
8861 Cut from include/my_base.h:
8862 When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
8863 other fields intact. When this is off (by default) InnoDB will use memcpy
8864 to overwrite entire row.
8865 HA_EXTRA_IGNORE_DUP_KEY:
8866 HA_EXTRA_NO_IGNORE_DUP_KEY:
8867 Informs the handler to we will not stop the transaction if we get an
8868 duplicate key errors during insert/update.
8869 Always called in pair, triggered by INSERT IGNORE and other similar
8870 SQL constructs.
8871 Not used by MyISAM.
8872
8873 3) Operations used only by MyISAM
8874 ---------------------------------
8875 HA_EXTRA_NORMAL:
8876 Only used in MyISAM to reset quick mode, not implemented by any other
8877 handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
8878
8879 It is called after completing a successful DELETE query if the QUICK
8880 option is set.
8881
8882 HA_EXTRA_QUICK:
8883 When the user does DELETE QUICK FROM table where-clause; this extra
8884 option is called before the delete query is performed and
8885 HA_EXTRA_NORMAL is called after the delete query is completed.
8886 Temporary tables used internally in MySQL always set this option
8887
8888 The meaning of quick mode is that when deleting in a B-tree no merging
8889 of leafs is performed. This is a common method and many large DBMS's
8890 actually only support this quick mode since it is very difficult to
8891 merge leaves in a tree used by many threads concurrently.
8892
8893 HA_EXTRA_CACHE:
8894 This flag is usually set with extra_opt along with a cache size.
8895 The size of this buffer is set by the user variable
8896 record_buffer_size. The value of this cache size is the amount of
8897 data read from disk in each fetch when performing a table scan.
8898 This means that before scanning a table it is normal to call
8899 extra with HA_EXTRA_CACHE and when the scan is completed to call
8900 HA_EXTRA_NO_CACHE to release the cache memory.
8901
8902 Some special care is taken when using this extra parameter since there
8903 could be a write ongoing on the table in the same statement. In this
8904 one has to take special care since there might be a WRITE CACHE as
8905 well. HA_EXTRA_CACHE specifies using a READ CACHE and using
8906 READ CACHE and WRITE CACHE at the same time is not possible.
8907
8908 Only MyISAM currently use this option.
8909
8910 It is set when doing full table scans using rr_sequential and
8911 reset when completing such a scan with end_read_record
8912 (resetting means calling extra with HA_EXTRA_NO_CACHE).
8913
8914 It is set in filesort.cc for MyISAM internal tables and it is set in
8915 a multi-update where HA_EXTRA_CACHE is called on a temporary result
8916 table and after that ha_rnd_init(0) on table to be updated
8917 and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
8918
8919 Apart from that it is always used from init_read_record but not when
8920 used from UPDATE statements. It is not used from DELETE statements
8921 with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
8922 statements. The reason here is that DELETE's in MyISAM doesn't move
8923 existings data rows.
8924
8925 It is also set in copy_data_between_tables when scanning the old table
8926 to copy over to the new table.
8927 And it is set in join_init_read_record where quick objects are used
8928 to perform a scan on the table. In this case the full table scan can
8929 even be performed multiple times as part of the nested loop join.
8930
8931 For purposes of the partition handler it is obviously necessary to have
8932 special treatment of this extra call. If we would simply pass this
8933 extra call down to each handler we would allocate
8934 cache size * no of partitions amount of memory and this is not
8935 necessary since we will only scan one partition at a time when doing
8936 full table scans.
8937
8938 Thus we treat it by first checking whether we have MyISAM handlers in
8939 the table, if not we simply ignore the call and if we have we will
8940 record the call but will not call any underlying handler yet. Then
8941 when performing the sequential scan we will check this recorded value
8942 and call extra_opt whenever we start scanning a new partition.
8943
8944 HA_EXTRA_NO_CACHE:
8945 When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
8946 flush method in the select_union class.
8947 It is used to some extent when insert delayed inserts.
8948 See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
8949
8950 It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
8951 if they are MyISAM handlers. Other handlers we can ignore the call
8952 for. If no cache is in use they will quickly return after finding
8953 this out. And we also ensure that all caches are disabled and no one
8954 is left by mistake.
8955 In the future this call will probably be deleted and we will instead call
8956 ::reset();
8957
8958 HA_EXTRA_WRITE_CACHE:
8959 See above, called from various places. It is mostly used when we
8960 do INSERT ... SELECT
8961 No special handling to save cache space is developed currently.
8962
8963 HA_EXTRA_PREPARE_FOR_UPDATE:
8964 This is called as part of a multi-table update. When the table to be
8965 updated is also scanned then this informs MyISAM handler to drop any
8966 caches if dynamic records are used (fixed size records do not care
8967 about this call). We pass this along to the first partition to scan, and
8968 flag that it is to be called after HA_EXTRA_CACHE when moving to the next
8969 partition to scan.
8970
8971 HA_EXTRA_PREPARE_FOR_DROP:
8972 Only used by MyISAM, called in preparation for a DROP TABLE.
8973 It's used mostly by Windows that cannot handle dropping an open file.
8974 On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
8975
8976 HA_EXTRA_PREPARE_FOR_RENAME:
8977 Informs the handler we are about to attempt a rename of the table.
8978 For handlers that have share open files (MyISAM key-file and
8979 Archive writer) they must close the files before rename is possible
8980 on Windows.
8981
8982 HA_EXTRA_READCHECK:
8983 HA_EXTRA_NO_READCHECK:
8984 Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
8985 this is not needed in SQL. The reason for this call is that MyISAM sets
8986 the READ_CHECK_USED in the open call so the call is needed for MyISAM
8987 to reset this feature.
8988 The idea with this parameter was to inform of doing/not doing a read
8989 check before applying an update. Since SQL always performs a read before
8990 applying the update No Read Check is needed in MyISAM as well.
8991
8992 This is a cut from Docs/myisam.txt
8993 Sometimes you might want to force an update without checking whether
8994 another user has changed the record since you last read it. This is
8995 somewhat dangerous, so it should ideally not be used. That can be
8996 accomplished by wrapping the mi_update() call in two calls to mi_extra(),
8997 using these functions:
8998 HA_EXTRA_NO_READCHECK=5 No readcheck on update
8999 HA_EXTRA_READCHECK=6 Use readcheck (def)
9000
9001 HA_EXTRA_REMEMBER_POS:
9002 HA_EXTRA_RESTORE_POS:
9003 System versioning needs this for MyISAM and Aria tables.
9004 On DELETE using PRIMARY KEY:
9005 1) handler::ha_index_read_map() saves rowid used for row delete/update
9006 2) handler::ha_update_row() can rewrite saved rowid
9007 3) handler::ha_delete_row()/handler::ha_update_row() expects saved but got
9008 different rowid and operation fails
9009 Using those flags prevents harmful side effect of 2)
9010
9011 4) Operations only used by temporary tables for query processing
9012 ----------------------------------------------------------------
9013 HA_EXTRA_RESET_STATE:
9014 Same as reset() except that buffers are not released. If there is
9015 a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
9016 or to change type of cache between READ CACHE and WRITE CACHE.
9017
9018 This extra function is always called immediately before calling
9019 delete_all_rows on the handler for temporary tables.
9020 There are cases however when HA_EXTRA_RESET_STATE isn't called in
9021 a similar case for a temporary table in sql_union.cc and in two other
9022 cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
9023 called afterwards.
9024 The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
9025 disable caching, delete all rows and enable WRITE CACHE. This is
9026 used for temporary tables containing distinct sums and a
9027 functional group.
9028
9029 The only case that delete_all_rows is called on non-temporary tables
9030 is in sql_delete.cc when DELETE FROM table; is called by a user.
9031 In this case no special extra calls are performed before or after this
9032 call.
9033
9034 The partition handler should not need to bother about this one. It
9035 should never be called.
9036
9037 HA_EXTRA_NO_ROWS:
9038 Don't insert rows indication to HEAP and MyISAM, only used by temporary
9039 tables used in query processing.
9040 Not handled by partition handler.
9041
9042 5) Operations only used by MyISAM internally
9043 --------------------------------------------
9044 HA_EXTRA_REINIT_CACHE:
9045 This call reinitializes the READ CACHE described above if there is one
9046 and otherwise the call is ignored.
9047
9048 We can thus safely call it on all underlying handlers if they are
9049 MyISAM handlers. It is however never called so we don't handle it at all.
9050 HA_EXTRA_FLUSH_CACHE:
9051 Flush WRITE CACHE in MyISAM. It is only from one place in the code.
9052 This is in sql_insert.cc where it is called if the table_flags doesn't
9053 contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS
9054 set is the MyISAM handler and so the only handler not receiving this
9055 call is MyISAM.
9056 Thus in effect this call is called but never used. Could be removed
9057 from sql_insert.cc
9058 HA_EXTRA_NO_USER_CHANGE:
9059 Only used by MyISAM, never called.
9060 Simulates lock_type as locked.
9061 HA_EXTRA_WAIT_LOCK:
9062 HA_EXTRA_WAIT_NOLOCK:
9063 Only used by MyISAM, called from MyISAM handler but never from server
9064 code on top of the handler.
9065 Sets lock_wait on/off
9066 HA_EXTRA_NO_KEYS:
9067 Only used MyISAM, only used internally in MyISAM handler, never called
9068 from server level.
9069 HA_EXTRA_KEYREAD_CHANGE_POS:
9070 HA_EXTRA_PRELOAD_BUFFER_SIZE:
9071 HA_EXTRA_CHANGE_KEY_TO_DUP:
9072 HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
9073 Only used by MyISAM, never called.
9074
9075 6) Operations not used at all
9076 -----------------------------
9077 HA_EXTRA_KEY_CACHE:
9078 HA_EXTRA_NO_KEY_CACHE:
9079 This parameters are no longer used and could be removed.
9080
9081 7) Operations only used by federated tables for query processing
9082 ----------------------------------------------------------------
9083 HA_EXTRA_INSERT_WITH_UPDATE:
9084 Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be
9085 executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY.
9086
9087 8) Operations only used by NDB
9088 ------------------------------
9089 HA_EXTRA_DELETE_CANNOT_BATCH:
9090 HA_EXTRA_UPDATE_CANNOT_BATCH:
9091 Inform handler that delete_row()/update_row() cannot batch deletes/updates
9092 and should perform them immediately. This may be needed when table has
9093 AFTER DELETE/UPDATE triggers which access to subject table.
9094 These flags are reset by the handler::extra(HA_EXTRA_RESET) call.
9095
9096 9) Operations only used by MERGE
9097 ------------------------------
9098 HA_EXTRA_ADD_CHILDREN_LIST:
9099 HA_EXTRA_ATTACH_CHILDREN:
9100 HA_EXTRA_IS_ATTACHED_CHILDREN:
9101 HA_EXTRA_DETACH_CHILDREN:
9102 Special actions for MERGE tables. Ignore.
9103 */
9104
extra(enum ha_extra_function operation)9105 int ha_partition::extra(enum ha_extra_function operation)
9106 {
9107 DBUG_ENTER("ha_partition:extra");
9108 DBUG_PRINT("enter", ("operation: %d", (int) operation));
9109
9110 switch (operation) {
9111 /* Category 1), used by most handlers */
9112 case HA_EXTRA_NO_KEYREAD:
9113 DBUG_RETURN(loop_partitions(end_keyread_cb, NULL));
9114 case HA_EXTRA_KEYREAD:
9115 case HA_EXTRA_FLUSH:
9116 case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
9117 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9118 case HA_EXTRA_PREPARE_FOR_RENAME:
9119 case HA_EXTRA_FORCE_REOPEN:
9120 DBUG_RETURN(loop_extra_alter(operation));
9121 break;
9122
9123 /* Category 2), used by non-MyISAM handlers */
9124 case HA_EXTRA_IGNORE_DUP_KEY:
9125 case HA_EXTRA_NO_IGNORE_DUP_KEY:
9126 case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
9127 {
9128 if (!m_myisam)
9129 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9130 }
9131 break;
9132
9133 /* Category 3), used by MyISAM handlers */
9134 case HA_EXTRA_PREPARE_FOR_UPDATE:
9135 /*
9136 Needs to be run on the first partition in the range now, and
9137 later in late_extra_cache, when switching to a new partition to scan.
9138 */
9139 m_extra_prepare_for_update= TRUE;
9140 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
9141 {
9142 if (!m_extra_cache)
9143 m_extra_cache_part_id= m_part_spec.start_part;
9144 DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part);
9145 (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
9146 }
9147 break;
9148 case HA_EXTRA_NORMAL:
9149 case HA_EXTRA_QUICK:
9150 case HA_EXTRA_PREPARE_FOR_DROP:
9151 case HA_EXTRA_FLUSH_CACHE:
9152 case HA_EXTRA_PREPARE_FOR_ALTER_TABLE:
9153 case HA_EXTRA_REMEMBER_POS:
9154 case HA_EXTRA_RESTORE_POS:
9155 {
9156 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9157 }
9158 case HA_EXTRA_NO_READCHECK:
9159 {
9160 /*
9161 This is only done as a part of ha_open, which is also used in
9162 ha_partition::open, so no need to do anything.
9163 */
9164 break;
9165 }
9166 case HA_EXTRA_CACHE:
9167 {
9168 prepare_extra_cache(0);
9169 break;
9170 }
9171 case HA_EXTRA_NO_CACHE:
9172 {
9173 int ret= 0;
9174 if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
9175 ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
9176 m_extra_cache= FALSE;
9177 m_extra_cache_size= 0;
9178 m_extra_prepare_for_update= FALSE;
9179 m_extra_cache_part_id= NO_CURRENT_PART_ID;
9180 DBUG_RETURN(ret);
9181 }
9182 case HA_EXTRA_WRITE_CACHE:
9183 {
9184 m_extra_cache= FALSE;
9185 m_extra_cache_size= 0;
9186 m_extra_prepare_for_update= FALSE;
9187 m_extra_cache_part_id= NO_CURRENT_PART_ID;
9188 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9189 }
9190 case HA_EXTRA_IGNORE_NO_KEY:
9191 case HA_EXTRA_NO_IGNORE_NO_KEY:
9192 {
9193 /*
9194 Ignore as these are specific to NDB for handling
9195 idempotency
9196 */
9197 break;
9198 }
9199 case HA_EXTRA_WRITE_CAN_REPLACE:
9200 case HA_EXTRA_WRITE_CANNOT_REPLACE:
9201 {
9202 /*
9203 Informs handler that write_row() can replace rows which conflict
9204 with row being inserted by PK/unique key without reporting error
9205 to the SQL-layer.
9206
9207 At this time, this is safe by limitation of ha_partition
9208 */
9209 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9210 }
9211 /* Category 7), used by federated handlers */
9212 case HA_EXTRA_INSERT_WITH_UPDATE:
9213 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9214 /* Category 8) Operations only used by NDB */
9215 case HA_EXTRA_DELETE_CANNOT_BATCH:
9216 case HA_EXTRA_UPDATE_CANNOT_BATCH:
9217 {
9218 /* Currently only NDB use the *_CANNOT_BATCH */
9219 break;
9220 }
9221 /* Category 9) Operations only used by MERGE */
9222 case HA_EXTRA_ADD_CHILDREN_LIST:
9223 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9224 case HA_EXTRA_ATTACH_CHILDREN:
9225 {
9226 int result;
9227 uint num_locks;
9228 handler **file;
9229 if ((result= loop_partitions(extra_cb, &operation)))
9230 DBUG_RETURN(result);
9231
9232 /* Recalculate lock count as each child may have different set of locks */
9233 num_locks= 0;
9234 file= m_file;
9235 do
9236 {
9237 num_locks+= (*file)->lock_count();
9238 } while (*(++file));
9239
9240 m_num_locks= num_locks;
9241 break;
9242 }
9243 case HA_EXTRA_IS_ATTACHED_CHILDREN:
9244 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9245 case HA_EXTRA_DETACH_CHILDREN:
9246 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9247 case HA_EXTRA_MARK_AS_LOG_TABLE:
9248 /*
9249 http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
9250 says we no longer support logging to partitioned tables, so we fail
9251 here.
9252 */
9253 DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
9254 case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN:
9255 case HA_EXTRA_BEGIN_ALTER_COPY:
9256 case HA_EXTRA_END_ALTER_COPY:
9257 DBUG_RETURN(loop_partitions(extra_cb, &operation));
9258 default:
9259 {
9260 /* Temporary crash to discover what is wrong */
9261 DBUG_ASSERT(0);
9262 break;
9263 }
9264 }
9265 DBUG_RETURN(1);
9266 }
9267
9268
9269 /**
9270 Special extra call to reset extra parameters
9271
9272 @return Operation status.
9273 @retval >0 Error code
9274 @retval 0 Success
9275
9276 @note Called at end of each statement to reset buffers.
9277 To avoid excessive calls, the m_partitions_to_reset bitmap keep records
9278 of which partitions that have been used in extra(), external_lock() or
9279 start_stmt() and is needed to be called.
9280 */
9281
reset(void)9282 int ha_partition::reset(void)
9283 {
9284 int result= 0;
9285 int tmp;
9286 uint i;
9287 DBUG_ENTER("ha_partition::reset");
9288
9289 for (i= bitmap_get_first_set(&m_partitions_to_reset);
9290 i < m_tot_parts;
9291 i= bitmap_get_next_set(&m_partitions_to_reset, i))
9292 {
9293 if (bitmap_is_set(&m_opened_partitions, i) &&
9294 (tmp= m_file[i]->ha_reset()))
9295 result= tmp;
9296 }
9297 bitmap_clear_all(&m_partitions_to_reset);
9298 m_extra_prepare_for_update= FALSE;
9299 DBUG_RETURN(result);
9300 }
9301
9302 /**
9303 Special extra method with additional parameter
9304 See @ref ha_partition::extra
9305
9306 @param[in] operation operation to execute
9307 @param[in] arg extra argument
9308
9309 @return status
9310 @retval 0 success
9311 @retval >0 error code
9312
9313 @detail
9314 Operations supported by extra_opt:
9315 HA_EXTRA_KEYREAD:
9316 arg is interpreted as key index
9317 HA_EXTRA_CACHE:
9318 arg is interpreted as size of cache in full table scan
9319
9320 For detailed description refer to @ref ha_partition::extra
9321 */
9322
extra_opt(enum ha_extra_function operation,ulong arg)9323 int ha_partition::extra_opt(enum ha_extra_function operation, ulong arg)
9324 {
9325 DBUG_ENTER("ha_partition::extra_opt");
9326
9327 switch (operation)
9328 {
9329 case HA_EXTRA_KEYREAD:
9330 DBUG_RETURN(loop_partitions(start_keyread_cb, &arg));
9331 case HA_EXTRA_CACHE:
9332 prepare_extra_cache(arg);
9333 DBUG_RETURN(0);
9334 default:
9335 DBUG_ASSERT(0);
9336 }
9337 DBUG_RETURN(1);
9338 }
9339
9340
9341 /*
9342 Call extra on handler with HA_EXTRA_CACHE and cachesize
9343
9344 SYNOPSIS
9345 prepare_extra_cache()
9346 cachesize Size of cache for full table scan
9347
9348 RETURN VALUE
9349 NONE
9350 */
9351
prepare_extra_cache(uint cachesize)9352 void ha_partition::prepare_extra_cache(uint cachesize)
9353 {
9354 DBUG_ENTER("ha_partition::prepare_extra_cache");
9355 DBUG_PRINT("enter", ("cachesize %u", cachesize));
9356
9357 m_extra_cache= TRUE;
9358 m_extra_cache_size= cachesize;
9359 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
9360 {
9361 DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset,
9362 m_part_spec.start_part));
9363 bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
9364 late_extra_cache(m_part_spec.start_part);
9365 }
9366 DBUG_VOID_RETURN;
9367 }
9368
9369
9370 /**
9371 Prepares our new and reorged handlers for rename or delete.
9372
9373 @param operation Operation to forward
9374
9375 @return Operation status
9376 @retval 0 Success
9377 @retval !0 Error
9378 */
9379
loop_extra_alter(enum ha_extra_function operation)9380 int ha_partition::loop_extra_alter(enum ha_extra_function operation)
9381 {
9382 int result= 0, tmp;
9383 handler **file;
9384 DBUG_ENTER("ha_partition::loop_extra_alter");
9385 DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME ||
9386 operation == HA_EXTRA_FORCE_REOPEN);
9387
9388 if (m_new_file != NULL)
9389 {
9390 for (file= m_new_file; *file; file++)
9391 if ((tmp= (*file)->extra(operation)))
9392 result= tmp;
9393 }
9394 if (m_reorged_file != NULL)
9395 {
9396 for (file= m_reorged_file; *file; file++)
9397 if ((tmp= (*file)->extra(operation)))
9398 result= tmp;
9399 }
9400 if ((tmp= loop_partitions(extra_cb, &operation)))
9401 result= tmp;
9402 DBUG_RETURN(result);
9403 }
9404
9405
9406 /**
9407 Call callback(part, param) on all partitions
9408
9409 @param callback a callback to call for each partition
9410 @param param a void*-parameter passed to callback
9411
9412 @return Operation status
9413 @retval >0 Error code
9414 @retval 0 Success
9415 */
9416
loop_partitions(handler_callback callback,void * param)9417 int ha_partition::loop_partitions(handler_callback callback, void *param)
9418 {
9419 int result= 0, tmp;
9420 uint i;
9421 DBUG_ENTER("ha_partition::loop_partitions");
9422
9423 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
9424 i < m_tot_parts;
9425 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
9426 {
9427 /*
9428 This can be called after an error in ha_open.
9429 In this case calling 'extra' can crash.
9430 */
9431 if (bitmap_is_set(&m_opened_partitions, i) &&
9432 (tmp= callback(m_file[i], param)))
9433 result= tmp;
9434 }
9435 /* Add all used partitions to be called in reset(). */
9436 bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
9437 DBUG_RETURN(result);
9438 }
9439
9440
9441 /*
9442 Call extra(HA_EXTRA_CACHE) on next partition_id
9443
9444 SYNOPSIS
9445 late_extra_cache()
9446 partition_id Partition id to call extra on
9447
9448 RETURN VALUE
9449 NONE
9450 */
9451
late_extra_cache(uint partition_id)9452 void ha_partition::late_extra_cache(uint partition_id)
9453 {
9454 handler *file;
9455 DBUG_ENTER("ha_partition::late_extra_cache");
9456 DBUG_PRINT("enter", ("extra_cache %u prepare %u partid %u size %u",
9457 m_extra_cache, m_extra_prepare_for_update,
9458 partition_id, m_extra_cache_size));
9459
9460 if (!m_extra_cache && !m_extra_prepare_for_update)
9461 DBUG_VOID_RETURN;
9462 file= m_file[partition_id];
9463 if (m_extra_cache)
9464 {
9465 if (m_extra_cache_size == 0)
9466 (void) file->extra(HA_EXTRA_CACHE);
9467 else
9468 (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
9469 }
9470 if (m_extra_prepare_for_update)
9471 {
9472 (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
9473 }
9474 m_extra_cache_part_id= partition_id;
9475 DBUG_VOID_RETURN;
9476 }
9477
9478
9479 /*
9480 Call extra(HA_EXTRA_NO_CACHE) on next partition_id
9481
9482 SYNOPSIS
9483 late_extra_no_cache()
9484 partition_id Partition id to call extra on
9485
9486 RETURN VALUE
9487 NONE
9488 */
9489
late_extra_no_cache(uint partition_id)9490 void ha_partition::late_extra_no_cache(uint partition_id)
9491 {
9492 handler *file;
9493 DBUG_ENTER("ha_partition::late_extra_no_cache");
9494
9495 if (!m_extra_cache && !m_extra_prepare_for_update)
9496 DBUG_VOID_RETURN;
9497 file= m_file[partition_id];
9498 (void) file->extra(HA_EXTRA_NO_CACHE);
9499 DBUG_ASSERT(partition_id == m_extra_cache_part_id);
9500 m_extra_cache_part_id= NO_CURRENT_PART_ID;
9501 DBUG_VOID_RETURN;
9502 }
9503
9504
9505 /****************************************************************************
9506 MODULE optimiser support
9507 ****************************************************************************/
9508
9509 /**
9510 Get keys to use for scanning.
9511
9512 @return key_map of keys usable for scanning
9513
9514 @note No need to use read_partitions here, since it does not depend on
9515 which partitions is used, only which storage engine used.
9516 */
9517
keys_to_use_for_scanning()9518 const key_map *ha_partition::keys_to_use_for_scanning()
9519 {
9520 DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
9521 DBUG_RETURN(get_open_file_sample()->keys_to_use_for_scanning());
9522 }
9523
9524
9525 /**
9526 Minimum number of rows to base optimizer estimate on.
9527 */
9528
min_rows_for_estimate()9529 ha_rows ha_partition::min_rows_for_estimate()
9530 {
9531 uint i, max_used_partitions, tot_used_partitions;
9532 DBUG_ENTER("ha_partition::min_rows_for_estimate");
9533
9534 tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions);
9535
9536 /*
9537 All partitions might have been left as unused during partition pruning
9538 due to, for example, an impossible WHERE condition. Nonetheless, the
9539 optimizer might still attempt to perform (e.g. range) analysis where an
9540 estimate of the the number of rows is calculated using records_in_range.
9541 Hence, to handle this and other possible cases, use zero as the minimum
9542 number of rows to base the estimate on if no partition is being used.
9543 */
9544 if (!tot_used_partitions)
9545 DBUG_RETURN(0);
9546
9547 /*
9548 Allow O(log2(tot_partitions)) increase in number of used partitions.
9549 This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
9550 I.e when the total number of partitions doubles, allow one more
9551 partition to be checked.
9552 */
9553 i= 2;
9554 max_used_partitions= 1;
9555 while (i < m_tot_parts)
9556 {
9557 max_used_partitions++;
9558 i= i << 1;
9559 }
9560 if (max_used_partitions > tot_used_partitions)
9561 max_used_partitions= tot_used_partitions;
9562
9563 /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
9564 DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
9565 max_used_partitions,
9566 (ulong) stats.records));
9567 DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
9568 tot_used_partitions,
9569 (ulong) stats.records * max_used_partitions
9570 / tot_used_partitions));
9571 DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
9572 }
9573
9574
9575 /**
9576 Get the biggest used partition.
9577
9578 Starting at the N:th biggest partition and skips all non used
9579 partitions, returning the biggest used partition found
9580
9581 @param[in,out] part_index Skip the *part_index biggest partitions
9582
9583 @return The biggest used partition with index not lower than *part_index.
9584 @retval NO_CURRENT_PART_ID No more partition used.
9585 @retval != NO_CURRENT_PART_ID partition id of biggest used partition with
9586 index >= *part_index supplied. Note that
9587 *part_index will be updated to the next
9588 partition index to use.
9589 */
9590
get_biggest_used_partition(uint * part_index)9591 uint ha_partition::get_biggest_used_partition(uint *part_index)
9592 {
9593 uint part_id;
9594 while ((*part_index) < m_tot_parts)
9595 {
9596 part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
9597 if (bitmap_is_set(&m_part_info->read_partitions, part_id))
9598 return part_id;
9599 }
9600 return NO_CURRENT_PART_ID;
9601 }
9602
9603
9604 /*
9605 Return time for a scan of the table
9606
9607 SYNOPSIS
9608 scan_time()
9609
9610 RETURN VALUE
9611 time for scan
9612 */
9613
scan_time()9614 double ha_partition::scan_time()
9615 {
9616 double scan_time= 0;
9617 uint i;
9618 DBUG_ENTER("ha_partition::scan_time");
9619
9620 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9621 i < m_tot_parts;
9622 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9623 scan_time+= m_file[i]->scan_time();
9624 DBUG_RETURN(scan_time);
9625 }
9626
9627
9628 /**
9629 @brief
9630 Caculate time to scan the given index (index only scan)
9631
9632 @param inx Index number to scan
9633
9634 @return time for scanning index inx
9635 */
9636
key_scan_time(uint inx)9637 double ha_partition::key_scan_time(uint inx)
9638 {
9639 double scan_time= 0;
9640 uint i;
9641 DBUG_ENTER("ha_partition::key_scan_time");
9642 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9643 i < m_tot_parts;
9644 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9645 scan_time+= m_file[i]->key_scan_time(inx);
9646 DBUG_RETURN(scan_time);
9647 }
9648
9649
keyread_time(uint inx,uint ranges,ha_rows rows)9650 double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows)
9651 {
9652 double read_time= 0;
9653 uint i;
9654 DBUG_ENTER("ha_partition::keyread_time");
9655 if (!ranges)
9656 DBUG_RETURN(handler::keyread_time(inx, ranges, rows));
9657 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9658 i < m_tot_parts;
9659 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9660 read_time+= m_file[i]->keyread_time(inx, ranges, rows);
9661 DBUG_RETURN(read_time);
9662 }
9663
9664
9665 /**
9666 Find number of records in a range.
9667 @param inx Index number
9668 @param min_key Start of range
9669 @param max_key End of range
9670
9671 @return Number of rows in range.
9672
9673 Given a starting key, and an ending key estimate the number of rows that
9674 will exist between the two. max_key may be empty which in case determine
9675 if start_key matches any rows.
9676 */
9677
records_in_range(uint inx,const key_range * min_key,const key_range * max_key,page_range * pages)9678 ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key,
9679 const key_range *max_key,
9680 page_range *pages)
9681 {
9682 ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
9683 uint partition_index= 0, part_id;
9684 page_range ignore_pages;
9685 DBUG_ENTER("ha_partition::records_in_range");
9686
9687 /* Don't calculate pages of more than one active partition */
9688 if (bitmap_bits_set(&m_part_info->read_partitions) != 1)
9689 pages= &ignore_pages;
9690
9691 min_rows_to_check= min_rows_for_estimate();
9692
9693 while ((part_id= get_biggest_used_partition(&partition_index))
9694 != NO_CURRENT_PART_ID)
9695 {
9696 rows= m_file[part_id]->records_in_range(inx, min_key, max_key, pages);
9697
9698 DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
9699 (ulong) m_file[part_id]->stats.records));
9700
9701 if (rows == HA_POS_ERROR)
9702 DBUG_RETURN(HA_POS_ERROR);
9703 estimated_rows+= rows;
9704 checked_rows+= m_file[part_id]->stats.records;
9705 /*
9706 Returning 0 means no rows can be found, so we must continue
9707 this loop as long as we have estimated_rows == 0.
9708 Also many engines return 1 to indicate that there may exist
9709 a matching row, we do not normalize this by dividing by number of
9710 used partitions, but leave it to be returned as a sum, which will
9711 reflect that we will need to scan each partition's index.
9712
9713 Note that this statistics may not always be correct, so we must
9714 continue even if the current partition has 0 rows, since we might have
9715 deleted rows from the current partition, or inserted to the next
9716 partition.
9717 */
9718 if (estimated_rows && checked_rows &&
9719 checked_rows >= min_rows_to_check)
9720 {
9721 DBUG_PRINT("info",
9722 ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
9723 inx,
9724 (ulong) (estimated_rows * stats.records / checked_rows),
9725 (ulong) estimated_rows,
9726 (ulong) stats.records,
9727 (ulong) checked_rows));
9728 DBUG_RETURN(estimated_rows * stats.records / checked_rows);
9729 }
9730 }
9731 DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
9732 inx,
9733 (ulong) estimated_rows));
9734 DBUG_RETURN(estimated_rows);
9735 }
9736
9737
9738 /**
9739 Estimate upper bound of number of rows.
9740
9741 @return Number of rows.
9742 */
9743
estimate_rows_upper_bound()9744 ha_rows ha_partition::estimate_rows_upper_bound()
9745 {
9746 ha_rows rows, tot_rows= 0;
9747 handler **file= m_file;
9748 DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
9749
9750 do
9751 {
9752 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
9753 {
9754 rows= (*file)->estimate_rows_upper_bound();
9755 if (rows == HA_POS_ERROR)
9756 DBUG_RETURN(HA_POS_ERROR);
9757 tot_rows+= rows;
9758 }
9759 } while (*(++file));
9760 DBUG_RETURN(tot_rows);
9761 }
9762
9763
9764 /*
9765 Get time to read
9766
9767 SYNOPSIS
9768 read_time()
9769 index Index number used
9770 ranges Number of ranges
9771 rows Number of rows
9772
9773 RETURN VALUE
9774 time for read
9775
9776 DESCRIPTION
9777 This will be optimised later to include whether or not the index can
9778 be used with partitioning. To achieve we need to add another parameter
9779 that specifies how many of the index fields that are bound in the ranges.
9780 Possibly added as a new call to handlers.
9781 */
9782
read_time(uint index,uint ranges,ha_rows rows)9783 double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
9784 {
9785 DBUG_ENTER("ha_partition::read_time");
9786
9787 DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows));
9788 }
9789
9790
9791 /**
9792 Number of rows in table. see handler.h
9793
9794 @return Number of records in the table (after pruning!)
9795 */
9796
records()9797 ha_rows ha_partition::records()
9798 {
9799 ha_rows tot_rows= 0;
9800 uint i;
9801 DBUG_ENTER("ha_partition::records");
9802
9803 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9804 i < m_tot_parts;
9805 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9806 {
9807 if (unlikely(m_file[i]->pre_records()))
9808 DBUG_RETURN(HA_POS_ERROR);
9809 const ha_rows rows= m_file[i]->records();
9810 if (unlikely(rows == HA_POS_ERROR))
9811 DBUG_RETURN(HA_POS_ERROR);
9812 tot_rows+= rows;
9813 }
9814 DBUG_PRINT("exit", ("records: %lld", (longlong) tot_rows));
9815 DBUG_RETURN(tot_rows);
9816 }
9817
9818
9819 /*
9820 Is it ok to switch to a new engine for this table
9821
9822 SYNOPSIS
9823 can_switch_engine()
9824
9825 RETURN VALUE
9826 TRUE Ok
9827 FALSE Not ok
9828
9829 DESCRIPTION
9830 Used to ensure that tables with foreign key constraints are not moved
9831 to engines without foreign key support.
9832 */
9833
can_switch_engines()9834 bool ha_partition::can_switch_engines()
9835 {
9836 handler **file;
9837 DBUG_ENTER("ha_partition::can_switch_engines");
9838
9839 file= m_file;
9840 do
9841 {
9842 if (!(*file)->can_switch_engines())
9843 DBUG_RETURN(FALSE);
9844 } while (*(++file));
9845 DBUG_RETURN(TRUE);
9846 }
9847
9848
9849 /*
9850 Is table cache supported
9851
9852 SYNOPSIS
9853 table_cache_type()
9854
9855 */
9856
table_cache_type()9857 uint8 ha_partition::table_cache_type()
9858 {
9859 DBUG_ENTER("ha_partition::table_cache_type");
9860
9861 DBUG_RETURN(get_open_file_sample()->table_cache_type());
9862 }
9863
9864
9865 /**
9866 Calculate hash value for KEY partitioning using an array of fields.
9867
9868 @param field_array An array of the fields in KEY partitioning
9869
9870 @return hash_value calculated
9871
9872 @note Uses the hash function on the character set of the field.
9873 Integer and floating point fields use the binary character set by default.
9874 */
9875
calculate_key_hash_value(Field ** field_array)9876 uint32 ha_partition::calculate_key_hash_value(Field **field_array)
9877 {
9878 ulong nr1= 1;
9879 ulong nr2= 4;
9880 bool use_51_hash;
9881 use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm ==
9882 partition_info::KEY_ALGORITHM_51);
9883
9884 do
9885 {
9886 Field *field= *field_array;
9887 if (use_51_hash)
9888 {
9889 switch (field->real_type()) {
9890 case MYSQL_TYPE_TINY:
9891 case MYSQL_TYPE_SHORT:
9892 case MYSQL_TYPE_LONG:
9893 case MYSQL_TYPE_FLOAT:
9894 case MYSQL_TYPE_DOUBLE:
9895 case MYSQL_TYPE_NEWDECIMAL:
9896 case MYSQL_TYPE_TIMESTAMP:
9897 case MYSQL_TYPE_LONGLONG:
9898 case MYSQL_TYPE_INT24:
9899 case MYSQL_TYPE_TIME:
9900 case MYSQL_TYPE_DATETIME:
9901 case MYSQL_TYPE_YEAR:
9902 case MYSQL_TYPE_NEWDATE:
9903 {
9904 if (field->is_null())
9905 {
9906 nr1^= (nr1 << 1) | 1;
9907 continue;
9908 }
9909 /* Force this to my_hash_sort_bin, which was used in 5.1! */
9910 uint len= field->pack_length();
9911 my_charset_bin.hash_sort(field->ptr, len, &nr1, &nr2);
9912 /* Done with this field, continue with next one. */
9913 continue;
9914 }
9915 case MYSQL_TYPE_STRING:
9916 case MYSQL_TYPE_VARCHAR:
9917 case MYSQL_TYPE_BIT:
9918 /* Not affected, same in 5.1 and 5.5 */
9919 break;
9920 /*
9921 ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
9922 and my_hash_sort_bin in 5.5!
9923 */
9924 case MYSQL_TYPE_ENUM:
9925 case MYSQL_TYPE_SET:
9926 {
9927 if (field->is_null())
9928 {
9929 nr1^= (nr1 << 1) | 1;
9930 continue;
9931 }
9932 /* Force this to my_hash_sort_bin, which was used in 5.1! */
9933 uint len= field->pack_length();
9934 my_charset_latin1.hash_sort(field->ptr, len, &nr1, &nr2);
9935 continue;
9936 }
9937 /* New types in mysql-5.6. */
9938 case MYSQL_TYPE_DATETIME2:
9939 case MYSQL_TYPE_TIME2:
9940 case MYSQL_TYPE_TIMESTAMP2:
9941 /* Not affected, 5.6+ only! */
9942 break;
9943
9944 /* These types should not be allowed for partitioning! */
9945 case MYSQL_TYPE_NULL:
9946 case MYSQL_TYPE_DECIMAL:
9947 case MYSQL_TYPE_DATE:
9948 case MYSQL_TYPE_TINY_BLOB:
9949 case MYSQL_TYPE_MEDIUM_BLOB:
9950 case MYSQL_TYPE_LONG_BLOB:
9951 case MYSQL_TYPE_BLOB:
9952 case MYSQL_TYPE_VAR_STRING:
9953 case MYSQL_TYPE_GEOMETRY:
9954 /* fall through */
9955 default:
9956 DBUG_ASSERT(0); // New type?
9957 /* Fall through for default hashing (5.5). */
9958 }
9959 /* fall through, use collation based hashing. */
9960 }
9961 field->hash(&nr1, &nr2);
9962 } while (*(++field_array));
9963 return (uint32) nr1;
9964 }
9965
9966
9967 /****************************************************************************
9968 MODULE print messages
9969 ****************************************************************************/
9970
index_type(uint inx)9971 const char *ha_partition::index_type(uint inx)
9972 {
9973 uint first_used_partition;
9974 DBUG_ENTER("ha_partition::index_type");
9975
9976 first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions));
9977
9978 if (first_used_partition == MY_BIT_NONE)
9979 {
9980 DBUG_ASSERT(0); // How can this happen?
9981 DBUG_RETURN(handler::index_type(inx));
9982 }
9983
9984 DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
9985 }
9986
9987
get_row_type() const9988 enum row_type ha_partition::get_row_type() const
9989 {
9990 uint i;
9991 enum row_type type;
9992 DBUG_ENTER("ha_partition::get_row_type");
9993
9994 i= bitmap_get_first_set(&m_part_info->read_partitions);
9995 DBUG_ASSERT(i < m_tot_parts);
9996 if (i >= m_tot_parts)
9997 DBUG_RETURN(ROW_TYPE_NOT_USED);
9998
9999 type= m_file[i]->get_row_type();
10000 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
10001
10002 for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
10003 i < m_tot_parts;
10004 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
10005 {
10006 enum row_type part_type= m_file[i]->get_row_type();
10007 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
10008 if (part_type != type)
10009 DBUG_RETURN(ROW_TYPE_NOT_USED);
10010 }
10011
10012 DBUG_RETURN(type);
10013 }
10014
10015
append_row_to_str(String & str)10016 void ha_partition::append_row_to_str(String &str)
10017 {
10018 const uchar *rec;
10019 bool is_rec0= !m_err_rec || m_err_rec == table->record[0];
10020 if (is_rec0)
10021 rec= table->record[0];
10022 else
10023 rec= m_err_rec;
10024 // If PK, use full PK instead of full part field array!
10025 if (table->s->primary_key != MAX_KEY)
10026 {
10027 KEY *key= table->key_info + table->s->primary_key;
10028 KEY_PART_INFO *key_part= key->key_part;
10029 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
10030 if (!is_rec0)
10031 set_key_field_ptr(key, rec, table->record[0]);
10032 for (; key_part != key_part_end; key_part++)
10033 {
10034 Field *field= key_part->field;
10035 str.append(" ");
10036 str.append(&field->field_name);
10037 str.append(":");
10038 field_unpack(&str, field, rec, 0, false);
10039 }
10040 if (!is_rec0)
10041 set_key_field_ptr(key, table->record[0], rec);
10042 }
10043 else
10044 {
10045 Field **field_ptr;
10046 if (!is_rec0)
10047 table->move_fields(m_part_info->full_part_field_array, rec,
10048 table->record[0]);
10049 /* No primary key, use full partition field array. */
10050 for (field_ptr= m_part_info->full_part_field_array;
10051 *field_ptr;
10052 field_ptr++)
10053 {
10054 Field *field= *field_ptr;
10055 str.append(" ");
10056 str.append(&field->field_name);
10057 str.append(":");
10058 field_unpack(&str, field, rec, 0, false);
10059 }
10060 if (!is_rec0)
10061 table->move_fields(m_part_info->full_part_field_array, table->record[0],
10062 rec);
10063 }
10064 }
10065
10066
print_error(int error,myf errflag)10067 void ha_partition::print_error(int error, myf errflag)
10068 {
10069 THD *thd= ha_thd();
10070 DBUG_ENTER("ha_partition::print_error");
10071 DBUG_PRINT("enter", ("error: %d", error));
10072
10073 /* Should probably look for my own errors first */
10074 if ((error == HA_ERR_NO_PARTITION_FOUND) &&
10075 ! (thd->lex->alter_info.partition_flags & ALTER_PARTITION_TRUNCATE))
10076 {
10077 m_part_info->print_no_partition_found(table, errflag);
10078 DBUG_VOID_RETURN;
10079 }
10080 else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
10081 {
10082 /* Should only happen on DELETE or UPDATE! */
10083 DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
10084 thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
10085 thd_sql_command(thd) == SQLCOM_UPDATE ||
10086 thd_sql_command(thd) == SQLCOM_UPDATE_MULTI);
10087 DBUG_ASSERT(m_err_rec);
10088 if (m_err_rec)
10089 {
10090 uint max_length;
10091 char buf[MAX_KEY_LENGTH];
10092 String str(buf,sizeof(buf),system_charset_info);
10093 uint32 part_id;
10094 str.length(0);
10095 str.append("(");
10096 str.append_ulonglong(m_last_part);
10097 str.append(" != ");
10098 if (get_part_for_buf(m_err_rec, m_rec0, m_part_info, &part_id))
10099 str.append("?");
10100 else
10101 str.append_ulonglong(part_id);
10102 str.append(")");
10103 append_row_to_str(str);
10104
10105 /* Log this error, so the DBA can notice it and fix it! */
10106 sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s"
10107 "Please REPAIR the table!",
10108 table->s->table_name.str,
10109 str.c_ptr_safe());
10110
10111 max_length= (MYSQL_ERRMSG_SIZE -
10112 (uint) strlen(ER_THD(thd, ER_ROW_IN_WRONG_PARTITION)));
10113 if (str.length() >= max_length)
10114 {
10115 str.length(max_length-4);
10116 str.append(STRING_WITH_LEN("..."));
10117 }
10118 my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
10119 m_err_rec= NULL;
10120 DBUG_VOID_RETURN;
10121 }
10122 /* fall through to generic error handling. */
10123 }
10124
10125 /*
10126 We choose a main handler's print_error if:
10127 * m_file has not been initialized, like in bug#42438
10128 * lookup_errkey is set, which means that an error has occured in the
10129 main handler, not in individual partitions
10130 */
10131 if (m_file && lookup_errkey == (uint)-1)
10132 {
10133 if (m_last_part >= m_tot_parts)
10134 {
10135 DBUG_ASSERT(0);
10136 m_last_part= 0;
10137 }
10138 m_file[m_last_part]->print_error(error, errflag);
10139 }
10140 else
10141 handler::print_error(error, errflag);
10142 DBUG_VOID_RETURN;
10143 }
10144
10145
get_error_message(int error,String * buf)10146 bool ha_partition::get_error_message(int error, String *buf)
10147 {
10148 DBUG_ENTER("ha_partition::get_error_message");
10149
10150 /* Should probably look for my own errors first */
10151
10152 /* In case m_file has not been initialized, like in bug#42438 */
10153 if (m_file)
10154 DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
10155 DBUG_RETURN(handler::get_error_message(error, buf));
10156
10157 }
10158
10159
10160 /****************************************************************************
10161 MODULE in-place ALTER
10162 ****************************************************************************/
10163 /**
10164 Get table flags.
10165 */
10166
table_flags() const10167 handler::Table_flags ha_partition::table_flags() const
10168 {
10169 uint first_used_partition= 0;
10170 DBUG_ENTER("ha_partition::table_flags");
10171 if (m_handler_status < handler_initialized ||
10172 m_handler_status >= handler_closed)
10173 DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
10174
10175 if (get_lock_type() != F_UNLCK)
10176 {
10177 /*
10178 The flags are cached after external_lock, and may depend on isolation
10179 level. So we should use a locked partition to get the correct flags.
10180 */
10181 first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
10182 if (first_used_partition == MY_BIT_NONE)
10183 first_used_partition= 0;
10184 }
10185 DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
10186 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
10187 (PARTITION_ENABLED_TABLE_FLAGS));
10188 }
10189
10190
10191 /**
10192 alter_table_flags must be on handler/table level, not on hton level
10193 due to the ha_partition hton does not know what the underlying hton is.
10194 */
10195
alter_table_flags(alter_table_operations flags)10196 alter_table_operations ha_partition::alter_table_flags(alter_table_operations flags)
10197 {
10198 alter_table_operations flags_to_return;
10199 DBUG_ENTER("ha_partition::alter_table_flags");
10200
10201 flags_to_return= ht->alter_table_flags(flags);
10202 flags_to_return|= m_file[0]->alter_table_flags(flags);
10203
10204 DBUG_RETURN(flags_to_return);
10205 }
10206
10207
10208 /**
10209 check if copy of data is needed in alter table.
10210 */
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)10211 bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info,
10212 uint table_changes)
10213 {
10214 /*
10215 The check for any partitioning related changes have already been done
10216 in mysql_alter_table (by fix_partition_func), so it is only up to
10217 the underlying handlers.
10218 */
10219 List_iterator<partition_element> part_it(m_part_info->partitions);
10220 HA_CREATE_INFO dummy_info= *create_info;
10221 uint i=0;
10222 while (partition_element *part_elem= part_it++)
10223 {
10224 if (m_is_sub_partitioned)
10225 {
10226 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
10227 while (partition_element *sub_elem= subpart_it++)
10228 {
10229 dummy_info.data_file_name= sub_elem->data_file_name;
10230 dummy_info.index_file_name= sub_elem->index_file_name;
10231 if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
10232 return COMPATIBLE_DATA_NO;
10233 }
10234 }
10235 else
10236 {
10237 dummy_info.data_file_name= part_elem->data_file_name;
10238 dummy_info.index_file_name= part_elem->index_file_name;
10239 if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
10240 return COMPATIBLE_DATA_NO;
10241 }
10242 }
10243 return COMPATIBLE_DATA_YES;
10244 }
10245
10246
10247 /**
10248 Support of in-place alter table.
10249 */
10250
10251 /**
10252 Helper class for in-place alter, see handler.h
10253 */
10254
10255 class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
10256 {
10257 public:
10258 inplace_alter_handler_ctx **handler_ctx_array;
10259 private:
10260 uint m_tot_parts;
10261
10262 public:
ha_partition_inplace_ctx(THD * thd,uint tot_parts)10263 ha_partition_inplace_ctx(THD *thd, uint tot_parts)
10264 : inplace_alter_handler_ctx(),
10265 handler_ctx_array(NULL),
10266 m_tot_parts(tot_parts)
10267 {}
10268
~ha_partition_inplace_ctx()10269 ~ha_partition_inplace_ctx()
10270 {
10271 if (handler_ctx_array)
10272 {
10273 for (uint index= 0; index < m_tot_parts; index++)
10274 delete handler_ctx_array[index];
10275 }
10276 }
10277 };
10278
10279
10280 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10281 ha_partition::check_if_supported_inplace_alter(TABLE *altered_table,
10282 Alter_inplace_info *ha_alter_info)
10283 {
10284 uint index= 0;
10285 enum_alter_inplace_result result;
10286 alter_table_operations orig_ops;
10287 ha_partition_inplace_ctx *part_inplace_ctx;
10288 bool first_is_set= false;
10289 THD *thd= ha_thd();
10290
10291 DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
10292 /*
10293 Support inplace change of KEY () -> KEY ALGORITHM = N ().
10294 Any other change would set partition_changed in
10295 prep_alter_part_table() in mysql_alter_table().
10296 */
10297 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10298 {
10299 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10300 DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
10301 }
10302
10303 part_inplace_ctx=
10304 new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
10305 if (!part_inplace_ctx)
10306 DBUG_RETURN(HA_ALTER_ERROR);
10307
10308 part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
10309 thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
10310 if (!part_inplace_ctx->handler_ctx_array)
10311 DBUG_RETURN(HA_ALTER_ERROR);
10312
10313 do {
10314 result= HA_ALTER_INPLACE_NO_LOCK;
10315 /* Set all to NULL, including the terminating one. */
10316 for (index= 0; index <= m_tot_parts; index++)
10317 part_inplace_ctx->handler_ctx_array[index]= NULL;
10318
10319 ha_alter_info->handler_flags |= ALTER_PARTITIONED;
10320 orig_ops= ha_alter_info->handler_flags;
10321 for (index= 0; index < m_tot_parts; index++)
10322 {
10323 enum_alter_inplace_result p_result=
10324 m_file[index]->check_if_supported_inplace_alter(altered_table,
10325 ha_alter_info);
10326 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10327
10328 if (index == 0)
10329 first_is_set= (ha_alter_info->handler_ctx != NULL);
10330 else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
10331 {
10332 /* Either none or all partitions must set handler_ctx! */
10333 DBUG_ASSERT(0);
10334 DBUG_RETURN(HA_ALTER_ERROR);
10335 }
10336 if (p_result < result)
10337 result= p_result;
10338 if (result == HA_ALTER_ERROR)
10339 break;
10340 }
10341 } while (orig_ops != ha_alter_info->handler_flags);
10342
10343 ha_alter_info->handler_ctx= part_inplace_ctx;
10344 /*
10345 To indicate for future inplace calls that there are several
10346 partitions/handlers that need to be committed together,
10347 we set group_commit_ctx to the NULL terminated array of
10348 the partitions handlers.
10349 */
10350 ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
10351
10352 DBUG_RETURN(result);
10353 }
10354
10355
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10356 bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table,
10357 Alter_inplace_info *ha_alter_info)
10358 {
10359 uint index= 0;
10360 bool error= false;
10361 ha_partition_inplace_ctx *part_inplace_ctx;
10362
10363 DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
10364
10365 /*
10366 Changing to similar partitioning, only update metadata.
10367 Non allowed changes would be caought in prep_alter_part_table().
10368 */
10369 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10370 {
10371 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10372 DBUG_RETURN(false);
10373 }
10374
10375 part_inplace_ctx=
10376 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10377
10378 for (index= 0; index < m_tot_parts && !error; index++)
10379 {
10380 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
10381 if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
10382 ha_alter_info))
10383 error= true;
10384 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10385 }
10386 ha_alter_info->handler_ctx= part_inplace_ctx;
10387
10388 DBUG_RETURN(error);
10389 }
10390
10391
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10392 bool ha_partition::inplace_alter_table(TABLE *altered_table,
10393 Alter_inplace_info *ha_alter_info)
10394 {
10395 uint index= 0;
10396 bool error= false;
10397 ha_partition_inplace_ctx *part_inplace_ctx;
10398
10399 DBUG_ENTER("ha_partition::inplace_alter_table");
10400
10401 /*
10402 Changing to similar partitioning, only update metadata.
10403 Non allowed changes would be caught in prep_alter_part_table().
10404 */
10405 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10406 {
10407 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10408 DBUG_RETURN(false);
10409 }
10410
10411 part_inplace_ctx=
10412 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10413
10414 for (index= 0; index < m_tot_parts && !error; index++)
10415 {
10416 if ((ha_alter_info->handler_ctx=
10417 part_inplace_ctx->handler_ctx_array[index]) != NULL
10418 && index != 0)
10419 ha_alter_info->handler_ctx->set_shared_data
10420 (*part_inplace_ctx->handler_ctx_array[index - 1]);
10421
10422 if (m_file[index]->ha_inplace_alter_table(altered_table,
10423 ha_alter_info))
10424 error= true;
10425 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10426 }
10427 ha_alter_info->handler_ctx= part_inplace_ctx;
10428
10429 DBUG_RETURN(error);
10430 }
10431
10432
10433 /*
10434 Note that this function will try rollback failed ADD INDEX by
10435 executing DROP INDEX for the indexes that were committed (if any)
10436 before the error occurred. This means that the underlying storage
10437 engine must be able to drop index in-place with X-lock held.
10438 (As X-lock will be held here if new indexes are to be committed)
10439 */
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)10440 bool ha_partition::commit_inplace_alter_table(TABLE *altered_table,
10441 Alter_inplace_info *ha_alter_info,
10442 bool commit)
10443 {
10444 ha_partition_inplace_ctx *part_inplace_ctx;
10445 bool error= false;
10446
10447 DBUG_ENTER("ha_partition::commit_inplace_alter_table");
10448
10449 /*
10450 Changing to similar partitioning, only update metadata.
10451 Non allowed changes would be caught in prep_alter_part_table().
10452 */
10453 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10454 {
10455 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10456 DBUG_RETURN(false);
10457 }
10458
10459 part_inplace_ctx=
10460 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10461
10462 if (commit)
10463 {
10464 DBUG_ASSERT(ha_alter_info->group_commit_ctx ==
10465 part_inplace_ctx->handler_ctx_array);
10466 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
10467 error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
10468 ha_alter_info, commit);
10469 if (unlikely(error))
10470 goto end;
10471 if (ha_alter_info->group_commit_ctx)
10472 {
10473 /*
10474 If ha_alter_info->group_commit_ctx is not set to NULL,
10475 then the engine did only commit the first partition!
10476 The engine is probably new, since both innodb and the default
10477 implementation of handler::commit_inplace_alter_table sets it to NULL
10478 and simply return false, since it allows metadata changes only.
10479 Loop over all other partitions as to follow the protocol!
10480 */
10481 uint i;
10482 DBUG_ASSERT(0);
10483 for (i= 1; i < m_tot_parts; i++)
10484 {
10485 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10486 error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
10487 ha_alter_info,
10488 true);
10489 }
10490 }
10491 }
10492 else
10493 {
10494 uint i;
10495 for (i= 0; i < m_tot_parts; i++)
10496 {
10497 /* Rollback, commit == false, is done for each partition! */
10498 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10499 if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
10500 ha_alter_info, false))
10501 error= true;
10502 }
10503 }
10504 end:
10505 ha_alter_info->handler_ctx= part_inplace_ctx;
10506
10507 DBUG_RETURN(error);
10508 }
10509
10510
min_of_the_max_uint(uint (handler::* operator_func)(void)const) const10511 uint ha_partition::min_of_the_max_uint(
10512 uint (handler::*operator_func)(void) const) const
10513 {
10514 handler **file;
10515 uint min_of_the_max= ((*m_file)->*operator_func)();
10516
10517 for (file= m_file+1; *file; file++)
10518 {
10519 uint tmp= ((*file)->*operator_func)();
10520 set_if_smaller(min_of_the_max, tmp);
10521 }
10522 return min_of_the_max;
10523 }
10524
10525
max_supported_key_parts() const10526 uint ha_partition::max_supported_key_parts() const
10527 {
10528 return min_of_the_max_uint(&handler::max_supported_key_parts);
10529 }
10530
10531
max_supported_key_length() const10532 uint ha_partition::max_supported_key_length() const
10533 {
10534 return min_of_the_max_uint(&handler::max_supported_key_length);
10535 }
10536
10537
max_supported_key_part_length() const10538 uint ha_partition::max_supported_key_part_length() const
10539 {
10540 return min_of_the_max_uint(&handler::max_supported_key_part_length);
10541 }
10542
10543
max_supported_record_length() const10544 uint ha_partition::max_supported_record_length() const
10545 {
10546 return min_of_the_max_uint(&handler::max_supported_record_length);
10547 }
10548
10549
max_supported_keys() const10550 uint ha_partition::max_supported_keys() const
10551 {
10552 return min_of_the_max_uint(&handler::max_supported_keys);
10553 }
10554
10555
min_record_length(uint options) const10556 uint ha_partition::min_record_length(uint options) const
10557 {
10558 handler **file;
10559 uint max= (*m_file)->min_record_length(options);
10560
10561 for (file= m_file, file++; *file; file++)
10562 if (max < (*file)->min_record_length(options))
10563 max= (*file)->min_record_length(options);
10564 return max;
10565 }
10566
10567 /****************************************************************************
10568 MODULE compare records
10569 ****************************************************************************/
10570 /*
10571 Compare two positions
10572
10573 SYNOPSIS
10574 cmp_ref()
10575 ref1 First position
10576 ref2 Second position
10577
10578 RETURN VALUE
10579 <0 ref1 < ref2
10580 0 Equal
10581 >0 ref1 > ref2
10582
10583 DESCRIPTION
10584 We get two references and need to check if those records are the same.
10585 If they belong to different partitions we decide that they are not
10586 the same record. Otherwise we use the particular handler to decide if
10587 they are the same. Sort in partition id order if not equal.
10588
10589 MariaDB note:
10590 Please don't merge the code from MySQL that does this:
10591
10592 We get two references and need to check if those records are the same.
10593 If they belong to different partitions we decide that they are not
10594 the same record. Otherwise we use the particular handler to decide if
10595 they are the same. Sort in partition id order if not equal.
10596
10597 It is incorrect, MariaDB has an alternative fix.
10598 */
10599
cmp_ref(const uchar * ref1,const uchar * ref2)10600 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
10601 {
10602 int cmp;
10603 uint32 diff1, diff2;
10604 DBUG_ENTER("ha_partition::cmp_ref");
10605
10606 cmp= get_open_file_sample()->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
10607 (ref2 + PARTITION_BYTES_IN_POS));
10608 if (cmp)
10609 DBUG_RETURN(cmp);
10610
10611 diff2= uint2korr(ref2);
10612 diff1= uint2korr(ref1);
10613
10614 if (diff1 == diff2)
10615 {
10616 /* This means that the references are same and are in same partition.*/
10617 DBUG_RETURN(0);
10618 }
10619
10620 /*
10621 In Innodb we compare with either primary key value or global DB_ROW_ID so
10622 it is not possible that the two references are equal and are in different
10623 partitions, but in myisam it is possible since we are comparing offsets.
10624 Remove this assert if DB_ROW_ID is changed to be per partition.
10625 */
10626 DBUG_ASSERT(!m_innodb);
10627 DBUG_RETURN(diff2 > diff1 ? -1 : 1);
10628 }
10629
10630
10631 /****************************************************************************
10632 MODULE auto increment
10633 ****************************************************************************/
10634
10635
10636 /**
10637 Retreive new values for part_share->next_auto_inc_val if needed
10638
10639 This is needed if the value has not been initialized or if one of
10640 the underlying partitions require that the value should be re-calculated
10641 */
10642
update_next_auto_inc_val()10643 void ha_partition::update_next_auto_inc_val()
10644 {
10645 if (!part_share->auto_inc_initialized ||
10646 need_info_for_auto_inc())
10647 info(HA_STATUS_AUTO);
10648 }
10649
10650
10651 /**
10652 Determine whether a partition needs auto-increment initialization.
10653
10654 @return
10655 TRUE A partition needs auto-increment initialization
10656 FALSE No partition needs auto-increment initialization
10657
10658 Resets part_share->auto_inc_initialized if next auto_increment needs to be
10659 recalculated.
10660 */
10661
need_info_for_auto_inc()10662 bool ha_partition::need_info_for_auto_inc()
10663 {
10664 handler **file= m_file;
10665 DBUG_ENTER("ha_partition::need_info_for_auto_inc");
10666
10667 do
10668 {
10669 if ((*file)->need_info_for_auto_inc())
10670 {
10671 /* We have to get new auto_increment values from handler */
10672 part_share->auto_inc_initialized= FALSE;
10673 DBUG_RETURN(TRUE);
10674 }
10675 } while (*(++file));
10676 DBUG_RETURN(FALSE);
10677 }
10678
10679
10680 /**
10681 Determine if all partitions can use the current auto-increment value for
10682 auto-increment initialization.
10683
10684 @return
10685 TRUE All partitions can use the current auto-increment
10686 value for auto-increment initialization
10687 FALSE All partitions cannot use the current
10688 auto-increment value for auto-increment
10689 initialization
10690
10691 Notes
10692 This function is only called for ::info(HA_STATUS_AUTO) and is
10693 mainly used by the Spider engine, which returns false
10694 except in the case of DROP TABLE or ALTER TABLE when it returns TRUE.
10695 Other engines always returns TRUE for this call.
10696 */
10697
can_use_for_auto_inc_init()10698 bool ha_partition::can_use_for_auto_inc_init()
10699 {
10700 handler **file= m_file;
10701 DBUG_ENTER("ha_partition::can_use_for_auto_inc_init");
10702
10703 do
10704 {
10705 if (!(*file)->can_use_for_auto_inc_init())
10706 DBUG_RETURN(FALSE);
10707 } while (*(++file));
10708 DBUG_RETURN(TRUE);
10709 }
10710
10711
reset_auto_increment(ulonglong value)10712 int ha_partition::reset_auto_increment(ulonglong value)
10713 {
10714 handler **file= m_file;
10715 int res;
10716 DBUG_ENTER("ha_partition::reset_auto_increment");
10717 lock_auto_increment();
10718 part_share->auto_inc_initialized= false;
10719 part_share->next_auto_inc_val= 0;
10720 do
10721 {
10722 if ((res= (*file)->ha_reset_auto_increment(value)) != 0)
10723 break;
10724 } while (*(++file));
10725 unlock_auto_increment();
10726 DBUG_RETURN(res);
10727 }
10728
10729
10730 /**
10731 This method is called by update_auto_increment which in turn is called
10732 by the individual handlers as part of write_row. We use the
10733 part_share->next_auto_inc_val, or search all
10734 partitions for the highest auto_increment_value if not initialized or
10735 if auto_increment field is a secondary part of a key, we must search
10736 every partition when holding a mutex to be sure of correctness.
10737 */
10738
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)10739 void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
10740 ulonglong nb_desired_values,
10741 ulonglong *first_value,
10742 ulonglong *nb_reserved_values)
10743 {
10744 DBUG_ENTER("ha_partition::get_auto_increment");
10745 DBUG_PRINT("enter", ("offset: %lu inc: %lu desired_values: %lu "
10746 "first_value: %lu", (ulong) offset, (ulong) increment,
10747 (ulong) nb_desired_values, (ulong) *first_value));
10748 DBUG_ASSERT(increment);
10749 DBUG_ASSERT(nb_desired_values);
10750 *first_value= 0;
10751 if (table->s->next_number_keypart)
10752 {
10753 /*
10754 next_number_keypart is != 0 if the auto_increment column is a secondary
10755 column in the index (it is allowed in MyISAM)
10756 */
10757 DBUG_PRINT("info", ("next_number_keypart != 0"));
10758 ulonglong nb_reserved_values_part;
10759 ulonglong first_value_part, max_first_value;
10760 handler **file= m_file;
10761 first_value_part= max_first_value= *first_value;
10762 /* Must find highest value among all partitions. */
10763 do
10764 {
10765 /* Only nb_desired_values = 1 makes sense */
10766 (*file)->get_auto_increment(offset, increment, 1,
10767 &first_value_part, &nb_reserved_values_part);
10768 if (unlikely(first_value_part == ULONGLONG_MAX)) // error in one partition
10769 {
10770 *first_value= first_value_part;
10771 /* log that the error was between table/partition handler */
10772 sql_print_error("Partition failed to reserve auto_increment value");
10773 DBUG_VOID_RETURN;
10774 }
10775 DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
10776 set_if_bigger(max_first_value, first_value_part);
10777 } while (*(++file));
10778 *first_value= max_first_value;
10779 *nb_reserved_values= 1;
10780 }
10781 else
10782 {
10783 THD *thd= ha_thd();
10784 /*
10785 This is initialized in the beginning of the first write_row call.
10786 */
10787 DBUG_ASSERT(part_share->auto_inc_initialized);
10788 /*
10789 Get a lock for handling the auto_increment in part_share
10790 for avoiding two concurrent statements getting the same number.
10791 */
10792
10793 lock_auto_increment();
10794
10795 /*
10796 In a multi-row insert statement like INSERT SELECT and LOAD DATA
10797 where the number of candidate rows to insert is not known in advance
10798 we must hold a lock/mutex for the whole statement if we have statement
10799 based replication. Because the statement-based binary log contains
10800 only the first generated value used by the statement, and slaves assumes
10801 all other generated values used by this statement were consecutive to
10802 this first one, we must exclusively lock the generator until the
10803 statement is done.
10804 */
10805 if (!auto_increment_safe_stmt_log_lock &&
10806 thd->lex->sql_command != SQLCOM_INSERT &&
10807 mysql_bin_log.is_open() &&
10808 !thd->is_current_stmt_binlog_format_row() &&
10809 (thd->variables.option_bits & OPTION_BIN_LOG))
10810 {
10811 DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
10812 auto_increment_safe_stmt_log_lock= TRUE;
10813 }
10814
10815 /* this gets corrected (for offset/increment) in update_auto_increment */
10816 *first_value= part_share->next_auto_inc_val;
10817 part_share->next_auto_inc_val+= nb_desired_values * increment;
10818
10819 unlock_auto_increment();
10820 DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
10821 *nb_reserved_values= nb_desired_values;
10822 }
10823 DBUG_VOID_RETURN;
10824 }
10825
release_auto_increment()10826 void ha_partition::release_auto_increment()
10827 {
10828 DBUG_ENTER("ha_partition::release_auto_increment");
10829
10830 if (table->s->next_number_keypart)
10831 {
10832 uint i;
10833 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
10834 i < m_tot_parts;
10835 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
10836 {
10837 m_file[i]->ha_release_auto_increment();
10838 }
10839 }
10840 else
10841 {
10842 lock_auto_increment();
10843 if (next_insert_id)
10844 {
10845 ulonglong next_auto_inc_val= part_share->next_auto_inc_val;
10846 /*
10847 If the current auto_increment values is lower than the reserved
10848 value, and the reserved value was reserved by this thread,
10849 we can lower the reserved value.
10850 */
10851 if (next_insert_id < next_auto_inc_val &&
10852 auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val)
10853 {
10854 THD *thd= ha_thd();
10855 /*
10856 Check that we do not lower the value because of a failed insert
10857 with SET INSERT_ID, i.e. forced/non generated values.
10858 */
10859 if (thd->auto_inc_intervals_forced.maximum() < next_insert_id)
10860 part_share->next_auto_inc_val= next_insert_id;
10861 }
10862 DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
10863 (ulong) part_share->next_auto_inc_val));
10864 }
10865 /*
10866 Unlock the multi-row statement lock taken in get_auto_increment.
10867 These actions must be performed even if the next_insert_id field
10868 contains zero, otherwise if the update_auto_increment fails then
10869 an unnecessary lock will remain:
10870 */
10871 if (auto_increment_safe_stmt_log_lock)
10872 {
10873 auto_increment_safe_stmt_log_lock= FALSE;
10874 DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
10875 }
10876
10877 unlock_auto_increment();
10878 }
10879 DBUG_VOID_RETURN;
10880 }
10881
10882 /****************************************************************************
10883 MODULE initialize handler for HANDLER call
10884 ****************************************************************************/
10885
init_table_handle_for_HANDLER()10886 void ha_partition::init_table_handle_for_HANDLER()
10887 {
10888 return;
10889 }
10890
10891
10892 /**
10893 Calculate the checksum of the table (all partitions)
10894 */
10895
pre_calculate_checksum()10896 int ha_partition::pre_calculate_checksum()
10897 {
10898 int error;
10899 DBUG_ENTER("ha_partition::pre_calculate_checksum");
10900 m_pre_calling= TRUE;
10901 if ((table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM)))
10902 {
10903 handler **file= m_file;
10904 do
10905 {
10906 if ((error= (*file)->pre_calculate_checksum()))
10907 {
10908 DBUG_RETURN(error);
10909 }
10910 } while (*(++file));
10911 }
10912 DBUG_RETURN(0);
10913 }
10914
10915
calculate_checksum()10916 int ha_partition::calculate_checksum()
10917 {
10918 int error;
10919 stats.checksum= 0;
10920 stats.checksum_null= TRUE;
10921
10922 DBUG_ENTER("ha_partition::calculate_checksum");
10923 if (!m_pre_calling)
10924 {
10925 if ((error= pre_calculate_checksum()))
10926 {
10927 m_pre_calling= FALSE;
10928 DBUG_RETURN(error);
10929 }
10930 }
10931 m_pre_calling= FALSE;
10932
10933 handler **file= m_file;
10934 do
10935 {
10936 if ((error= (*file)->calculate_checksum()))
10937 {
10938 DBUG_RETURN(error);
10939 }
10940 if (!(*file)->stats.checksum_null)
10941 {
10942 stats.checksum+= (*file)->stats.checksum;
10943 stats.checksum_null= FALSE;
10944 }
10945 } while (*(++file));
10946 DBUG_RETURN(0);
10947 }
10948
10949
10950 /****************************************************************************
10951 MODULE enable/disable indexes
10952 ****************************************************************************/
10953
10954 /*
10955 Disable indexes for a while
10956 SYNOPSIS
10957 disable_indexes()
10958 mode Mode
10959 RETURN VALUES
10960 0 Success
10961 != 0 Error
10962 */
10963
disable_indexes(uint mode)10964 int ha_partition::disable_indexes(uint mode)
10965 {
10966 handler **file;
10967 int error= 0;
10968
10969 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10970 for (file= m_file; *file; file++)
10971 {
10972 if (unlikely((error= (*file)->ha_disable_indexes(mode))))
10973 break;
10974 }
10975 return error;
10976 }
10977
10978
10979 /*
10980 Enable indexes again
10981 SYNOPSIS
10982 enable_indexes()
10983 mode Mode
10984 RETURN VALUES
10985 0 Success
10986 != 0 Error
10987 */
10988
enable_indexes(uint mode)10989 int ha_partition::enable_indexes(uint mode)
10990 {
10991 handler **file;
10992 int error= 0;
10993
10994 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10995 for (file= m_file; *file; file++)
10996 {
10997 if (unlikely((error= (*file)->ha_enable_indexes(mode))))
10998 break;
10999 }
11000 return error;
11001 }
11002
11003
11004 /*
11005 Check if indexes are disabled
11006 SYNOPSIS
11007 indexes_are_disabled()
11008
11009 RETURN VALUES
11010 0 Indexes are enabled
11011 != 0 Indexes are disabled
11012 */
11013
indexes_are_disabled(void)11014 int ha_partition::indexes_are_disabled(void)
11015 {
11016 handler **file;
11017 int error= 0;
11018
11019 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
11020 for (file= m_file; *file; file++)
11021 {
11022 if (unlikely((error= (*file)->indexes_are_disabled())))
11023 break;
11024 }
11025 return error;
11026 }
11027
11028
11029 /**
11030 Check/fix misplaced rows.
11031
11032 @param read_part_id Partition to check/fix.
11033 @param repair If true, move misplaced rows to correct partition.
11034
11035 @return Operation status.
11036 @retval HA_ADMIN_OK Success
11037 @retval != HA_ADMIN_OK Error
11038 */
11039
check_misplaced_rows(uint read_part_id,bool do_repair)11040 int ha_partition::check_misplaced_rows(uint read_part_id, bool do_repair)
11041 {
11042 int result= 0;
11043 uint32 correct_part_id;
11044 longlong func_value;
11045 longlong num_misplaced_rows= 0;
11046
11047 DBUG_ENTER("ha_partition::check_misplaced_rows");
11048
11049 DBUG_ASSERT(m_file);
11050
11051 if (m_part_info->vers_info &&
11052 read_part_id != m_part_info->vers_info->now_part->id &&
11053 !m_part_info->vers_info->interval.is_set())
11054 {
11055 /* Skip this check as it is not supported for non-INTERVAL history partitions. */
11056 DBUG_RETURN(HA_ADMIN_OK);
11057 }
11058
11059 if (do_repair)
11060 {
11061 /* We must read the full row, if we need to move it! */
11062 bitmap_set_all(table->read_set);
11063 bitmap_set_all(table->write_set);
11064 }
11065 else
11066 {
11067 /* Only need to read the partitioning fields. */
11068 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
11069 }
11070
11071 if ((result= m_file[read_part_id]->ha_rnd_init(1)))
11072 DBUG_RETURN(result);
11073
11074 while (true)
11075 {
11076 if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0)))
11077 {
11078 if (result != HA_ERR_END_OF_FILE)
11079 break;
11080
11081 if (num_misplaced_rows > 0)
11082 {
11083 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "warning",
11084 table_share->db.str, table->alias,
11085 opt_op_name[REPAIR_PARTS],
11086 "Moved %lld misplaced rows",
11087 num_misplaced_rows);
11088 }
11089 /* End-of-file reached, all rows are now OK, reset result and break. */
11090 result= 0;
11091 break;
11092 }
11093
11094 result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
11095 &func_value);
11096 if (result)
11097 break;
11098
11099 if (correct_part_id != read_part_id)
11100 {
11101 num_misplaced_rows++;
11102 if (!do_repair)
11103 {
11104 /* Check. */
11105 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
11106 table_share->db.str, table->alias,
11107 opt_op_name[CHECK_PARTS],
11108 "Found a misplaced row");
11109 /* Break on first misplaced row! */
11110 result= HA_ADMIN_NEEDS_UPGRADE;
11111 break;
11112 }
11113 else
11114 {
11115 DBUG_PRINT("info", ("Moving row from partition %u to %u",
11116 (uint) read_part_id, (uint) correct_part_id));
11117
11118 /*
11119 Insert row into correct partition. Notice that there are no commit
11120 for every N row, so the repair will be one large transaction!
11121 */
11122 if ((result= m_file[correct_part_id]->ha_write_row(m_rec0)))
11123 {
11124 /*
11125 We have failed to insert a row, it might have been a duplicate!
11126 */
11127 char buf[MAX_KEY_LENGTH];
11128 String str(buf,sizeof(buf),system_charset_info);
11129 str.length(0);
11130 if (result == HA_ERR_FOUND_DUPP_KEY)
11131 {
11132 str.append("Duplicate key found, "
11133 "please update or delete the record:\n");
11134 result= HA_ADMIN_CORRUPT;
11135 }
11136 m_err_rec= NULL;
11137 append_row_to_str(str);
11138
11139 /*
11140 If the engine supports transactions, the failure will be
11141 rolled back
11142 */
11143 if (!m_file[correct_part_id]->has_transactions_and_rollback())
11144 {
11145 /* Log this error, so the DBA can notice it and fix it! */
11146 sql_print_error("Table '%-192s' failed to move/insert a row"
11147 " from part %u into part %u:\n%s",
11148 table->s->table_name.str,
11149 (uint) read_part_id,
11150 (uint) correct_part_id,
11151 str.c_ptr_safe());
11152 }
11153 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
11154 table_share->db.str, table->alias,
11155 opt_op_name[REPAIR_PARTS],
11156 "Failed to move/insert a row"
11157 " from part %u into part %u:\n%s",
11158 (uint) read_part_id,
11159 (uint) correct_part_id,
11160 str.c_ptr_safe());
11161 break;
11162 }
11163
11164 /* Delete row from wrong partition. */
11165 if ((result= m_file[read_part_id]->ha_delete_row(m_rec0)))
11166 {
11167 if (m_file[correct_part_id]->has_transactions_and_rollback())
11168 break;
11169 /*
11170 We have introduced a duplicate, since we failed to remove it
11171 from the wrong partition.
11172 */
11173 char buf[MAX_KEY_LENGTH];
11174 String str(buf,sizeof(buf),system_charset_info);
11175 str.length(0);
11176 m_err_rec= NULL;
11177 append_row_to_str(str);
11178
11179 /* Log this error, so the DBA can notice it and fix it! */
11180 sql_print_error("Table '%-192s': Delete from part %u failed with"
11181 " error %d. But it was already inserted into"
11182 " part %u, when moving the misplaced row!"
11183 "\nPlease manually fix the duplicate row:\n%s",
11184 table->s->table_name.str,
11185 (uint) read_part_id,
11186 result,
11187 (uint) correct_part_id,
11188 str.c_ptr_safe());
11189 break;
11190 }
11191 }
11192 }
11193 }
11194
11195 int tmp_result= m_file[read_part_id]->ha_rnd_end();
11196 DBUG_RETURN(result ? result : tmp_result);
11197 }
11198
11199
11200 #define KEY_PARTITIONING_CHANGED_STR \
11201 "KEY () partitioning changed, please run:\n" \
11202 "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
11203
check_for_upgrade(HA_CHECK_OPT * check_opt)11204 int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt)
11205 {
11206 int error= HA_ADMIN_NEEDS_CHECK;
11207 DBUG_ENTER("ha_partition::check_for_upgrade");
11208
11209 /*
11210 This is called even without FOR UPGRADE,
11211 if the .frm version is lower than the current version.
11212 In that case return that it needs checking!
11213 */
11214 if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
11215 DBUG_RETURN(error);
11216
11217 /*
11218 Partitions will be checked for during their ha_check!
11219
11220 Check if KEY (sub)partitioning was used and any field's hash calculation
11221 differs from 5.1, see bug#14521864.
11222 */
11223 if (table->s->mysql_version < 50503 && // 5.1 table (<5.5.3)
11224 ((m_part_info->part_type == HASH_PARTITION && // KEY partitioned
11225 m_part_info->list_of_part_fields) ||
11226 (m_is_sub_partitioned && // KEY subpartitioned
11227 m_part_info->list_of_subpart_fields)))
11228 {
11229 Field **field;
11230 if (m_is_sub_partitioned)
11231 {
11232 field= m_part_info->subpart_field_array;
11233 }
11234 else
11235 {
11236 field= m_part_info->part_field_array;
11237 }
11238 for (; *field; field++)
11239 {
11240 switch ((*field)->real_type()) {
11241 case MYSQL_TYPE_TINY:
11242 case MYSQL_TYPE_SHORT:
11243 case MYSQL_TYPE_LONG:
11244 case MYSQL_TYPE_FLOAT:
11245 case MYSQL_TYPE_DOUBLE:
11246 case MYSQL_TYPE_NEWDECIMAL:
11247 case MYSQL_TYPE_TIMESTAMP:
11248 case MYSQL_TYPE_LONGLONG:
11249 case MYSQL_TYPE_INT24:
11250 case MYSQL_TYPE_TIME:
11251 case MYSQL_TYPE_DATETIME:
11252 case MYSQL_TYPE_YEAR:
11253 case MYSQL_TYPE_NEWDATE:
11254 case MYSQL_TYPE_ENUM:
11255 case MYSQL_TYPE_SET:
11256 {
11257 THD *thd= ha_thd();
11258 char *part_buf;
11259 String db_name, table_name;
11260 uint part_buf_len;
11261 bool skip_generation= false;
11262 partition_info::enum_key_algorithm old_algorithm;
11263 old_algorithm= m_part_info->key_algorithm;
11264 error= HA_ADMIN_FAILED;
11265 append_identifier(ha_thd(), &db_name, &table_share->db);
11266 append_identifier(ha_thd(), &table_name, &table_share->table_name);
11267 if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
11268 {
11269 /*
11270 Only possible when someone tampered with .frm files,
11271 like during tests :)
11272 */
11273 skip_generation= true;
11274 }
11275 m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
11276 if (skip_generation ||
11277 !(part_buf= generate_partition_syntax_for_frm(thd, m_part_info,
11278 &part_buf_len,
11279 NULL, NULL)) ||
11280 print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
11281 table_share->db.str,
11282 table->alias,
11283 opt_op_name[CHECK_PARTS],
11284 KEY_PARTITIONING_CHANGED_STR,
11285 db_name.c_ptr_safe(),
11286 table_name.c_ptr_safe(),
11287 part_buf))
11288 {
11289 /* Error creating admin message (too long string?). */
11290 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
11291 table_share->db.str, table->alias,
11292 opt_op_name[CHECK_PARTS],
11293 KEY_PARTITIONING_CHANGED_STR,
11294 db_name.c_ptr_safe(), table_name.c_ptr_safe(),
11295 "<old partition clause>, but add ALGORITHM = 1"
11296 " between 'KEY' and '(' to change the metadata"
11297 " without the need of a full table rebuild.");
11298 }
11299 m_part_info->key_algorithm= old_algorithm;
11300 DBUG_RETURN(error);
11301 }
11302 default:
11303 /* Not affected! */
11304 ;
11305 }
11306 }
11307 }
11308
11309 DBUG_RETURN(error);
11310 }
11311
11312
get_next_global_for_child()11313 TABLE_LIST *ha_partition::get_next_global_for_child()
11314 {
11315 handler **file;
11316 DBUG_ENTER("ha_partition::get_next_global_for_child");
11317 for (file= m_file; *file; file++)
11318 {
11319 TABLE_LIST *table_list;
11320 if ((table_list= (*file)->get_next_global_for_child()))
11321 DBUG_RETURN(table_list);
11322 }
11323 DBUG_RETURN(0);
11324 }
11325
11326
11327 /**
11328 Push an engine condition to the condition stack of the storage engine
11329 for each partition.
11330
11331 @param cond Pointer to the engine condition to be pushed.
11332
11333 @return NULL Underlying engine will not return rows that
11334 do not match the passed condition.
11335 <> NULL 'Remainder' condition that the caller must use
11336 to filter out records.
11337 */
11338
cond_push(const COND * cond)11339 const COND *ha_partition::cond_push(const COND *cond)
11340 {
11341 uint i;
11342 COND *res_cond= NULL;
11343 DBUG_ENTER("ha_partition::cond_push");
11344
11345 for (i= bitmap_get_first_set(&m_partitions_to_reset);
11346 i < m_tot_parts;
11347 i= bitmap_get_next_set(&m_partitions_to_reset, i))
11348 {
11349 if (bitmap_is_set(&m_opened_partitions, i))
11350 {
11351 if (m_file[i]->pushed_cond != cond)
11352 {
11353 if (m_file[i]->cond_push(cond))
11354 res_cond= (COND *) cond;
11355 else
11356 m_file[i]->pushed_cond= cond;
11357 }
11358 }
11359 }
11360 DBUG_RETURN(res_cond);
11361 }
11362
11363
11364 /**
11365 Pop the top condition from the condition stack of the storage engine
11366 for each partition.
11367 */
11368
cond_pop()11369 void ha_partition::cond_pop()
11370 {
11371 uint i;
11372 DBUG_ENTER("ha_partition::cond_pop");
11373
11374 for (i= bitmap_get_first_set(&m_partitions_to_reset);
11375 i < m_tot_parts;
11376 i= bitmap_get_next_set(&m_partitions_to_reset, i))
11377 {
11378 if (bitmap_is_set(&m_opened_partitions, i))
11379 {
11380 m_file[i]->cond_pop();
11381 }
11382 }
11383 DBUG_VOID_RETURN;
11384 }
11385
11386
11387 /**
11388 Perform bulk update preparation on each partition.
11389
11390 SYNOPSIS
11391 start_bulk_update()
11392
11393 RETURN VALUE
11394 TRUE Error
11395 FALSE Success
11396 */
11397
start_bulk_update()11398 bool ha_partition::start_bulk_update()
11399 {
11400 handler **file= m_file;
11401 DBUG_ENTER("ha_partition::start_bulk_update");
11402
11403 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
11404 table->write_set))
11405 DBUG_RETURN(TRUE);
11406
11407 do
11408 {
11409 bzero(&(*file)->copy_info, sizeof((*file)->copy_info));
11410 if ((*file)->start_bulk_update())
11411 DBUG_RETURN(TRUE);
11412 } while (*(++file));
11413 DBUG_RETURN(FALSE);
11414 }
11415
11416
11417 /**
11418 Perform bulk update execution on each partition. A bulk update allows
11419 a handler to batch the updated rows instead of performing the updates
11420 one row at a time.
11421
11422 SYNOPSIS
11423 exec_bulk_update()
11424
11425 RETURN VALUE
11426 TRUE Error
11427 FALSE Success
11428 */
11429
exec_bulk_update(ha_rows * dup_key_found)11430 int ha_partition::exec_bulk_update(ha_rows *dup_key_found)
11431 {
11432 int error;
11433 handler **file= m_file;
11434 DBUG_ENTER("ha_partition::exec_bulk_update");
11435
11436 do
11437 {
11438 if (unlikely((error= (*file)->exec_bulk_update(dup_key_found))))
11439 DBUG_RETURN(error);
11440 } while (*(++file));
11441 DBUG_RETURN(0);
11442 }
11443
11444
11445 /**
11446 Perform bulk update cleanup on each partition.
11447
11448 SYNOPSIS
11449 end_bulk_update()
11450
11451 RETURN VALUE
11452 NONE
11453 */
11454
end_bulk_update()11455 int ha_partition::end_bulk_update()
11456 {
11457 int error= 0;
11458 handler **file= m_file;
11459 DBUG_ENTER("ha_partition::end_bulk_update");
11460
11461 do
11462 {
11463 int tmp;
11464 if ((tmp= (*file)->end_bulk_update()))
11465 error= tmp;
11466 } while (*(++file));
11467 sum_copy_infos();
11468 DBUG_RETURN(error);
11469 }
11470
11471
11472 /**
11473 Add the row to the bulk update on the partition on which the row is stored.
11474 A bulk update allows a handler to batch the updated rows instead of
11475 performing the updates one row at a time.
11476
11477 SYNOPSIS
11478 bulk_update_row()
11479 old_data Old record
11480 new_data New record
11481 dup_key_found Number of duplicate keys found
11482
11483 RETURN VALUE
11484 >1 Error
11485 1 Bulk update not used, normal operation used
11486 0 Bulk update used by handler
11487 */
11488
bulk_update_row(const uchar * old_data,const uchar * new_data,ha_rows * dup_key_found)11489 int ha_partition::bulk_update_row(const uchar *old_data, const uchar *new_data,
11490 ha_rows *dup_key_found)
11491 {
11492 int error= 0;
11493 uint32 part_id;
11494 longlong func_value;
11495 DBUG_ENTER("ha_partition::bulk_update_row");
11496
11497 MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set);
11498 error= m_part_info->get_partition_id(m_part_info, &part_id,
11499 &func_value);
11500 dbug_tmp_restore_column_map(&table->read_set, old_map);
11501 if (unlikely(error))
11502 {
11503 m_part_info->err_value= func_value;
11504 goto end;
11505 }
11506
11507 error= m_file[part_id]->ha_bulk_update_row(old_data, new_data,
11508 dup_key_found);
11509
11510 end:
11511 DBUG_RETURN(error);
11512 }
11513
11514
11515 /**
11516 Perform bulk delete preparation on each partition.
11517
11518 SYNOPSIS
11519 start_bulk_delete()
11520
11521 RETURN VALUE
11522 TRUE Error
11523 FALSE Success
11524 */
11525
start_bulk_delete()11526 bool ha_partition::start_bulk_delete()
11527 {
11528 handler **file= m_file;
11529 DBUG_ENTER("ha_partition::start_bulk_delete");
11530
11531 do
11532 {
11533 if ((*file)->start_bulk_delete())
11534 DBUG_RETURN(TRUE);
11535 } while (*(++file));
11536 DBUG_RETURN(FALSE);
11537 }
11538
11539
11540 /**
11541 Perform bulk delete cleanup on each partition.
11542
11543 SYNOPSIS
11544 end_bulk_delete()
11545
11546 RETURN VALUE
11547 >0 Error
11548 0 Success
11549 */
11550
end_bulk_delete()11551 int ha_partition::end_bulk_delete()
11552 {
11553 int error= 0;
11554 handler **file= m_file;
11555 DBUG_ENTER("ha_partition::end_bulk_delete");
11556
11557 do
11558 {
11559 int tmp;
11560 if ((tmp= (*file)->end_bulk_delete()))
11561 error= tmp;
11562 } while (*(++file));
11563 sum_copy_infos();
11564 DBUG_RETURN(error);
11565 }
11566
11567
check_if_updates_are_ignored(const char * op) const11568 bool ha_partition::check_if_updates_are_ignored(const char *op) const
11569 {
11570 return (handler::check_if_updates_are_ignored(op) ||
11571 ha_check_if_updates_are_ignored(table->in_use, partition_ht(), op));
11572 }
11573
11574 /**
11575 Perform initialization for a direct update request.
11576
11577 SYNOPSIS
11578 direct_update_rows_init()
11579 update fields Pointer to the list of fields to update
11580
11581 RETURN VALUE
11582 >0 Error
11583 0 Success
11584 */
11585
direct_update_rows_init(List<Item> * update_fields)11586 int ha_partition::direct_update_rows_init(List<Item> *update_fields)
11587 {
11588 int error;
11589 uint i, found;
11590 handler *file;
11591 DBUG_ENTER("ha_partition::direct_update_rows_init");
11592
11593 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
11594 table->write_set))
11595 {
11596 DBUG_PRINT("info", ("partition FALSE by updating part_key"));
11597 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11598 }
11599
11600 m_part_spec.start_part= 0;
11601 m_part_spec.end_part= m_tot_parts - 1;
11602 m_direct_update_part_spec= m_part_spec;
11603
11604 found= 0;
11605 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11606 {
11607 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11608 bitmap_is_set(&(m_part_info->lock_partitions), i))
11609 {
11610 file= m_file[i];
11611 if (unlikely((error= (m_pre_calling ?
11612 file->pre_direct_update_rows_init(update_fields) :
11613 file->direct_update_rows_init(update_fields)))))
11614 {
11615 DBUG_PRINT("info", ("partition FALSE by storage engine"));
11616 DBUG_RETURN(error);
11617 }
11618 found++;
11619 }
11620 }
11621
11622 TABLE_LIST *table_list= table->pos_in_table_list;
11623 if (found != 1 && table_list)
11624 {
11625 while (table_list->parent_l)
11626 table_list= table_list->parent_l;
11627 st_select_lex *select_lex= table_list->select_lex;
11628 DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11629 if (select_lex && select_lex->explicit_limit)
11630 {
11631 DBUG_PRINT("info", ("partition explicit_limit=TRUE"));
11632 DBUG_PRINT("info", ("partition offset_limit: %p",
11633 select_lex->offset_limit));
11634 DBUG_PRINT("info", ("partition select_limit: %p",
11635 select_lex->select_limit));
11636 DBUG_PRINT("info", ("partition FALSE by select_lex"));
11637 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11638 }
11639 }
11640 DBUG_PRINT("info", ("partition OK"));
11641 DBUG_RETURN(0);
11642 }
11643
11644
11645 /**
11646 Do initialization for performing parallel direct update
11647 for a handlersocket update request.
11648
11649 SYNOPSIS
11650 pre_direct_update_rows_init()
11651 update fields Pointer to the list of fields to update
11652
11653 RETURN VALUE
11654 >0 Error
11655 0 Success
11656 */
11657
pre_direct_update_rows_init(List<Item> * update_fields)11658 int ha_partition::pre_direct_update_rows_init(List<Item> *update_fields)
11659 {
11660 bool save_m_pre_calling;
11661 int error;
11662 DBUG_ENTER("ha_partition::pre_direct_update_rows_init");
11663 save_m_pre_calling= m_pre_calling;
11664 m_pre_calling= TRUE;
11665 error= direct_update_rows_init(update_fields);
11666 m_pre_calling= save_m_pre_calling;
11667 DBUG_RETURN(error);
11668 }
11669
11670
11671 /**
11672 Execute a direct update request. A direct update request updates all
11673 qualified rows in a single operation, rather than one row at a time.
11674 The direct update operation is pushed down to each individual
11675 partition.
11676
11677 SYNOPSIS
11678 direct_update_rows()
11679 update_rows Number of updated rows
11680
11681 RETURN VALUE
11682 >0 Error
11683 0 Success
11684 */
11685
direct_update_rows(ha_rows * update_rows_result,ha_rows * found_rows_result)11686 int ha_partition::direct_update_rows(ha_rows *update_rows_result,
11687 ha_rows *found_rows_result)
11688 {
11689 int error;
11690 bool rnd_seq= FALSE;
11691 ha_rows update_rows= 0;
11692 ha_rows found_rows= 0;
11693 uint32 i;
11694 DBUG_ENTER("ha_partition::direct_update_rows");
11695
11696 /* If first call to direct_update_rows with RND scan */
11697 if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11698 {
11699 rnd_seq= TRUE;
11700 m_scan_value= 2;
11701 }
11702
11703 *update_rows_result= 0;
11704 *found_rows_result= 0;
11705 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11706 {
11707 handler *file= m_file[i];
11708 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11709 bitmap_is_set(&(m_part_info->lock_partitions), i))
11710 {
11711 if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11712 {
11713 if (unlikely((error= (m_pre_calling ?
11714 file->ha_pre_rnd_init(TRUE) :
11715 file->ha_rnd_init(TRUE)))))
11716 DBUG_RETURN(error);
11717 }
11718 if (unlikely((error= (m_pre_calling ?
11719 (file)->pre_direct_update_rows() :
11720 (file)->ha_direct_update_rows(&update_rows,
11721 &found_rows)))))
11722 {
11723 if (rnd_seq)
11724 {
11725 if (m_pre_calling)
11726 file->ha_pre_rnd_end();
11727 else
11728 file->ha_rnd_end();
11729 }
11730 DBUG_RETURN(error);
11731 }
11732 *update_rows_result+= update_rows;
11733 *found_rows_result+= found_rows;
11734 }
11735 if (rnd_seq)
11736 {
11737 if (unlikely((error= (m_pre_calling ?
11738 file->ha_pre_index_or_rnd_end() :
11739 file->ha_index_or_rnd_end()))))
11740 DBUG_RETURN(error);
11741 }
11742 }
11743 DBUG_RETURN(0);
11744 }
11745
11746
11747 /**
11748 Start parallel execution of a direct update for a handlersocket update
11749 request. A direct update request updates all qualified rows in a single
11750 operation, rather than one row at a time. The direct update operation
11751 is pushed down to each individual partition.
11752
11753 SYNOPSIS
11754 pre_direct_update_rows()
11755
11756 RETURN VALUE
11757 >0 Error
11758 0 Success
11759 */
11760
pre_direct_update_rows()11761 int ha_partition::pre_direct_update_rows()
11762 {
11763 bool save_m_pre_calling;
11764 int error;
11765 ha_rows not_used= 0;
11766 DBUG_ENTER("ha_partition::pre_direct_update_rows");
11767 save_m_pre_calling= m_pre_calling;
11768 m_pre_calling= TRUE;
11769 error= direct_update_rows(¬_used, ¬_used);
11770 m_pre_calling= save_m_pre_calling;
11771 DBUG_RETURN(error);
11772 }
11773
11774
11775 /**
11776 Perform initialization for a direct delete request.
11777
11778 SYNOPSIS
11779 direct_delete_rows_init()
11780
11781 RETURN VALUE
11782 >0 Error
11783 0 Success
11784 */
11785
direct_delete_rows_init()11786 int ha_partition::direct_delete_rows_init()
11787 {
11788 int error;
11789 uint i, found;
11790 DBUG_ENTER("ha_partition::direct_delete_rows_init");
11791
11792 m_part_spec.start_part= 0;
11793 m_part_spec.end_part= m_tot_parts - 1;
11794 m_direct_update_part_spec= m_part_spec;
11795
11796 found= 0;
11797 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11798 {
11799 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11800 bitmap_is_set(&(m_part_info->lock_partitions), i))
11801 {
11802 handler *file= m_file[i];
11803 if (unlikely((error= (m_pre_calling ?
11804 file->pre_direct_delete_rows_init() :
11805 file->direct_delete_rows_init()))))
11806 {
11807 DBUG_PRINT("exit", ("error in direct_delete_rows_init"));
11808 DBUG_RETURN(error);
11809 }
11810 found++;
11811 }
11812 }
11813
11814 TABLE_LIST *table_list= table->pos_in_table_list;
11815 if (found != 1 && table_list)
11816 {
11817 while (table_list->parent_l)
11818 table_list= table_list->parent_l;
11819 st_select_lex *select_lex= table_list->select_lex;
11820 DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11821 if (select_lex && select_lex->explicit_limit)
11822 {
11823 DBUG_PRINT("info", ("partition explicit_limit: TRUE"));
11824 DBUG_PRINT("info", ("partition offset_limit: %p",
11825 select_lex->offset_limit));
11826 DBUG_PRINT("info", ("partition select_limit: %p",
11827 select_lex->select_limit));
11828 DBUG_PRINT("info", ("partition FALSE by select_lex"));
11829 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11830 }
11831 }
11832 DBUG_PRINT("exit", ("OK"));
11833 DBUG_RETURN(0);
11834 }
11835
11836
11837 /**
11838 Do initialization for performing parallel direct delete
11839 for a handlersocket delete request.
11840
11841 SYNOPSIS
11842 pre_direct_delete_rows_init()
11843
11844 RETURN VALUE
11845 >0 Error
11846 0 Success
11847 */
11848
pre_direct_delete_rows_init()11849 int ha_partition::pre_direct_delete_rows_init()
11850 {
11851 bool save_m_pre_calling;
11852 int error;
11853 DBUG_ENTER("ha_partition::pre_direct_delete_rows_init");
11854 save_m_pre_calling= m_pre_calling;
11855 m_pre_calling= TRUE;
11856 error= direct_delete_rows_init();
11857 m_pre_calling= save_m_pre_calling;
11858 DBUG_RETURN(error);
11859 }
11860
11861
11862 /**
11863 Execute a direct delete request. A direct delete request deletes all
11864 qualified rows in a single operation, rather than one row at a time.
11865 The direct delete operation is pushed down to each individual
11866 partition.
11867
11868 SYNOPSIS
11869 direct_delete_rows()
11870 delete_rows Number of deleted rows
11871
11872 RETURN VALUE
11873 >0 Error
11874 0 Success
11875 */
11876
direct_delete_rows(ha_rows * delete_rows_result)11877 int ha_partition::direct_delete_rows(ha_rows *delete_rows_result)
11878 {
11879 int error;
11880 bool rnd_seq= FALSE;
11881 ha_rows delete_rows= 0;
11882 uint32 i;
11883 handler *file;
11884 DBUG_ENTER("ha_partition::direct_delete_rows");
11885
11886 if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11887 {
11888 rnd_seq= TRUE;
11889 m_scan_value= 2;
11890 }
11891
11892 *delete_rows_result= 0;
11893 m_part_spec= m_direct_update_part_spec;
11894 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11895 {
11896 file= m_file[i];
11897 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11898 bitmap_is_set(&(m_part_info->lock_partitions), i))
11899 {
11900 if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11901 {
11902 if (unlikely((error= (m_pre_calling ?
11903 file->ha_pre_rnd_init(TRUE) :
11904 file->ha_rnd_init(TRUE)))))
11905 DBUG_RETURN(error);
11906 }
11907 if ((error= (m_pre_calling ?
11908 file->pre_direct_delete_rows() :
11909 file->ha_direct_delete_rows(&delete_rows))))
11910 {
11911 if (m_pre_calling)
11912 file->ha_pre_rnd_end();
11913 else
11914 file->ha_rnd_end();
11915 DBUG_RETURN(error);
11916 }
11917 delete_rows_result+= delete_rows;
11918 }
11919 if (rnd_seq)
11920 {
11921 if (unlikely((error= (m_pre_calling ?
11922 file->ha_pre_index_or_rnd_end() :
11923 file->ha_index_or_rnd_end()))))
11924 DBUG_RETURN(error);
11925 }
11926 }
11927 DBUG_RETURN(0);
11928 }
11929
11930
11931 /**
11932 Start parallel execution of a direct delete for a handlersocket delete
11933 request. A direct delete request deletes all qualified rows in a single
11934 operation, rather than one row at a time. The direct delete operation
11935 is pushed down to each individual partition.
11936
11937 SYNOPSIS
11938 pre_direct_delete_rows()
11939
11940 RETURN VALUE
11941 >0 Error
11942 0 Success
11943 */
11944
pre_direct_delete_rows()11945 int ha_partition::pre_direct_delete_rows()
11946 {
11947 bool save_m_pre_calling;
11948 int error;
11949 ha_rows not_used;
11950 DBUG_ENTER("ha_partition::pre_direct_delete_rows");
11951 save_m_pre_calling= m_pre_calling;
11952 m_pre_calling= TRUE;
11953 error= direct_delete_rows(¬_used);
11954 m_pre_calling= save_m_pre_calling;
11955 DBUG_RETURN(error);
11956 }
11957
11958 /**
11959 Push metadata for the current operation down to each partition.
11960
11961 SYNOPSIS
11962 info_push()
11963
11964 RETURN VALUE
11965 >0 Error
11966 0 Success
11967 */
11968
info_push(uint info_type,void * info)11969 int ha_partition::info_push(uint info_type, void *info)
11970 {
11971 int error= 0, tmp;
11972 uint i;
11973 DBUG_ENTER("ha_partition::info_push");
11974
11975 for (i= bitmap_get_first_set(&m_partitions_to_reset);
11976 i < m_tot_parts;
11977 i= bitmap_get_next_set(&m_partitions_to_reset, i))
11978 {
11979 if (bitmap_is_set(&m_opened_partitions, i))
11980 {
11981 if ((tmp= m_file[i]->info_push(info_type, info)))
11982 {
11983 error= tmp;
11984 }
11985 }
11986 }
11987 DBUG_RETURN(error);
11988 }
11989
11990
11991 bool
can_convert_string(const Field_string * field,const Column_definition & new_type) const11992 ha_partition::can_convert_string(const Field_string* field,
11993 const Column_definition& new_type) const
11994 {
11995 for (uint index= 0; index < m_tot_parts; index++)
11996 {
11997 if (!m_file[index]->can_convert_string(field, new_type))
11998 return false;
11999 }
12000 return true;
12001 }
12002
12003 bool
can_convert_varstring(const Field_varstring * field,const Column_definition & new_type) const12004 ha_partition::can_convert_varstring(const Field_varstring* field,
12005 const Column_definition& new_type) const{
12006 for (uint index= 0; index < m_tot_parts; index++)
12007 {
12008 if (!m_file[index]->can_convert_varstring(field, new_type))
12009 return false;
12010 }
12011 return true;
12012 }
12013
12014 bool
can_convert_blob(const Field_blob * field,const Column_definition & new_type) const12015 ha_partition::can_convert_blob(const Field_blob* field,
12016 const Column_definition& new_type) const
12017 {
12018 for (uint index= 0; index < m_tot_parts; index++)
12019 {
12020 if (!m_file[index]->can_convert_blob(field, new_type))
12021 return false;
12022 }
12023 return true;
12024 }
12025
12026 struct st_mysql_storage_engine partition_storage_engine=
12027 { MYSQL_HANDLERTON_INTERFACE_VERSION };
12028
maria_declare_plugin(partition)12029 maria_declare_plugin(partition)
12030 {
12031 MYSQL_STORAGE_ENGINE_PLUGIN,
12032 &partition_storage_engine,
12033 "partition",
12034 "Mikael Ronstrom, MySQL AB",
12035 "Partition Storage Engine Helper",
12036 PLUGIN_LICENSE_GPL,
12037 partition_initialize, /* Plugin Init */
12038 NULL, /* Plugin Deinit */
12039 0x0100, /* 1.0 */
12040 NULL, /* status variables */
12041 NULL, /* system variables */
12042 "1.0", /* string version */
12043 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
12044 }
12045 maria_declare_plugin_end;
12046
12047 #endif
12048