1 /*
2   Copyright (c) 2005, 2019, Oracle and/or its affiliates.
3   Copyright (c) 2009, 2021, MariaDB
4 
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; version 2 of the License.
8 
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13 
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA
17 */
18 
19 /*
20   This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
21   It is an abstraction layer on top of other handlers such as MyISAM,
22   InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
23   be handled by a storage engine. The current example of this is NDB
24   Cluster that has internally handled partitioning. This have benefits in
25   that many loops needed in the partition handler can be avoided.
26 
27   Partitioning has an inherent feature which in some cases is positive and
28   in some cases is negative. It splits the data into chunks. This makes
29   the data more manageable, queries can easily be parallelised towards the
30   parts and indexes are split such that there are less levels in the
31   index trees. The inherent disadvantage is that to use a split index
32   one has to scan all index parts which is ok for large queries but for
33   small queries it can be a disadvantage.
34 
35   Partitioning lays the foundation for more manageable databases that are
36   extremely large. It does also lay the foundation for more parallelism
37   in the execution of queries. This functionality will grow with later
38   versions of MySQL/MariaDB.
39 
40   The partition is setup to use table locks. It implements an partition "SHARE"
41   that is inserted into a hash by table name. You can use this to store
42   information of state that any partition handler object will be able to see
43   if it is using the same table.
44 
45   Please read the object definition in ha_partition.h before reading the rest
46   if this file.
47 */
48 
49 #include "mariadb.h"
50 #include "sql_priv.h"
51 #include "sql_parse.h"                          // append_file_to_dir
52 #include "create_options.h"
53 
54 #ifdef WITH_PARTITION_STORAGE_ENGINE
55 #include "ha_partition.h"
56 #include "sql_table.h"                        // tablename_to_filename
57 #include "key.h"
58 #include "sql_plugin.h"
59 #include "sql_show.h"                        // append_identifier
60 #include "sql_admin.h"                       // SQL_ADMIN_MSG_TEXT_SIZE
61 #include "sql_select.h"
62 
63 #include "debug_sync.h"
64 
65 /* First 4 bytes in the .par file is the number of 32-bit words in the file */
66 #define PAR_WORD_SIZE 4
67 /* offset to the .par file checksum */
68 #define PAR_CHECKSUM_OFFSET 4
69 /* offset to the total number of partitions */
70 #define PAR_NUM_PARTS_OFFSET 8
71 /* offset to the engines array */
72 #define PAR_ENGINES_OFFSET 12
73 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
74                                        HA_REC_NOT_IN_SEQ | \
75                                        HA_CAN_REPAIR | \
76                                        HA_REUSES_FILE_NAMES)
77 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
78                                         HA_DUPLICATE_POS | \
79                                         HA_CAN_INSERT_DELAYED | \
80                                         HA_READ_BEFORE_WRITE_REMOVAL |\
81                                         HA_CAN_TABLES_WITHOUT_ROLLBACK)
82 
83 static const char *ha_par_ext= PAR_EXT;
84 
85 /****************************************************************************
86                 MODULE create/delete handler object
87 ****************************************************************************/
88 
89 static handler *partition_create_handler(handlerton *hton,
90                                          TABLE_SHARE *share,
91                                          MEM_ROOT *mem_root);
92 static uint partition_flags();
93 static alter_table_operations alter_table_flags(alter_table_operations flags);
94 
95 
notify_tabledef_changed(LEX_CSTRING * db,LEX_CSTRING * org_table_name,LEX_CUSTRING * frm,LEX_CUSTRING * version)96 int ha_partition::notify_tabledef_changed(LEX_CSTRING *db,
97                                           LEX_CSTRING *org_table_name,
98                                           LEX_CUSTRING *frm,
99                                           LEX_CUSTRING *version)
100 {
101   char from_buff[FN_REFLEN + 1], from_lc_buff[FN_REFLEN + 1];
102   const char *from_path, *name_buffer_ptr, *from;
103   int res= 0;
104   handler **file= m_file;
105   DBUG_ENTER("ha_partition::notify_tabledef_changed");
106 
107   from= table->s->normalized_path.str;
108 
109   /* setup m_name_buffer_ptr */
110   if (read_par_file(table->s->normalized_path.str))
111     DBUG_RETURN(1);
112 
113   from_path= get_canonical_filename(*file, from, from_lc_buff);
114   name_buffer_ptr= m_name_buffer_ptr;
115   do
116   {
117     LEX_CSTRING table_name;
118     const char *table_name_ptr;
119     if (create_partition_name(from_buff, sizeof(from_buff),
120                               from_path, name_buffer_ptr,
121                               NORMAL_PART_NAME, FALSE))
122       res=1;
123     table_name_ptr= from_buff + dirname_length(from_buff);
124 
125     lex_string_set3(&table_name, table_name_ptr, strlen(table_name_ptr));
126 
127     if (((*file)->ht)->notify_tabledef_changed((*file)->ht, db, &table_name,
128                                                frm, version, *file))
129       res=1;
130     name_buffer_ptr= strend(name_buffer_ptr) + 1;
131   } while (*(++file));
132   DBUG_RETURN(res);
133 }
134 
135 
136 static int
partition_notify_tabledef_changed(handlerton *,LEX_CSTRING * db,LEX_CSTRING * table,LEX_CUSTRING * frm,LEX_CUSTRING * version,handler * file)137 partition_notify_tabledef_changed(handlerton *,
138                                   LEX_CSTRING *db,
139                                   LEX_CSTRING *table,
140                                   LEX_CUSTRING *frm,
141                                   LEX_CUSTRING *version,
142                                   handler *file)
143 {
144   DBUG_ENTER("partition_notify_tabledef_changed");
145   DBUG_RETURN(static_cast<ha_partition*>
146               (file)->notify_tabledef_changed(db, table, frm, version));
147 }
148 
149 
150 /*
151   If frm_error() is called then we will use this to to find out what file
152   extensions exist for the storage engine. This is also used by the default
153   rename_table and delete_table method in handler.cc.
154 */
155 static const char *ha_partition_ext[]=
156 {
157   ha_par_ext, NullS
158 };
159 
160 static PSI_memory_key key_memory_Partition_share;
161 static PSI_memory_key key_memory_partition_sort_buffer;
162 static PSI_memory_key key_memory_Partition_admin;
163 
164 static PSI_memory_key key_memory_ha_partition_file;
165 //static PSI_memory_key key_memory_ha_partition_engine_array;
166 static PSI_memory_key key_memory_ha_partition_part_ids;
167 
168 #ifdef HAVE_PSI_INTERFACE
169 PSI_mutex_key key_partition_auto_inc_mutex;
170 PSI_file_key key_file_ha_partition_par;
171 
172 static PSI_mutex_info all_partition_mutexes[]=
173 {
174   { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0}
175 };
176 static PSI_memory_info all_partitioning_memory[]=
177 { { &key_memory_Partition_share, "Partition_share", 0},
178   { &key_memory_partition_sort_buffer, "partition_sort_buffer", 0},
179   { &key_memory_Partition_admin, "Partition_admin", 0},
180   { &key_memory_ha_partition_file, "ha_partition::file", 0},
181 //  { &key_memory_ha_partition_engine_array, "ha_partition::engine_array", 0},
182   { &key_memory_ha_partition_part_ids, "ha_partition::part_ids", 0} };
183 static PSI_file_info all_partition_file[]=
184 { { &key_file_ha_partition_par, "ha_partition::parfile", 0} };
185 
init_partition_psi_keys(void)186 static void init_partition_psi_keys(void)
187 {
188   const char* category= "partition";
189   int count;
190 
191   count= array_elements(all_partitioning_memory);
192   mysql_memory_register(category, all_partitioning_memory, count);
193   count= array_elements(all_partition_mutexes);
194   mysql_mutex_register(category, all_partition_mutexes, count);
195   count= array_elements(all_partition_file);
196   mysql_file_register(category, all_partition_file, count);
197 }
198 #endif /* HAVE_PSI_INTERFACE */
199 
partition_initialize(void * p)200 static int partition_initialize(void *p)
201 {
202   handlerton *partition_hton;
203   partition_hton= (handlerton *)p;
204 
205   partition_hton->db_type= DB_TYPE_PARTITION_DB;
206   partition_hton->create= partition_create_handler;
207 
208   partition_hton->partition_flags= partition_flags;
209   partition_hton->notify_tabledef_changed= partition_notify_tabledef_changed;
210   partition_hton->alter_table_flags= alter_table_flags;
211   partition_hton->flags= HTON_NOT_USER_SELECTABLE |
212                          HTON_HIDDEN |
213                          HTON_TEMPORARY_NOT_SUPPORTED;
214   partition_hton->tablefile_extensions= ha_partition_ext;
215 
216 #ifdef HAVE_PSI_INTERFACE
217   init_partition_psi_keys();
218 #endif
219   return 0;
220 }
221 
222 
223 /**
224   Initialize and allocate space for partitions shares.
225 
226   @param num_parts  Number of partitions to allocate storage for.
227 
228   @return Operation status.
229     @retval true  Failure (out of memory).
230     @retval false Success.
231 */
232 
init(uint num_parts)233 bool Partition_share::init(uint num_parts)
234 {
235   DBUG_ENTER("Partition_share::init");
236   auto_inc_initialized= false;
237   partition_name_hash_initialized= false;
238   next_auto_inc_val= 0;
239   if (partitions_share_refs.init(num_parts))
240   {
241     DBUG_RETURN(true);
242   }
243   DBUG_RETURN(false);
244 }
245 
246 
247 /*
248   Create new partition handler
249 
250   SYNOPSIS
251     partition_create_handler()
252     table                       Table object
253 
254   RETURN VALUE
255     New partition object
256 */
257 
partition_create_handler(handlerton * hton,TABLE_SHARE * share,MEM_ROOT * mem_root)258 static handler *partition_create_handler(handlerton *hton,
259                                          TABLE_SHARE *share,
260                                          MEM_ROOT *mem_root)
261 {
262   ha_partition *file= new (mem_root) ha_partition(hton, share);
263   if (file && file->initialize_partition(mem_root))
264   {
265     delete file;
266     file= 0;
267   }
268   return file;
269 }
270 
partition_flags()271 static uint partition_flags()
272 {
273   return HA_CAN_PARTITION;
274 }
275 
alter_table_flags(alter_table_operations flags)276 static alter_table_operations alter_table_flags(alter_table_operations flags __attribute__((unused)))
277 {
278   return (HA_PARTITION_FUNCTION_SUPPORTED |
279           HA_FAST_CHANGE_PARTITION);
280 }
281 
282 /*
283   Constructor method
284 
285   SYNOPSIS
286     ha_partition()
287     table                       Table object
288 
289   RETURN VALUE
290     NONE
291 */
292 
ha_partition(handlerton * hton,TABLE_SHARE * share)293 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
294   :handler(hton, share)
295 {
296   DBUG_ENTER("ha_partition::ha_partition(table)");
297   ha_partition_init();
298   DBUG_VOID_RETURN;
299 }
300 
301 
302 /* Initialize all partition variables */
303 
ha_partition_init()304 void ha_partition::ha_partition_init()
305 {
306   init_alloc_root(PSI_INSTRUMENT_ME, &m_mem_root, 512, 512, MYF(0));
307   init_handler_variables();
308 }
309 
310 /*
311   Constructor method
312 
313   SYNOPSIS
314     ha_partition()
315     part_info                       Partition info
316 
317   RETURN VALUE
318     NONE
319 */
320 
ha_partition(handlerton * hton,partition_info * part_info)321 ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
322   :handler(hton, NULL)
323 {
324   DBUG_ENTER("ha_partition::ha_partition(part_info)");
325   DBUG_ASSERT(part_info);
326   ha_partition_init();
327   m_part_info= part_info;
328   m_create_handler= TRUE;
329   m_is_sub_partitioned= m_part_info->is_sub_partitioned();
330   DBUG_VOID_RETURN;
331 }
332 
333 /**
334   ha_partition constructor method used by ha_partition::clone()
335 
336   @param hton               Handlerton (partition_hton)
337   @param share              Table share object
338   @param part_info_arg      partition_info to use
339   @param clone_arg          ha_partition to clone
340   @param clme_mem_root_arg  MEM_ROOT to use
341 
342   @return New partition handler
343 */
344 
ha_partition(handlerton * hton,TABLE_SHARE * share,partition_info * part_info_arg,ha_partition * clone_arg,MEM_ROOT * clone_mem_root_arg)345 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
346                            partition_info *part_info_arg,
347                            ha_partition *clone_arg,
348                            MEM_ROOT *clone_mem_root_arg)
349   :handler(hton, share)
350 {
351   DBUG_ENTER("ha_partition::ha_partition(clone)");
352   ha_partition_init();
353   m_part_info= part_info_arg;
354   m_create_handler= TRUE;
355   m_is_sub_partitioned= m_part_info->is_sub_partitioned();
356   m_is_clone_of= clone_arg;
357   m_clone_mem_root= clone_mem_root_arg;
358   part_share= clone_arg->part_share;
359   m_tot_parts= clone_arg->m_tot_parts;
360   DBUG_VOID_RETURN;
361 }
362 
363 /*
364   Initialize handler object
365 
366   SYNOPSIS
367     init_handler_variables()
368 
369   RETURN VALUE
370     NONE
371 */
372 
init_handler_variables()373 void ha_partition::init_handler_variables()
374 {
375   active_index= MAX_KEY;
376   m_mode= 0;
377   m_open_test_lock= 0;
378   m_file_buffer= NULL;
379   m_name_buffer_ptr= NULL;
380   m_engine_array= NULL;
381   m_connect_string= NULL;
382   m_file= NULL;
383   m_file_tot_parts= 0;
384   m_reorged_file= NULL;
385   m_new_file= NULL;
386   m_reorged_parts= 0;
387   m_added_file= NULL;
388   m_tot_parts= 0;
389   m_part_spec.start_part= NO_CURRENT_PART_ID;
390   m_scan_value= 2;
391   m_ref_length= 0;
392   m_part_spec.end_part= NO_CURRENT_PART_ID;
393   m_index_scan_type= partition_no_index_scan;
394   m_start_key.key= NULL;
395   m_start_key.length= 0;
396   m_myisam= FALSE;
397   m_innodb= FALSE;
398   m_extra_cache= FALSE;
399   m_extra_cache_size= 0;
400   m_extra_prepare_for_update= FALSE;
401   m_extra_cache_part_id= NO_CURRENT_PART_ID;
402   m_handler_status= handler_not_initialized;
403   m_part_field_array= NULL;
404   m_ordered_rec_buffer= NULL;
405   m_top_entry= NO_CURRENT_PART_ID;
406   m_rec_length= 0;
407   m_last_part= 0;
408   m_rec0= 0;
409   m_err_rec= NULL;
410   m_curr_key_info[0]= NULL;
411   m_curr_key_info[1]= NULL;
412   m_part_func_monotonicity_info= NON_MONOTONIC;
413   m_key_not_found= FALSE;
414   auto_increment_lock= FALSE;
415   auto_increment_safe_stmt_log_lock= FALSE;
416   /*
417     this allows blackhole to work properly
418   */
419   m_num_locks= 0;
420   m_part_info= NULL;
421   m_create_handler= FALSE;
422   m_is_sub_partitioned= 0;
423   m_is_clone_of= NULL;
424   m_clone_mem_root= NULL;
425   part_share= NULL;
426   m_new_partitions_share_refs.empty();
427   m_part_ids_sorted_by_num_of_records= NULL;
428   m_partitions_to_open= NULL;
429 
430   m_range_info= NULL;
431   m_mrr_full_buffer_size= 0;
432   m_mrr_new_full_buffer_size= 0;
433   m_mrr_full_buffer= NULL;
434   m_mrr_range_first= NULL;
435 
436   m_pre_calling= FALSE;
437   m_pre_call_use_parallel= FALSE;
438 
439   ft_first= ft_current=  NULL;
440   bulk_access_executing= FALSE;                 // For future
441 
442   /*
443     Clear bitmaps to allow on one to call my_bitmap_free() on them at any time
444   */
445   my_bitmap_clear(&m_bulk_insert_started);
446   my_bitmap_clear(&m_locked_partitions);
447   my_bitmap_clear(&m_partitions_to_reset);
448   my_bitmap_clear(&m_key_not_found_partitions);
449   my_bitmap_clear(&m_mrr_used_partitions);
450   my_bitmap_clear(&m_opened_partitions);
451   m_file_sample= NULL;
452 
453 #ifdef DONT_HAVE_TO_BE_INITALIZED
454   m_start_key.flag= 0;
455   m_ordered= TRUE;
456 #endif
457 }
458 
459 
table_type() const460 const char *ha_partition::table_type() const
461 {
462   // we can do this since we only support a single engine type
463   return m_file[0]->table_type();
464 }
465 
466 
467 /*
468   Destructor method
469 
470   SYNOPSIS
471     ~ha_partition()
472 
473   RETURN VALUE
474     NONE
475 */
476 
~ha_partition()477 ha_partition::~ha_partition()
478 {
479   DBUG_ENTER("ha_partition::~ha_partition");
480   if (m_new_partitions_share_refs.elements)
481     m_new_partitions_share_refs.delete_elements();
482   if (m_file != NULL)
483   {
484     uint i;
485     for (i= 0; i < m_tot_parts; i++)
486       delete m_file[i];
487   }
488   destroy_record_priority_queue();
489   my_free(m_part_ids_sorted_by_num_of_records);
490 
491   if (m_added_file)
492   {
493     for (handler **ph= m_added_file; *ph; ph++)
494       delete (*ph);
495   }
496   clear_handler_file();
497   free_root(&m_mem_root, MYF(0));
498 
499   DBUG_VOID_RETURN;
500 }
501 
502 
503 /*
504   Initialize partition handler object
505 
506   SYNOPSIS
507     initialize_partition()
508     mem_root			Allocate memory through this
509 
510   RETURN VALUE
511     1                         Error
512     0                         Success
513 
514   DESCRIPTION
515 
516   The partition handler is only a layer on top of other engines. Thus it
517   can't really perform anything without the underlying handlers. Thus we
518   add this method as part of the allocation of a handler object.
519 
520   1) Allocation of underlying handlers
521      If we have access to the partition info we will allocate one handler
522      instance for each partition.
523   2) Allocation without partition info
524      The cases where we don't have access to this information is when called
525      in preparation for delete_table and rename_table and in that case we
526      only need to set HA_FILE_BASED. In that case we will use the .par file
527      that contains information about the partitions and their engines and
528      the names of each partition.
529   3) Table flags initialisation
530      We need also to set table flags for the partition handler. This is not
531      static since it depends on what storage engines are used as underlying
532      handlers.
533      The table flags is set in this routine to simulate the behaviour of a
534      normal storage engine
535      The flag HA_FILE_BASED will be set independent of the underlying handlers
536   4) Index flags initialisation
537      When knowledge exists on the indexes it is also possible to initialize the
538      index flags. Again the index flags must be initialized by using the under-
539      lying handlers since this is storage engine dependent.
540      The flag HA_READ_ORDER will be reset for the time being to indicate no
541      ordered output is available from partition handler indexes. Later a merge
542      sort will be performed using the underlying handlers.
543   5) has_transactions are calculated here.
544 
545 */
546 
initialize_partition(MEM_ROOT * mem_root)547 bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
548 {
549   handler **file_array, *file;
550   ulonglong check_table_flags;
551   DBUG_ENTER("ha_partition::initialize_partition");
552 
553   if (m_create_handler)
554   {
555     m_tot_parts= m_part_info->get_tot_partitions();
556     DBUG_ASSERT(m_tot_parts > 0);
557     if (new_handlers_from_part_info(mem_root))
558       DBUG_RETURN(1);
559   }
560   else if (!table_share || !table_share->normalized_path.str)
561   {
562     /*
563       Called with dummy table share (delete, rename and alter table).
564       Don't need to set-up anything.
565     */
566     DBUG_RETURN(0);
567   }
568   else if (get_from_handler_file(table_share->normalized_path.str,
569                                  mem_root, false))
570   {
571     my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
572     DBUG_RETURN(1);
573   }
574   /*
575     We create all underlying table handlers here. We do it in this special
576     method to be able to report allocation errors.
577 
578     Set up has_transactions since they are called often in all kinds of places,
579     other parameters are calculated on demand.
580     Verify that all partitions have the same table_flags.
581   */
582   check_table_flags= m_file[0]->ha_table_flags();
583   file_array= m_file;
584   do
585   {
586     file= *file_array;
587     if (check_table_flags != file->ha_table_flags())
588     {
589       my_error(ER_MIX_HANDLER_ERROR, MYF(0));
590       DBUG_RETURN(1);
591     }
592   } while (*(++file_array));
593   m_handler_status= handler_initialized;
594   DBUG_RETURN(0);
595 }
596 
597 /****************************************************************************
598                 MODULE meta data changes
599 ****************************************************************************/
600 /*
601   Delete a table
602 
603   SYNOPSIS
604     delete_table()
605     name                    Full path of table name
606 
607   RETURN VALUE
608     >0                        Error
609     0                         Success
610 
611   DESCRIPTION
612     Used to delete a table. By the time delete_table() has been called all
613     opened references to this table will have been closed (and your globally
614     shared references released. The variable name will just be the name of
615     the table. You will need to remove any files you have created at this
616     point.
617 
618     If you do not implement this, the default delete_table() is called from
619     handler.cc and it will delete all files with the file extensions returned
620     by bas_ext().
621 
622     Called from handler.cc by delete_table and  ha_create_table(). Only used
623     during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
624     the storage engine.
625 */
626 
delete_table(const char * name)627 int ha_partition::delete_table(const char *name)
628 {
629   DBUG_ENTER("ha_partition::delete_table");
630 
631   DBUG_RETURN(del_ren_table(name, NULL));
632 }
633 
634 
635 /*
636   Rename a table
637 
638   SYNOPSIS
639     rename_table()
640     from                      Full path of old table name
641     to                        Full path of new table name
642 
643   RETURN VALUE
644     >0                        Error
645     0                         Success
646 
647   DESCRIPTION
648     Renames a table from one name to another from alter table call.
649 
650     If you do not implement this, the default rename_table() is called from
651     handler.cc and it will rename all files with the file extensions returned
652     by bas_ext().
653 
654     Called from sql_table.cc by mysql_rename_table().
655 */
656 
rename_table(const char * from,const char * to)657 int ha_partition::rename_table(const char *from, const char *to)
658 {
659   DBUG_ENTER("ha_partition::rename_table");
660 
661   DBUG_RETURN(del_ren_table(from, to));
662 }
663 
664 
665 /*
666   Create the handler file (.par-file)
667 
668   SYNOPSIS
669     create_partitioning_metadata()
670     path                              Path to the new frm file (without ext)
671     old_p                             Path to the old frm file (without ext)
672     create_info                       Create info generated for CREATE TABLE
673 
674   RETURN VALUE
675     >0                        Error
676     0                         Success
677 
678   DESCRIPTION
679     create_partitioning_metadata is called to create any handler specific files
680     before opening the file with openfrm to later call ::create on the
681     file object.
682     In the partition handler this is used to store the names of partitions
683     and types of engines in the partitions.
684 */
685 
create_partitioning_metadata(const char * path,const char * old_path,chf_create_flags action_flag)686 int ha_partition::create_partitioning_metadata(const char *path,
687                                                const char *old_path,
688                                                chf_create_flags action_flag)
689 {
690   partition_element *part;
691   DBUG_ENTER("ha_partition::create_partitioning_metadata");
692 
693   /*
694     We need to update total number of parts since we might write the handler
695     file as part of a partition management command
696   */
697   if (action_flag == CHF_DELETE_FLAG ||
698       action_flag == CHF_RENAME_FLAG)
699   {
700     char name[FN_REFLEN];
701     char old_name[FN_REFLEN];
702 
703     strxmov(name, path, ha_par_ext, NullS);
704     strxmov(old_name, old_path, ha_par_ext, NullS);
705     if ((action_flag == CHF_DELETE_FLAG &&
706          mysql_file_delete(key_file_ha_partition_par, name, MYF(MY_WME))) ||
707         (action_flag == CHF_RENAME_FLAG &&
708          mysql_file_rename(key_file_ha_partition_par, old_name, name,
709                            MYF(MY_WME))))
710     {
711       DBUG_RETURN(TRUE);
712     }
713   }
714   else if (action_flag == CHF_CREATE_FLAG)
715   {
716     if (create_handler_file(path))
717     {
718       my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
719       DBUG_RETURN(1);
720     }
721   }
722 
723   /* m_part_info is only NULL when we failed to create a partition table */
724   if (m_part_info)
725   {
726     part= m_part_info->partitions.head();
727     /* part->engine_type may be 0 when we failed to create the partition */
728     if (part->engine_type &&
729         (part->engine_type)->create_partitioning_metadata &&
730         ((part->engine_type)->create_partitioning_metadata)(path, old_path,
731                                                             action_flag))
732     {
733       my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
734       DBUG_RETURN(1);
735     }
736   }
737   DBUG_RETURN(0);
738 }
739 
740 
741 /*
742   Create a partitioned table
743 
744   SYNOPSIS
745     create()
746     name                              Full path of table name
747     table_arg                         Table object
748     create_info                       Create info generated for CREATE TABLE
749 
750   RETURN VALUE
751     >0                        Error
752     0                         Success
753 
754   DESCRIPTION
755     create() is called to create a table. The variable name will have the name
756     of the table. When create() is called you do not need to worry about
757     opening the table. Also, the FRM file will have already been created so
758     adjusting create_info will not do you any good. You can overwrite the frm
759     file at this point if you wish to change the table definition, but there
760     are no methods currently provided for doing that.
761 
762     Called from handler.cc by ha_create_table().
763 */
764 
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * create_info)765 int ha_partition::create(const char *name, TABLE *table_arg,
766 			 HA_CREATE_INFO *create_info)
767 {
768   int error;
769   char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN];
770   char *name_buffer_ptr;
771   const char *path;
772   uint i;
773   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
774   partition_element *part_elem;
775   handler **file, **abort_file;
776   DBUG_ENTER("ha_partition::create");
777   DBUG_PRINT("enter", ("name: '%s'", name));
778 
779   DBUG_ASSERT(!fn_frm_ext(name));
780 
781   /* Not allowed to create temporary partitioned tables */
782   if (create_info && create_info->tmp_table())
783   {
784     my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), "CREATE TEMPORARY TABLE");
785     DBUG_RETURN(TRUE);
786   }
787 
788   if (get_from_handler_file(name, ha_thd()->mem_root, false))
789     DBUG_RETURN(TRUE);
790   DBUG_ASSERT(m_file_buffer);
791   name_buffer_ptr= m_name_buffer_ptr;
792   file= m_file;
793   /*
794     Since ha_partition has HA_FILE_BASED, it must alter underlying table names
795     if they do not have HA_FILE_BASED and lower_case_table_names == 2.
796     See Bug#37402, for Mac OS X.
797     The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
798     Using the first partitions handler, since mixing handlers is not allowed.
799   */
800   path= get_canonical_filename(*file, name, name_lc_buff);
801   for (i= 0; i < m_part_info->num_parts; i++)
802   {
803     part_elem= part_it++;
804     if (m_is_sub_partitioned)
805     {
806       uint j;
807       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
808       for (j= 0; j < m_part_info->num_subparts; j++)
809       {
810         part_elem= sub_it++;
811         if (unlikely((error= create_partition_name(name_buff,
812                                                    sizeof(name_buff), path,
813                                                    name_buffer_ptr,
814                                                    NORMAL_PART_NAME, FALSE))))
815           goto create_error;
816         if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
817                                                         create_info,
818                                                         part_elem)) ||
819                      ((error= (*file)->ha_create(name_buff, table_arg,
820                                                  create_info)))))
821           goto create_error;
822 
823         name_buffer_ptr= strend(name_buffer_ptr) + 1;
824         file++;
825       }
826     }
827     else
828     {
829       if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
830                                                  path, name_buffer_ptr,
831                                                  NORMAL_PART_NAME, FALSE))))
832         goto create_error;
833       if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
834                                                       create_info,
835                                                       part_elem)) ||
836                    ((error= (*file)->ha_create(name_buff, table_arg,
837                                                create_info)))))
838         goto create_error;
839 
840       name_buffer_ptr= strend(name_buffer_ptr) + 1;
841       file++;
842     }
843   }
844   DBUG_RETURN(0);
845 
846 create_error:
847   name_buffer_ptr= m_name_buffer_ptr;
848   for (abort_file= file, file= m_file; file < abort_file; file++)
849   {
850     if (!create_partition_name(name_buff, sizeof(name_buff), path,
851                                name_buffer_ptr, NORMAL_PART_NAME, FALSE))
852       (void) (*file)->delete_table((const char*) name_buff);
853     name_buffer_ptr= strend(name_buffer_ptr) + 1;
854   }
855   handler::delete_table(name);
856   DBUG_RETURN(error);
857 }
858 
859 
860 /*
861   Drop partitions as part of ALTER TABLE of partitions
862 
863   SYNOPSIS
864     drop_partitions()
865     path                        Complete path of db and table name
866 
867   RETURN VALUE
868     >0                          Failure
869     0                           Success
870 
871   DESCRIPTION
872     Use part_info object on handler object to deduce which partitions to
873     drop (each partition has a state attached to it)
874 */
875 
drop_partitions(const char * path)876 int ha_partition::drop_partitions(const char *path)
877 {
878   List_iterator<partition_element> part_it(m_part_info->partitions);
879   char part_name_buff[FN_REFLEN + 1];
880   uint num_parts= m_part_info->partitions.elements;
881   uint num_subparts= m_part_info->num_subparts;
882   uint i= 0;
883   uint name_variant;
884   int  ret_error;
885   int  error= 0;
886   DBUG_ENTER("ha_partition::drop_partitions");
887 
888   /*
889     Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
890     We use m_file[0] as long as all partitions have the same storage engine.
891   */
892   DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
893                                                    part_name_buff)));
894   do
895   {
896     partition_element *part_elem= part_it++;
897     if (part_elem->part_state == PART_TO_BE_DROPPED)
898     {
899       handler *file;
900       /*
901         This part is to be dropped, meaning the part or all its subparts.
902       */
903       name_variant= NORMAL_PART_NAME;
904       if (m_is_sub_partitioned)
905       {
906         List_iterator<partition_element> sub_it(part_elem->subpartitions);
907         uint j= 0, part;
908         do
909         {
910           partition_element *sub_elem= sub_it++;
911           part= i * num_subparts + j;
912           if (unlikely((ret_error=
913                         create_subpartition_name(part_name_buff,
914                                                  sizeof(part_name_buff), path,
915                                                  part_elem->partition_name,
916                                                  sub_elem->partition_name,
917                                                  name_variant))))
918             error= ret_error;
919           file= m_file[part];
920           DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
921           if (unlikely((ret_error= file->delete_table(part_name_buff))))
922             error= ret_error;
923           if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
924                                                 entry_pos)))
925             error= 1;
926         } while (++j < num_subparts);
927       }
928       else
929       {
930         if ((ret_error= create_partition_name(part_name_buff,
931                           sizeof(part_name_buff), path,
932                           part_elem->partition_name, name_variant, TRUE)))
933           error= ret_error;
934         else
935         {
936           file= m_file[i];
937           DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
938           if (unlikely((ret_error= file->delete_table(part_name_buff))))
939             error= ret_error;
940           if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
941                                                 entry_pos)))
942             error= 1;
943         }
944       }
945       if (part_elem->part_state == PART_IS_CHANGED)
946         part_elem->part_state= PART_NORMAL;
947       else
948         part_elem->part_state= PART_IS_DROPPED;
949     }
950   } while (++i < num_parts);
951   (void) sync_ddl_log();
952   DBUG_RETURN(error);
953 }
954 
955 
956 /*
957   Rename partitions as part of ALTER TABLE of partitions
958 
959   SYNOPSIS
960     rename_partitions()
961     path                        Complete path of db and table name
962 
963   RETURN VALUE
964     TRUE                        Failure
965     FALSE                       Success
966 
967   DESCRIPTION
968     When reorganising partitions, adding hash partitions and coalescing
969     partitions it can be necessary to rename partitions while holding
970     an exclusive lock on the table.
971     Which partitions to rename is given by state of partitions found by the
972     partition info struct referenced from the handler object
973 */
974 
rename_partitions(const char * path)975 int ha_partition::rename_partitions(const char *path)
976 {
977   List_iterator<partition_element> part_it(m_part_info->partitions);
978   List_iterator<partition_element> temp_it(m_part_info->temp_partitions);
979   char part_name_buff[FN_REFLEN + 1];
980   char norm_name_buff[FN_REFLEN + 1];
981   uint num_parts= m_part_info->partitions.elements;
982   uint part_count= 0;
983   uint num_subparts= m_part_info->num_subparts;
984   uint i= 0;
985   uint j= 0;
986   int error= 0;
987   int ret_error;
988   uint temp_partitions= m_part_info->temp_partitions.elements;
989   handler *file;
990   partition_element *part_elem, *sub_elem;
991   DBUG_ENTER("ha_partition::rename_partitions");
992 
993   /*
994     Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
995     We use m_file[0] as long as all partitions have the same storage engine.
996   */
997   DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
998                                                    norm_name_buff)));
999 
1000   DEBUG_SYNC(ha_thd(), "before_rename_partitions");
1001   if (temp_partitions)
1002   {
1003     /*
1004       These are the reorganised partitions that have already been copied.
1005       We delete the partitions and log the delete by inactivating the
1006       delete log entry in the table log. We only need to synchronise
1007       these writes before moving to the next loop since there is no
1008       interaction among reorganised partitions, they cannot have the
1009       same name.
1010     */
1011     do
1012     {
1013       part_elem= temp_it++;
1014       if (m_is_sub_partitioned)
1015       {
1016         List_iterator<partition_element> sub_it(part_elem->subpartitions);
1017         j= 0;
1018         do
1019         {
1020           sub_elem= sub_it++;
1021           file= m_reorged_file[part_count++];
1022           if (unlikely((ret_error=
1023                         create_subpartition_name(norm_name_buff,
1024                                                  sizeof(norm_name_buff), path,
1025                                                  part_elem->partition_name,
1026                                                  sub_elem->partition_name,
1027                                                  NORMAL_PART_NAME))))
1028             error= ret_error;
1029           DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1030           if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1031             error= ret_error;
1032           else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1033                                                      entry_pos)))
1034             error= 1;
1035           else
1036             sub_elem->log_entry= NULL; /* Indicate success */
1037         } while (++j < num_subparts);
1038       }
1039       else
1040       {
1041         file= m_reorged_file[part_count++];
1042         if (unlikely((ret_error=
1043                       create_partition_name(norm_name_buff,
1044                                             sizeof(norm_name_buff), path,
1045                                             part_elem->partition_name,
1046                                             NORMAL_PART_NAME, TRUE))))
1047           error= ret_error;
1048         else
1049         {
1050           DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1051           if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1052             error= ret_error;
1053           else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1054                                                      entry_pos)))
1055             error= 1;
1056           else
1057             part_elem->log_entry= NULL; /* Indicate success */
1058         }
1059       }
1060     } while (++i < temp_partitions);
1061     (void) sync_ddl_log();
1062   }
1063   i= 0;
1064   do
1065   {
1066     /*
1067        When state is PART_IS_CHANGED it means that we have created a new
1068        TEMP partition that is to be renamed to normal partition name and
1069        we are to delete the old partition with currently the normal name.
1070 
1071        We perform this operation by
1072        1) Delete old partition with normal partition name
1073        2) Signal this in table log entry
1074        3) Synch table log to ensure we have consistency in crashes
1075        4) Rename temporary partition name to normal partition name
1076        5) Signal this to table log entry
1077        It is not necessary to synch the last state since a new rename
1078        should not corrupt things if there was no temporary partition.
1079 
1080        The only other parts we need to cater for are new parts that
1081        replace reorganised parts. The reorganised parts were deleted
1082        by the code above that goes through the temp_partitions list.
1083        Thus the synch above makes it safe to simply perform step 4 and 5
1084        for those entries.
1085     */
1086     part_elem= part_it++;
1087     if (part_elem->part_state == PART_IS_CHANGED ||
1088         part_elem->part_state == PART_TO_BE_DROPPED ||
1089         (part_elem->part_state == PART_IS_ADDED && temp_partitions))
1090     {
1091       if (m_is_sub_partitioned)
1092       {
1093         List_iterator<partition_element> sub_it(part_elem->subpartitions);
1094         uint part;
1095 
1096         j= 0;
1097         do
1098         {
1099           sub_elem= sub_it++;
1100           part= i * num_subparts + j;
1101           if (unlikely((ret_error=
1102                         create_subpartition_name(norm_name_buff,
1103                                                  sizeof(norm_name_buff), path,
1104                                                  part_elem->partition_name,
1105                                                  sub_elem->partition_name,
1106                                                  NORMAL_PART_NAME))))
1107             error= ret_error;
1108           if (part_elem->part_state == PART_IS_CHANGED)
1109           {
1110             file= m_reorged_file[part_count++];
1111             DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1112             if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1113               error= ret_error;
1114             else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1115                                                        entry_pos)))
1116               error= 1;
1117             (void) sync_ddl_log();
1118           }
1119           file= m_new_file[part];
1120           if (unlikely((ret_error=
1121                         create_subpartition_name(part_name_buff,
1122                                                  sizeof(part_name_buff), path,
1123                                                  part_elem->partition_name,
1124                                                  sub_elem->partition_name,
1125                                                  TEMP_PART_NAME))))
1126             error= ret_error;
1127           DBUG_PRINT("info", ("Rename subpartition from %s to %s",
1128                      part_name_buff, norm_name_buff));
1129           if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1130                                                          norm_name_buff))))
1131             error= ret_error;
1132           else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1133                                                      entry_pos)))
1134             error= 1;
1135           else
1136             sub_elem->log_entry= NULL;
1137         } while (++j < num_subparts);
1138       }
1139       else
1140       {
1141         if (unlikely((ret_error=
1142                       create_partition_name(norm_name_buff,
1143                                             sizeof(norm_name_buff), path,
1144                                             part_elem->partition_name,
1145                                             NORMAL_PART_NAME, TRUE)) ||
1146                      (ret_error= create_partition_name(part_name_buff,
1147                                                        sizeof(part_name_buff),
1148                                                        path,
1149                                                        part_elem->
1150                                                        partition_name,
1151                                                        TEMP_PART_NAME, TRUE))))
1152           error= ret_error;
1153         else
1154         {
1155           if (part_elem->part_state == PART_IS_CHANGED)
1156           {
1157             file= m_reorged_file[part_count++];
1158             DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1159             if (unlikely((ret_error= file->delete_table(norm_name_buff))))
1160               error= ret_error;
1161             else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1162                                                        entry_pos)))
1163               error= 1;
1164             (void) sync_ddl_log();
1165           }
1166           file= m_new_file[i];
1167           DBUG_PRINT("info", ("Rename partition from %s to %s",
1168                      part_name_buff, norm_name_buff));
1169           if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1170                                                          norm_name_buff))))
1171             error= ret_error;
1172           else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1173                                                      entry_pos)))
1174             error= 1;
1175           else
1176             part_elem->log_entry= NULL;
1177         }
1178       }
1179     }
1180   } while (++i < num_parts);
1181   (void) sync_ddl_log();
1182   DBUG_RETURN(error);
1183 }
1184 
1185 
1186 #define OPTIMIZE_PARTS 1
1187 #define ANALYZE_PARTS 2
1188 #define CHECK_PARTS   3
1189 #define REPAIR_PARTS 4
1190 #define ASSIGN_KEYCACHE_PARTS 5
1191 #define PRELOAD_KEYS_PARTS 6
1192 
1193 static const char *opt_op_name[]= {NULL,
1194                                    "optimize", "analyze", "check", "repair",
1195                                    "assign_to_keycache", "preload_keys"};
1196 
1197 /*
1198   Optimize table
1199 
1200   SYNOPSIS
1201     optimize()
1202     thd               Thread object
1203     check_opt         Check/analyze/repair/optimize options
1204 
1205   RETURN VALUES
1206     >0                Error
1207     0                 Success
1208 */
1209 
optimize(THD * thd,HA_CHECK_OPT * check_opt)1210 int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
1211 {
1212   DBUG_ENTER("ha_partition::optimize");
1213 
1214   DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
1215 }
1216 
1217 
1218 /*
1219   Analyze table
1220 
1221   SYNOPSIS
1222     analyze()
1223     thd               Thread object
1224     check_opt         Check/analyze/repair/optimize options
1225 
1226   RETURN VALUES
1227     >0                Error
1228     0                 Success
1229 */
1230 
analyze(THD * thd,HA_CHECK_OPT * check_opt)1231 int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
1232 {
1233   DBUG_ENTER("ha_partition::analyze");
1234 
1235   int result= handle_opt_partitions(thd, check_opt, ANALYZE_PARTS);
1236 
1237   if ((result == 0) && m_file[0]
1238       && (m_file[0]->ha_table_flags() & HA_ONLINE_ANALYZE))
1239   {
1240     /* If this is ANALYZE TABLE that will not force table definition cache
1241        eviction, update statistics for the partition handler. */
1242     this->info(HA_STATUS_CONST | HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
1243   }
1244 
1245   DBUG_RETURN(result);
1246 }
1247 
1248 
1249 /*
1250   Check table
1251 
1252   SYNOPSIS
1253     check()
1254     thd               Thread object
1255     check_opt         Check/analyze/repair/optimize options
1256 
1257   RETURN VALUES
1258     >0                Error
1259     0                 Success
1260 */
1261 
check(THD * thd,HA_CHECK_OPT * check_opt)1262 int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
1263 {
1264   DBUG_ENTER("ha_partition::check");
1265 
1266   DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
1267 }
1268 
1269 
1270 /*
1271   Repair table
1272 
1273   SYNOPSIS
1274     repair()
1275     thd               Thread object
1276     check_opt         Check/analyze/repair/optimize options
1277 
1278   RETURN VALUES
1279     >0                Error
1280     0                 Success
1281 */
1282 
repair(THD * thd,HA_CHECK_OPT * check_opt)1283 int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
1284 {
1285   DBUG_ENTER("ha_partition::repair");
1286 
1287   int res= handle_opt_partitions(thd, check_opt, REPAIR_PARTS);
1288   DBUG_RETURN(res);
1289 }
1290 
1291 /**
1292   Assign to keycache
1293 
1294   @param thd          Thread object
1295   @param check_opt    Check/analyze/repair/optimize options
1296 
1297   @return
1298     @retval >0        Error
1299     @retval 0         Success
1300 */
1301 
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1302 int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
1303 {
1304   DBUG_ENTER("ha_partition::assign_to_keycache");
1305 
1306   DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
1307 }
1308 
1309 
1310 /**
1311   Preload to keycache
1312 
1313   @param thd          Thread object
1314   @param check_opt    Check/analyze/repair/optimize options
1315 
1316   @return
1317     @retval >0        Error
1318     @retval 0         Success
1319 */
1320 
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1321 int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
1322 {
1323   DBUG_ENTER("ha_partition::preload_keys");
1324 
1325   DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
1326 }
1327 
1328 
1329 /*
1330   Handle optimize/analyze/check/repair of one partition
1331 
1332   SYNOPSIS
1333     handle_opt_part()
1334     thd                      Thread object
1335     check_opt                Options
1336     file                     Handler object of partition
1337     flag                     Optimize/Analyze/Check/Repair flag
1338 
1339   RETURN VALUE
1340     >0                        Failure
1341     0                         Success
1342 */
1343 
handle_opt_part(THD * thd,HA_CHECK_OPT * check_opt,uint part_id,uint flag)1344 int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
1345                                   uint part_id, uint flag)
1346 {
1347   int error;
1348   handler *file= m_file[part_id];
1349   DBUG_ENTER("handle_opt_part");
1350   DBUG_PRINT("enter", ("flag: %u", flag));
1351 
1352   if (flag == OPTIMIZE_PARTS)
1353     error= file->ha_optimize(thd, check_opt);
1354   else if (flag == ANALYZE_PARTS)
1355     error= file->ha_analyze(thd, check_opt);
1356   else if (flag == CHECK_PARTS)
1357   {
1358     error= file->ha_check(thd, check_opt);
1359     if (!error ||
1360         error == HA_ADMIN_ALREADY_DONE ||
1361         error == HA_ADMIN_NOT_IMPLEMENTED)
1362     {
1363       if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1364         error= check_misplaced_rows(part_id, false);
1365     }
1366   }
1367   else if (flag == REPAIR_PARTS)
1368   {
1369     error= file->ha_repair(thd, check_opt);
1370     if (!error ||
1371         error == HA_ADMIN_ALREADY_DONE ||
1372         error == HA_ADMIN_NOT_IMPLEMENTED)
1373     {
1374       if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1375         error= check_misplaced_rows(part_id, true);
1376     }
1377   }
1378   else if (flag == ASSIGN_KEYCACHE_PARTS)
1379     error= file->assign_to_keycache(thd, check_opt);
1380   else if (flag == PRELOAD_KEYS_PARTS)
1381     error= file->preload_keys(thd, check_opt);
1382   else
1383   {
1384     DBUG_ASSERT(FALSE);
1385     error= 1;
1386   }
1387   if (error == HA_ADMIN_ALREADY_DONE)
1388     error= 0;
1389   DBUG_RETURN(error);
1390 }
1391 
1392 
1393 /*
1394    print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE
1395    (modelled after mi_check_print_msg)
1396    TODO: move this into the handler, or rewrite mysql_admin_table.
1397 */
1398 bool print_admin_msg(THD* thd, uint len,
1399                             const char* msg_type,
1400                             const char* db_name, String &table_name,
1401                             const char* op_name, const char *fmt, ...)
1402   ATTRIBUTE_FORMAT(printf, 7, 8);
print_admin_msg(THD * thd,uint len,const char * msg_type,const char * db_name,String & table_name,const char * op_name,const char * fmt,...)1403 bool print_admin_msg(THD* thd, uint len,
1404                             const char* msg_type,
1405                             const char* db_name, String &table_name,
1406                             const char* op_name, const char *fmt, ...)
1407 {
1408   va_list args;
1409   Protocol *protocol= thd->protocol;
1410   size_t length;
1411   size_t msg_length;
1412   char name[NAME_LEN*2+2];
1413   char *msgbuf;
1414   bool error= true;
1415 
1416   if (!(msgbuf= (char*) my_malloc(key_memory_Partition_admin, len, MYF(0))))
1417     return true;
1418   va_start(args, fmt);
1419   msg_length= my_vsnprintf(msgbuf, len, fmt, args);
1420   va_end(args);
1421   if (msg_length >= (len - 1))
1422     goto err;
1423   msgbuf[len - 1]= 0; // healthy paranoia
1424 
1425 
1426   if (!thd->vio_ok())
1427   {
1428     sql_print_error("%s", msgbuf);
1429     goto err;
1430   }
1431 
1432   length=(size_t)(strxmov(name, db_name, ".", table_name.c_ptr_safe(), NullS) - name);
1433   /*
1434      TODO: switch from protocol to push_warning here. The main reason we didn't
1435      it yet is parallel repair, which threads have no THD object accessible via
1436      current_thd.
1437 
1438      Also we likely need to lock mutex here (in both cases with protocol and
1439      push_warning).
1440   */
1441   DBUG_PRINT("info",("print_admin_msg:  %s, %s, %s, %s", name, op_name,
1442                      msg_type, msgbuf));
1443   protocol->prepare_for_resend();
1444   protocol->store(name, length, system_charset_info);
1445   protocol->store(op_name, system_charset_info);
1446   protocol->store(msg_type, system_charset_info);
1447   protocol->store(msgbuf, msg_length, system_charset_info);
1448   if (protocol->write())
1449   {
1450     sql_print_error("Failed on my_net_write, writing to stderr instead: %s",
1451                     msgbuf);
1452     goto err;
1453   }
1454   error= false;
1455 err:
1456   my_free(msgbuf);
1457   return error;
1458 }
1459 
1460 
1461 /*
1462   Handle optimize/analyze/check/repair of partitions
1463 
1464   SYNOPSIS
1465     handle_opt_partitions()
1466     thd                      Thread object
1467     check_opt                Options
1468     flag                     Optimize/Analyze/Check/Repair flag
1469 
1470   RETURN VALUE
1471     >0                        Failure
1472     0                         Success
1473 */
1474 
handle_opt_partitions(THD * thd,HA_CHECK_OPT * check_opt,uint flag)1475 int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1476                                         uint flag)
1477 {
1478   List_iterator<partition_element> part_it(m_part_info->partitions);
1479   uint num_parts= m_part_info->num_parts;
1480   uint num_subparts= m_part_info->num_subparts;
1481   uint i= 0;
1482   int error;
1483   DBUG_ENTER("ha_partition::handle_opt_partitions");
1484   DBUG_PRINT("enter", ("flag= %u", flag));
1485 
1486   do
1487   {
1488     partition_element *part_elem= part_it++;
1489     /*
1490       when ALTER TABLE <CMD> PARTITION ...
1491       it should only do named partitions, otherwise all partitions
1492     */
1493     if (!(thd->lex->alter_info.partition_flags & ALTER_PARTITION_ADMIN) ||
1494         part_elem->part_state == PART_ADMIN)
1495     {
1496       if (m_is_sub_partitioned)
1497       {
1498         List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1499         partition_element *sub_elem;
1500         uint j= 0, part;
1501         do
1502         {
1503           sub_elem= subpart_it++;
1504           part= i * num_subparts + j;
1505           DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1506                      part, sub_elem->partition_name));
1507           if (unlikely((error= handle_opt_part(thd, check_opt, part, flag))))
1508           {
1509             /* print a line which partition the error belongs to */
1510             if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1511                 error != HA_ADMIN_ALREADY_DONE &&
1512                 error != HA_ADMIN_TRY_ALTER &&
1513                 error != HA_ERR_TABLE_READONLY)
1514             {
1515 	      print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1516                               table_share->db.str, table->alias,
1517                               opt_op_name[flag],
1518                               "Subpartition %s returned error",
1519                               sub_elem->partition_name);
1520             }
1521             /* reset part_state for the remaining partitions */
1522             do
1523             {
1524               if (part_elem->part_state == PART_ADMIN)
1525                 part_elem->part_state= PART_NORMAL;
1526             } while ((part_elem= part_it++));
1527             DBUG_RETURN(error);
1528           }
1529         } while (++j < num_subparts);
1530       }
1531       else
1532       {
1533         DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1534                             part_elem->partition_name));
1535         if (unlikely((error= handle_opt_part(thd, check_opt, i, flag))))
1536         {
1537           /* print a line which partition the error belongs to */
1538           if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1539               error != HA_ADMIN_ALREADY_DONE &&
1540               error != HA_ADMIN_TRY_ALTER)
1541           {
1542 	    print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1543                             table_share->db.str, table->alias,
1544                             opt_op_name[flag], "Partition %s returned error",
1545                             part_elem->partition_name);
1546           }
1547           /* reset part_state for the remaining partitions */
1548           do
1549           {
1550             if (part_elem->part_state == PART_ADMIN)
1551               part_elem->part_state= PART_NORMAL;
1552           } while ((part_elem= part_it++));
1553           DBUG_RETURN(error);
1554         }
1555       }
1556       part_elem->part_state= PART_NORMAL;
1557     }
1558   } while (++i < num_parts);
1559   DBUG_RETURN(FALSE);
1560 }
1561 
1562 
1563 /**
1564   @brief Check and repair the table if necessary
1565 
1566   @param thd    Thread object
1567 
1568   @retval TRUE  Error/Not supported
1569   @retval FALSE Success
1570 
1571   @note Called if open_table_from_share fails and ::is_crashed().
1572 */
1573 
check_and_repair(THD * thd)1574 bool ha_partition::check_and_repair(THD *thd)
1575 {
1576   handler **file= m_file;
1577   DBUG_ENTER("ha_partition::check_and_repair");
1578 
1579   do
1580   {
1581     if ((*file)->ha_check_and_repair(thd))
1582       DBUG_RETURN(TRUE);
1583   } while (*(++file));
1584   DBUG_RETURN(FALSE);
1585 }
1586 
1587 
1588 /**
1589   @breif Check if the table can be automatically repaired
1590 
1591   @retval TRUE  Can be auto repaired
1592   @retval FALSE Cannot be auto repaired
1593 */
1594 
auto_repair(int error) const1595 bool ha_partition::auto_repair(int error) const
1596 {
1597   DBUG_ENTER("ha_partition::auto_repair");
1598 
1599   /*
1600     As long as we only support one storage engine per table,
1601     we can use the first partition for this function.
1602   */
1603   DBUG_RETURN(m_file[0]->auto_repair(error));
1604 }
1605 
1606 
1607 /**
1608   @breif Check if the table is crashed
1609 
1610   @retval TRUE  Crashed
1611   @retval FALSE Not crashed
1612 */
1613 
is_crashed() const1614 bool ha_partition::is_crashed() const
1615 {
1616   handler **file= m_file;
1617   DBUG_ENTER("ha_partition::is_crashed");
1618 
1619   do
1620   {
1621     if ((*file)->is_crashed())
1622       DBUG_RETURN(TRUE);
1623   } while (*(++file));
1624   DBUG_RETURN(FALSE);
1625 }
1626 
1627 
1628 /*
1629   Prepare by creating a new partition
1630 
1631   SYNOPSIS
1632     prepare_new_partition()
1633     table                      Table object
1634     create_info                Create info from CREATE TABLE
1635     file                       Handler object of new partition
1636     part_name                  partition name
1637 
1638   RETURN VALUE
1639     >0                         Error
1640     0                          Success
1641 */
1642 
prepare_new_partition(TABLE * tbl,HA_CREATE_INFO * create_info,handler * file,const char * part_name,partition_element * p_elem,uint disable_non_uniq_indexes)1643 int ha_partition::prepare_new_partition(TABLE *tbl,
1644                                         HA_CREATE_INFO *create_info,
1645                                         handler *file, const char *part_name,
1646                                         partition_element *p_elem,
1647                                         uint disable_non_uniq_indexes)
1648 {
1649   int error;
1650   DBUG_ENTER("prepare_new_partition");
1651 
1652   /*
1653     This call to set_up_table_before_create() is done for an alter table.
1654     So this may be the second time around for this partition_element,
1655     depending on how many partitions and subpartitions there were before,
1656     and how many there are now.
1657     The first time, on the CREATE, data_file_name and index_file_name
1658     came from the parser.  They did not have the file name attached to
1659     the end.  But if this partition is less than the total number of
1660     previous partitions, it's data_file_name has the filename attached.
1661     So we need to take the partition filename off if it exists.
1662     That file name may be different from part_name, which will be
1663     attached in append_file_to_dir().
1664   */
1665   truncate_partition_filename((char*) p_elem->data_file_name);
1666   truncate_partition_filename((char*) p_elem->index_file_name);
1667 
1668   if (unlikely((error= set_up_table_before_create(tbl, part_name, create_info,
1669                                                   p_elem))))
1670     goto error_create;
1671 
1672   if (!(file->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
1673     tbl->s->connect_string= p_elem->connect_string;
1674   create_info->options|= HA_CREATE_TMP_ALTER;
1675   if ((error= file->ha_create(part_name, tbl, create_info)))
1676   {
1677     /*
1678       Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1679       if the table/partition already exists.
1680       If we return that error code, then print_error would try to
1681       get_dup_key on a non-existing partition.
1682       So return a more reasonable error code.
1683     */
1684     if (error == HA_ERR_FOUND_DUPP_KEY)
1685       error= HA_ERR_TABLE_EXIST;
1686     goto error_create;
1687   }
1688   DBUG_PRINT("info", ("partition %s created", part_name));
1689   if (unlikely((error= file->ha_open(tbl, part_name, m_mode,
1690                                      m_open_test_lock | HA_OPEN_NO_PSI_CALL |
1691                                      HA_OPEN_FOR_CREATE))))
1692     goto error_open;
1693   DBUG_PRINT("info", ("partition %s opened", part_name));
1694 
1695   /*
1696     Note: if you plan to add another call that may return failure,
1697     better to do it before external_lock() as cleanup_new_partition()
1698     assumes that external_lock() is last call that may fail here.
1699     Otherwise see description for cleanup_new_partition().
1700   */
1701   if (unlikely((error= file->ha_external_lock(ha_thd(), F_WRLCK))))
1702     goto error_external_lock;
1703   DBUG_PRINT("info", ("partition %s external locked", part_name));
1704 
1705   if (disable_non_uniq_indexes)
1706     file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
1707 
1708   DBUG_RETURN(0);
1709 error_external_lock:
1710   (void) file->ha_close();
1711 error_open:
1712   (void) file->delete_table(part_name);
1713 error_create:
1714   DBUG_RETURN(error);
1715 }
1716 
1717 
1718 /*
1719   Cleanup by removing all created partitions after error
1720 
1721   SYNOPSIS
1722     cleanup_new_partition()
1723     part_count             Number of partitions to remove
1724 
1725   RETURN VALUE
1726     NONE
1727 
1728   DESCRIPTION
1729     This function is called immediately after prepare_new_partition() in
1730     case the latter fails.
1731 
1732     In prepare_new_partition() last call that may return failure is
1733     external_lock(). That means if prepare_new_partition() fails,
1734     partition does not have external lock. Thus no need to call
1735     external_lock(F_UNLCK) here.
1736 
1737   TODO:
1738     We must ensure that in the case that we get an error during the process
1739     that we call external_lock with F_UNLCK, close the table and delete the
1740     table in the case where we have been successful with prepare_handler.
1741     We solve this by keeping an array of successful calls to prepare_handler
1742     which can then be used to undo the call.
1743 */
1744 
cleanup_new_partition(uint part_count)1745 void ha_partition::cleanup_new_partition(uint part_count)
1746 {
1747   DBUG_ENTER("ha_partition::cleanup_new_partition");
1748 
1749   if (m_added_file)
1750   {
1751     THD *thd= ha_thd();
1752     handler **file= m_added_file;
1753     while ((part_count > 0) && (*file))
1754     {
1755       (*file)->ha_external_unlock(thd);
1756       (*file)->ha_close();
1757 
1758       /* Leave the (*file)->delete_table(part_name) to the ddl-log */
1759 
1760       file++;
1761       part_count--;
1762     }
1763     m_added_file= NULL;
1764   }
1765   DBUG_VOID_RETURN;
1766 }
1767 
1768 /*
1769   Implement the partition changes defined by ALTER TABLE of partitions
1770 
1771   SYNOPSIS
1772     change_partitions()
1773     create_info                 HA_CREATE_INFO object describing all
1774                                 fields and indexes in table
1775     path                        Complete path of db and table name
1776     out: copied                 Output parameter where number of copied
1777                                 records are added
1778     out: deleted                Output parameter where number of deleted
1779                                 records are added
1780     pack_frm_data               Reference to packed frm file
1781     pack_frm_len                Length of packed frm file
1782 
1783   RETURN VALUE
1784     >0                        Failure
1785     0                         Success
1786 
1787   DESCRIPTION
1788     Add and copy if needed a number of partitions, during this operation
1789     no other operation is ongoing in the server. This is used by
1790     ADD PARTITION all types as well as by REORGANIZE PARTITION. For
1791     one-phased implementations it is used also by DROP and COALESCE
1792     PARTITIONs.
1793     One-phased implementation needs the new frm file, other handlers will
1794     get zero length and a NULL reference here.
1795 */
1796 
change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted,const uchar * pack_frm_data,size_t pack_frm_len)1797 int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
1798                                     const char *path,
1799                                     ulonglong * const copied,
1800                                     ulonglong * const deleted,
1801                                     const uchar *pack_frm_data
1802                                     __attribute__((unused)),
1803                                     size_t pack_frm_len
1804                                     __attribute__((unused)))
1805 {
1806   List_iterator<partition_element> part_it(m_part_info->partitions);
1807   List_iterator <partition_element> t_it(m_part_info->temp_partitions);
1808   char part_name_buff[FN_REFLEN + 1];
1809   uint num_parts= m_part_info->partitions.elements;
1810   uint num_subparts= m_part_info->num_subparts;
1811   uint i= 0;
1812   uint num_remain_partitions, part_count, orig_count;
1813   handler **new_file_array;
1814   int error= 1;
1815   bool first;
1816   uint temp_partitions= m_part_info->temp_partitions.elements;
1817   THD *thd= ha_thd();
1818   DBUG_ENTER("ha_partition::change_partitions");
1819 
1820   /*
1821     Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
1822     We use m_file[0] as long as all partitions have the same storage engine.
1823   */
1824   DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
1825                                                    part_name_buff)));
1826   m_reorged_parts= 0;
1827   if (!m_part_info->is_sub_partitioned())
1828     num_subparts= 1;
1829 
1830   /*
1831     Step 1:
1832       Calculate number of reorganised partitions and allocate space for
1833       their handler references.
1834   */
1835   if (temp_partitions)
1836   {
1837     m_reorged_parts= temp_partitions * num_subparts;
1838   }
1839   else
1840   {
1841     do
1842     {
1843       partition_element *part_elem= part_it++;
1844       if (part_elem->part_state == PART_CHANGED ||
1845           part_elem->part_state == PART_REORGED_DROPPED)
1846       {
1847         m_reorged_parts+= num_subparts;
1848       }
1849     } while (++i < num_parts);
1850   }
1851   if (m_reorged_parts &&
1852       !(m_reorged_file= (handler**) thd->calloc(sizeof(handler*)*
1853                                                 (m_reorged_parts + 1))))
1854   {
1855     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1856   }
1857 
1858   /*
1859     Step 2:
1860       Calculate number of partitions after change and allocate space for
1861       their handler references.
1862   */
1863   num_remain_partitions= 0;
1864   if (temp_partitions)
1865   {
1866     num_remain_partitions= num_parts * num_subparts;
1867   }
1868   else
1869   {
1870     part_it.rewind();
1871     i= 0;
1872     do
1873     {
1874       partition_element *part_elem= part_it++;
1875       if (part_elem->part_state == PART_NORMAL ||
1876           part_elem->part_state == PART_TO_BE_ADDED ||
1877           part_elem->part_state == PART_CHANGED)
1878       {
1879         num_remain_partitions+= num_subparts;
1880       }
1881     } while (++i < num_parts);
1882   }
1883   if (!(new_file_array= ((handler**)
1884                          thd->calloc(sizeof(handler*)*
1885                                      (2*(num_remain_partitions + 1))))))
1886   {
1887     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1888   }
1889   m_added_file= &new_file_array[num_remain_partitions + 1];
1890 
1891   /*
1892     Step 3:
1893       Fill m_reorged_file with handler references and NULL at the end
1894   */
1895   if (m_reorged_parts)
1896   {
1897     i= 0;
1898     part_count= 0;
1899     first= TRUE;
1900     part_it.rewind();
1901     do
1902     {
1903       partition_element *part_elem= part_it++;
1904       if (part_elem->part_state == PART_CHANGED ||
1905           part_elem->part_state == PART_REORGED_DROPPED)
1906       {
1907         memcpy((void*)&m_reorged_file[part_count],
1908                (void*)&m_file[i*num_subparts],
1909                sizeof(handler*)*num_subparts);
1910         part_count+= num_subparts;
1911       }
1912       else if (first && temp_partitions &&
1913                part_elem->part_state == PART_TO_BE_ADDED)
1914       {
1915         /*
1916           When doing an ALTER TABLE REORGANIZE PARTITION a number of
1917           partitions is to be reorganised into a set of new partitions.
1918           The reorganised partitions are in this case in the temp_partitions
1919           list. We copy all of them in one batch and thus we only do this
1920           until we find the first partition with state PART_TO_BE_ADDED
1921           since this is where the new partitions go in and where the old
1922           ones used to be.
1923         */
1924         first= FALSE;
1925         DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts);
1926         memcpy((void*)m_reorged_file, &m_file[i*num_subparts],
1927                sizeof(handler*)*m_reorged_parts);
1928       }
1929     } while (++i < num_parts);
1930   }
1931 
1932   /*
1933     Step 4:
1934       Fill new_array_file with handler references. Create the handlers if
1935       needed.
1936   */
1937   i= 0;
1938   part_count= 0;
1939   orig_count= 0;
1940   first= TRUE;
1941   part_it.rewind();
1942   do
1943   {
1944     partition_element *part_elem= part_it++;
1945     if (part_elem->part_state == PART_NORMAL)
1946     {
1947       DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts);
1948       memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count],
1949              sizeof(handler*)*num_subparts);
1950       part_count+= num_subparts;
1951       orig_count+= num_subparts;
1952     }
1953     else if (part_elem->part_state == PART_CHANGED ||
1954              part_elem->part_state == PART_TO_BE_ADDED)
1955     {
1956       uint j= 0;
1957       Parts_share_refs *p_share_refs;
1958       /*
1959         The Handler_shares for each partition's handler can be allocated
1960         within this handler, since there will not be any more instances of the
1961         new partitions, until the table is reopened after the ALTER succeeded.
1962       */
1963       p_share_refs= new Parts_share_refs;
1964       if (!p_share_refs)
1965         DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1966       if (p_share_refs->init(num_subparts))
1967         DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1968       if (m_new_partitions_share_refs.push_back(p_share_refs, thd->mem_root))
1969         DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1970       do
1971       {
1972         handler **new_file= &new_file_array[part_count++];
1973         if (!(*new_file=
1974               get_new_handler(table->s,
1975                               thd->mem_root,
1976                               part_elem->engine_type)))
1977         {
1978           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1979         }
1980         if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j]))
1981         {
1982           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1983         }
1984       } while (++j < num_subparts);
1985       if (part_elem->part_state == PART_CHANGED)
1986         orig_count+= num_subparts;
1987       else if (temp_partitions && first)
1988       {
1989         orig_count+= (num_subparts * temp_partitions);
1990         first= FALSE;
1991       }
1992     }
1993   } while (++i < num_parts);
1994   first= FALSE;
1995   /*
1996     Step 5:
1997       Create the new partitions and also open, lock and call external_lock
1998       on them to prepare them for copy phase and also for later close
1999       calls
2000   */
2001 
2002   /*
2003      Before creating new partitions check whether indexes are disabled
2004      in the  partitions.
2005   */
2006 
2007   uint disable_non_uniq_indexes= indexes_are_disabled();
2008 
2009   i= 0;
2010   part_count= 0;
2011   part_it.rewind();
2012   do
2013   {
2014     partition_element *part_elem= part_it++;
2015     if (part_elem->part_state == PART_TO_BE_ADDED ||
2016         part_elem->part_state == PART_CHANGED)
2017     {
2018       /*
2019         A new partition needs to be created PART_TO_BE_ADDED means an
2020         entirely new partition and PART_CHANGED means a changed partition
2021         that will still exist with either more or less data in it.
2022       */
2023       uint name_variant= NORMAL_PART_NAME;
2024       if (part_elem->part_state == PART_CHANGED ||
2025           (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
2026         name_variant= TEMP_PART_NAME;
2027       if (m_part_info->is_sub_partitioned())
2028       {
2029         List_iterator<partition_element> sub_it(part_elem->subpartitions);
2030         uint j= 0, part;
2031         do
2032         {
2033           partition_element *sub_elem= sub_it++;
2034           if (unlikely((error=
2035                         create_subpartition_name(part_name_buff,
2036                                                  sizeof(part_name_buff), path,
2037                                                  part_elem->partition_name,
2038                                                  sub_elem->partition_name,
2039                                                  name_variant))))
2040           {
2041             cleanup_new_partition(part_count);
2042             DBUG_RETURN(error);
2043           }
2044           part= i * num_subparts + j;
2045           DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
2046           if (unlikely((error=
2047                         prepare_new_partition(table, create_info,
2048                                               new_file_array[part],
2049                                               (const char *)part_name_buff,
2050                                               sub_elem,
2051                                               disable_non_uniq_indexes))))
2052           {
2053             cleanup_new_partition(part_count);
2054             DBUG_RETURN(error);
2055           }
2056 
2057           m_added_file[part_count++]= new_file_array[part];
2058         } while (++j < num_subparts);
2059       }
2060       else
2061       {
2062         if (unlikely((error=
2063                       create_partition_name(part_name_buff,
2064                                             sizeof(part_name_buff), path,
2065                                             part_elem->partition_name,
2066                                             name_variant, TRUE))))
2067         {
2068           cleanup_new_partition(part_count);
2069           DBUG_RETURN(error);
2070         }
2071 
2072         DBUG_PRINT("info", ("Add partition %s", part_name_buff));
2073         if (unlikely((error=
2074                       prepare_new_partition(table, create_info,
2075                                             new_file_array[i],
2076                                             (const char *)part_name_buff,
2077                                             part_elem,
2078                                             disable_non_uniq_indexes))))
2079         {
2080           cleanup_new_partition(part_count);
2081           DBUG_RETURN(error);
2082         }
2083 
2084         m_added_file[part_count++]= new_file_array[i];
2085       }
2086     }
2087   } while (++i < num_parts);
2088 
2089   /*
2090     Step 6:
2091       State update to prepare for next write of the frm file.
2092   */
2093   i= 0;
2094   part_it.rewind();
2095   do
2096   {
2097     partition_element *part_elem= part_it++;
2098     if (part_elem->part_state == PART_TO_BE_ADDED)
2099       part_elem->part_state= PART_IS_ADDED;
2100     else if (part_elem->part_state == PART_CHANGED)
2101       part_elem->part_state= PART_IS_CHANGED;
2102     else if (part_elem->part_state == PART_REORGED_DROPPED)
2103       part_elem->part_state= PART_TO_BE_DROPPED;
2104   } while (++i < num_parts);
2105   for (i= 0; i < temp_partitions; i++)
2106   {
2107     partition_element *part_elem= t_it++;
2108     DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
2109     part_elem->part_state= PART_TO_BE_DROPPED;
2110   }
2111   DBUG_ASSERT(m_new_file == 0);
2112   m_new_file= new_file_array;
2113   if (unlikely((error= copy_partitions(copied, deleted))))
2114   {
2115     /*
2116       Close and unlock the new temporary partitions.
2117       They will later be deleted through the ddl-log.
2118     */
2119     cleanup_new_partition(part_count);
2120     m_new_file= 0;
2121   }
2122   DBUG_RETURN(error);
2123 }
2124 
2125 
2126 /*
2127   Copy partitions as part of ALTER TABLE of partitions
2128 
2129   SYNOPSIS
2130     copy_partitions()
2131     out:copied                 Number of records copied
2132     out:deleted                Number of records deleted
2133 
2134   RETURN VALUE
2135     >0                         Error code
2136     0                          Success
2137 
2138   DESCRIPTION
2139     change_partitions has done all the preparations, now it is time to
2140     actually copy the data from the reorganised partitions to the new
2141     partitions.
2142 */
2143 
copy_partitions(ulonglong * const copied,ulonglong * const deleted)2144 int ha_partition::copy_partitions(ulonglong * const copied,
2145                                   ulonglong * const deleted)
2146 {
2147   uint reorg_part= 0;
2148   int result= 0;
2149   longlong func_value;
2150   DBUG_ENTER("ha_partition::copy_partitions");
2151 
2152   if (m_part_info->linear_hash_ind)
2153   {
2154     if (m_part_info->part_type == HASH_PARTITION)
2155       set_linear_hash_mask(m_part_info, m_part_info->num_parts);
2156     else
2157       set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
2158   }
2159   else if (m_part_info->part_type == VERSIONING_PARTITION)
2160   {
2161     if (m_part_info->check_constants(ha_thd(), m_part_info))
2162       goto init_error;
2163   }
2164 
2165   while (reorg_part < m_reorged_parts)
2166   {
2167     handler *file= m_reorged_file[reorg_part];
2168     uint32 new_part;
2169 
2170     late_extra_cache(reorg_part);
2171     if (unlikely((result= file->ha_rnd_init_with_error(1))))
2172       goto init_error;
2173     while (TRUE)
2174     {
2175       if ((result= file->ha_rnd_next(m_rec0)))
2176       {
2177         if (result != HA_ERR_END_OF_FILE)
2178           goto error;
2179         /*
2180           End-of-file reached, break out to continue with next partition or
2181           end the copy process.
2182         */
2183         break;
2184       }
2185       /* Found record to insert into new handler */
2186       if (m_part_info->get_partition_id(m_part_info, &new_part,
2187                                         &func_value))
2188       {
2189         /*
2190            This record is in the original table but will not be in the new
2191            table since it doesn't fit into any partition any longer due to
2192            changed partitioning ranges or list values.
2193         */
2194         (*deleted)++;
2195       }
2196       else
2197       {
2198         /* Copy record to new handler */
2199         (*copied)++;
2200         DBUG_ASSERT(!m_new_file[new_part]->row_logging);
2201         result= m_new_file[new_part]->ha_write_row(m_rec0);
2202         if (result)
2203           goto error;
2204       }
2205     }
2206     late_extra_no_cache(reorg_part);
2207     file->ha_rnd_end();
2208     reorg_part++;
2209   }
2210   DBUG_EXECUTE_IF("debug_abort_copy_partitions",
2211                   DBUG_RETURN(HA_ERR_UNSUPPORTED); );
2212   DBUG_RETURN(FALSE);
2213 error:
2214   m_reorged_file[reorg_part]->ha_rnd_end();
2215 init_error:
2216   DBUG_RETURN(result);
2217 }
2218 
2219 /*
2220   Update create info as part of ALTER TABLE
2221 
2222   SYNOPSIS
2223     update_create_info()
2224     create_info                   Create info from ALTER TABLE
2225 
2226   RETURN VALUE
2227     NONE
2228 
2229   DESCRIPTION
2230   Forward this handler call to the storage engine foreach
2231   partition handler.  The data_file_name for each partition may
2232   need to be reset if the tablespace was moved.  Use a dummy
2233   HA_CREATE_INFO structure and transfer necessary data.
2234 */
2235 
update_create_info(HA_CREATE_INFO * create_info)2236 void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
2237 {
2238   DBUG_ENTER("ha_partition::update_create_info");
2239 
2240   /*
2241     Fix for bug#38751, some engines needs info-calls in ALTER.
2242     Archive need this since it flushes in ::info.
2243     HA_STATUS_AUTO is optimized so it will not always be forwarded
2244     to all partitions, but HA_STATUS_VARIABLE will.
2245   */
2246   info(HA_STATUS_VARIABLE | HA_STATUS_OPEN);
2247 
2248   info(HA_STATUS_AUTO);
2249 
2250   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2251     create_info->auto_increment_value= stats.auto_increment_value;
2252 
2253   /*
2254     DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
2255     partitioned table, only its parts.
2256   */
2257   my_bool from_alter= (create_info->data_file_name == (const char*) -1);
2258   create_info->data_file_name= create_info->index_file_name= NULL;
2259 
2260   if (!(m_file[0]->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
2261     create_info->connect_string= null_clex_str;
2262 
2263   /*
2264     We do not need to update the individual partition DATA DIRECTORY settings
2265     since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
2266   */
2267   if (from_alter)
2268     DBUG_VOID_RETURN;
2269 
2270   /*
2271     send Handler::update_create_info() to the storage engine for each
2272     partition that currently has a handler object.  Using a dummy
2273     HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
2274   */
2275 
2276   List_iterator<partition_element> part_it(m_part_info->partitions);
2277   partition_element *part_elem, *sub_elem;
2278   uint num_subparts= m_part_info->num_subparts;
2279   uint num_parts= (num_subparts ? m_file_tot_parts / num_subparts :
2280                    m_file_tot_parts);
2281   HA_CREATE_INFO dummy_info;
2282   dummy_info.init();
2283 
2284   /*
2285     Since update_create_info() can be called from mysql_prepare_alter_table()
2286     when not all handlers are set up, we look for that condition first.
2287     If all handlers are not available, do not call update_create_info for any.
2288   */
2289   uint i, j, part;
2290   for (i= 0; i < num_parts; i++)
2291   {
2292     part_elem= part_it++;
2293     if (!part_elem)
2294       DBUG_VOID_RETURN;
2295     if (m_is_sub_partitioned)
2296     {
2297       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2298       for (j= 0; j < num_subparts; j++)
2299       {
2300         sub_elem= subpart_it++;
2301         if (!sub_elem)
2302           DBUG_VOID_RETURN;
2303         part= i * num_subparts + j;
2304         if (part >= m_file_tot_parts || !m_file[part])
2305           DBUG_VOID_RETURN;
2306       }
2307     }
2308     else
2309     {
2310       if (!m_file[i])
2311         DBUG_VOID_RETURN;
2312     }
2313   }
2314   part_it.rewind();
2315 
2316   for (i= 0; i < num_parts; i++)
2317   {
2318     part_elem= part_it++;
2319     DBUG_ASSERT(part_elem);
2320     if (m_is_sub_partitioned)
2321     {
2322       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2323       for (j= 0; j < num_subparts; j++)
2324       {
2325         sub_elem= subpart_it++;
2326         DBUG_ASSERT(sub_elem);
2327         part= i * num_subparts + j;
2328         DBUG_ASSERT(part < m_file_tot_parts);
2329         DBUG_ASSERT(m_file[part]);
2330         dummy_info.data_file_name= dummy_info.index_file_name = NULL;
2331         m_file[part]->update_create_info(&dummy_info);
2332         sub_elem->data_file_name = (char*) dummy_info.data_file_name;
2333         sub_elem->index_file_name = (char*) dummy_info.index_file_name;
2334       }
2335     }
2336     else
2337     {
2338       DBUG_ASSERT(m_file[i]);
2339       dummy_info.data_file_name= dummy_info.index_file_name= NULL;
2340       m_file[i]->update_create_info(&dummy_info);
2341       part_elem->data_file_name = (char*) dummy_info.data_file_name;
2342       part_elem->index_file_name = (char*) dummy_info.index_file_name;
2343     }
2344   }
2345   DBUG_VOID_RETURN;
2346 }
2347 
2348 
2349 /**
2350   Change the internal TABLE_SHARE pointer
2351 
2352   @param table_arg    TABLE object
2353   @param share        New share to use
2354 
2355   @note Is used in error handling in delete_table.
2356   All handlers should exist (lock_partitions should not be used)
2357 */
2358 
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2359 void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2360 {
2361   handler **file_array;
2362   table= table_arg;
2363   table_share= share;
2364   /*
2365     m_file can be NULL when using an old cached table in DROP TABLE, when the
2366     table just has REMOVED PARTITIONING, see Bug#42438
2367   */
2368   if (m_file)
2369   {
2370     file_array= m_file;
2371     DBUG_ASSERT(*file_array);
2372     do
2373     {
2374       (*file_array)->change_table_ptr(table_arg, share);
2375     } while (*(++file_array));
2376   }
2377 
2378   if (m_added_file && m_added_file[0])
2379   {
2380     /* if in middle of a drop/rename etc */
2381     file_array= m_added_file;
2382     do
2383     {
2384       (*file_array)->change_table_ptr(table_arg, share);
2385     } while (*(++file_array));
2386   }
2387 }
2388 
2389 
2390 /**
2391   Handle delete and rename table
2392 
2393     @param from         Full path of old table
2394     @param to           Full path of new table. May be NULL in case of delete
2395 
2396   @return Operation status
2397     @retval >0  Error
2398     @retval 0   Success
2399 
2400   @note  Common routine to handle delete_table and rename_table.
2401   The routine uses the partition handler file to get the
2402   names of the partition instances. Both these routines
2403   are called after creating the handler without table
2404   object and thus the file is needed to discover the
2405   names of the partitions and the underlying storage engines.
2406 */
2407 
del_ren_table(const char * from,const char * to)2408 uint ha_partition::del_ren_table(const char *from, const char *to)
2409 {
2410   int save_error= 0;
2411   int error;
2412   char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1],
2413        from_lc_buff[FN_REFLEN], to_lc_buff[FN_REFLEN];
2414   char *name_buffer_ptr;
2415   const char *from_path;
2416   const char *to_path= NULL;
2417   uint i;
2418   handler **file, **abort_file;
2419   THD *thd= ha_thd();
2420   DBUG_ENTER("ha_partition::del_ren_table");
2421 
2422   if (get_from_handler_file(from, thd->mem_root, false))
2423     DBUG_RETURN(my_errno ? my_errno : ENOENT);
2424   DBUG_ASSERT(m_file_buffer);
2425   DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
2426   name_buffer_ptr= m_name_buffer_ptr;
2427 
2428   file= m_file;
2429   /* The command should be logged with IF EXISTS if using a shared table */
2430   if (m_file[0]->ht->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
2431     thd->replication_flags|= OPTION_IF_EXISTS;
2432 
2433   if (to == NULL)
2434   {
2435     /*
2436       Delete table, start by delete the .par file. If error, break, otherwise
2437       delete as much as possible.
2438     */
2439     if (unlikely((error= handler::delete_table(from))))
2440       DBUG_RETURN(error);
2441   }
2442 
2443   if (ha_check_if_updates_are_ignored(thd, partition_ht(),
2444                                       to ? "RENAME" : "DROP"))
2445     DBUG_RETURN(0);
2446 
2447   /*
2448     Since ha_partition has HA_FILE_BASED, it must alter underlying table names
2449     if they do not have HA_FILE_BASED and lower_case_table_names == 2.
2450     See Bug#37402, for Mac OS X.
2451     The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
2452     Using the first partitions handler, since mixing handlers is not allowed.
2453   */
2454   from_path= get_canonical_filename(*file, from, from_lc_buff);
2455   if (to != NULL)
2456     to_path= get_canonical_filename(*file, to, to_lc_buff);
2457   i= 0;
2458   do
2459   {
2460     if (unlikely((error= create_partition_name(from_buff, sizeof(from_buff),
2461                                                from_path, name_buffer_ptr,
2462                                                NORMAL_PART_NAME, FALSE))))
2463       goto rename_error;
2464 
2465     if (to != NULL)
2466     {                                           // Rename branch
2467       if (unlikely((error= create_partition_name(to_buff, sizeof(to_buff),
2468                                                  to_path, name_buffer_ptr,
2469                                                  NORMAL_PART_NAME, FALSE))))
2470         goto rename_error;
2471       error= (*file)->ha_rename_table(from_buff, to_buff);
2472       if (unlikely(error))
2473         goto rename_error;
2474     }
2475     else                                        // delete branch
2476     {
2477       error= (*file)->delete_table(from_buff);
2478     }
2479     name_buffer_ptr= strend(name_buffer_ptr) + 1;
2480     if (unlikely(error))
2481       save_error= error;
2482     i++;
2483   } while (*(++file));
2484   if (to != NULL)
2485   {
2486     if (unlikely((error= handler::rename_table(from, to))))
2487     {
2488       /* Try to revert everything, ignore errors */
2489       (void) handler::rename_table(to, from);
2490       goto rename_error;
2491     }
2492   }
2493 
2494   /* Update .par file in the handlers that supports it */
2495   if ((*m_file)->ht->create_partitioning_metadata)
2496   {
2497     error= (*m_file)->ht->create_partitioning_metadata(to, from,
2498                                                        to == NULL ?
2499                                                        CHF_DELETE_FLAG :
2500                                                        CHF_RENAME_FLAG);
2501     DBUG_EXECUTE_IF("failed_create_partitioning_metadata",
2502                     { my_message_sql(ER_OUT_OF_RESOURCES,"Simulated crash",MYF(0));
2503                       error= 1;
2504                     });
2505     if (error)
2506     {
2507       if (to)
2508       {
2509         (void) handler::rename_table(to, from);
2510         (void) (*m_file)->ht->create_partitioning_metadata(from, to,
2511                                                            CHF_RENAME_FLAG);
2512         goto rename_error;
2513       }
2514       else
2515         save_error=error;
2516     }
2517   }
2518   DBUG_RETURN(save_error);
2519 
2520 rename_error:
2521   name_buffer_ptr= m_name_buffer_ptr;
2522   for (abort_file= file, file= m_file; file < abort_file; file++)
2523   {
2524     /* Revert the rename, back from 'to' to the original 'from' */
2525     if (!create_partition_name(from_buff, sizeof(from_buff), from_path,
2526                                name_buffer_ptr, NORMAL_PART_NAME, FALSE) &&
2527         !create_partition_name(to_buff, sizeof(to_buff), to_path,
2528                                name_buffer_ptr, NORMAL_PART_NAME, FALSE))
2529     {
2530       /* Ignore error here */
2531       (void) (*file)->ha_rename_table(to_buff, from_buff);
2532     }
2533     name_buffer_ptr= strend(name_buffer_ptr) + 1;
2534   }
2535   DBUG_RETURN(error);
2536 }
2537 
count_query_cache_dependant_tables(uint8 * tables_type)2538 uint ha_partition::count_query_cache_dependant_tables(uint8 *tables_type)
2539 {
2540   DBUG_ENTER("ha_partition::count_query_cache_dependant_tables");
2541   /* Here we rely on the fact that all tables are of the same type */
2542   uint8 type= m_file[0]->table_cache_type();
2543   (*tables_type)|= type;
2544   DBUG_PRINT("enter", ("cnt: %u", (uint) m_tot_parts));
2545   /*
2546     We need save underlying tables only for HA_CACHE_TBL_ASKTRANSACT:
2547     HA_CACHE_TBL_NONTRANSACT - because all changes goes through partition table
2548     HA_CACHE_TBL_NOCACHE - because will not be cached
2549     HA_CACHE_TBL_TRANSACT - QC need to know that such type present
2550   */
2551   DBUG_RETURN(type == HA_CACHE_TBL_ASKTRANSACT ? m_tot_parts : 0);
2552 }
2553 
2554 my_bool ha_partition::
reg_query_cache_dependant_table(THD * thd,char * engine_key,uint engine_key_len,char * cache_key,uint cache_key_len,uint8 type,Query_cache * cache,Query_cache_block_table ** block_table,handler * file,uint * n)2555 reg_query_cache_dependant_table(THD *thd,
2556                                 char *engine_key, uint engine_key_len,
2557                                 char *cache_key, uint cache_key_len,
2558                                 uint8 type,
2559                                 Query_cache *cache,
2560                                 Query_cache_block_table **block_table,
2561                                 handler *file,
2562                                 uint *n)
2563 {
2564   DBUG_ENTER("ha_partition::reg_query_cache_dependant_table");
2565   qc_engine_callback engine_callback;
2566   ulonglong engine_data;
2567   /* ask undelying engine */
2568   if (!file->register_query_cache_table(thd, engine_key,
2569                                         engine_key_len,
2570                                         &engine_callback,
2571                                         &engine_data))
2572   {
2573     DBUG_PRINT("qcache", ("Handler does not allow caching for %.*s",
2574                           engine_key_len, engine_key));
2575     /*
2576       As this can change from call to call, don't reset set
2577       thd->lex->safe_to_cache_query
2578     */
2579     thd->query_cache_is_applicable= 0;        // Query can't be cached
2580     DBUG_RETURN(TRUE);
2581   }
2582   (++(*block_table))->n= ++(*n);
2583   if (!cache->insert_table(thd, cache_key_len,
2584                            cache_key, (*block_table),
2585                            (uint32) table_share->db.length,
2586                            (uint8) (cache_key_len -
2587                                     table_share->table_cache_key.length),
2588                            type,
2589                            engine_callback, engine_data,
2590                            FALSE))
2591     DBUG_RETURN(TRUE);
2592   DBUG_RETURN(FALSE);
2593 }
2594 
2595 
2596 my_bool ha_partition::
register_query_cache_dependant_tables(THD * thd,Query_cache * cache,Query_cache_block_table ** block_table,uint * n)2597 register_query_cache_dependant_tables(THD *thd,
2598                                       Query_cache *cache,
2599                                       Query_cache_block_table **block_table,
2600                                       uint *n)
2601 {
2602   char *engine_key_end, *query_cache_key_end;
2603   uint i;
2604   uint num_parts= m_part_info->num_parts;
2605   uint num_subparts= m_part_info->num_subparts;
2606   int diff_length;
2607   List_iterator<partition_element> part_it(m_part_info->partitions);
2608   char engine_key[FN_REFLEN], query_cache_key[FN_REFLEN];
2609   DBUG_ENTER("ha_partition::register_query_cache_dependant_tables");
2610 
2611   /* see ha_partition::count_query_cache_dependant_tables */
2612   if (m_file[0]->table_cache_type() != HA_CACHE_TBL_ASKTRANSACT)
2613     DBUG_RETURN(FALSE); // nothing to register
2614 
2615   /* prepare static part of the key */
2616   memcpy(engine_key, table_share->normalized_path.str,
2617          table_share->normalized_path.length);
2618   memcpy(query_cache_key, table_share->table_cache_key.str,
2619          table_share->table_cache_key.length);
2620 
2621   diff_length= ((int) table_share->table_cache_key.length -
2622                 (int) table_share->normalized_path.length -1);
2623 
2624   engine_key_end= engine_key + table_share->normalized_path.length;
2625   query_cache_key_end= query_cache_key + table_share->table_cache_key.length -1;
2626 
2627   engine_key_end[0]= engine_key_end[2]= query_cache_key_end[0]=
2628     query_cache_key_end[2]= '#';
2629   query_cache_key_end[1]= engine_key_end[1]= 'P';
2630   engine_key_end+= 3;
2631   query_cache_key_end+= 3;
2632 
2633   i= 0;
2634   do
2635   {
2636     partition_element *part_elem= part_it++;
2637     char *engine_pos= strmov(engine_key_end, part_elem->partition_name);
2638     if (m_is_sub_partitioned)
2639     {
2640       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2641       partition_element *sub_elem;
2642       uint j= 0, part;
2643       engine_pos[0]= engine_pos[3]= '#';
2644       engine_pos[1]= 'S';
2645       engine_pos[2]= 'P';
2646       engine_pos += 4;
2647       do
2648       {
2649         char *end;
2650         uint length;
2651         sub_elem= subpart_it++;
2652         part= i * num_subparts + j;
2653         /* we store the end \0 as part of the key */
2654         end= strmov(engine_pos, sub_elem->partition_name) + 1;
2655         length= (uint)(end - engine_key);
2656         /* Copy the suffix and end 0 to query cache key */
2657         memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2658         if (reg_query_cache_dependant_table(thd, engine_key, length,
2659                                             query_cache_key,
2660                                             length + diff_length,
2661                                             m_file[part]->table_cache_type(),
2662                                             cache,
2663                                             block_table, m_file[part],
2664                                             n))
2665           DBUG_RETURN(TRUE);
2666       } while (++j < num_subparts);
2667     }
2668     else
2669     {
2670       char *end= engine_pos+1;                  // copy end \0
2671       uint length= (uint)(end - engine_key);
2672       /* Copy the suffix and end 0 to query cache key */
2673       memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2674       if (reg_query_cache_dependant_table(thd, engine_key, length,
2675                                           query_cache_key,
2676                                           length + diff_length,
2677                                           m_file[i]->table_cache_type(),
2678                                           cache,
2679                                           block_table, m_file[i],
2680                                           n))
2681         DBUG_RETURN(TRUE);
2682     }
2683   } while (++i < num_parts);
2684   DBUG_PRINT("info", ("cnt: %u", (uint)m_tot_parts));
2685   DBUG_RETURN(FALSE);
2686 }
2687 
2688 
2689 /**
2690   Set up table share object before calling create on underlying handler
2691 
2692   @param table             Table object
2693   @param info              Create info
2694   @param part_elem[in,out] Pointer to used partition_element, searched if NULL
2695 
2696   @return    status
2697     @retval  TRUE  Error
2698     @retval  FALSE Success
2699 
2700   @details
2701     Set up
2702     1) Comment on partition
2703     2) MAX_ROWS, MIN_ROWS on partition
2704     3) Index file name on partition
2705     4) Data file name on partition
2706 */
2707 
set_up_table_before_create(TABLE * tbl,const char * partition_name_with_path,HA_CREATE_INFO * info,partition_element * part_elem)2708 int ha_partition::set_up_table_before_create(TABLE *tbl,
2709                     const char *partition_name_with_path,
2710                     HA_CREATE_INFO *info,
2711                     partition_element *part_elem)
2712 {
2713   int error= 0;
2714   LEX_CSTRING part_name;
2715   THD *thd= ha_thd();
2716   DBUG_ENTER("set_up_table_before_create");
2717 
2718   DBUG_ASSERT(part_elem);
2719 
2720   if (!part_elem)
2721     DBUG_RETURN(1);
2722   tbl->s->max_rows= part_elem->part_max_rows;
2723   tbl->s->min_rows= part_elem->part_min_rows;
2724   part_name.str= strrchr(partition_name_with_path, FN_LIBCHAR)+1;
2725   part_name.length= strlen(part_name.str);
2726   if ((part_elem->index_file_name &&
2727       (error= append_file_to_dir(thd,
2728                                  (const char**)&part_elem->index_file_name,
2729                                  &part_name))) ||
2730       (part_elem->data_file_name &&
2731       (error= append_file_to_dir(thd,
2732                                  (const char**)&part_elem->data_file_name,
2733                                  &part_name))))
2734   {
2735     DBUG_RETURN(error);
2736   }
2737   info->index_file_name= part_elem->index_file_name;
2738   info->data_file_name= part_elem->data_file_name;
2739   info->connect_string= part_elem->connect_string;
2740   if (info->connect_string.length)
2741     info->used_fields|= HA_CREATE_USED_CONNECTION;
2742   tbl->s->connect_string= part_elem->connect_string;
2743   DBUG_RETURN(0);
2744 }
2745 
2746 
2747 /*
2748   Add two names together
2749 
2750   SYNOPSIS
2751     name_add()
2752     out:dest                          Destination string
2753     first_name                        First name
2754     sec_name                          Second name
2755 
2756   RETURN VALUE
2757     >0                                Error
2758     0                                 Success
2759 
2760   DESCRIPTION
2761     Routine used to add two names with '_' in between then. Service routine
2762     to create_handler_file
2763     Include the NULL in the count of characters since it is needed as separator
2764     between the partition names.
2765 */
2766 
name_add(char * dest,const char * first_name,const char * sec_name)2767 static uint name_add(char *dest, const char *first_name, const char *sec_name)
2768 {
2769   return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
2770 }
2771 
2772 
2773 /**
2774   Create the special .par file
2775 
2776   @param name  Full path of table name
2777 
2778   @return Operation status
2779     @retval FALSE  Error code
2780     @retval TRUE   Success
2781 
2782   @note
2783     Method used to create handler file with names of partitions, their
2784     engine types and the number of partitions.
2785 */
2786 
create_handler_file(const char * name)2787 bool ha_partition::create_handler_file(const char *name)
2788 {
2789   partition_element *part_elem, *subpart_elem;
2790   size_t i, j, part_name_len, subpart_name_len;
2791   size_t tot_partition_words, tot_name_len, num_parts;
2792   size_t tot_parts= 0;
2793   size_t tot_len_words, tot_len_byte, chksum, tot_name_words;
2794   char *name_buffer_ptr;
2795   uchar *file_buffer, *engine_array;
2796   bool result= TRUE;
2797   char file_name[FN_REFLEN];
2798   char part_name[FN_REFLEN];
2799   char subpart_name[FN_REFLEN];
2800   File file;
2801   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2802   DBUG_ENTER("create_handler_file");
2803 
2804   num_parts= m_part_info->partitions.elements;
2805   DBUG_PRINT("enter", ("table name: %s  num_parts: %zu", name, num_parts));
2806   tot_name_len= 0;
2807   for (i= 0; i < num_parts; i++)
2808   {
2809     part_elem= part_it++;
2810     if (part_elem->part_state != PART_NORMAL &&
2811         part_elem->part_state != PART_TO_BE_ADDED &&
2812         part_elem->part_state != PART_CHANGED)
2813       continue;
2814     tablename_to_filename(part_elem->partition_name, part_name,
2815                           FN_REFLEN);
2816     part_name_len= strlen(part_name);
2817     if (!m_is_sub_partitioned)
2818     {
2819       tot_name_len+= part_name_len + 1;
2820       tot_parts++;
2821     }
2822     else
2823     {
2824       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2825       for (j= 0; j < m_part_info->num_subparts; j++)
2826       {
2827 	subpart_elem= sub_it++;
2828         tablename_to_filename(subpart_elem->partition_name,
2829                               subpart_name,
2830                               FN_REFLEN);
2831 	subpart_name_len= strlen(subpart_name);
2832 	tot_name_len+= part_name_len + subpart_name_len + 5;
2833         tot_parts++;
2834       }
2835     }
2836   }
2837   /*
2838      File format:
2839      Length in words              4 byte
2840      Checksum                     4 byte
2841      Total number of partitions   4 byte
2842      Array of engine types        n * 4 bytes where
2843      n = (m_tot_parts + 3)/4
2844      Length of name part in bytes 4 bytes
2845      (Names in filename format)
2846      Name part                    m * 4 bytes where
2847      m = ((length_name_part + 3)/4)*4
2848 
2849      All padding bytes are zeroed
2850   */
2851   tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2852   tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2853   /* 4 static words (tot words, checksum, tot partitions, name length) */
2854   tot_len_words= 4 + tot_partition_words + tot_name_words;
2855   tot_len_byte= PAR_WORD_SIZE * tot_len_words;
2856   if (!(file_buffer= (uchar *) my_malloc(key_memory_ha_partition_file,
2857                                          tot_len_byte, MYF(MY_ZEROFILL))))
2858     DBUG_RETURN(TRUE);
2859   engine_array= (file_buffer + PAR_ENGINES_OFFSET);
2860   name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
2861                             + PAR_WORD_SIZE);
2862   part_it.rewind();
2863   for (i= 0; i < num_parts; i++)
2864   {
2865     part_elem= part_it++;
2866     if (part_elem->part_state != PART_NORMAL &&
2867         part_elem->part_state != PART_TO_BE_ADDED &&
2868         part_elem->part_state != PART_CHANGED)
2869       continue;
2870     if (!m_is_sub_partitioned)
2871     {
2872       tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
2873       name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1;
2874       *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
2875       DBUG_PRINT("info", ("engine: %u", *engine_array));
2876       engine_array++;
2877     }
2878     else
2879     {
2880       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2881       for (j= 0; j < m_part_info->num_subparts; j++)
2882       {
2883 	subpart_elem= sub_it++;
2884         tablename_to_filename(part_elem->partition_name, part_name,
2885                               FN_REFLEN);
2886         tablename_to_filename(subpart_elem->partition_name, subpart_name,
2887                               FN_REFLEN);
2888 	name_buffer_ptr+= name_add(name_buffer_ptr,
2889 				   part_name,
2890 				   subpart_name);
2891         *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
2892         DBUG_PRINT("info", ("engine: %u", *engine_array));
2893 	engine_array++;
2894       }
2895     }
2896   }
2897   chksum= 0;
2898   int4store(file_buffer, tot_len_words);
2899   int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
2900   int4store(file_buffer + PAR_ENGINES_OFFSET +
2901             (tot_partition_words * PAR_WORD_SIZE),
2902             tot_name_len);
2903   for (i= 0; i < tot_len_words; i++)
2904     chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
2905   int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
2906   /*
2907     Add .par extension to the file name.
2908     Create and write and close file
2909     to be used at open, delete_table and rename_table
2910   */
2911   fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
2912   if ((file= mysql_file_create(key_file_ha_partition_par,
2913                                file_name, CREATE_MODE, O_RDWR | O_TRUNC,
2914                                MYF(MY_WME))) >= 0)
2915   {
2916     result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
2917                              MYF(MY_WME | MY_NABP)) != 0;
2918 
2919     /* Write connection information (for federatedx engine) */
2920     part_it.rewind();
2921     for (i= 0; i < num_parts && !result; i++)
2922     {
2923       uchar buffer[4];
2924       part_elem= part_it++;
2925       size_t length= part_elem->connect_string.length;
2926       int4store(buffer, length);
2927       if (my_write(file, buffer, 4, MYF(MY_WME | MY_NABP)) ||
2928           my_write(file, (uchar *) part_elem->connect_string.str, length,
2929                    MYF(MY_WME | MY_NABP)))
2930       {
2931         result= TRUE;
2932         break;
2933       }
2934     }
2935     (void) mysql_file_close(file, MYF(0));
2936     if (result)
2937       mysql_file_delete(key_file_ha_partition_par, file_name, MYF(MY_WME));
2938   }
2939   else
2940     result= TRUE;
2941   my_free(file_buffer);
2942   DBUG_RETURN(result);
2943 }
2944 
2945 
2946 /**
2947   Clear handler variables and free some memory
2948 */
2949 
clear_handler_file()2950 void ha_partition::clear_handler_file()
2951 {
2952   if (m_engine_array)
2953     plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
2954   free_root(&m_mem_root, MYF(MY_KEEP_PREALLOC));
2955   m_file_buffer= NULL;
2956   m_engine_array= NULL;
2957   m_connect_string= NULL;
2958 }
2959 
2960 
2961 /**
2962   Create underlying handler objects
2963 
2964   @param mem_root  Allocate memory through this
2965 
2966   @return Operation status
2967     @retval TRUE   Error
2968     @retval FALSE  Success
2969 */
2970 
create_handlers(MEM_ROOT * mem_root)2971 bool ha_partition::create_handlers(MEM_ROOT *mem_root)
2972 {
2973   uint i;
2974   uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2975   handlerton *hton0;
2976   DBUG_ENTER("create_handlers");
2977 
2978   if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2979     DBUG_RETURN(TRUE);
2980   m_file_tot_parts= m_tot_parts;
2981   bzero((char*) m_file, alloc_len);
2982   for (i= 0; i < m_tot_parts; i++)
2983   {
2984     handlerton *hton= plugin_data(m_engine_array[i], handlerton*);
2985     if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
2986       DBUG_RETURN(TRUE);
2987     DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
2988   }
2989   /* For the moment we only support partition over the same table engine */
2990   hton0= plugin_data(m_engine_array[0], handlerton*);
2991   if (hton0 == myisam_hton)
2992   {
2993     DBUG_PRINT("info", ("MyISAM"));
2994     m_myisam= TRUE;
2995   }
2996   /* INNODB may not be compiled in... */
2997   else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
2998   {
2999     DBUG_PRINT("info", ("InnoDB"));
3000     m_innodb= TRUE;
3001   }
3002   DBUG_RETURN(FALSE);
3003 }
3004 
3005 
3006 /*
3007   Create underlying handler objects from partition info
3008 
3009   SYNOPSIS
3010     new_handlers_from_part_info()
3011     mem_root		Allocate memory through this
3012 
3013   RETURN VALUE
3014     TRUE                  Error
3015     FALSE                 Success
3016 */
3017 
new_handlers_from_part_info(MEM_ROOT * mem_root)3018 bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
3019 {
3020   uint i, j, part_count;
3021   partition_element *part_elem;
3022   uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3023   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
3024   DBUG_ENTER("ha_partition::new_handlers_from_part_info");
3025 
3026   if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
3027     goto error;
3028 
3029   m_file_tot_parts= m_tot_parts;
3030   bzero((char*) m_file, alloc_len);
3031   DBUG_ASSERT(m_part_info->num_parts > 0);
3032 
3033   i= 0;
3034   part_count= 0;
3035   /*
3036     Don't know the size of the underlying storage engine, invent a number of
3037     bytes allocated for error message if allocation fails
3038   */
3039   do
3040   {
3041     part_elem= part_it++;
3042     if (m_is_sub_partitioned)
3043     {
3044       for (j= 0; j < m_part_info->num_subparts; j++)
3045       {
3046 	if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
3047                                                     part_elem->engine_type)))
3048           goto error;
3049 	DBUG_PRINT("info", ("engine_type: %u",
3050                    (uint) ha_legacy_type(part_elem->engine_type)));
3051       }
3052     }
3053     else
3054     {
3055       if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
3056                                                   part_elem->engine_type)))
3057         goto error;
3058       DBUG_PRINT("info", ("engine_type: %u",
3059                  (uint) ha_legacy_type(part_elem->engine_type)));
3060     }
3061   } while (++i < m_part_info->num_parts);
3062   if (part_elem->engine_type == myisam_hton)
3063   {
3064     DBUG_PRINT("info", ("MyISAM"));
3065     m_myisam= TRUE;
3066   }
3067   DBUG_RETURN(FALSE);
3068 error:
3069   DBUG_RETURN(TRUE);
3070 }
3071 
3072 
3073 /**
3074   Read the .par file to get the partitions engines and names
3075 
3076   @param name  Name of table file (without extension)
3077 
3078   @return Operation status
3079     @retval true   Failure
3080     @retval false  Success
3081 
3082   @note On success, m_file_buffer is allocated and must be
3083   freed by the caller. m_name_buffer_ptr and m_tot_parts is also set.
3084 */
3085 
read_par_file(const char * name)3086 bool ha_partition::read_par_file(const char *name)
3087 {
3088   char buff[FN_REFLEN];
3089   uchar *tot_name_len_offset;
3090   File file;
3091   uchar *file_buffer;
3092   uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
3093   DBUG_ENTER("ha_partition::read_par_file");
3094   DBUG_PRINT("enter", ("table name: '%s'", name));
3095 
3096   if (m_file_buffer)
3097     DBUG_RETURN(false);
3098   fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
3099 
3100   /* Following could be done with mysql_file_stat to read in whole file */
3101   if ((file= mysql_file_open(key_file_ha_partition_par,
3102                              buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
3103     DBUG_RETURN(TRUE);
3104   if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
3105     goto err1;
3106   len_words= uint4korr(buff);
3107   len_bytes= PAR_WORD_SIZE * len_words;
3108   if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
3109     goto err1;
3110   if (!(file_buffer= (uchar*) alloc_root(&m_mem_root, len_bytes)))
3111     goto err1;
3112   if (mysql_file_read(file, file_buffer, len_bytes, MYF(MY_NABP)))
3113     goto err2;
3114 
3115   chksum= 0;
3116   for (i= 0; i < len_words; i++)
3117     chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
3118   if (chksum)
3119     goto err2;
3120   m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
3121   DBUG_PRINT("info", ("No of parts: %u", m_tot_parts));
3122   tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
3123 
3124   tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
3125                        PAR_WORD_SIZE * tot_partition_words;
3126   tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
3127                   PAR_WORD_SIZE;
3128   /*
3129     Verify the total length = tot size word, checksum word, num parts word +
3130     engines array + name length word + name array.
3131   */
3132   if (len_words != (tot_partition_words + tot_name_words + 4))
3133     goto err2;
3134   m_file_buffer= file_buffer;          // Will be freed in clear_handler_file()
3135   m_name_buffer_ptr= (char*) (tot_name_len_offset + PAR_WORD_SIZE);
3136 
3137   if (!(m_connect_string= (LEX_CSTRING*)
3138         alloc_root(&m_mem_root, m_tot_parts * sizeof(LEX_CSTRING))))
3139     goto err2;
3140   bzero(m_connect_string, m_tot_parts * sizeof(LEX_CSTRING));
3141 
3142   /* Read connection arguments (for federated X engine) */
3143   for (i= 0; i < m_tot_parts; i++)
3144   {
3145     LEX_CSTRING connect_string;
3146     uchar buffer[4];
3147     char *tmp;
3148     if (my_read(file, buffer, 4, MYF(MY_NABP)))
3149     {
3150       /* No extra options; Probably not a federatedx engine */
3151       break;
3152     }
3153     connect_string.length= uint4korr(buffer);
3154     connect_string.str= tmp= (char*) alloc_root(&m_mem_root,
3155                                                 connect_string.length+1);
3156     if (my_read(file, (uchar*) connect_string.str, connect_string.length,
3157                 MYF(MY_NABP)))
3158       break;
3159     tmp[connect_string.length]= 0;
3160     m_connect_string[i]= connect_string;
3161   }
3162 
3163   (void) mysql_file_close(file, MYF(0));
3164   DBUG_RETURN(false);
3165 
3166 err2:
3167 err1:
3168   (void) mysql_file_close(file, MYF(0));
3169   DBUG_RETURN(true);
3170 }
3171 
3172 
3173 /**
3174   Setup m_engine_array
3175 
3176   @param mem_root  MEM_ROOT to use for allocating new handlers
3177 
3178   @return Operation status
3179     @retval false  Success
3180     @retval true   Failure
3181 */
3182 
setup_engine_array(MEM_ROOT * mem_root)3183 bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
3184 {
3185   uint i;
3186   uchar *buff;
3187   handlerton **engine_array, *first_engine;
3188   enum legacy_db_type db_type, first_db_type;
3189 
3190   DBUG_ASSERT(!m_file);
3191   DBUG_ENTER("ha_partition::setup_engine_array");
3192   engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*));
3193   if (!engine_array)
3194     DBUG_RETURN(true);
3195 
3196   buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
3197   first_db_type= (enum legacy_db_type) buff[0];
3198   first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
3199   if (!first_engine)
3200     goto err;
3201 
3202   if (!(m_engine_array= (plugin_ref*)
3203         alloc_root(&m_mem_root, m_tot_parts * sizeof(plugin_ref))))
3204     goto err;
3205 
3206   for (i= 0; i < m_tot_parts; i++)
3207   {
3208     db_type= (enum legacy_db_type) buff[i];
3209     if (db_type != first_db_type)
3210     {
3211       DBUG_PRINT("error", ("partition %u engine %d is not same as "
3212                            "first partition %d", i, db_type,
3213                            (int) first_db_type));
3214       DBUG_ASSERT(0);
3215       clear_handler_file();
3216       goto err;
3217     }
3218     m_engine_array[i]= ha_lock_engine(NULL, first_engine);
3219     if (!m_engine_array[i])
3220     {
3221       clear_handler_file();
3222       goto err;
3223     }
3224   }
3225 
3226   my_afree(engine_array);
3227 
3228   if (create_handlers(mem_root))
3229   {
3230     clear_handler_file();
3231     DBUG_RETURN(true);
3232   }
3233 
3234   DBUG_RETURN(false);
3235 
3236 err:
3237   my_afree(engine_array);
3238   DBUG_RETURN(true);
3239 }
3240 
3241 
3242 /**
3243   Get info about partition engines and their names from the .par file
3244 
3245   @param name      Full path of table name
3246   @param mem_root  Allocate memory through this
3247   @param is_clone  If it is a clone, don't create new handlers
3248 
3249   @return Operation status
3250     @retval true   Error
3251     @retval false  Success
3252 
3253   @note Open handler file to get partition names, engine types and number of
3254   partitions.
3255 */
3256 
get_from_handler_file(const char * name,MEM_ROOT * mem_root,bool is_clone)3257 bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
3258                                          bool is_clone)
3259 {
3260   DBUG_ENTER("ha_partition::get_from_handler_file");
3261   DBUG_PRINT("enter", ("table name: '%s'", name));
3262 
3263   if (m_file_buffer)
3264     DBUG_RETURN(false);
3265 
3266   if (read_par_file(name))
3267     DBUG_RETURN(true);
3268 
3269   if (!is_clone && setup_engine_array(mem_root))
3270     DBUG_RETURN(true);
3271 
3272   DBUG_RETURN(false);
3273 }
3274 
3275 
3276 /****************************************************************************
3277                 MODULE open/close object
3278 ****************************************************************************/
3279 
3280 /**
3281   Get the partition name.
3282 
3283   @param       part   Struct containing name and length
3284   @param[out]  length Length of the name
3285 
3286   @return Partition name
3287 */
3288 
get_part_name(PART_NAME_DEF * part,size_t * length,my_bool not_used)3289 static uchar *get_part_name(PART_NAME_DEF *part, size_t *length,
3290                             my_bool not_used __attribute__((unused)))
3291 {
3292   *length= part->length;
3293   return part->partition_name;
3294 }
3295 
3296 
3297 /**
3298   Insert a partition name in the partition_name_hash.
3299 
3300   @param name        Name of partition
3301   @param part_id     Partition id (number)
3302   @param is_subpart  Set if the name belongs to a subpartition
3303 
3304   @return Operation status
3305     @retval true   Failure
3306     @retval false  Success
3307 */
3308 
insert_partition_name_in_hash(const char * name,uint part_id,bool is_subpart)3309 bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id,
3310                                                  bool is_subpart)
3311 {
3312   PART_NAME_DEF *part_def;
3313   uchar *part_name;
3314   size_t part_name_length;
3315   DBUG_ENTER("ha_partition::insert_partition_name_in_hash");
3316   /*
3317     Calculate and store the length here, to avoid doing it when
3318     searching the hash.
3319   */
3320   part_name_length= strlen(name);
3321   /*
3322     Must use memory that lives as long as table_share.
3323     Freed in the Partition_share destructor.
3324     Since we use my_multi_malloc, then my_free(part_def) will also free
3325     part_name, as a part of my_hash_free.
3326   */
3327   if (!my_multi_malloc(key_memory_Partition_share, MY_WME,
3328                        &part_def, sizeof(PART_NAME_DEF),
3329                        &part_name, part_name_length + 1,
3330                        NULL))
3331     DBUG_RETURN(true);
3332   memcpy(part_name, name, part_name_length + 1);
3333   part_def->partition_name= part_name;
3334   part_def->length= (uint)part_name_length;
3335   part_def->part_id= part_id;
3336   part_def->is_subpart= is_subpart;
3337   if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def))
3338   {
3339     my_free(part_def);
3340     DBUG_RETURN(true);
3341   }
3342   DBUG_RETURN(false);
3343 }
3344 
3345 
3346 /**
3347   Populate the partition_name_hash in part_share.
3348 */
3349 
populate_partition_name_hash()3350 bool ha_partition::populate_partition_name_hash()
3351 {
3352   List_iterator<partition_element> part_it(m_part_info->partitions);
3353   uint num_parts= m_part_info->num_parts;
3354   uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1;
3355   uint tot_names;
3356   uint i= 0;
3357   DBUG_ASSERT(part_share);
3358 
3359   DBUG_ENTER("ha_partition::populate_partition_name_hash");
3360 
3361   /*
3362     partition_name_hash is only set once and never changed
3363     -> OK to check without locking.
3364   */
3365 
3366   if (part_share->partition_name_hash_initialized)
3367     DBUG_RETURN(false);
3368   lock_shared_ha_data();
3369   if (part_share->partition_name_hash_initialized)
3370   {
3371     unlock_shared_ha_data();
3372     DBUG_RETURN(false);
3373   }
3374   tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts;
3375   if (my_hash_init(key_memory_Partition_share,
3376                    &part_share->partition_name_hash, system_charset_info,
3377                    tot_names, 0, 0, (my_hash_get_key) get_part_name, my_free,
3378                    HASH_UNIQUE))
3379   {
3380     unlock_shared_ha_data();
3381     DBUG_RETURN(TRUE);
3382   }
3383 
3384   do
3385   {
3386     partition_element *part_elem= part_it++;
3387     DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
3388     if (part_elem->part_state == PART_NORMAL)
3389     {
3390       if (insert_partition_name_in_hash(part_elem->partition_name,
3391                                         i * num_subparts, false))
3392         goto err;
3393       if (m_is_sub_partitioned)
3394       {
3395         List_iterator<partition_element>
3396                                     subpart_it(part_elem->subpartitions);
3397         partition_element *sub_elem;
3398         uint j= 0;
3399         do
3400         {
3401           sub_elem= subpart_it++;
3402           if (insert_partition_name_in_hash(sub_elem->partition_name,
3403                                             i * num_subparts + j, true))
3404             goto err;
3405 
3406         } while (++j < num_subparts);
3407       }
3408     }
3409   } while (++i < num_parts);
3410 
3411   part_share->partition_name_hash_initialized= true;
3412   unlock_shared_ha_data();
3413 
3414   DBUG_RETURN(FALSE);
3415 err:
3416   my_hash_free(&part_share->partition_name_hash);
3417   unlock_shared_ha_data();
3418 
3419   DBUG_RETURN(TRUE);
3420 }
3421 
3422 
3423 /**
3424   Set Handler_share pointer and allocate Handler_share pointers
3425   for each partition and set those.
3426 
3427   @param ha_share_arg  Where to store/retrieve the Partitioning_share pointer
3428                        to be shared by all instances of the same table.
3429 
3430   @return Operation status
3431     @retval true  Failure
3432     @retval false Success
3433 */
3434 
set_ha_share_ref(Handler_share ** ha_share_arg)3435 bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
3436 {
3437   Handler_share **ha_shares;
3438   uint i;
3439   DBUG_ENTER("ha_partition::set_ha_share_ref");
3440 
3441   DBUG_ASSERT(!part_share);
3442   DBUG_ASSERT(table_share);
3443   DBUG_ASSERT(!m_is_clone_of);
3444   DBUG_ASSERT(m_tot_parts);
3445   if (handler::set_ha_share_ref(ha_share_arg))
3446     DBUG_RETURN(true);
3447   if (!(part_share= get_share()))
3448     DBUG_RETURN(true);
3449   DBUG_ASSERT(part_share->partitions_share_refs.num_parts >= m_tot_parts);
3450   ha_shares= part_share->partitions_share_refs.ha_shares;
3451   for (i= 0; i < m_tot_parts; i++)
3452   {
3453     if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
3454       DBUG_RETURN(true);
3455   }
3456   DBUG_RETURN(false);
3457 }
3458 
3459 
3460 /**
3461   Get the PARTITION_SHARE for the table.
3462 
3463   @return Operation status
3464     @retval true   Error
3465     @retval false  Success
3466 
3467   @note Gets or initializes the Partition_share object used by partitioning.
3468   The Partition_share is used for handling the auto_increment etc.
3469 */
3470 
get_share()3471 Partition_share *ha_partition::get_share()
3472 {
3473   Partition_share *tmp_share;
3474   DBUG_ENTER("ha_partition::get_share");
3475   DBUG_ASSERT(table_share);
3476 
3477   lock_shared_ha_data();
3478   if (!(tmp_share= static_cast<Partition_share*>(get_ha_share_ptr())))
3479   {
3480     tmp_share= new Partition_share;
3481     if (!tmp_share)
3482       goto err;
3483     if (tmp_share->init(m_tot_parts))
3484     {
3485       delete tmp_share;
3486       tmp_share= NULL;
3487       goto err;
3488     }
3489     set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
3490   }
3491 err:
3492   unlock_shared_ha_data();
3493   DBUG_RETURN(tmp_share);
3494 }
3495 
3496 
3497 
3498 /**
3499   Helper function for freeing all internal bitmaps.
3500 */
3501 
free_partition_bitmaps()3502 void ha_partition::free_partition_bitmaps()
3503 {
3504   /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3505   my_bitmap_free(&m_bulk_insert_started);
3506   my_bitmap_free(&m_locked_partitions);
3507   my_bitmap_free(&m_partitions_to_reset);
3508   my_bitmap_free(&m_key_not_found_partitions);
3509   my_bitmap_free(&m_opened_partitions);
3510   my_bitmap_free(&m_mrr_used_partitions);
3511 }
3512 
3513 
3514 /**
3515   Helper function for initializing all internal bitmaps.
3516 
3517   Note:
3518   All bitmaps, including partially allocated, are freed in
3519   free_partion_bitmaps()
3520 */
3521 
init_partition_bitmaps()3522 bool ha_partition::init_partition_bitmaps()
3523 {
3524   DBUG_ENTER("ha_partition::init_partition_bitmaps");
3525 
3526   /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3527   if (my_bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
3528     DBUG_RETURN(true);
3529 
3530   /* Initialize the bitmap we use to keep track of locked partitions */
3531   if (my_bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
3532     DBUG_RETURN(true);
3533 
3534   /*
3535     Initialize the bitmap we use to keep track of partitions which may have
3536     something to reset in ha_reset().
3537   */
3538   if (my_bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
3539     DBUG_RETURN(true);
3540 
3541   /*
3542     Initialize the bitmap we use to keep track of partitions which returned
3543     HA_ERR_KEY_NOT_FOUND from index_read_map.
3544   */
3545   if (my_bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, FALSE))
3546     DBUG_RETURN(true);
3547 
3548   if (bitmap_init(&m_mrr_used_partitions, NULL, m_tot_parts, TRUE))
3549     DBUG_RETURN(true);
3550 
3551   if (my_bitmap_init(&m_opened_partitions, NULL, m_tot_parts, FALSE))
3552     DBUG_RETURN(true);
3553 
3554   m_file_sample= NULL;
3555 
3556   /* Initialize the bitmap for read/lock_partitions */
3557   if (!m_is_clone_of)
3558   {
3559     DBUG_ASSERT(!m_clone_mem_root);
3560     if (m_part_info->set_partition_bitmaps(NULL))
3561       DBUG_RETURN(true);
3562   }
3563   DBUG_RETURN(false);
3564 }
3565 
3566 
3567 /*
3568   Open handler object
3569 SYNOPSIS
3570     open()
3571     name                  Full path of table name
3572     mode                  Open mode flags
3573     test_if_locked        ?
3574 
3575   RETURN VALUE
3576     >0                    Error
3577     0                     Success
3578 
3579   DESCRIPTION
3580     Used for opening tables. The name will be the name of the file.
3581     A table is opened when it needs to be opened. For instance
3582     when a request comes in for a select on the table (tables are not
3583     open and closed for each request, they are cached).
3584 
3585     Called from handler.cc by handler::ha_open(). The server opens all tables
3586     by calling ha_open() which then calls the handler specific open().
3587 */
3588 
open(const char * name,int mode,uint test_if_locked)3589 int ha_partition::open(const char *name, int mode, uint test_if_locked)
3590 {
3591   int error= HA_ERR_INITIALIZATION;
3592   handler **file;
3593   char name_buff[FN_REFLEN + 1];
3594   ulonglong check_table_flags;
3595   DBUG_ENTER("ha_partition::open");
3596 
3597   DBUG_ASSERT(table->s == table_share);
3598   ref_length= 0;
3599   m_mode= mode;
3600   m_open_test_lock= test_if_locked;
3601   m_part_field_array= m_part_info->full_part_field_array;
3602   if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of)))
3603     DBUG_RETURN(error);
3604   if (populate_partition_name_hash())
3605   {
3606     DBUG_RETURN(HA_ERR_INITIALIZATION);
3607   }
3608   m_start_key.length= 0;
3609   m_rec0= table->record[0];
3610   m_rec_length= table_share->reclength;
3611   if (!m_part_ids_sorted_by_num_of_records)
3612   {
3613     if (!(m_part_ids_sorted_by_num_of_records=
3614             (uint32*) my_malloc(key_memory_ha_partition_part_ids,
3615                                 m_tot_parts * sizeof(uint32), MYF(MY_WME))))
3616       DBUG_RETURN(error);
3617     uint32 i;
3618     /* Initialize it with all partition ids. */
3619     for (i= 0; i < m_tot_parts; i++)
3620       m_part_ids_sorted_by_num_of_records[i]= i;
3621   }
3622 
3623   if (init_partition_bitmaps())
3624     goto err_alloc;
3625 
3626   if (!MY_TEST(m_is_clone_of) &&
3627       unlikely((error=
3628                 m_part_info->set_partition_bitmaps(m_partitions_to_open))))
3629     goto err_alloc;
3630 
3631   /* Allocate memory used with MMR */
3632   if (!(m_range_info= (void **)
3633         my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
3634                         &m_range_info, sizeof(range_id_t) * m_tot_parts,
3635                         &m_stock_range_seq, sizeof(uint) * m_tot_parts,
3636                         &m_mrr_buffer, sizeof(HANDLER_BUFFER) * m_tot_parts,
3637                         &m_mrr_buffer_size, sizeof(uint) * m_tot_parts,
3638                         &m_part_mrr_range_length, sizeof(uint) * m_tot_parts,
3639                         &m_part_mrr_range_first,
3640                         sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3641                         &m_part_mrr_range_current,
3642                         sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3643                         &m_partition_part_key_multi_range_hld,
3644                         sizeof(PARTITION_PART_KEY_MULTI_RANGE_HLD) * m_tot_parts,
3645                         NullS)))
3646     goto err_alloc;
3647 
3648   bzero(m_mrr_buffer, m_tot_parts * sizeof(HANDLER_BUFFER));
3649   bzero(m_part_mrr_range_first,
3650         sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts);
3651 
3652   if (m_is_clone_of)
3653   {
3654     uint i, alloc_len;
3655     char *name_buffer_ptr;
3656     DBUG_ASSERT(m_clone_mem_root);
3657     /* Allocate an array of handler pointers for the partitions handlers. */
3658     alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3659     if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
3660     {
3661       error= HA_ERR_INITIALIZATION;
3662       goto err_alloc;
3663     }
3664     memset(m_file, 0, alloc_len);
3665     name_buffer_ptr= m_name_buffer_ptr;
3666     /*
3667       Populate them by cloning the original partitions. This also opens them.
3668       Note that file->ref is allocated too.
3669     */
3670     file= m_is_clone_of->m_file;
3671     for (i= 0; i < m_tot_parts; i++)
3672     {
3673       if (!bitmap_is_set(&m_is_clone_of->m_opened_partitions, i))
3674         continue;
3675 
3676       if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
3677                                                  name, name_buffer_ptr,
3678                                                  NORMAL_PART_NAME, FALSE))))
3679         goto err_handler;
3680       /* ::clone() will also set ha_share from the original. */
3681       if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
3682       {
3683         error= HA_ERR_INITIALIZATION;
3684         file= &m_file[i];
3685         goto err_handler;
3686       }
3687       if (!m_file_sample)
3688         m_file_sample= m_file[i];
3689       name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3690       bitmap_set_bit(&m_opened_partitions, i);
3691     }
3692   }
3693   else
3694   {
3695     check_insert_autoincrement();
3696     if (unlikely((error= open_read_partitions(name_buff, sizeof(name_buff)))))
3697       goto err_handler;
3698     m_num_locks= m_file_sample->lock_count();
3699   }
3700   /*
3701     We want to know the upper bound for locks, to allocate enough memory.
3702     There is no performance lost if we simply return in lock_count() the
3703     maximum number locks needed, only some minor over allocation of memory
3704     in get_lock_data().
3705   */
3706   m_num_locks*= m_tot_parts;
3707 
3708   file= m_file;
3709   ref_length= get_open_file_sample()->ref_length;
3710   check_table_flags= ((get_open_file_sample()->ha_table_flags() &
3711                        ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3712                       (PARTITION_ENABLED_TABLE_FLAGS));
3713   while (*(++file))
3714   {
3715     if (!bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3716       continue;
3717     /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
3718     set_if_bigger(ref_length, ((*file)->ref_length));
3719     /*
3720       Verify that all partitions have the same set of table flags.
3721       Mask all flags that partitioning enables/disables.
3722     */
3723     if (check_table_flags != (((*file)->ha_table_flags() &
3724                                ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3725                               (PARTITION_ENABLED_TABLE_FLAGS)))
3726     {
3727       error= HA_ERR_INITIALIZATION;
3728       /* set file to last handler, so all of them are closed */
3729       file= &m_file[m_tot_parts - 1];
3730       goto err_handler;
3731     }
3732   }
3733   key_used_on_scan= get_open_file_sample()->key_used_on_scan;
3734   implicit_emptied= get_open_file_sample()->implicit_emptied;
3735   /*
3736     Add 2 bytes for partition id in position ref length.
3737     ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
3738   */
3739   ref_length+= PARTITION_BYTES_IN_POS;
3740   m_ref_length= ref_length;
3741 
3742   /*
3743     Release buffer read from .par file. It will not be reused again after
3744     being opened once.
3745   */
3746   clear_handler_file();
3747 
3748   /*
3749     Some handlers update statistics as part of the open call. This will in
3750     some cases corrupt the statistics of the partition handler and thus
3751     to ensure we have correct statistics we call info from open after
3752     calling open on all individual handlers.
3753   */
3754   m_handler_status= handler_opened;
3755   if (m_part_info->part_expr)
3756     m_part_func_monotonicity_info=
3757                             m_part_info->part_expr->get_monotonicity_info();
3758   else if (m_part_info->list_of_part_fields)
3759     m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
3760   info(HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN);
3761   DBUG_RETURN(0);
3762 
3763 err_handler:
3764   DEBUG_SYNC(ha_thd(), "partition_open_error");
3765   DBUG_ASSERT(m_tot_parts > 0);
3766   for (uint i= m_tot_parts - 1; ; --i)
3767   {
3768     if (bitmap_is_set(&m_opened_partitions, i))
3769       m_file[i]->ha_close();
3770     if (!i)
3771       break;
3772   }
3773 err_alloc:
3774   free_partition_bitmaps();
3775   my_free(m_range_info);
3776   m_range_info= 0;
3777 
3778   DBUG_RETURN(error);
3779 }
3780 
3781 
3782 /*
3783   Disabled since it is not possible to prune yet.
3784   without pruning, it need to rebind/unbind every partition in every
3785   statement which uses a table from the table cache. Will also use
3786   as many PSI_tables as there are partitions.
3787 */
3788 
3789 #ifdef HAVE_M_PSI_PER_PARTITION
unbind_psi()3790 void ha_partition::unbind_psi()
3791 {
3792   uint i;
3793 
3794   DBUG_ENTER("ha_partition::unbind_psi");
3795   handler::unbind_psi();
3796   for (i= 0; i < m_tot_parts; i++)
3797   {
3798     DBUG_ASSERT(m_file[i] != NULL);
3799     m_file[i]->unbind_psi();
3800   }
3801   DBUG_VOID_RETURN;
3802 }
3803 
rebind()3804 int ha_partition::rebind()
3805 {
3806   uint i;
3807 
3808   DBUG_ENTER("ha_partition::rebind");
3809   if (int error= handler::rebind())
3810     DBUG_RETURN(error);
3811   for (i= 0; i < m_tot_parts; i++)
3812   {
3813     DBUG_ASSERT(m_file[i] != NULL);
3814     if (int error= m_file[i]->rebind())
3815     {
3816       while (i)
3817         m_file[--i]->unbind_psi();
3818       handler::unbind_psi();
3819       DBUG_RETURN(error);
3820     }
3821   }
3822   DBUG_RETURN(0);
3823 }
3824 #endif /* HAVE_M_PSI_PER_PARTITION */
3825 
3826 
3827 /*
3828   Check if the table definition has changed for the part tables
3829   We use the first partition for the check.
3830 */
3831 
discover_check_version()3832 int ha_partition::discover_check_version()
3833 {
3834   return m_file[0]->discover_check_version();
3835 }
3836 
3837 /**
3838   Clone the open and locked partitioning handler.
3839 
3840   @param  mem_root  MEM_ROOT to use.
3841 
3842   @return Pointer to the successfully created clone or NULL
3843 
3844   @details
3845   This function creates a new ha_partition handler as a clone/copy. The
3846   original (this) must already be opened and locked. The clone will use
3847   the originals m_part_info.
3848   It also allocates memory for ref + ref_dup.
3849   In ha_partition::open() it will clone its original handlers partitions
3850   which will allocate then on the correct MEM_ROOT and also open them.
3851 */
3852 
clone(const char * name,MEM_ROOT * mem_root)3853 handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
3854 {
3855   ha_partition *new_handler;
3856 
3857   DBUG_ENTER("ha_partition::clone");
3858   new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
3859                                            this, mem_root);
3860   if (!new_handler)
3861     DBUG_RETURN(NULL);
3862 
3863   /*
3864     We will not clone each partition's handler here, it will be done in
3865     ha_partition::open() for clones. Also set_ha_share_ref is not needed
3866     here, since 1) ha_share is copied in the constructor used above
3867     2) each partition's cloned handler will set it from its original.
3868   */
3869 
3870   /*
3871     Allocate new_handler->ref here because otherwise ha_open will allocate it
3872     on this->table->mem_root and we will not be able to reclaim that memory
3873     when the clone handler object is destroyed.
3874   */
3875   if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
3876                                               ALIGN_SIZE(m_ref_length)*2)))
3877     goto err;
3878 
3879   if (new_handler->ha_open(table, name,
3880                            table->db_stat,
3881                            HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
3882     goto err;
3883 
3884   DBUG_RETURN((handler*) new_handler);
3885 
3886 err:
3887   delete new_handler;
3888   DBUG_RETURN(NULL);
3889 }
3890 
3891 
3892 /*
3893   Close handler object
3894 
3895   SYNOPSIS
3896     close()
3897 
3898   RETURN VALUE
3899     >0                   Error code
3900     0                    Success
3901 
3902   DESCRIPTION
3903     Called from sql_base.cc, sql_select.cc, and table.cc.
3904     In sql_select.cc it is only used to close up temporary tables or during
3905     the process where a temporary table is converted over to being a
3906     myisam table.
3907     For sql_base.cc look at close_data_tables().
3908 */
3909 
close(void)3910 int ha_partition::close(void)
3911 {
3912   bool first= TRUE;
3913   handler **file;
3914   uint i;
3915   st_partition_ft_info *tmp_ft_info;
3916   DBUG_ENTER("ha_partition::close");
3917   DBUG_ASSERT(table->s == table_share);
3918   DBUG_ASSERT(m_part_info);
3919 
3920   destroy_record_priority_queue();
3921 
3922   for (; ft_first ; ft_first= tmp_ft_info)
3923   {
3924     tmp_ft_info= ft_first->next;
3925     my_free(ft_first);
3926   }
3927 
3928   /* Free active mrr_ranges */
3929   for (i= 0; i < m_tot_parts; i++)
3930   {
3931     if (m_part_mrr_range_first[i])
3932     {
3933       PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_first=
3934         m_part_mrr_range_first[i];
3935       do
3936       {
3937         PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_current;
3938         tmp_mrr_range_current= tmp_mrr_range_first;
3939         tmp_mrr_range_first= tmp_mrr_range_first->next;
3940         my_free(tmp_mrr_range_current);
3941       } while (tmp_mrr_range_first);
3942     }
3943   }
3944   if (m_mrr_range_first)
3945   {
3946     do
3947     {
3948       m_mrr_range_current= m_mrr_range_first;
3949       m_mrr_range_first= m_mrr_range_first->next;
3950       if (m_mrr_range_current->key[0])
3951         my_free(m_mrr_range_current->key[0]);
3952       if (m_mrr_range_current->key[1])
3953         my_free(m_mrr_range_current->key[1]);
3954       my_free(m_mrr_range_current);
3955     } while (m_mrr_range_first);
3956   }
3957   my_free(m_range_info);
3958   m_range_info= NULL;                           // Safety
3959 
3960   if (m_mrr_full_buffer)
3961   {
3962     my_free(m_mrr_full_buffer);
3963     m_mrr_full_buffer= NULL;
3964     m_mrr_full_buffer_size= 0;
3965   }
3966   file= m_file;
3967 
3968 repeat:
3969   do
3970   {
3971     if (!first || bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3972       (*file)->ha_close();
3973   } while (*(++file));
3974 
3975   free_partition_bitmaps();
3976 
3977   if (first && m_added_file && m_added_file[0])
3978   {
3979     file= m_added_file;
3980     first= FALSE;
3981     goto repeat;
3982   }
3983 
3984   m_handler_status= handler_closed;
3985   DBUG_RETURN(0);
3986 }
3987 
3988 /****************************************************************************
3989                 MODULE start/end statement
3990 ****************************************************************************/
3991 /*
3992   A number of methods to define various constants for the handler. In
3993   the case of the partition handler we need to use some max and min
3994   of the underlying handlers in most cases.
3995 */
3996 
3997 /*
3998   Set external locks on table
3999 
4000   SYNOPSIS
4001     external_lock()
4002     thd                    Thread object
4003     lock_type              Type of external lock
4004 
4005   RETURN VALUE
4006     >0                   Error code
4007     0                    Success
4008 
4009   DESCRIPTION
4010     First you should go read the section "locking functions for mysql" in
4011     lock.cc to understand this.
4012     This create a lock on the table. If you are implementing a storage engine
4013     that can handle transactions look at ha_berkeley.cc to see how you will
4014     want to go about doing this. Otherwise you should consider calling
4015     flock() here.
4016     Originally this method was used to set locks on file level to enable
4017     several MySQL Servers to work on the same data. For transactional
4018     engines it has been "abused" to also mean start and end of statements
4019     to enable proper rollback of statements and transactions. When LOCK
4020     TABLES has been issued the start_stmt method takes over the role of
4021     indicating start of statement but in this case there is no end of
4022     statement indicator(?).
4023 
4024     Called from lock.cc by lock_external() and unlock_external(). Also called
4025     from sql_table.cc by copy_data_between_tables().
4026 */
4027 
external_lock(THD * thd,int lock_type)4028 int ha_partition::external_lock(THD *thd, int lock_type)
4029 {
4030   int error;
4031   uint i, first_used_partition;
4032   MY_BITMAP *used_partitions;
4033   DBUG_ENTER("ha_partition::external_lock");
4034 
4035   DBUG_ASSERT(!auto_increment_lock);
4036   DBUG_ASSERT(!auto_increment_safe_stmt_log_lock);
4037 
4038   if (lock_type == F_UNLCK)
4039     used_partitions= &m_locked_partitions;
4040   else
4041     used_partitions= &(m_part_info->lock_partitions);
4042 
4043   first_used_partition= bitmap_get_first_set(used_partitions);
4044 
4045   for (i= first_used_partition;
4046        i < m_tot_parts;
4047        i= bitmap_get_next_set(used_partitions, i))
4048   {
4049     DBUG_PRINT("info", ("external_lock(thd, %d) part %u", lock_type, i));
4050     if (unlikely((error= m_file[i]->ha_external_lock(thd, lock_type))))
4051     {
4052       if (lock_type != F_UNLCK)
4053         goto err_handler;
4054     }
4055     DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
4056     if (lock_type != F_UNLCK)
4057       bitmap_set_bit(&m_locked_partitions, i);
4058   }
4059   if (lock_type == F_UNLCK)
4060   {
4061     bitmap_clear_all(used_partitions);
4062   }
4063   else
4064   {
4065     /* Add touched partitions to be included in reset(). */
4066     bitmap_union(&m_partitions_to_reset, used_partitions);
4067   }
4068 
4069   if (m_added_file && m_added_file[0])
4070   {
4071     handler **file= m_added_file;
4072     DBUG_ASSERT(lock_type == F_UNLCK);
4073     do
4074     {
4075       (void) (*file)->ha_external_lock(thd, lock_type);
4076     } while (*(++file));
4077   }
4078   if (lock_type == F_WRLCK)
4079   {
4080     if (m_part_info->part_expr)
4081       m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
4082     if (m_part_info->part_type == VERSIONING_PARTITION &&
4083       /* TODO: MDEV-20345 exclude more inapproriate commands like INSERT
4084          These commands may be excluded because working history partition is needed
4085          only for versioned DML. */
4086       thd->lex->sql_command != SQLCOM_SELECT &&
4087       thd->lex->sql_command != SQLCOM_INSERT_SELECT &&
4088       (error= m_part_info->vers_set_hist_part(thd)))
4089       goto err_handler;
4090   }
4091   DBUG_RETURN(0);
4092 
4093 err_handler:
4094   uint j;
4095   for (j= first_used_partition;
4096        j < i;
4097        j= bitmap_get_next_set(&m_locked_partitions, j))
4098   {
4099     (void) m_file[j]->ha_external_unlock(thd);
4100   }
4101   bitmap_clear_all(&m_locked_partitions);
4102   DBUG_RETURN(error);
4103 }
4104 
4105 
4106 /*
4107   Get the lock(s) for the table and perform conversion of locks if needed
4108 
4109   SYNOPSIS
4110     store_lock()
4111     thd                   Thread object
4112     to                    Lock object array
4113     lock_type             Table lock type
4114 
4115   RETURN VALUE
4116     >0                   Error code
4117     0                    Success
4118 
4119   DESCRIPTION
4120     The idea with handler::store_lock() is the following:
4121 
4122     The statement decided which locks we should need for the table
4123     for updates/deletes/inserts we get WRITE locks, for SELECT... we get
4124     read locks.
4125 
4126     Before adding the lock into the table lock handler (see thr_lock.c)
4127     mysqld calls store lock with the requested locks.  Store lock can now
4128     modify a write lock to a read lock (or some other lock), ignore the
4129     lock (if we don't want to use MySQL table locks at all) or add locks
4130     for many tables (like we do when we are using a MERGE handler).
4131 
4132     Berkeley DB for partition  changes all WRITE locks to TL_WRITE_ALLOW_WRITE
4133     (which signals that we are doing WRITES, but we are still allowing other
4134     reader's and writer's.
4135 
4136     When releasing locks, store_lock() is also called. In this case one
4137     usually doesn't have to do anything.
4138 
4139     store_lock is called when holding a global mutex to ensure that only
4140     one thread at a time changes the locking information of tables.
4141 
4142     In some exceptional cases MySQL may send a request for a TL_IGNORE;
4143     This means that we are requesting the same lock as last time and this
4144     should also be ignored. (This may happen when someone does a flush
4145     table when we have opened a part of the tables, in which case mysqld
4146     closes and reopens the tables and tries to get the same locks as last
4147     time).  In the future we will probably try to remove this.
4148 
4149     Called from lock.cc by get_lock_data().
4150 */
4151 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)4152 THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
4153 					 THR_LOCK_DATA **to,
4154 					 enum thr_lock_type lock_type)
4155 {
4156   uint i;
4157   DBUG_ENTER("ha_partition::store_lock");
4158   DBUG_ASSERT(thd == current_thd);
4159 
4160   /*
4161     This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
4162     even when thd != table->in_use. In that case don't use partition pruning,
4163     but use all partitions instead to avoid using another threads structures.
4164   */
4165   if (thd != table->in_use)
4166   {
4167     for (i= 0; i < m_tot_parts; i++)
4168       to= m_file[i]->store_lock(thd, to, lock_type);
4169   }
4170   else
4171   {
4172     MY_BITMAP *used_partitions= lock_type == TL_UNLOCK ||
4173                                 lock_type == TL_IGNORE ?
4174                                 &m_locked_partitions :
4175                                 &m_part_info->lock_partitions;
4176 
4177     for (i= bitmap_get_first_set(used_partitions);
4178          i < m_tot_parts;
4179          i= bitmap_get_next_set(used_partitions, i))
4180     {
4181       DBUG_PRINT("info", ("store lock %u iteration", i));
4182       to= m_file[i]->store_lock(thd, to, lock_type);
4183     }
4184   }
4185   DBUG_RETURN(to);
4186 }
4187 
4188 /*
4189   Start a statement when table is locked
4190 
4191   SYNOPSIS
4192     start_stmt()
4193     thd                  Thread object
4194     lock_type            Type of external lock
4195 
4196   RETURN VALUE
4197     >0                   Error code
4198     0                    Success
4199 
4200   DESCRIPTION
4201     This method is called instead of external lock when the table is locked
4202     before the statement is executed.
4203 */
4204 
start_stmt(THD * thd,thr_lock_type lock_type)4205 int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
4206 {
4207   int error= 0;
4208   uint i;
4209   /* Assert that read_partitions is included in lock_partitions */
4210   DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
4211                                &m_part_info->lock_partitions));
4212   /*
4213     m_locked_partitions is set in previous external_lock/LOCK TABLES.
4214     Current statement's lock requests must not include any partitions
4215     not previously locked.
4216   */
4217   DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
4218                                &m_locked_partitions));
4219   DBUG_ENTER("ha_partition::start_stmt");
4220 
4221   for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
4222        i < m_tot_parts;
4223        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
4224   {
4225     if (unlikely((error= m_file[i]->start_stmt(thd, lock_type))))
4226       DBUG_RETURN(error);
4227     /* Add partition to be called in reset(). */
4228     bitmap_set_bit(&m_partitions_to_reset, i);
4229   }
4230   switch (lock_type)
4231   {
4232   case TL_WRITE_ALLOW_WRITE:
4233   case TL_WRITE_CONCURRENT_INSERT:
4234   case TL_WRITE_DELAYED:
4235   case TL_WRITE_DEFAULT:
4236   case TL_WRITE_LOW_PRIORITY:
4237   case TL_WRITE:
4238   case TL_WRITE_ONLY:
4239     if (m_part_info->part_expr)
4240       m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
4241     if (m_part_info->part_type == VERSIONING_PARTITION &&
4242       // TODO: MDEV-20345 (see above)
4243       thd->lex->sql_command != SQLCOM_SELECT &&
4244       thd->lex->sql_command != SQLCOM_INSERT_SELECT)
4245       error= m_part_info->vers_set_hist_part(thd);
4246   default:;
4247   }
4248   DBUG_RETURN(error);
4249 }
4250 
4251 
4252 /**
4253   Get number of lock objects returned in store_lock
4254 
4255   @returns Number of locks returned in call to store_lock
4256 
4257   @desc
4258     Returns the maxinum possible number of store locks needed in call to
4259     store lock.
4260 */
4261 
lock_count() const4262 uint ha_partition::lock_count() const
4263 {
4264   DBUG_ENTER("ha_partition::lock_count");
4265   DBUG_RETURN(m_num_locks);
4266 }
4267 
4268 
4269 /*
4270   Unlock last accessed row
4271 
4272   SYNOPSIS
4273     unlock_row()
4274 
4275   RETURN VALUE
4276     NONE
4277 
4278   DESCRIPTION
4279     Record currently processed was not in the result set of the statement
4280     and is thus unlocked. Used for UPDATE and DELETE queries.
4281 */
4282 
unlock_row()4283 void ha_partition::unlock_row()
4284 {
4285   DBUG_ENTER("ha_partition::unlock_row");
4286   m_file[m_last_part]->unlock_row();
4287   DBUG_VOID_RETURN;
4288 }
4289 
4290 /**
4291   Check if semi consistent read was used
4292 
4293   SYNOPSIS
4294     was_semi_consistent_read()
4295 
4296   RETURN VALUE
4297     TRUE   Previous read was a semi consistent read
4298     FALSE  Previous read was not a semi consistent read
4299 
4300   DESCRIPTION
4301     See handler.h:
4302     In an UPDATE or DELETE, if the row under the cursor was locked by another
4303     transaction, and the engine used an optimistic read of the last
4304     committed row value under the cursor, then the engine returns 1 from this
4305     function. MySQL must NOT try to update this optimistic value. If the
4306     optimistic value does not match the WHERE condition, MySQL can decide to
4307     skip over this row. Currently only works for InnoDB. This can be used to
4308     avoid unnecessary lock waits.
4309 
4310     If this method returns nonzero, it will also signal the storage
4311     engine that the next read will be a locking re-read of the row.
4312 */
was_semi_consistent_read()4313 bool ha_partition::was_semi_consistent_read()
4314 {
4315   DBUG_ENTER("ha_partition::was_semi_consistent_read");
4316   DBUG_ASSERT(m_last_part < m_tot_parts);
4317   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4318   DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
4319 }
4320 
4321 /**
4322   Use semi consistent read if possible
4323 
4324   SYNOPSIS
4325     try_semi_consistent_read()
4326     yes   Turn on semi consistent read
4327 
4328   RETURN VALUE
4329     NONE
4330 
4331   DESCRIPTION
4332     See handler.h:
4333     Tell the engine whether it should avoid unnecessary lock waits.
4334     If yes, in an UPDATE or DELETE, if the row under the cursor was locked
4335     by another transaction, the engine may try an optimistic read of
4336     the last committed row value under the cursor.
4337     Note: prune_partitions are already called before this call, so using
4338     pruning is OK.
4339 */
try_semi_consistent_read(bool yes)4340 void ha_partition::try_semi_consistent_read(bool yes)
4341 {
4342   uint i;
4343   DBUG_ENTER("ha_partition::try_semi_consistent_read");
4344 
4345   i= bitmap_get_first_set(&(m_part_info->read_partitions));
4346   DBUG_ASSERT(i != MY_BIT_NONE);
4347   for (;
4348        i < m_tot_parts;
4349        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4350   {
4351     m_file[i]->try_semi_consistent_read(yes);
4352   }
4353   DBUG_VOID_RETURN;
4354 }
4355 
4356 
4357 /****************************************************************************
4358                 MODULE change record
4359 ****************************************************************************/
4360 
4361 /*
4362   Insert a row to the table
4363 
4364   SYNOPSIS
4365     write_row()
4366     buf                        The row in MySQL Row Format
4367 
4368   RETURN VALUE
4369     >0                         Error code
4370     0                          Success
4371 
4372   DESCRIPTION
4373     write_row() inserts a row. buf() is a byte array of data, normally
4374     record[0].
4375 
4376     You can use the field information to extract the data from the native byte
4377     array type.
4378 
4379     Example of this would be:
4380     for (Field **field=table->field ; *field ; field++)
4381     {
4382       ...
4383     }
4384 
4385     See ha_tina.cc for a variant of extracting all of the data as strings.
4386     ha_berkeley.cc has a variant of how to store it intact by "packing" it
4387     for ha_berkeley's own native storage type.
4388 
4389     Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
4390     sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
4391 
4392     ADDITIONAL INFO:
4393 
4394     We have to set auto_increment fields, because those may be used in
4395     determining which partition the row should be written to.
4396 */
4397 
write_row(const uchar * buf)4398 int ha_partition::write_row(const uchar * buf)
4399 {
4400   uint32 part_id;
4401   int error;
4402   longlong func_value;
4403   bool have_auto_increment= table->next_number_field && buf == table->record[0];
4404   MY_BITMAP *old_map;
4405   THD *thd= ha_thd();
4406   Sql_mode_save sms(thd);
4407   bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null;
4408   DBUG_ENTER("ha_partition::write_row");
4409   DBUG_PRINT("enter", ("partition this: %p", this));
4410 
4411   /*
4412     If we have an auto_increment column and we are writing a changed row
4413     or a new row, then update the auto_increment value in the record.
4414   */
4415   if (have_auto_increment)
4416   {
4417     if (!table_share->next_number_keypart)
4418       update_next_auto_inc_val();
4419     error= update_auto_increment();
4420 
4421     /*
4422       If we have failed to set the auto-increment value for this row,
4423       it is highly likely that we will not be able to insert it into
4424       the correct partition. We must check and fail if necessary.
4425     */
4426     if (unlikely(error))
4427       goto exit;
4428 
4429     /*
4430       Don't allow generation of auto_increment value the partitions handler.
4431       If a partitions handler would change the value, then it might not
4432       match the partition any longer.
4433       This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
4434       So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
4435       The partitions handler::next_insert_id must always be 0. Otherwise
4436       we need to forward release_auto_increment, or reset it for all
4437       partitions.
4438     */
4439     if (table->next_number_field->val_int() == 0)
4440     {
4441       table->auto_increment_field_not_null= TRUE;
4442       thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
4443     }
4444   }
4445   old_map= dbug_tmp_use_all_columns(table, &table->read_set);
4446   error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4447   dbug_tmp_restore_column_map(&table->read_set, old_map);
4448   if (unlikely(error))
4449   {
4450     m_part_info->err_value= func_value;
4451     goto exit;
4452   }
4453   if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4454   {
4455     DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
4456                         part_id, (long) func_value));
4457     error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4458     goto exit;
4459   }
4460   m_last_part= part_id;
4461   DBUG_PRINT("info", ("Insert in partition %u", part_id));
4462 
4463   start_part_bulk_insert(thd, part_id);
4464 
4465   DBUG_ASSERT(!m_file[part_id]->row_logging);
4466   error= m_file[part_id]->ha_write_row(buf);
4467   if (!error && have_auto_increment && !table->s->next_number_keypart)
4468     set_auto_increment_if_higher(table->next_number_field);
4469 
4470 exit:
4471   table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
4472   DBUG_RETURN(error);
4473 }
4474 
4475 
4476 /*
4477   Update an existing row
4478 
4479   SYNOPSIS
4480     update_row()
4481     old_data                 Old record in MySQL Row Format
4482     new_data                 New record in MySQL Row Format
4483 
4484   RETURN VALUE
4485     >0                         Error code
4486     0                          Success
4487 
4488   DESCRIPTION
4489     Yes, update_row() does what you expect, it updates a row. old_data will
4490     have the previous row record in it, while new_data will have the newest
4491     data in it.
4492     Keep in mind that the server can do updates based on ordering if an
4493     ORDER BY clause was used. Consecutive ordering is not guaranteed.
4494 
4495     Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
4496     new_data is always record[0]
4497     old_data is always record[1]
4498 */
4499 
update_row(const uchar * old_data,const uchar * new_data)4500 int ha_partition::update_row(const uchar *old_data, const uchar *new_data)
4501 {
4502   THD *thd= ha_thd();
4503   uint32 new_part_id, old_part_id= m_last_part;
4504   int error= 0;
4505   DBUG_ENTER("ha_partition::update_row");
4506   m_err_rec= NULL;
4507 
4508   // Need to read partition-related columns, to locate the row's partition:
4509   DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4510                                table->read_set));
4511 #ifndef DBUG_OFF
4512   /*
4513     The protocol for updating a row is:
4514     1) position the handler (cursor) on the row to be updated,
4515        either through the last read row (rnd or index) or by rnd_pos.
4516     2) call update_row with both old and new full records as arguments.
4517 
4518     This means that m_last_part should already be set to actual partition
4519     where the row was read from. And if that is not the same as the
4520     calculated part_id we found a misplaced row, we return an error to
4521     notify the user that something is broken in the row distribution
4522     between partitions! Since we don't check all rows on read, we return an
4523     error instead of correcting m_last_part, to make the user aware of the
4524     problem!
4525 
4526     Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4527     so this is not supported for this engine.
4528   */
4529   error= get_part_for_buf(old_data, m_rec0, m_part_info, &old_part_id);
4530   DBUG_ASSERT(!error);
4531   DBUG_ASSERT(old_part_id == m_last_part);
4532   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
4533 #endif
4534 
4535   if (unlikely((error= get_part_for_buf(new_data, m_rec0, m_part_info,
4536                                         &new_part_id))))
4537     goto exit;
4538   if (unlikely(!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id)))
4539   {
4540     error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4541     goto exit;
4542   }
4543 
4544 
4545   m_last_part= new_part_id;
4546   start_part_bulk_insert(thd, new_part_id);
4547   DBUG_ASSERT(!m_file[new_part_id]->row_logging);
4548   if (new_part_id == old_part_id)
4549   {
4550     DBUG_PRINT("info", ("Update in partition %u", (uint) new_part_id));
4551     error= m_file[new_part_id]->ha_update_row(old_data, new_data);
4552     goto exit;
4553   }
4554   else
4555   {
4556     Field *saved_next_number_field= table->next_number_field;
4557     /*
4558       Don't allow generation of auto_increment value for update.
4559       table->next_number_field is never set on UPDATE.
4560       But is set for INSERT ... ON DUPLICATE KEY UPDATE,
4561       and since update_row() does not generate or update an auto_inc value,
4562       we cannot have next_number_field set when moving a row
4563       to another partition with write_row(), since that could
4564       generate/update the auto_inc value.
4565       This gives the same behavior for partitioned vs non partitioned tables.
4566     */
4567     table->next_number_field= NULL;
4568     DBUG_PRINT("info", ("Update from partition %u to partition %u",
4569 			(uint) old_part_id, (uint) new_part_id));
4570     error= m_file[new_part_id]->ha_write_row((uchar*) new_data);
4571     table->next_number_field= saved_next_number_field;
4572     if (unlikely(error))
4573       goto exit;
4574 
4575     error= m_file[old_part_id]->ha_delete_row(old_data);
4576     if (unlikely(error))
4577       goto exit;
4578   }
4579 
4580 exit:
4581   /*
4582     if updating an auto_increment column, update
4583     part_share->next_auto_inc_val if needed.
4584     (not to be used if auto_increment on secondary field in a multi-column
4585     index)
4586     mysql_update does not set table->next_number_field, so we use
4587     table->found_next_number_field instead.
4588     Also checking that the field is marked in the write set.
4589   */
4590   if (table->found_next_number_field &&
4591       new_data == table->record[0] &&
4592       !table->s->next_number_keypart &&
4593       bitmap_is_set(table->write_set,
4594                     table->found_next_number_field->field_index))
4595   {
4596     update_next_auto_inc_val();
4597     if (part_share->auto_inc_initialized)
4598       set_auto_increment_if_higher(table->found_next_number_field);
4599   }
4600   DBUG_RETURN(error);
4601 }
4602 
4603 
4604 /*
4605   Remove an existing row
4606 
4607   SYNOPSIS
4608     delete_row
4609     buf                      Deleted row in MySQL Row Format
4610 
4611   RETURN VALUE
4612     >0                       Error Code
4613     0                        Success
4614 
4615   DESCRIPTION
4616     This will delete a row. buf will contain a copy of the row to be deleted.
4617     The server will call this right after the current row has been read
4618     (from either a previous rnd_xxx() or index_xxx() call).
4619     If you keep a pointer to the last row or can access a primary key it will
4620     make doing the deletion quite a bit easier.
4621     Keep in mind that the server does no guarantee consecutive deletions.
4622     ORDER BY clauses can be used.
4623 
4624     Called in sql_acl.cc and sql_udf.cc to manage internal table information.
4625     Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
4626     it is used for removing duplicates while in insert it is used for REPLACE
4627     calls.
4628 
4629     buf is either record[0] or record[1]
4630 */
4631 
delete_row(const uchar * buf)4632 int ha_partition::delete_row(const uchar *buf)
4633 {
4634   int error;
4635   DBUG_ENTER("ha_partition::delete_row");
4636   m_err_rec= NULL;
4637 
4638   DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4639                                table->read_set));
4640 #ifndef DBUG_OFF
4641   THD* thd = ha_thd();
4642   /*
4643     The protocol for deleting a row is:
4644     1) position the handler (cursor) on the row to be deleted,
4645        either through the last read row (rnd or index) or by rnd_pos.
4646     2) call delete_row with the full record as argument.
4647 
4648     This means that m_last_part should already be set to actual partition
4649     where the row was read from. And if that is not the same as the
4650     calculated part_id we found a misplaced row, we return an error to
4651     notify the user that something is broken in the row distribution
4652     between partitions! Since we don't check all rows on read, we return an
4653     error instead of forwarding the delete to the correct (m_last_part)
4654     partition!
4655 
4656     Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4657     so this is not supported for this engine.
4658 
4659     For partitions by system_time, get_part_for_buf() is always either current
4660     or last historical partition, but DELETE HISTORY can delete from any
4661     historical partition. So, skip the check in this case.
4662   */
4663   if (!thd->lex->vers_conditions.delete_history)
4664   {
4665     uint32 part_id;
4666     error= get_part_for_buf(buf, m_rec0, m_part_info, &part_id);
4667     DBUG_ASSERT(!error);
4668     DBUG_ASSERT(part_id == m_last_part);
4669   }
4670   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4671   DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), m_last_part));
4672 #endif
4673 
4674   if (!bitmap_is_set(&(m_part_info->lock_partitions), m_last_part))
4675     DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
4676 
4677   DBUG_ASSERT(!m_file[m_last_part]->row_logging);
4678   error= m_file[m_last_part]->ha_delete_row(buf);
4679   DBUG_RETURN(error);
4680 }
4681 
4682 
4683 /*
4684   Delete all rows in a table
4685 
4686   SYNOPSIS
4687     delete_all_rows()
4688 
4689   RETURN VALUE
4690     >0                       Error Code
4691     0                        Success
4692 
4693   DESCRIPTION
4694     Used to delete all rows in a table. Both for cases of truncate and
4695     for cases where the optimizer realizes that all rows will be
4696     removed as a result of a SQL statement.
4697 
4698     Called from item_sum.cc by Item_func_group_concat::clear(),
4699     Item_sum_count::clear(), and Item_func_group_concat::clear().
4700     Called from sql_delete.cc by mysql_delete().
4701     Called from sql_select.cc by JOIN::reset().
4702     Called from sql_union.cc by st_select_lex_unit::exec().
4703 */
4704 
delete_all_rows()4705 int ha_partition::delete_all_rows()
4706 {
4707   int error;
4708   uint i;
4709   DBUG_ENTER("ha_partition::delete_all_rows");
4710 
4711   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4712        i < m_tot_parts;
4713        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4714   {
4715     /* Can be pruned, like DELETE FROM t PARTITION (pX) */
4716     if (unlikely((error= m_file[i]->ha_delete_all_rows())))
4717       DBUG_RETURN(error);
4718   }
4719   DBUG_RETURN(0);
4720 }
4721 
4722 
4723 /**
4724   Manually truncate the table.
4725 
4726   @retval  0    Success.
4727   @retval  > 0  Error code.
4728 */
4729 
truncate()4730 int ha_partition::truncate()
4731 {
4732   int error;
4733   handler **file;
4734   DBUG_ENTER("ha_partition::truncate");
4735 
4736   /*
4737     TRUNCATE also means resetting auto_increment. Hence, reset
4738     it so that it will be initialized again at the next use.
4739   */
4740   lock_auto_increment();
4741   part_share->next_auto_inc_val= 0;
4742   part_share->auto_inc_initialized= false;
4743   unlock_auto_increment();
4744 
4745   file= m_file;
4746   do
4747   {
4748     if (unlikely((error= (*file)->ha_truncate())))
4749       DBUG_RETURN(error);
4750   } while (*(++file));
4751   DBUG_RETURN(0);
4752 }
4753 
4754 
4755 /**
4756   Truncate a set of specific partitions.
4757 
4758   @remark Auto increment value will be truncated in that partition as well!
4759 
4760   ALTER TABLE t TRUNCATE PARTITION ...
4761 */
4762 
truncate_partition(Alter_info * alter_info,bool * binlog_stmt)4763 int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt)
4764 {
4765   int error= 0;
4766   List_iterator<partition_element> part_it(m_part_info->partitions);
4767   uint num_parts= m_part_info->num_parts;
4768   uint num_subparts= m_part_info->num_subparts;
4769   uint i= 0;
4770   DBUG_ENTER("ha_partition::truncate_partition");
4771 
4772   /* Only binlog when it starts any call to the partitions handlers */
4773   *binlog_stmt= false;
4774 
4775   if (set_part_state(alter_info, m_part_info, PART_ADMIN))
4776     DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
4777 
4778   /*
4779     TRUNCATE also means resetting auto_increment. Hence, reset
4780     it so that it will be initialized again at the next use.
4781   */
4782   lock_auto_increment();
4783   part_share->next_auto_inc_val= 0;
4784   part_share->auto_inc_initialized= FALSE;
4785   unlock_auto_increment();
4786 
4787   *binlog_stmt= true;
4788 
4789   do
4790   {
4791     partition_element *part_elem= part_it++;
4792     if (part_elem->part_state == PART_ADMIN)
4793     {
4794       if (m_is_sub_partitioned)
4795       {
4796         List_iterator<partition_element>
4797                                     subpart_it(part_elem->subpartitions);
4798         partition_element *sub_elem;
4799         uint j= 0, part;
4800         do
4801         {
4802           sub_elem= subpart_it++;
4803           part= i * num_subparts + j;
4804           DBUG_PRINT("info", ("truncate subpartition %u (%s)",
4805                               part, sub_elem->partition_name));
4806           if (unlikely((error= m_file[part]->ha_truncate())))
4807             break;
4808           sub_elem->part_state= PART_NORMAL;
4809         } while (++j < num_subparts);
4810       }
4811       else
4812       {
4813         DBUG_PRINT("info", ("truncate partition %u (%s)", i,
4814                             part_elem->partition_name));
4815         error= m_file[i]->ha_truncate();
4816       }
4817       part_elem->part_state= PART_NORMAL;
4818     }
4819   } while (!error && (++i < num_parts));
4820   DBUG_RETURN(error);
4821 }
4822 
4823 
4824 /*
4825   Start a large batch of insert rows
4826 
4827   SYNOPSIS
4828     start_bulk_insert()
4829     rows                  Number of rows to insert
4830     flags                 Flags to control index creation
4831 
4832   RETURN VALUE
4833     NONE
4834 
4835   DESCRIPTION
4836     rows == 0 means we will probably insert many rows
4837 */
start_bulk_insert(ha_rows rows,uint flags)4838 void ha_partition::start_bulk_insert(ha_rows rows, uint flags)
4839 {
4840   DBUG_ENTER("ha_partition::start_bulk_insert");
4841 
4842   m_bulk_inserted_rows= 0;
4843   bitmap_clear_all(&m_bulk_insert_started);
4844   /* use the last bit for marking if bulk_insert_started was called */
4845   bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
4846   DBUG_VOID_RETURN;
4847 }
4848 
4849 
4850 /*
4851   Check if start_bulk_insert has been called for this partition,
4852   if not, call it and mark it called
4853 */
start_part_bulk_insert(THD * thd,uint part_id)4854 void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
4855 {
4856   long old_buffer_size;
4857   if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
4858       bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4859   {
4860     DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4861     old_buffer_size= thd->variables.read_buff_size;
4862     /* Update read_buffer_size for this partition */
4863     thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
4864     m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
4865     bitmap_set_bit(&m_bulk_insert_started, part_id);
4866     thd->variables.read_buff_size= old_buffer_size;
4867   }
4868   m_bulk_inserted_rows++;
4869 }
4870 
4871 /*
4872   Estimate the read buffer size for each partition.
4873   SYNOPSIS
4874     ha_partition::estimate_read_buffer_size()
4875     original_size  read buffer size originally set for the server
4876   RETURN VALUE
4877     estimated buffer size.
4878   DESCRIPTION
4879     If the estimated number of rows to insert is less than 10 (but not 0)
4880     the new buffer size is same as original buffer size.
4881     In case of first partition of when partition function is monotonic
4882     new buffer size is same as the original buffer size.
4883     For rest of the partition total buffer of 10*original_size is divided
4884     equally if number of partition is more than 10 other wise each partition
4885     will be allowed to use original buffer size.
4886 */
estimate_read_buffer_size(long original_size)4887 long ha_partition::estimate_read_buffer_size(long original_size)
4888 {
4889   /*
4890     If number of rows to insert is less than 10, but not 0,
4891     return original buffer size.
4892   */
4893   if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
4894     return (original_size);
4895   /*
4896     If first insert/partition and monotonic partition function,
4897     allow using buffer size originally set.
4898    */
4899   if (!m_bulk_inserted_rows &&
4900       m_part_func_monotonicity_info != NON_MONOTONIC &&
4901       m_tot_parts > 1)
4902     return original_size;
4903   /*
4904     Allow total buffer used in all partition to go up to 10*read_buffer_size.
4905     11*read_buffer_size in case of monotonic partition function.
4906   */
4907 
4908   if (m_tot_parts < 10)
4909       return original_size;
4910   return (original_size * 10 / m_tot_parts);
4911 }
4912 
4913 /*
4914   Try to predict the number of inserts into this partition.
4915 
4916   If less than 10 rows (including 0 which means Unknown)
4917     just give that as a guess
4918   If monotonic partitioning function was used
4919     guess that 50 % of the inserts goes to the first partition
4920   For all other cases, guess on equal distribution between the partitions
4921 */
guess_bulk_insert_rows()4922 ha_rows ha_partition::guess_bulk_insert_rows()
4923 {
4924   DBUG_ENTER("guess_bulk_insert_rows");
4925 
4926   if (estimation_rows_to_insert < 10)
4927     DBUG_RETURN(estimation_rows_to_insert);
4928 
4929   /* If first insert/partition and monotonic partition function, guess 50%.  */
4930   if (!m_bulk_inserted_rows &&
4931       m_part_func_monotonicity_info != NON_MONOTONIC &&
4932       m_tot_parts > 1)
4933     DBUG_RETURN(estimation_rows_to_insert / 2);
4934 
4935   /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
4936   if (m_bulk_inserted_rows < estimation_rows_to_insert)
4937     DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
4938                 / m_tot_parts) + 1);
4939   /* The estimation was wrong, must say 'Unknown' */
4940   DBUG_RETURN(0);
4941 }
4942 
4943 
sum_copy_info(handler * file)4944 void ha_partition::sum_copy_info(handler *file)
4945 {
4946   copy_info.records+= file->copy_info.records;
4947   copy_info.touched+= file->copy_info.touched;
4948   copy_info.copied+=  file->copy_info.copied;
4949   copy_info.deleted+= file->copy_info.deleted;
4950   copy_info.updated+= file->copy_info.updated;
4951 }
4952 
4953 
sum_copy_infos()4954 void ha_partition::sum_copy_infos()
4955 {
4956   handler **file_array;
4957   bzero(&copy_info, sizeof(copy_info));
4958   file_array= m_file;
4959   do
4960   {
4961     if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
4962       sum_copy_info(*file_array);
4963   } while (*(++file_array));
4964 }
4965 
reset_copy_info()4966 void ha_partition::reset_copy_info()
4967 {
4968   handler **file_array;
4969   bzero(&copy_info, sizeof(copy_info));
4970   file_array= m_file;
4971   do
4972   {
4973     if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
4974       bzero(&(*file_array)->copy_info, sizeof(copy_info));
4975   } while (*(++file_array));
4976 }
4977 
4978 
4979 
4980 /*
4981   Finish a large batch of insert rows
4982 
4983   SYNOPSIS
4984     end_bulk_insert()
4985 
4986   RETURN VALUE
4987     >0                      Error code
4988     0                       Success
4989 
4990   Note: end_bulk_insert can be called without start_bulk_insert
4991         being called, see bug#44108.
4992 
4993 */
4994 
end_bulk_insert()4995 int ha_partition::end_bulk_insert()
4996 {
4997   int error= 0;
4998   uint i;
4999   DBUG_ENTER("ha_partition::end_bulk_insert");
5000 
5001   if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
5002     DBUG_RETURN(error);
5003 
5004   for (i= bitmap_get_first_set(&m_bulk_insert_started);
5005        i < m_tot_parts;
5006        i= bitmap_get_next_set(&m_bulk_insert_started, i))
5007   {
5008     int tmp;
5009     if ((tmp= m_file[i]->ha_end_bulk_insert()))
5010       error= tmp;
5011     sum_copy_info(m_file[i]);
5012   }
5013   bitmap_clear_all(&m_bulk_insert_started);
5014   DBUG_RETURN(error);
5015 }
5016 
5017 
5018 /****************************************************************************
5019                 MODULE full table scan
5020 ****************************************************************************/
5021 /*
5022   Initialize engine for random reads
5023 
5024   SYNOPSIS
5025     ha_partition::rnd_init()
5026     scan	0  Initialize for random reads through rnd_pos()
5027 		1  Initialize for random scan through rnd_next()
5028 
5029   RETURN VALUE
5030     >0          Error code
5031     0           Success
5032 
5033   DESCRIPTION
5034     rnd_init() is called when the server wants the storage engine to do a
5035     table scan or when the server wants to access data through rnd_pos.
5036 
5037     When scan is used we will scan one handler partition at a time.
5038     When preparing for rnd_pos we will init all handler partitions.
5039     No extra cache handling is needed when scanning is not performed.
5040 
5041     Before initialising we will call rnd_end to ensure that we clean up from
5042     any previous incarnation of a table scan.
5043     Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
5044     sql_table.cc, and sql_update.cc.
5045 */
5046 
rnd_init(bool scan)5047 int ha_partition::rnd_init(bool scan)
5048 {
5049   int error;
5050   uint i= 0;
5051   uint32 part_id;
5052   DBUG_ENTER("ha_partition::rnd_init");
5053 
5054   /*
5055     For operations that may need to change data, we may need to extend
5056     read_set.
5057   */
5058   if (get_lock_type() == F_WRLCK)
5059   {
5060     /*
5061       If write_set contains any of the fields used in partition and
5062       subpartition expression, we need to set all bits in read_set because
5063       the row may need to be inserted in a different [sub]partition. In
5064       other words update_row() can be converted into write_row(), which
5065       requires a complete record.
5066     */
5067     if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
5068                               table->write_set))
5069     {
5070       DBUG_PRINT("info", ("partition set full bitmap"));
5071       bitmap_set_all(table->read_set);
5072     }
5073     else
5074     {
5075       /*
5076         Some handlers only read fields as specified by the bitmap for the
5077         read set. For partitioned handlers we always require that the
5078         fields of the partition functions are read such that we can
5079         calculate the partition id to place updated and deleted records.
5080       */
5081       DBUG_PRINT("info", ("partition set part_field bitmap"));
5082       bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5083     }
5084   }
5085 
5086   /* Now we see what the index of our first important partition is */
5087   DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
5088                       m_part_info->read_partitions.bitmap));
5089   part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
5090   DBUG_PRINT("info", ("m_part_spec.start_part: %u", (uint) part_id));
5091 
5092   if (part_id == MY_BIT_NONE)
5093   {
5094     error= 0;
5095     goto err1;
5096   }
5097 
5098   /*
5099     We have a partition and we are scanning with rnd_next
5100     so we bump our cache
5101   */
5102   DBUG_PRINT("info", ("rnd_init on partition: %u", (uint) part_id));
5103   if (scan)
5104   {
5105     /*
5106       rnd_end() is needed for partitioning to reset internal data if scan
5107       is already in use
5108     */
5109     rnd_end();
5110     late_extra_cache(part_id);
5111 
5112     m_index_scan_type= partition_no_index_scan;
5113   }
5114 
5115   for (i= part_id;
5116        i < m_tot_parts;
5117        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5118   {
5119     if (unlikely((error= m_file[i]->ha_rnd_init(scan))))
5120       goto err;
5121   }
5122 
5123   m_scan_value= scan;
5124   m_part_spec.start_part= part_id;
5125   m_part_spec.end_part= m_tot_parts - 1;
5126   m_rnd_init_and_first= TRUE;
5127   DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
5128   DBUG_RETURN(0);
5129 
5130 err:
5131   if (scan)
5132     late_extra_no_cache(part_id);
5133 
5134   /* Call rnd_end for all previously inited partitions. */
5135   for (;
5136        part_id < i;
5137        part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id))
5138   {
5139     m_file[part_id]->ha_rnd_end();
5140   }
5141 err1:
5142   m_scan_value= 2;
5143   m_part_spec.start_part= NO_CURRENT_PART_ID;
5144   DBUG_RETURN(error);
5145 }
5146 
5147 
5148 /*
5149   End of a table scan
5150 
5151   SYNOPSIS
5152     rnd_end()
5153 
5154   RETURN VALUE
5155     >0          Error code
5156     0           Success
5157 */
5158 
rnd_end()5159 int ha_partition::rnd_end()
5160 {
5161   DBUG_ENTER("ha_partition::rnd_end");
5162   switch (m_scan_value) {
5163   case 2:                                       // Error
5164     break;
5165   case 1:                                       // Table scan
5166     if (m_part_spec.start_part != NO_CURRENT_PART_ID)
5167       late_extra_no_cache(m_part_spec.start_part);
5168     /* fall through */
5169   case 0:
5170     uint i;
5171     for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5172          i < m_tot_parts;
5173          i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5174     {
5175       m_file[i]->ha_rnd_end();
5176     }
5177     break;
5178   }
5179   m_scan_value= 2;
5180   m_part_spec.start_part= NO_CURRENT_PART_ID;
5181   DBUG_RETURN(0);
5182 }
5183 
5184 
5185 /*
5186   read next row during full table scan (scan in random row order)
5187 
5188   SYNOPSIS
5189     rnd_next()
5190     buf		buffer that should be filled with data
5191 
5192   RETURN VALUE
5193     >0          Error code
5194     0           Success
5195 
5196   DESCRIPTION
5197     This is called for each row of the table scan. When you run out of records
5198     you should return HA_ERR_END_OF_FILE.
5199     The Field structure for the table is the key to getting data into buf
5200     in a manner that will allow the server to understand it.
5201 
5202     Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
5203     sql_table.cc, and sql_update.cc.
5204 */
5205 
rnd_next(uchar * buf)5206 int ha_partition::rnd_next(uchar *buf)
5207 {
5208   handler *file;
5209   int result= HA_ERR_END_OF_FILE, error;
5210   uint part_id= m_part_spec.start_part;
5211   DBUG_ENTER("ha_partition::rnd_next");
5212   DBUG_PRINT("enter", ("partition this: %p", this));
5213 
5214   /* upper level will increment this once again at end of call */
5215   decrement_statistics(&SSV::ha_read_rnd_next_count);
5216 
5217   if (part_id == NO_CURRENT_PART_ID)
5218   {
5219     /*
5220       The original set of partitions to scan was empty and thus we report
5221       the result here.
5222     */
5223     goto end;
5224   }
5225 
5226   DBUG_ASSERT(m_scan_value == 1);
5227 
5228   if (m_rnd_init_and_first)
5229   {
5230     m_rnd_init_and_first= FALSE;
5231     error= handle_pre_scan(FALSE, check_parallel_search());
5232     if (m_pre_calling || error)
5233       DBUG_RETURN(error);
5234   }
5235 
5236   file= m_file[part_id];
5237 
5238   while (TRUE)
5239   {
5240     result= file->ha_rnd_next(buf);
5241     if (!result)
5242     {
5243       m_last_part= part_id;
5244       DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5245       m_part_spec.start_part= part_id;
5246       table->status= 0;
5247       DBUG_RETURN(0);
5248     }
5249 
5250     /*
5251       if we get here, then the current partition ha_rnd_next returned failure
5252     */
5253     if (result != HA_ERR_END_OF_FILE)
5254       goto end_dont_reset_start_part;         // Return error
5255 
5256     /* End current partition */
5257     late_extra_no_cache(part_id);
5258     /* Shift to next partition */
5259     part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id);
5260     if (part_id >= m_tot_parts)
5261     {
5262       result= HA_ERR_END_OF_FILE;
5263       break;
5264     }
5265     m_last_part= part_id;
5266     DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5267     m_part_spec.start_part= part_id;
5268     file= m_file[part_id];
5269     late_extra_cache(part_id);
5270   }
5271 
5272 end:
5273   DBUG_PRINT("exit", ("reset start_part"));
5274   m_part_spec.start_part= NO_CURRENT_PART_ID;
5275 end_dont_reset_start_part:
5276   DBUG_RETURN(result);
5277 }
5278 
5279 
5280 /*
5281   Save position of current row
5282 
5283   SYNOPSIS
5284     position()
5285     record             Current record in MySQL Row Format
5286 
5287   RETURN VALUE
5288     NONE
5289 
5290   DESCRIPTION
5291     position() is called after each call to rnd_next() if the data needs
5292     to be ordered. You can do something like the following to store
5293     the position:
5294     ha_store_ptr(ref, ref_length, current_position);
5295 
5296     The server uses ref to store data. ref_length in the above case is
5297     the size needed to store current_position. ref is just a byte array
5298     that the server will maintain. If you are using offsets to mark rows, then
5299     current_position should be the offset. If it is a primary key like in
5300     BDB, then it needs to be a primary key.
5301 
5302     Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
5303 */
5304 
position(const uchar * record)5305 void ha_partition::position(const uchar *record)
5306 {
5307   handler *file= m_file[m_last_part];
5308   size_t pad_length;
5309   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
5310   DBUG_ENTER("ha_partition::position");
5311 
5312   file->position(record);
5313   int2store(ref, m_last_part);
5314   memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length);
5315   pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
5316   if (pad_length)
5317     memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
5318 
5319   DBUG_VOID_RETURN;
5320 }
5321 
5322 
5323 /*
5324   Read row using position
5325 
5326   SYNOPSIS
5327     rnd_pos()
5328     out:buf                     Row read in MySQL Row Format
5329     position                    Position of read row
5330 
5331   RETURN VALUE
5332     >0                          Error code
5333     0                           Success
5334 
5335   DESCRIPTION
5336     This is like rnd_next, but you are given a position to use
5337     to determine the row. The position will be of the type that you stored in
5338     ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
5339     or position you saved when position() was called.
5340     Called from filesort.cc records.cc sql_insert.cc sql_select.cc
5341     sql_update.cc.
5342 */
5343 
rnd_pos(uchar * buf,uchar * pos)5344 int ha_partition::rnd_pos(uchar * buf, uchar *pos)
5345 {
5346   uint part_id;
5347   handler *file;
5348   DBUG_ENTER("ha_partition::rnd_pos");
5349   decrement_statistics(&SSV::ha_read_rnd_count);
5350 
5351   part_id= uint2korr((const uchar *) pos);
5352   DBUG_ASSERT(part_id < m_tot_parts);
5353   file= m_file[part_id];
5354   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
5355   m_last_part= part_id;
5356   DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
5357 }
5358 
5359 
5360 /*
5361   Read row using position using given record to find
5362 
5363   SYNOPSIS
5364     rnd_pos_by_record()
5365     record             Current record in MySQL Row Format
5366 
5367   RETURN VALUE
5368     >0                 Error code
5369     0                  Success
5370 
5371   DESCRIPTION
5372     this works as position()+rnd_pos() functions, but does some extra work,
5373     calculating m_last_part - the partition to where the 'record'
5374     should go.
5375 
5376     called from replication (log_event.cc)
5377 */
5378 
rnd_pos_by_record(uchar * record)5379 int ha_partition::rnd_pos_by_record(uchar *record)
5380 {
5381   DBUG_ENTER("ha_partition::rnd_pos_by_record");
5382 
5383   if (unlikely(get_part_for_buf(record, m_rec0, m_part_info, &m_last_part)))
5384     DBUG_RETURN(1);
5385 
5386   int err= m_file[m_last_part]->rnd_pos_by_record(record);
5387   DBUG_RETURN(err);
5388 }
5389 
5390 
5391 /****************************************************************************
5392                 MODULE index scan
5393 ****************************************************************************/
5394 /*
5395   Positions an index cursor to the index specified in the handle. Fetches the
5396   row if available. If the key value is null, begin at the first key of the
5397   index.
5398 
5399   There are loads of optimisations possible here for the partition handler.
5400   The same optimisations can also be checked for full table scan although
5401   only through conditions and not from index ranges.
5402   Phase one optimisations:
5403     Check if the fields of the partition function are bound. If so only use
5404     the single partition it becomes bound to.
5405   Phase two optimisations:
5406     If it can be deducted through range or list partitioning that only a
5407     subset of the partitions are used, then only use those partitions.
5408 */
5409 
5410 
5411 /**
5412   Setup the ordered record buffer and the priority queue.
5413 */
5414 
init_record_priority_queue()5415 bool ha_partition::init_record_priority_queue()
5416 {
5417   DBUG_ENTER("ha_partition::init_record_priority_queue");
5418   DBUG_ASSERT(!m_ordered_rec_buffer);
5419   /*
5420     Initialize the ordered record buffer.
5421   */
5422   size_t alloc_len;
5423   uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
5424 
5425   if (used_parts == 0) /* Do nothing since no records expected. */
5426     DBUG_RETURN(false);
5427 
5428   /* Allocate record buffer for each used partition. */
5429   m_priority_queue_rec_len= m_rec_length + ORDERED_REC_OFFSET;
5430   if (!m_using_extended_keys)
5431     m_priority_queue_rec_len+= get_open_file_sample()->ref_length;
5432   alloc_len= used_parts * m_priority_queue_rec_len;
5433   /* Allocate a key for temporary use when setting up the scan. */
5434   alloc_len+= table_share->max_key_length;
5435   Ordered_blob_storage **blob_storage;
5436   Ordered_blob_storage *objs;
5437   const size_t n_all= used_parts * table->s->blob_fields;
5438 
5439   if (!my_multi_malloc(key_memory_partition_sort_buffer, MYF(MY_WME),
5440                        &m_ordered_rec_buffer, alloc_len,
5441                        &blob_storage, n_all * sizeof *blob_storage,
5442                        &objs, n_all * sizeof *objs, NULL))
5443     DBUG_RETURN(true);
5444 
5445   /*
5446     We set-up one record per partition and each record has 2 bytes in
5447     front where the partition id is written. This is used by ordered
5448     index_read.
5449     We also set-up a reference to the first record for temporary use in
5450     setting up the scan.
5451   */
5452   char *ptr= (char*) m_ordered_rec_buffer;
5453   uint i;
5454   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5455        i < m_tot_parts;
5456        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5457   {
5458     DBUG_PRINT("info", ("init rec-buf for part %u", i));
5459     if (table->s->blob_fields)
5460     {
5461       for (uint j= 0; j < table->s->blob_fields; ++j, ++objs)
5462         blob_storage[j]= new (objs) Ordered_blob_storage;
5463       *((Ordered_blob_storage ***) ptr)= blob_storage;
5464       blob_storage+= table->s->blob_fields;
5465     }
5466     int2store(ptr + sizeof(String **), i);
5467     ptr+= m_priority_queue_rec_len;
5468   }
5469   m_start_key.key= (const uchar*)ptr;
5470 
5471   /* Initialize priority queue, initialized to reading forward. */
5472   int (*cmp_func)(void *, uchar *, uchar *);
5473   void *cmp_arg= (void*) this;
5474   if (!m_using_extended_keys && !(table_flags() & HA_SLOW_CMP_REF))
5475     cmp_func= cmp_key_rowid_part_id;
5476   else
5477     cmp_func= cmp_key_part_id;
5478   DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts));
5479   if (init_queue(&m_queue, used_parts, ORDERED_PART_NUM_OFFSET,
5480                  0, cmp_func, cmp_arg, 0, 0))
5481   {
5482     my_free(m_ordered_rec_buffer);
5483     m_ordered_rec_buffer= NULL;
5484     DBUG_RETURN(true);
5485   }
5486   DBUG_RETURN(false);
5487 }
5488 
5489 
5490 /**
5491   Destroy the ordered record buffer and the priority queue.
5492 */
5493 
destroy_record_priority_queue()5494 void ha_partition::destroy_record_priority_queue()
5495 {
5496   DBUG_ENTER("ha_partition::destroy_record_priority_queue");
5497   if (m_ordered_rec_buffer)
5498   {
5499     if (table->s->blob_fields)
5500     {
5501       char *ptr= (char *) m_ordered_rec_buffer;
5502       for (uint i= bitmap_get_first_set(&m_part_info->read_partitions);
5503             i < m_tot_parts;
5504             i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5505       {
5506         Ordered_blob_storage **blob_storage= *((Ordered_blob_storage ***) ptr);
5507         for (uint b= 0; b < table->s->blob_fields; ++b)
5508           blob_storage[b]->blob.free();
5509         ptr+= m_priority_queue_rec_len;
5510       }
5511     }
5512 
5513     delete_queue(&m_queue);
5514     my_free(m_ordered_rec_buffer);
5515     m_ordered_rec_buffer= NULL;
5516   }
5517   DBUG_VOID_RETURN;
5518 }
5519 
5520 
5521 /*
5522   Initialize handler before start of index scan
5523 
5524   SYNOPSIS
5525     index_init()
5526     inx                Index number
5527     sorted             Is rows to be returned in sorted order
5528 
5529   RETURN VALUE
5530     >0                 Error code
5531     0                  Success
5532 
5533   DESCRIPTION
5534     index_init is always called before starting index scans (except when
5535     starting through index_read_idx and using read_range variants).
5536 */
5537 
index_init(uint inx,bool sorted)5538 int ha_partition::index_init(uint inx, bool sorted)
5539 {
5540   int error= 0;
5541   uint i;
5542   DBUG_ENTER("ha_partition::index_init");
5543   DBUG_PRINT("enter", ("partition this: %p  inx: %u  sorted: %u", this, inx, sorted));
5544 
5545   active_index= inx;
5546   m_part_spec.start_part= NO_CURRENT_PART_ID;
5547   m_start_key.length= 0;
5548   m_ordered= sorted;
5549   m_ordered_scan_ongoing= FALSE;
5550   m_curr_key_info[0]= table->key_info+inx;
5551   if (pk_is_clustering_key(table->s->primary_key))
5552   {
5553     /*
5554       if PK is clustered, then the key cmp must use the pk to
5555       differentiate between equal key in given index.
5556     */
5557     DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
5558     m_curr_key_info[1]= table->key_info+table->s->primary_key;
5559     m_curr_key_info[2]= NULL;
5560     m_using_extended_keys= TRUE;
5561   }
5562   else
5563   {
5564     m_curr_key_info[1]= NULL;
5565     m_using_extended_keys= FALSE;
5566   }
5567 
5568   if (init_record_priority_queue())
5569     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5570 
5571   /*
5572     Some handlers only read fields as specified by the bitmap for the
5573     read set. For partitioned handlers we always require that the
5574     fields of the partition functions are read such that we can
5575     calculate the partition id to place updated and deleted records.
5576     But this is required for operations that may need to change data only.
5577   */
5578   if (get_lock_type() == F_WRLCK)
5579   {
5580     DBUG_PRINT("info", ("partition set part_field bitmap"));
5581     bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5582   }
5583   if (sorted)
5584   {
5585     /*
5586       An ordered scan is requested. We must make sure all fields of the
5587       used index are in the read set, as partitioning requires them for
5588       sorting (see ha_partition::handle_ordered_index_scan).
5589 
5590       The SQL layer may request an ordered index scan without having index
5591       fields in the read set when
5592        - it needs to do an ordered scan over an index prefix.
5593        - it evaluates ORDER BY with SELECT COUNT(*) FROM t1.
5594 
5595       TODO: handle COUNT(*) queries via unordered scan.
5596     */
5597     KEY **key_info= m_curr_key_info;
5598     do
5599     {
5600       for (i= 0; i < (*key_info)->user_defined_key_parts; i++)
5601         (*key_info)->key_part[i].field->register_field_in_read_map();
5602     } while (*(++key_info));
5603   }
5604   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5605        i < m_tot_parts;
5606        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5607   {
5608     if (unlikely((error= m_file[i]->ha_index_init(inx, sorted))))
5609       goto err;
5610 
5611     DBUG_EXECUTE_IF("ha_partition_fail_index_init", {
5612       i++;
5613       error= HA_ERR_NO_PARTITION_FOUND;
5614       goto err;
5615     });
5616   }
5617 err:
5618   if (unlikely(error))
5619   {
5620     /* End the previously initialized indexes. */
5621     uint j;
5622     for (j= bitmap_get_first_set(&m_part_info->read_partitions);
5623          j < i;
5624          j= bitmap_get_next_set(&m_part_info->read_partitions, j))
5625     {
5626       (void) m_file[j]->ha_index_end();
5627     }
5628     destroy_record_priority_queue();
5629   }
5630   DBUG_RETURN(error);
5631 }
5632 
5633 
5634 /*
5635   End of index scan
5636 
5637   SYNOPSIS
5638     index_end()
5639 
5640   RETURN VALUE
5641     >0                 Error code
5642     0                  Success
5643 
5644   DESCRIPTION
5645     index_end is called at the end of an index scan to clean up any
5646     things needed to clean up.
5647 */
5648 
index_end()5649 int ha_partition::index_end()
5650 {
5651   int error= 0;
5652   handler **file;
5653   DBUG_ENTER("ha_partition::index_end");
5654 
5655   active_index= MAX_KEY;
5656   m_part_spec.start_part= NO_CURRENT_PART_ID;
5657   file= m_file;
5658   do
5659   {
5660     if ((*file)->inited == INDEX)
5661     {
5662       int tmp;
5663       if ((tmp= (*file)->ha_index_end()))
5664         error= tmp;
5665     }
5666     else if ((*file)->inited == RND)
5667     {
5668       // Possible due to MRR
5669       int tmp;
5670       if ((tmp= (*file)->ha_rnd_end()))
5671         error= tmp;
5672     }
5673   } while (*(++file));
5674   destroy_record_priority_queue();
5675   DBUG_RETURN(error);
5676 }
5677 
5678 
5679 /*
5680   Read one record in an index scan and start an index scan
5681 
5682   SYNOPSIS
5683     index_read_map()
5684     buf                    Read row in MySQL Row Format
5685     key                    Key parts in consecutive order
5686     keypart_map            Which part of key is used
5687     find_flag              What type of key condition is used
5688 
5689   RETURN VALUE
5690     >0                 Error code
5691     0                  Success
5692 
5693   DESCRIPTION
5694     index_read_map starts a new index scan using a start key. The MySQL Server
5695     will check the end key on its own. Thus to function properly the
5696     partitioned handler need to ensure that it delivers records in the sort
5697     order of the MySQL Server.
5698     index_read_map can be restarted without calling index_end on the previous
5699     index scan and without calling index_init. In this case the index_read_map
5700     is on the same index as the previous index_scan. This is particularly
5701     used in conjuntion with multi read ranges.
5702 */
5703 
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5704 int ha_partition::index_read_map(uchar *buf, const uchar *key,
5705                                  key_part_map keypart_map,
5706                                  enum ha_rkey_function find_flag)
5707 {
5708   DBUG_ENTER("ha_partition::index_read_map");
5709   decrement_statistics(&SSV::ha_read_key_count);
5710   end_range= 0;
5711   m_index_scan_type= partition_index_read;
5712   m_start_key.key= key;
5713   m_start_key.keypart_map= keypart_map;
5714   m_start_key.flag= find_flag;
5715   DBUG_RETURN(common_index_read(buf, TRUE));
5716 }
5717 
5718 
5719 /* Compare two part_no partition numbers */
cmp_part_ids(uchar * ref1,uchar * ref2)5720 static int cmp_part_ids(uchar *ref1, uchar *ref2)
5721 {
5722   uint32 diff2= uint2korr(ref2);
5723   uint32 diff1= uint2korr(ref1);
5724   if (diff2 > diff1)
5725     return -1;
5726   if (diff2 < diff1)
5727     return 1;
5728   return 0;
5729 }
5730 
5731 
5732 /*
5733   @brief
5734     Provide ordering by (key_value, part_no).
5735 */
5736 
cmp_key_part_id(void * ptr,uchar * ref1,uchar * ref2)5737 extern "C" int cmp_key_part_id(void *ptr, uchar *ref1, uchar *ref2)
5738 {
5739   ha_partition *file= (ha_partition*)ptr;
5740   if (int res= key_rec_cmp(file->m_curr_key_info,
5741                            ref1 + PARTITION_BYTES_IN_POS,
5742                            ref2 + PARTITION_BYTES_IN_POS))
5743     return res;
5744   return cmp_part_ids(ref1, ref2);
5745 }
5746 
5747 /*
5748   @brief
5749     Provide ordering by (key_value, underying_table_rowid, part_no).
5750 */
cmp_key_rowid_part_id(void * ptr,uchar * ref1,uchar * ref2)5751 extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2)
5752 {
5753   ha_partition *file= (ha_partition*)ptr;
5754   int res;
5755 
5756   if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
5757                         ref2 + PARTITION_BYTES_IN_POS)))
5758   {
5759     return res;
5760   }
5761   if ((res= file->get_open_file_sample()->cmp_ref(ref1 +
5762           PARTITION_BYTES_IN_POS + file->m_rec_length,
5763           ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length)))
5764   {
5765     return res;
5766   }
5767   return cmp_part_ids(ref1, ref2);
5768 }
5769 
5770 
5771 /**
5772   Common routine for a number of index_read variants
5773 
5774   @param buf             Buffer where the record should be returned.
5775   @param have_start_key  TRUE <=> the left endpoint is available, i.e.
5776                          we're in index_read call or in read_range_first
5777                          call and the range has left endpoint.
5778                          FALSE <=> there is no left endpoint (we're in
5779                          read_range_first() call and the range has no left
5780                          endpoint).
5781 
5782   @return Operation status
5783     @retval 0      OK
5784     @retval HA_ERR_END_OF_FILE   Whole index scanned, without finding the record.
5785     @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
5786     @retval other  error code.
5787 
5788   @details
5789     Start scanning the range (when invoked from read_range_first()) or doing
5790     an index lookup (when invoked from index_read_XXX):
5791      - If possible, perform partition selection
5792      - Find the set of partitions we're going to use
5793      - Depending on whether we need ordering:
5794         NO:  Get the first record from first used partition (see
5795              handle_unordered_scan_next_partition)
5796         YES: Fill the priority queue and get the record that is the first in
5797              the ordering
5798 */
5799 
common_index_read(uchar * buf,bool have_start_key)5800 int ha_partition::common_index_read(uchar *buf, bool have_start_key)
5801 {
5802   int error;
5803   uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */
5804   bool reverse_order= FALSE;
5805   DBUG_ENTER("ha_partition::common_index_read");
5806 
5807   DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
5808                       m_ordered, m_ordered_scan_ongoing));
5809 
5810   if (have_start_key)
5811   {
5812     m_start_key.length= key_len= calculate_key_len(table, active_index,
5813                                                    m_start_key.key,
5814                                                    m_start_key.keypart_map);
5815     DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
5816                         m_start_key.keypart_map, m_start_key.flag, key_len));
5817     DBUG_ASSERT(key_len);
5818   }
5819   if (unlikely((error= partition_scan_set_up(buf, have_start_key))))
5820   {
5821     DBUG_RETURN(error);
5822   }
5823 
5824   if (have_start_key &&
5825       (m_start_key.flag == HA_READ_PREFIX_LAST ||
5826        m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
5827        m_start_key.flag == HA_READ_BEFORE_KEY))
5828   {
5829     reverse_order= TRUE;
5830     m_ordered_scan_ongoing= TRUE;
5831   }
5832   DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
5833                       m_ordered, m_ordered_scan_ongoing, have_start_key));
5834   if (!m_ordered_scan_ongoing)
5835    {
5836     /*
5837       We use unordered index scan when read_range is used and flag
5838       is set to not use ordered.
5839       We also use an unordered index scan when the number of partitions to
5840       scan is only one.
5841       The unordered index scan will use the partition set created.
5842     */
5843     DBUG_PRINT("info", ("doing unordered scan"));
5844     error= handle_pre_scan(FALSE, FALSE);
5845     if (likely(!error))
5846       error= handle_unordered_scan_next_partition(buf);
5847   }
5848   else
5849   {
5850     /*
5851       In all other cases we will use the ordered index scan. This will use
5852       the partition set created by the get_partition_set method.
5853     */
5854     error= handle_ordered_index_scan(buf, reverse_order);
5855   }
5856   DBUG_RETURN(error);
5857 }
5858 
5859 
5860 /*
5861   Start an index scan from leftmost record and return first record
5862 
5863   SYNOPSIS
5864     index_first()
5865     buf                 Read row in MySQL Row Format
5866 
5867   RETURN VALUE
5868     >0                  Error code
5869     0                   Success
5870 
5871   DESCRIPTION
5872     index_first() asks for the first key in the index.
5873     This is similar to index_read except that there is no start key since
5874     the scan starts from the leftmost entry and proceeds forward with
5875     index_next.
5876 
5877     Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5878     and sql_select.cc.
5879 */
5880 
index_first(uchar * buf)5881 int ha_partition::index_first(uchar * buf)
5882 {
5883   DBUG_ENTER("ha_partition::index_first");
5884   decrement_statistics(&SSV::ha_read_first_count);
5885 
5886   end_range= 0;
5887   m_index_scan_type= partition_index_first;
5888   DBUG_RETURN(common_first_last(buf));
5889 }
5890 
5891 
5892 /*
5893   Start an index scan from rightmost record and return first record
5894 
5895   SYNOPSIS
5896     index_last()
5897     buf                 Read row in MySQL Row Format
5898 
5899   RETURN VALUE
5900     >0                  Error code
5901     0                   Success
5902 
5903   DESCRIPTION
5904     index_last() asks for the last key in the index.
5905     This is similar to index_read except that there is no start key since
5906     the scan starts from the rightmost entry and proceeds forward with
5907     index_prev.
5908 
5909     Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5910     and sql_select.cc.
5911 */
5912 
index_last(uchar * buf)5913 int ha_partition::index_last(uchar * buf)
5914 {
5915   DBUG_ENTER("ha_partition::index_last");
5916   decrement_statistics(&SSV::ha_read_last_count);
5917 
5918   m_index_scan_type= partition_index_last;
5919   DBUG_RETURN(common_first_last(buf));
5920 }
5921 
5922 /*
5923   Common routine for index_first/index_last
5924 
5925   SYNOPSIS
5926     ha_partition::common_first_last()
5927 
5928   see index_first for rest
5929 */
5930 
common_first_last(uchar * buf)5931 int ha_partition::common_first_last(uchar *buf)
5932 {
5933   int error;
5934 
5935   if (unlikely((error= partition_scan_set_up(buf, FALSE))))
5936     return error;
5937   if (!m_ordered_scan_ongoing &&
5938       m_index_scan_type != partition_index_last)
5939   {
5940     if (unlikely((error= handle_pre_scan(FALSE, check_parallel_search()))))
5941       return error;
5942    return handle_unordered_scan_next_partition(buf);
5943   }
5944   return handle_ordered_index_scan(buf, FALSE);
5945 }
5946 
5947 
5948 /*
5949   Optimization of the default implementation to take advantage of dynamic
5950   partition pruning.
5951 */
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)5952 int ha_partition::index_read_idx_map(uchar *buf, uint index,
5953                                      const uchar *key,
5954                                      key_part_map keypart_map,
5955                                      enum ha_rkey_function find_flag)
5956 {
5957   int error= HA_ERR_KEY_NOT_FOUND;
5958   DBUG_ENTER("ha_partition::index_read_idx_map");
5959   decrement_statistics(&SSV::ha_read_key_count);
5960 
5961   if (find_flag == HA_READ_KEY_EXACT)
5962   {
5963     uint part;
5964     m_start_key.key= key;
5965     m_start_key.keypart_map= keypart_map;
5966     m_start_key.flag= find_flag;
5967     m_start_key.length= calculate_key_len(table, index, m_start_key.key,
5968                                           m_start_key.keypart_map);
5969 
5970     get_partition_set(table, buf, index, &m_start_key, &m_part_spec);
5971 
5972     /* The start part is must be marked as used. */
5973     DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
5974                 bitmap_is_set(&(m_part_info->read_partitions),
5975                               m_part_spec.start_part));
5976 
5977     for (part= m_part_spec.start_part;
5978          part <= m_part_spec.end_part;
5979          part= bitmap_get_next_set(&m_part_info->read_partitions, part))
5980     {
5981       error= m_file[part]->ha_index_read_idx_map(buf, index, key,
5982                                                  keypart_map, find_flag);
5983       if (likely(error != HA_ERR_KEY_NOT_FOUND &&
5984                  error != HA_ERR_END_OF_FILE))
5985         break;
5986     }
5987     if (part <= m_part_spec.end_part)
5988       m_last_part= part;
5989   }
5990   else
5991   {
5992     /*
5993       If not only used with READ_EXACT, we should investigate if possible
5994       to optimize for other find_flag's as well.
5995     */
5996     DBUG_ASSERT(0);
5997     /* fall back on the default implementation */
5998     error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag);
5999   }
6000   DBUG_RETURN(error);
6001 }
6002 
6003 
6004 /*
6005   Read next record in a forward index scan
6006 
6007   SYNOPSIS
6008     index_next()
6009     buf                   Read row in MySQL Row Format
6010 
6011   RETURN VALUE
6012     >0                    Error code
6013     0                     Success
6014 
6015   DESCRIPTION
6016     Used to read forward through the index.
6017 */
6018 
index_next(uchar * buf)6019 int ha_partition::index_next(uchar * buf)
6020 {
6021   DBUG_ENTER("ha_partition::index_next");
6022   decrement_statistics(&SSV::ha_read_next_count);
6023 
6024   /*
6025     TODO(low priority):
6026     If we want partition to work with the HANDLER commands, we
6027     must be able to do index_last() -> index_prev() -> index_next()
6028     and if direction changes, we must step back those partitions in
6029     the record queue so we don't return a value from the wrong direction.
6030   */
6031   if (m_index_scan_type == partition_index_last)
6032     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6033   if (!m_ordered_scan_ongoing)
6034   {
6035     DBUG_RETURN(handle_unordered_next(buf, FALSE));
6036   }
6037   DBUG_RETURN(handle_ordered_next(buf, FALSE));
6038 }
6039 
6040 
6041 /*
6042   Read next record special
6043 
6044   SYNOPSIS
6045     index_next_same()
6046     buf                   Read row in MySQL Row Format
6047     key                   Key
6048     keylen                Length of key
6049 
6050   RETURN VALUE
6051     >0                    Error code
6052     0                     Success
6053 
6054   DESCRIPTION
6055     This routine is used to read the next but only if the key is the same
6056     as supplied in the call.
6057 */
6058 
index_next_same(uchar * buf,const uchar * key,uint keylen)6059 int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
6060 {
6061   DBUG_ENTER("ha_partition::index_next_same");
6062   decrement_statistics(&SSV::ha_read_next_count);
6063 
6064   DBUG_ASSERT(keylen == m_start_key.length);
6065   if (m_index_scan_type == partition_index_last)
6066     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6067   if (!m_ordered_scan_ongoing)
6068     DBUG_RETURN(handle_unordered_next(buf, TRUE));
6069   DBUG_RETURN(handle_ordered_next(buf, TRUE));
6070 }
6071 
6072 
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)6073 int ha_partition::index_read_last_map(uchar *buf,
6074                                           const uchar *key,
6075                                           key_part_map keypart_map)
6076 {
6077   DBUG_ENTER("ha_partition::index_read_last_map");
6078 
6079   m_ordered= true;                              // Safety measure
6080   end_range= NULL;
6081   m_index_scan_type= partition_index_read_last;
6082   m_start_key.key= key;
6083   m_start_key.keypart_map= keypart_map;
6084   m_start_key.flag= HA_READ_PREFIX_LAST;
6085   DBUG_RETURN(common_index_read(buf, true));
6086 }
6087 
6088 
6089 /*
6090   Read next record when performing index scan backwards
6091 
6092   SYNOPSIS
6093     index_prev()
6094     buf                   Read row in MySQL Row Format
6095 
6096   RETURN VALUE
6097     >0                    Error code
6098     0                     Success
6099 
6100   DESCRIPTION
6101     Used to read backwards through the index.
6102 */
6103 
index_prev(uchar * buf)6104 int ha_partition::index_prev(uchar * buf)
6105 {
6106   DBUG_ENTER("ha_partition::index_prev");
6107   decrement_statistics(&SSV::ha_read_prev_count);
6108 
6109   /* TODO: read comment in index_next */
6110   if (m_index_scan_type == partition_index_first)
6111     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
6112   DBUG_RETURN(handle_ordered_prev(buf));
6113 }
6114 
6115 
6116 /*
6117   Start a read of one range with start and end key
6118 
6119   SYNOPSIS
6120     read_range_first()
6121     start_key           Specification of start key
6122     end_key             Specification of end key
6123     eq_range_arg        Is it equal range
6124     sorted              Should records be returned in sorted order
6125 
6126   RETURN VALUE
6127     >0                    Error code
6128     0                     Success
6129 
6130   DESCRIPTION
6131     We reimplement read_range_first since we don't want the compare_key
6132     check at the end. This is already performed in the partition handler.
6133     read_range_next is very much different due to that we need to scan
6134     all underlying handlers.
6135 */
6136 
read_range_first(const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)6137 int ha_partition::read_range_first(const key_range *start_key,
6138 				   const key_range *end_key,
6139 				   bool eq_range_arg, bool sorted)
6140 {
6141   int error;
6142   DBUG_ENTER("ha_partition::read_range_first");
6143 
6144   m_ordered= sorted;
6145   eq_range= eq_range_arg;
6146   set_end_range(end_key);
6147 
6148   range_key_part= m_curr_key_info[0]->key_part;
6149   if (start_key)
6150     m_start_key= *start_key;
6151   else
6152     m_start_key.key= NULL;
6153 
6154   m_index_scan_type= partition_read_range;
6155   error= common_index_read(m_rec0, MY_TEST(start_key));
6156   DBUG_RETURN(error);
6157 }
6158 
6159 
6160 /*
6161   Read next record in read of a range with start and end key
6162 
6163   SYNOPSIS
6164     read_range_next()
6165 
6166   RETURN VALUE
6167     >0                    Error code
6168     0                     Success
6169 */
6170 
read_range_next()6171 int ha_partition::read_range_next()
6172 {
6173   DBUG_ENTER("ha_partition::read_range_next");
6174 
6175   if (m_ordered_scan_ongoing)
6176   {
6177     DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
6178   }
6179   DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
6180 }
6181 
6182 /**
6183    Create a copy of all keys used by multi_range_read()
6184 
6185    @retval 0 ok
6186    @retval HA_ERR_END_OF_FILE no keys in range
6187    @retval other value: error
6188 
6189    TODO to save memory:
6190    - If (mrr_mode & HA_MRR_MATERIALIZED_KEYS) is set then the keys data is
6191      stable and we don't have to copy the keys, only store a pointer to the
6192      key.
6193    - When allocating key data, store things in a MEM_ROOT buffer instead of
6194      a malloc() per key. This will simplify and speed up the current code
6195      and use less memory.
6196 */
6197 
multi_range_key_create_key(RANGE_SEQ_IF * seq,range_seq_t seq_it)6198 int ha_partition::multi_range_key_create_key(RANGE_SEQ_IF *seq,
6199                                              range_seq_t seq_it)
6200 {
6201   uint i, length;
6202   key_range *start_key, *end_key;
6203   KEY_MULTI_RANGE *range;
6204   DBUG_ENTER("ha_partition::multi_range_key_create_key");
6205 
6206   bitmap_clear_all(&m_mrr_used_partitions);
6207   m_mrr_range_length= 0;
6208   bzero(m_part_mrr_range_length,
6209         sizeof(*m_part_mrr_range_length) * m_tot_parts);
6210   if (!m_mrr_range_first)
6211   {
6212     if (!(m_mrr_range_first= (PARTITION_KEY_MULTI_RANGE *)
6213           my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
6214                           &m_mrr_range_current, sizeof(PARTITION_KEY_MULTI_RANGE),
6215                           NullS)))
6216       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6217 
6218     m_mrr_range_first->id= 1;
6219     m_mrr_range_first->key[0]= NULL;
6220     m_mrr_range_first->key[1]= NULL;
6221     m_mrr_range_first->next= NULL;
6222   }
6223   else
6224     m_mrr_range_current= m_mrr_range_first;
6225 
6226   for (i= 0; i < m_tot_parts; i++)
6227   {
6228     if (!m_part_mrr_range_first[i])
6229     {
6230       if (!(m_part_mrr_range_first[i]= (PARTITION_PART_KEY_MULTI_RANGE *)
6231             my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL),
6232                             &m_part_mrr_range_current[i], sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6233                             NullS)))
6234         DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6235     }
6236     else
6237     {
6238       m_part_mrr_range_current[i]= m_part_mrr_range_first[i];
6239       m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6240     }
6241   }
6242   m_mrr_range_current->key_multi_range.start_key.key= NULL;
6243   m_mrr_range_current->key_multi_range.end_key.key= NULL;
6244 
6245   while (!seq->next(seq_it, &m_mrr_range_current->key_multi_range))
6246   {
6247     m_mrr_range_length++;
6248     range= &m_mrr_range_current->key_multi_range;
6249 
6250     /* Copy start key */
6251     start_key= &range->start_key;
6252     DBUG_PRINT("info",("partition range->range_flag: %u", range->range_flag));
6253     DBUG_PRINT("info",("partition start_key->key: %p", start_key->key));
6254     DBUG_PRINT("info",("partition start_key->length: %u", start_key->length));
6255     DBUG_PRINT("info",("partition start_key->keypart_map: %lu",
6256                        start_key->keypart_map));
6257     DBUG_PRINT("info",("partition start_key->flag: %u", start_key->flag));
6258 
6259     if (start_key->key)
6260     {
6261       length= start_key->length;
6262       if (!m_mrr_range_current->key[0] ||
6263           m_mrr_range_current->length[0] < length)
6264       {
6265         if (m_mrr_range_current->key[0])
6266           my_free(m_mrr_range_current->key[0]);
6267         if (!(m_mrr_range_current->key[0]=
6268               (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME))))
6269           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6270         m_mrr_range_current->length[0]= length;
6271       }
6272       memcpy(m_mrr_range_current->key[0], start_key->key, length);
6273       start_key->key= m_mrr_range_current->key[0];
6274     }
6275 
6276     /* Copy end key */
6277     end_key= &range->end_key;
6278     DBUG_PRINT("info",("partition end_key->key: %p", end_key->key));
6279     DBUG_PRINT("info",("partition end_key->length: %u", end_key->length));
6280     DBUG_PRINT("info",("partition end_key->keypart_map: %lu",
6281                        end_key->keypart_map));
6282     DBUG_PRINT("info",("partition end_key->flag: %u", end_key->flag));
6283     if (end_key->key)
6284     {
6285       length= end_key->length;
6286       if (!m_mrr_range_current->key[1] ||
6287           m_mrr_range_current->length[1] < length)
6288       {
6289         if (m_mrr_range_current->key[1])
6290           my_free(m_mrr_range_current->key[1]);
6291         if (!(m_mrr_range_current->key[1]=
6292               (uchar *) my_malloc(PSI_INSTRUMENT_ME, length, MYF(MY_WME))))
6293           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6294         m_mrr_range_current->length[1]= length;
6295       }
6296       memcpy(m_mrr_range_current->key[1], end_key->key, length);
6297       end_key->key= m_mrr_range_current->key[1];
6298     }
6299 
6300     m_mrr_range_current->ptr= m_mrr_range_current->key_multi_range.ptr;
6301     m_mrr_range_current->key_multi_range.ptr= m_mrr_range_current;
6302 
6303     if (start_key->key && (start_key->flag & HA_READ_KEY_EXACT))
6304       get_partition_set(table, table->record[0], active_index,
6305                         start_key, &m_part_spec);
6306     else
6307     {
6308       m_part_spec.start_part= 0;
6309       m_part_spec.end_part= m_tot_parts - 1;
6310     }
6311 
6312     /* Copy key to those partitions that needs it */
6313     for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
6314     {
6315       if (bitmap_is_set(&(m_part_info->read_partitions), i))
6316       {
6317         bitmap_set_bit(&m_mrr_used_partitions, i);
6318         m_part_mrr_range_length[i]++;
6319         m_part_mrr_range_current[i]->partition_key_multi_range=
6320           m_mrr_range_current;
6321 
6322         if (!m_part_mrr_range_current[i]->next)
6323         {
6324           PARTITION_PART_KEY_MULTI_RANGE *tmp_part_mrr_range;
6325           if (!(tmp_part_mrr_range= (PARTITION_PART_KEY_MULTI_RANGE *)
6326                 my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6327                           MYF(MY_WME | MY_ZEROFILL))))
6328             DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6329 
6330           m_part_mrr_range_current[i]->next= tmp_part_mrr_range;
6331           m_part_mrr_range_current[i]= tmp_part_mrr_range;
6332         }
6333         else
6334         {
6335           m_part_mrr_range_current[i]= m_part_mrr_range_current[i]->next;
6336           m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6337         }
6338       }
6339     }
6340 
6341     if (!m_mrr_range_current->next)
6342     {
6343       /* Add end of range sentinel */
6344       PARTITION_KEY_MULTI_RANGE *tmp_mrr_range;
6345       if (!(tmp_mrr_range= (PARTITION_KEY_MULTI_RANGE *)
6346             my_malloc(PSI_INSTRUMENT_ME, sizeof(PARTITION_KEY_MULTI_RANGE), MYF(MY_WME))))
6347         DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6348 
6349       tmp_mrr_range->id= m_mrr_range_current->id + 1;
6350       tmp_mrr_range->key[0]= NULL;
6351       tmp_mrr_range->key[1]= NULL;
6352       tmp_mrr_range->next= NULL;
6353       m_mrr_range_current->next= tmp_mrr_range;
6354     }
6355     m_mrr_range_current= m_mrr_range_current->next;
6356   }
6357 
6358   if (!m_mrr_range_length)
6359   {
6360     DBUG_PRINT("Warning",("No keys to use for mrr"));
6361     DBUG_RETURN(HA_ERR_END_OF_FILE);
6362   }
6363 
6364   /* set start and end part */
6365   m_part_spec.start_part= bitmap_get_first_set(&m_mrr_used_partitions);
6366 
6367   for (i= m_tot_parts; i-- > 0;)
6368   {
6369     if (bitmap_is_set(&m_mrr_used_partitions, i))
6370     {
6371       m_part_spec.end_part= i;
6372       break;
6373     }
6374   }
6375   for (i= 0; i < m_tot_parts; i++)
6376   {
6377     m_partition_part_key_multi_range_hld[i].partition= this;
6378     m_partition_part_key_multi_range_hld[i].part_id= i;
6379     m_partition_part_key_multi_range_hld[i].partition_part_key_multi_range=
6380       m_part_mrr_range_first[i];
6381   }
6382   DBUG_PRINT("return",("OK"));
6383   DBUG_RETURN(0);
6384 }
6385 
6386 
partition_multi_range_key_get_key_info(void * init_params,uint * length,key_part_map * map)6387 static void partition_multi_range_key_get_key_info(void *init_params,
6388                                                    uint *length,
6389                                                    key_part_map *map)
6390 {
6391   PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6392     (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6393   ha_partition *partition= hld->partition;
6394   key_range *start_key= (&partition->m_mrr_range_first->
6395                          key_multi_range.start_key);
6396   DBUG_ENTER("partition_multi_range_key_get_key_info");
6397   *length= start_key->length;
6398   *map= start_key->keypart_map;
6399   DBUG_VOID_RETURN;
6400 }
6401 
6402 
partition_multi_range_key_init(void * init_params,uint n_ranges,uint flags)6403 static range_seq_t partition_multi_range_key_init(void *init_params,
6404                                                   uint n_ranges,
6405                                                   uint flags)
6406 {
6407   PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6408     (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6409   ha_partition *partition= hld->partition;
6410   uint i= hld->part_id;
6411   DBUG_ENTER("partition_multi_range_key_init");
6412   // not used: partition->m_mrr_range_init_flags= flags;
6413   hld->partition_part_key_multi_range= partition->m_part_mrr_range_first[i];
6414   DBUG_RETURN(init_params);
6415 }
6416 
6417 
partition_multi_range_key_next(range_seq_t seq,KEY_MULTI_RANGE * range)6418 static bool partition_multi_range_key_next(range_seq_t seq,
6419                                            KEY_MULTI_RANGE *range)
6420 {
6421   PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6422     (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6423   PARTITION_KEY_MULTI_RANGE *partition_key_multi_range=
6424     hld->partition_part_key_multi_range->partition_key_multi_range;
6425   DBUG_ENTER("partition_multi_range_key_next");
6426   if (!partition_key_multi_range)
6427     DBUG_RETURN(TRUE);
6428   *range= partition_key_multi_range->key_multi_range;
6429   hld->partition_part_key_multi_range=
6430     hld->partition_part_key_multi_range->next;
6431   DBUG_RETURN(FALSE);
6432 }
6433 
6434 
partition_multi_range_key_skip_record(range_seq_t seq,range_id_t range_info,uchar * rowid)6435 static bool partition_multi_range_key_skip_record(range_seq_t seq,
6436                                                   range_id_t range_info,
6437                                                   uchar *rowid)
6438 {
6439   PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6440     (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6441   PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info;
6442   DBUG_ENTER("partition_multi_range_key_skip_record");
6443   DBUG_RETURN(hld->partition->m_seq_if->skip_record(hld->partition->m_seq,
6444                                                     pkmr->ptr, rowid));
6445 }
6446 
6447 
partition_multi_range_key_skip_index_tuple(range_seq_t seq,range_id_t range_info)6448 static bool partition_multi_range_key_skip_index_tuple(range_seq_t seq,
6449                                                        range_id_t range_info)
6450 {
6451   PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6452     (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6453   PARTITION_KEY_MULTI_RANGE *pkmr= (PARTITION_KEY_MULTI_RANGE *)range_info;
6454   DBUG_ENTER("partition_multi_range_key_skip_index_tuple");
6455   DBUG_RETURN(hld->partition->m_seq_if->skip_index_tuple(hld->partition->m_seq,
6456                                                          pkmr->ptr));
6457 }
6458 
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * mrr_mode,Cost_estimate * cost)6459 ha_rows ha_partition::multi_range_read_info_const(uint keyno,
6460                                                   RANGE_SEQ_IF *seq,
6461                                                   void *seq_init_param,
6462                                                   uint n_ranges, uint *bufsz,
6463                                                   uint *mrr_mode,
6464                                                   Cost_estimate *cost)
6465 {
6466   int error;
6467   uint i;
6468   handler **file;
6469   ha_rows rows= 0;
6470   uint ret_mrr_mode= 0;
6471   range_seq_t seq_it;
6472   part_id_range save_part_spec;
6473   Cost_estimate part_cost;
6474   DBUG_ENTER("ha_partition::multi_range_read_info_const");
6475   DBUG_PRINT("enter", ("partition this: %p", this));
6476 
6477   m_mrr_new_full_buffer_size= 0;
6478   save_part_spec= m_part_spec;
6479 
6480   cost->reset();
6481 
6482   seq_it= seq->init(seq_init_param, n_ranges, *mrr_mode);
6483   if (unlikely((error= multi_range_key_create_key(seq, seq_it))))
6484   {
6485     if (likely(error == HA_ERR_END_OF_FILE))    // No keys in range
6486     {
6487       rows= 0;
6488       goto end;
6489     }
6490     /*
6491       This error means that we can't do multi_range_read for the moment
6492       (probably running out of memory) and we need to fallback to
6493       normal reads
6494     */
6495     m_part_spec= save_part_spec;
6496     DBUG_RETURN(HA_POS_ERROR);
6497   }
6498   m_part_seq_if.get_key_info=
6499     seq->get_key_info ? partition_multi_range_key_get_key_info : NULL;
6500   m_part_seq_if.init= partition_multi_range_key_init;
6501   m_part_seq_if.next= partition_multi_range_key_next;
6502   m_part_seq_if.skip_record= (seq->skip_record ?
6503                               partition_multi_range_key_skip_record : NULL);
6504   m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6505                                    partition_multi_range_key_skip_index_tuple :
6506                                    NULL);
6507   file= m_file;
6508   do
6509   {
6510     i= (uint)(file - m_file);
6511     DBUG_PRINT("info",("partition part_id: %u", i));
6512     if (bitmap_is_set(&m_mrr_used_partitions, i))
6513     {
6514       ha_rows tmp_rows;
6515       uint tmp_mrr_mode;
6516       m_mrr_buffer_size[i]= 0;
6517       part_cost.reset();
6518       tmp_mrr_mode= *mrr_mode;
6519       tmp_rows= (*file)->
6520         multi_range_read_info_const(keyno, &m_part_seq_if,
6521                                     &m_partition_part_key_multi_range_hld[i],
6522                                     m_part_mrr_range_length[i],
6523                                     &m_mrr_buffer_size[i],
6524                                     &tmp_mrr_mode, &part_cost);
6525       if (tmp_rows == HA_POS_ERROR)
6526       {
6527         m_part_spec= save_part_spec;
6528         DBUG_RETURN(HA_POS_ERROR);
6529       }
6530       cost->add(&part_cost);
6531       rows+= tmp_rows;
6532       ret_mrr_mode|= tmp_mrr_mode;
6533       m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6534     }
6535   } while (*(++file));
6536   *mrr_mode= ret_mrr_mode;
6537 
6538 end:
6539   m_part_spec= save_part_spec;
6540   DBUG_RETURN(rows);
6541 }
6542 
6543 
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * mrr_mode,Cost_estimate * cost)6544 ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges,
6545                                             uint keys,
6546                                             uint key_parts, uint *bufsz,
6547                                             uint *mrr_mode,
6548                                             Cost_estimate *cost)
6549 {
6550   uint i;
6551   handler **file;
6552   ha_rows rows= 0;
6553   Cost_estimate part_cost;
6554   DBUG_ENTER("ha_partition::multi_range_read_info");
6555   DBUG_PRINT("enter", ("partition this: %p", this));
6556 
6557   cost->reset();
6558 
6559   m_mrr_new_full_buffer_size= 0;
6560   file= m_file;
6561   do
6562   {
6563     i= (uint)(file - m_file);
6564     if (bitmap_is_set(&(m_part_info->read_partitions), (i)))
6565     {
6566       ha_rows tmp_rows;
6567       m_mrr_buffer_size[i]= 0;
6568       part_cost.reset();
6569       if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys,
6570                                                     key_parts,
6571                                                     &m_mrr_buffer_size[i],
6572                                                     mrr_mode, &part_cost)))
6573         DBUG_RETURN(rows);
6574       cost->add(&part_cost);
6575       rows+= tmp_rows;
6576       m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6577     }
6578   } while (*(++file));
6579 
6580   DBUG_RETURN(0);
6581 }
6582 
6583 
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mrr_mode,HANDLER_BUFFER * buf)6584 int ha_partition::multi_range_read_init(RANGE_SEQ_IF *seq,
6585                                         void *seq_init_param,
6586                                         uint n_ranges, uint mrr_mode,
6587                                         HANDLER_BUFFER *buf)
6588 {
6589   int error;
6590   uint i;
6591   handler **file;
6592   uchar *tmp_buffer;
6593   DBUG_ENTER("ha_partition::multi_range_read_init");
6594   DBUG_PRINT("enter", ("partition this: %p", this));
6595 
6596   eq_range= 0;
6597   m_seq_if= seq;
6598   m_seq= seq->init(seq_init_param, n_ranges, mrr_mode);
6599   if (unlikely((error= multi_range_key_create_key(seq, m_seq))))
6600     DBUG_RETURN(0);
6601 
6602   m_part_seq_if.get_key_info= (seq->get_key_info ?
6603                                partition_multi_range_key_get_key_info :
6604                                NULL);
6605   m_part_seq_if.init= partition_multi_range_key_init;
6606   m_part_seq_if.next= partition_multi_range_key_next;
6607   m_part_seq_if.skip_record= (seq->skip_record ?
6608                               partition_multi_range_key_skip_record :
6609                               NULL);
6610   m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6611                                    partition_multi_range_key_skip_index_tuple :
6612                                    NULL);
6613 
6614   /* m_mrr_new_full_buffer_size was calculated in multi_range_read_info */
6615   if (m_mrr_full_buffer_size < m_mrr_new_full_buffer_size)
6616   {
6617     if (m_mrr_full_buffer)
6618       my_free(m_mrr_full_buffer);
6619     if (!(m_mrr_full_buffer=
6620           (uchar *) my_malloc(PSI_INSTRUMENT_ME, m_mrr_new_full_buffer_size, MYF(MY_WME))))
6621     {
6622       m_mrr_full_buffer_size= 0;
6623       error= HA_ERR_OUT_OF_MEM;
6624       goto error;
6625     }
6626     m_mrr_full_buffer_size= m_mrr_new_full_buffer_size;
6627   }
6628 
6629   tmp_buffer= m_mrr_full_buffer;
6630   file= m_file;
6631   do
6632   {
6633     i= (uint)(file - m_file);
6634     DBUG_PRINT("info",("partition part_id: %u", i));
6635     if (bitmap_is_set(&m_mrr_used_partitions, i))
6636     {
6637       if (m_mrr_new_full_buffer_size)
6638       {
6639         if (m_mrr_buffer_size[i])
6640         {
6641           m_mrr_buffer[i].buffer= tmp_buffer;
6642           m_mrr_buffer[i].end_of_used_area= tmp_buffer;
6643           tmp_buffer+= m_mrr_buffer_size[i];
6644           m_mrr_buffer[i].buffer_end= tmp_buffer;
6645         }
6646       }
6647       else
6648         m_mrr_buffer[i]= *buf;
6649 
6650       if (unlikely((error= (*file)->
6651                     multi_range_read_init(&m_part_seq_if,
6652                                           &m_partition_part_key_multi_range_hld[i],
6653                                           m_part_mrr_range_length[i],
6654                                           mrr_mode,
6655                                           &m_mrr_buffer[i]))))
6656         goto error;
6657       m_stock_range_seq[i]= 0;
6658     }
6659   } while (*(++file));
6660 
6661   m_multi_range_read_first= TRUE;
6662   m_mrr_range_current= m_mrr_range_first;
6663   m_index_scan_type= partition_read_multi_range;
6664   m_mrr_mode= mrr_mode;
6665   m_mrr_n_ranges= n_ranges;
6666   DBUG_RETURN(0);
6667 
6668 error:
6669   DBUG_RETURN(error);
6670 }
6671 
6672 
multi_range_read_next(range_id_t * range_info)6673 int ha_partition::multi_range_read_next(range_id_t *range_info)
6674 {
6675   int error;
6676   DBUG_ENTER("ha_partition::multi_range_read_next");
6677   DBUG_PRINT("enter", ("partition this: %p  partition m_mrr_mode: %u",
6678                        this, m_mrr_mode));
6679 
6680   if ((m_mrr_mode & HA_MRR_SORTED))
6681   {
6682     if (m_multi_range_read_first)
6683     {
6684       if (unlikely((error= handle_ordered_index_scan(table->record[0],
6685                                                      FALSE))))
6686         DBUG_RETURN(error);
6687       if (!m_pre_calling)
6688         m_multi_range_read_first= FALSE;
6689     }
6690     else if (unlikely((error= handle_ordered_next(table->record[0],
6691                                                   eq_range))))
6692       DBUG_RETURN(error);
6693     *range_info= m_mrr_range_current->ptr;
6694   }
6695   else
6696   {
6697     if (unlikely(m_multi_range_read_first))
6698     {
6699       if (unlikely((error=
6700                     handle_unordered_scan_next_partition(table->record[0]))))
6701         DBUG_RETURN(error);
6702       if (!m_pre_calling)
6703         m_multi_range_read_first= FALSE;
6704     }
6705     else if (unlikely((error= handle_unordered_next(table->record[0], FALSE))))
6706       DBUG_RETURN(error);
6707 
6708     if (!(m_mrr_mode & HA_MRR_NO_ASSOCIATION))
6709     {
6710       *range_info=
6711         ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr;
6712     }
6713   }
6714   DBUG_RETURN(0);
6715 }
6716 
6717 
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)6718 int ha_partition::multi_range_read_explain_info(uint mrr_mode, char *str,
6719                                                 size_t size)
6720 {
6721   DBUG_ENTER("ha_partition::multi_range_read_explain_info");
6722   DBUG_RETURN(get_open_file_sample()->
6723                 multi_range_read_explain_info(mrr_mode, str, size));
6724 }
6725 
6726 
6727 /**
6728   Find and retrieve the Full Text Search relevance ranking for a search string
6729   in a full text index.
6730 
6731   @param  handler           Full Text Search handler
6732   @param  record            Search string
6733   @param  length            Length of the search string
6734 
6735   @retval                   Relevance value
6736 */
6737 
partition_ft_find_relevance(FT_INFO * handler,uchar * record,uint length)6738 float partition_ft_find_relevance(FT_INFO *handler,
6739                                   uchar *record, uint length)
6740 {
6741   st_partition_ft_info *info= (st_partition_ft_info *)handler;
6742   uint m_last_part= ((ha_partition*) info->file)->last_part();
6743   FT_INFO *m_handler= info->part_ft_info[m_last_part];
6744   DBUG_ENTER("partition_ft_find_relevance");
6745   if (!m_handler)
6746     DBUG_RETURN((float)-1.0);
6747   DBUG_RETURN(m_handler->please->find_relevance(m_handler, record, length));
6748 }
6749 
6750 
6751 /**
6752   Retrieve the Full Text Search relevance ranking for the current
6753   full text search.
6754 
6755   @param  handler           Full Text Search handler
6756 
6757   @retval                   Relevance value
6758 */
6759 
partition_ft_get_relevance(FT_INFO * handler)6760 float partition_ft_get_relevance(FT_INFO *handler)
6761 {
6762   st_partition_ft_info *info= (st_partition_ft_info *)handler;
6763   uint m_last_part= ((ha_partition*) info->file)->last_part();
6764   FT_INFO *m_handler= info->part_ft_info[m_last_part];
6765   DBUG_ENTER("partition_ft_get_relevance");
6766   if (!m_handler)
6767     DBUG_RETURN((float)-1.0);
6768   DBUG_RETURN(m_handler->please->get_relevance(m_handler));
6769 }
6770 
6771 
6772 /**
6773   Free the memory for a full text search handler.
6774 
6775   @param  handler           Full Text Search handler
6776 */
6777 
partition_ft_close_search(FT_INFO * handler)6778 void partition_ft_close_search(FT_INFO *handler)
6779 {
6780   st_partition_ft_info *info= (st_partition_ft_info *)handler;
6781   info->file->ft_close_search(handler);
6782 }
6783 
6784 
6785 /**
6786   Free the memory for a full text search handler.
6787 
6788   @param  handler           Full Text Search handler
6789 */
6790 
ft_close_search(FT_INFO * handler)6791 void ha_partition::ft_close_search(FT_INFO *handler)
6792 {
6793   uint i;
6794   st_partition_ft_info *info= (st_partition_ft_info *)handler;
6795   DBUG_ENTER("ha_partition::ft_close_search");
6796 
6797   for (i= 0; i < m_tot_parts; i++)
6798   {
6799     FT_INFO *m_handler= info->part_ft_info[i];
6800     DBUG_ASSERT(!m_handler ||
6801                 (m_handler->please && m_handler->please->close_search));
6802     if (m_handler &&
6803         m_handler->please &&
6804         m_handler->please->close_search)
6805       m_handler->please->close_search(m_handler);
6806   }
6807   DBUG_VOID_RETURN;
6808 }
6809 
6810 
6811 /* Partition Full Text search function table */
6812 _ft_vft partition_ft_vft =
6813 {
6814   NULL, // partition_ft_read_next
6815   partition_ft_find_relevance,
6816   partition_ft_close_search,
6817   partition_ft_get_relevance,
6818   NULL  // partition_ft_reinit_search
6819 };
6820 
6821 
6822 /**
6823   Initialize a full text search.
6824 */
6825 
ft_init()6826 int ha_partition::ft_init()
6827 {
6828   int error;
6829   uint i= 0;
6830   uint32 part_id;
6831   DBUG_ENTER("ha_partition::ft_init");
6832   DBUG_PRINT("info", ("partition this: %p", this));
6833 
6834   /*
6835     For operations that may need to change data, we may need to extend
6836     read_set.
6837   */
6838   if (get_lock_type() == F_WRLCK)
6839   {
6840     /*
6841       If write_set contains any of the fields used in partition and
6842       subpartition expression, we need to set all bits in read_set because
6843       the row may need to be inserted in a different [sub]partition. In
6844       other words update_row() can be converted into write_row(), which
6845       requires a complete record.
6846     */
6847     if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
6848                               table->write_set))
6849       bitmap_set_all(table->read_set);
6850     else
6851     {
6852       /*
6853         Some handlers only read fields as specified by the bitmap for the
6854         read set. For partitioned handlers we always require that the
6855         fields of the partition functions are read such that we can
6856         calculate the partition id to place updated and deleted records.
6857       */
6858       bitmap_union(table->read_set, &m_part_info->full_part_field_set);
6859     }
6860   }
6861 
6862   /* Now we see what the index of our first important partition is */
6863   DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
6864              (void *) m_part_info->read_partitions.bitmap));
6865   part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
6866   DBUG_PRINT("info", ("m_part_spec.start_part %u", (uint) part_id));
6867 
6868   if (part_id == MY_BIT_NONE)
6869   {
6870     error= 0;
6871     goto err1;
6872   }
6873 
6874   DBUG_PRINT("info", ("ft_init on partition %u", (uint) part_id));
6875   /*
6876     ft_end() is needed for partitioning to reset internal data if scan
6877     is already in use
6878   */
6879   if (m_pre_calling)
6880   {
6881     if (unlikely((error= pre_ft_end())))
6882       goto err1;
6883   }
6884   else
6885     ft_end();
6886   m_index_scan_type= partition_ft_read;
6887   for (i= part_id; i < m_tot_parts; i++)
6888   {
6889     if (bitmap_is_set(&(m_part_info->read_partitions), i))
6890     {
6891       error= m_pre_calling ? m_file[i]->pre_ft_init() : m_file[i]->ft_init();
6892       if (unlikely(error))
6893         goto err2;
6894     }
6895   }
6896   m_scan_value= 1;
6897   m_part_spec.start_part= part_id;
6898   m_part_spec.end_part= m_tot_parts - 1;
6899   m_ft_init_and_first= TRUE;
6900   DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
6901   DBUG_RETURN(0);
6902 
6903 err2:
6904   late_extra_no_cache(part_id);
6905   while ((int)--i >= (int)part_id)
6906   {
6907     if (bitmap_is_set(&(m_part_info->read_partitions), i))
6908     {
6909       if (m_pre_calling)
6910         m_file[i]->pre_ft_end();
6911       else
6912         m_file[i]->ft_end();
6913     }
6914   }
6915 err1:
6916   m_scan_value= 2;
6917   m_part_spec.start_part= NO_CURRENT_PART_ID;
6918   DBUG_RETURN(error);
6919 }
6920 
6921 
6922 /**
6923   Initialize a full text search during a bulk access request.
6924 */
6925 
pre_ft_init()6926 int ha_partition::pre_ft_init()
6927 {
6928   bool save_m_pre_calling;
6929   int error;
6930   DBUG_ENTER("ha_partition::pre_ft_init");
6931   save_m_pre_calling= m_pre_calling;
6932   m_pre_calling= TRUE;
6933   error= ft_init();
6934   m_pre_calling= save_m_pre_calling;
6935   DBUG_RETURN(error);
6936 }
6937 
6938 
6939 /**
6940   Terminate a full text search.
6941 */
6942 
ft_end()6943 void ha_partition::ft_end()
6944 {
6945   handler **file;
6946   DBUG_ENTER("ha_partition::ft_end");
6947   DBUG_PRINT("info", ("partition this: %p", this));
6948 
6949   switch (m_scan_value) {
6950   case 2:                                       // Error
6951     break;
6952   case 1:                                       // Table scan
6953     if (NO_CURRENT_PART_ID != m_part_spec.start_part)
6954       late_extra_no_cache(m_part_spec.start_part);
6955     file= m_file;
6956     do
6957     {
6958       if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
6959       {
6960         if (m_pre_calling)
6961           (*file)->pre_ft_end();
6962         else
6963           (*file)->ft_end();
6964       }
6965     } while (*(++file));
6966     break;
6967   }
6968   m_scan_value= 2;
6969   m_part_spec.start_part= NO_CURRENT_PART_ID;
6970   ft_current= 0;
6971   DBUG_VOID_RETURN;
6972 }
6973 
6974 
6975 /**
6976   Terminate a full text search during a bulk access request.
6977 */
6978 
pre_ft_end()6979 int ha_partition::pre_ft_end()
6980 {
6981   bool save_m_pre_calling;
6982   DBUG_ENTER("ha_partition::pre_ft_end");
6983   save_m_pre_calling= m_pre_calling;
6984   m_pre_calling= TRUE;
6985   ft_end();
6986   m_pre_calling= save_m_pre_calling;
6987   DBUG_RETURN(0);
6988 }
6989 
6990 
swap_blobs(uchar * rec_buf,Ordered_blob_storage ** storage,bool restore)6991 void ha_partition::swap_blobs(uchar * rec_buf, Ordered_blob_storage ** storage, bool restore)
6992 {
6993   uint *ptr, *end;
6994   uint blob_n= 0;
6995   table->move_fields(table->field, rec_buf, table->record[0]);
6996   for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields;
6997        ptr != end; ++ptr, ++blob_n)
6998   {
6999     DBUG_ASSERT(*ptr < table->s->fields);
7000     Field_blob *blob= (Field_blob*) table->field[*ptr];
7001     DBUG_ASSERT(blob->flags & BLOB_FLAG);
7002     DBUG_ASSERT(blob->field_index == *ptr);
7003     if (!bitmap_is_set(table->read_set, *ptr) || blob->is_null())
7004       continue;
7005 
7006     Ordered_blob_storage &s= *storage[blob_n];
7007 
7008     if (restore)
7009     {
7010       /*
7011         We protect only blob cache (value or read_value). If the cache was
7012         empty that doesn't mean the blob was empty. Blobs allocated by a
7013         storage engine should work just fine.
7014       */
7015       if (!s.blob.is_empty())
7016         blob->swap(s.blob, s.set_read_value);
7017     }
7018     else
7019     {
7020       bool set_read_value;
7021       String *cached= blob->cached(&set_read_value);
7022       if (cached)
7023       {
7024         cached->swap(s.blob);
7025         s.set_read_value= set_read_value;
7026       }
7027     }
7028   }
7029   table->move_fields(table->field, table->record[0], rec_buf);
7030 }
7031 
7032 
7033 /**
7034   Initialize a full text search using the extended API.
7035 
7036   @param  flags             Search flags
7037   @param  inx               Key number
7038   @param  key               Key value
7039 
7040   @return FT_INFO structure if successful
7041           NULL              otherwise
7042 */
7043 
ft_init_ext(uint flags,uint inx,String * key)7044 FT_INFO *ha_partition::ft_init_ext(uint flags, uint inx, String *key)
7045 {
7046   FT_INFO *ft_handler;
7047   handler **file;
7048   st_partition_ft_info *ft_target, **parent;
7049   DBUG_ENTER("ha_partition::ft_init_ext");
7050 
7051   if (ft_current)
7052     parent= &ft_current->next;
7053   else
7054     parent= &ft_first;
7055 
7056   if (!(ft_target= *parent))
7057   {
7058     FT_INFO **tmp_ft_info;
7059     if (!(ft_target= (st_partition_ft_info *)
7060           my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | MY_ZEROFILL),
7061                           &ft_target, sizeof(st_partition_ft_info),
7062                           &tmp_ft_info, sizeof(FT_INFO *) * m_tot_parts,
7063                           NullS)))
7064     {
7065       my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL));
7066       DBUG_RETURN(NULL);
7067     }
7068     ft_target->part_ft_info= tmp_ft_info;
7069     (*parent)= ft_target;
7070   }
7071 
7072   ft_current= ft_target;
7073   file= m_file;
7074   do
7075   {
7076     if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
7077     {
7078       if ((ft_handler= (*file)->ft_init_ext(flags, inx, key)))
7079         (*file)->ft_handler= ft_handler;
7080       else
7081         (*file)->ft_handler= NULL;
7082       ft_target->part_ft_info[file - m_file]= ft_handler;
7083     }
7084     else
7085     {
7086       (*file)->ft_handler= NULL;
7087       ft_target->part_ft_info[file - m_file]= NULL;
7088     }
7089   } while (*(++file));
7090 
7091   ft_target->please= &partition_ft_vft;
7092   ft_target->file= this;
7093   DBUG_RETURN((FT_INFO*)ft_target);
7094 }
7095 
7096 
7097 /**
7098   Return the next record from the FT result set during an ordered index
7099   pre-scan
7100 
7101   @param  use_parallel      Is it a parallel search
7102 
7103   @return >0                Error code
7104           0                 Success
7105 */
7106 
pre_ft_read(bool use_parallel)7107 int ha_partition::pre_ft_read(bool use_parallel)
7108 {
7109   bool save_m_pre_calling;
7110   int error;
7111   DBUG_ENTER("ha_partition::pre_ft_read");
7112   DBUG_PRINT("info", ("partition this: %p", this));
7113   save_m_pre_calling= m_pre_calling;
7114   m_pre_calling= TRUE;
7115   m_pre_call_use_parallel= use_parallel;
7116   error= ft_read(table->record[0]);
7117   m_pre_calling= save_m_pre_calling;
7118   DBUG_RETURN(error);
7119 }
7120 
7121 
7122 /**
7123   Return the first or next record in a full text search.
7124 
7125   @param  buf               Buffer where the record should be returned
7126 
7127   @return >0                Error code
7128           0                 Success
7129 */
7130 
ft_read(uchar * buf)7131 int ha_partition::ft_read(uchar *buf)
7132 {
7133   handler *file;
7134   int result= HA_ERR_END_OF_FILE, error;
7135   uint part_id= m_part_spec.start_part;
7136   DBUG_ENTER("ha_partition::ft_read");
7137   DBUG_PRINT("info", ("partition this: %p", this));
7138   DBUG_PRINT("info", ("part_id: %u", part_id));
7139 
7140   if (part_id == NO_CURRENT_PART_ID)
7141   {
7142     /*
7143       The original set of partitions to scan was empty and thus we report
7144       the result here.
7145     */
7146     DBUG_PRINT("info", ("NO_CURRENT_PART_ID"));
7147     goto end;
7148   }
7149 
7150   DBUG_ASSERT(m_scan_value == 1);
7151 
7152   if (m_ft_init_and_first)                      // First call to ft_read()
7153   {
7154     m_ft_init_and_first= FALSE;
7155     if (!bulk_access_executing)
7156     {
7157       error= handle_pre_scan(FALSE, check_parallel_search());
7158       if (m_pre_calling || error)
7159         DBUG_RETURN(error);
7160     }
7161     late_extra_cache(part_id);
7162   }
7163 
7164   file= m_file[part_id];
7165 
7166   while (TRUE)
7167   {
7168     if (!(result= file->ft_read(buf)))
7169     {
7170       /* Found row: remember position and return it. */
7171       m_part_spec.start_part= m_last_part= part_id;
7172       table->status= 0;
7173       DBUG_RETURN(0);
7174     }
7175 
7176     /*
7177       if we get here, then the current partition ft_next returned failure
7178     */
7179     if (result != HA_ERR_END_OF_FILE)
7180       goto end_dont_reset_start_part;         // Return error
7181 
7182     /* End current partition */
7183     late_extra_no_cache(part_id);
7184     DBUG_PRINT("info", ("stopping using partition %u", (uint) part_id));
7185 
7186     /* Shift to next partition */
7187     while (++part_id < m_tot_parts &&
7188            !bitmap_is_set(&(m_part_info->read_partitions), part_id))
7189       ;
7190     if (part_id >= m_tot_parts)
7191     {
7192       result= HA_ERR_END_OF_FILE;
7193       break;
7194     }
7195     m_part_spec.start_part= m_last_part= part_id;
7196     file= m_file[part_id];
7197     DBUG_PRINT("info", ("now using partition %u", (uint) part_id));
7198     late_extra_cache(part_id);
7199   }
7200 
7201 end:
7202   m_part_spec.start_part= NO_CURRENT_PART_ID;
7203 end_dont_reset_start_part:
7204   table->status= STATUS_NOT_FOUND;
7205   DBUG_RETURN(result);
7206 }
7207 
7208 
7209 /*
7210   Common routine to set up index scans
7211 
7212   SYNOPSIS
7213     ha_partition::partition_scan_set_up()
7214       buf            Buffer to later return record in (this function
7215                      needs it to calculcate partitioning function
7216                      values)
7217 
7218       idx_read_flag  TRUE <=> m_start_key has range start endpoint which
7219                      probably can be used to determine the set of partitions
7220                      to scan.
7221                      FALSE <=> there is no start endpoint.
7222 
7223   DESCRIPTION
7224     Find out which partitions we'll need to read when scanning the specified
7225     range.
7226 
7227     If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
7228     as we will not need to do merge ordering.
7229 
7230   RETURN VALUE
7231     >0                    Error code
7232     0                     Success
7233 */
7234 
partition_scan_set_up(uchar * buf,bool idx_read_flag)7235 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
7236 {
7237   DBUG_ENTER("ha_partition::partition_scan_set_up");
7238 
7239   if (idx_read_flag)
7240     get_partition_set(table, buf, active_index, &m_start_key, &m_part_spec);
7241   else
7242   {
7243     m_part_spec.start_part= 0;
7244     m_part_spec.end_part= m_tot_parts - 1;
7245   }
7246   if (m_part_spec.start_part > m_part_spec.end_part)
7247   {
7248     /*
7249       We discovered a partition set but the set was empty so we report
7250       key not found.
7251     */
7252     DBUG_PRINT("info", ("scan with no partition to scan"));
7253     DBUG_RETURN(HA_ERR_END_OF_FILE);
7254   }
7255   if (m_part_spec.start_part == m_part_spec.end_part)
7256   {
7257     /*
7258       We discovered a single partition to scan, this never needs to be
7259       performed using the ordered index scan.
7260     */
7261     DBUG_PRINT("info", ("index scan using the single partition %u",
7262 			(uint) m_part_spec.start_part));
7263     m_ordered_scan_ongoing= FALSE;
7264   }
7265   else
7266   {
7267     /*
7268       Set m_ordered_scan_ongoing according how the scan should be done
7269       Only exact partitions are discovered atm by get_partition_set.
7270       Verify this, also bitmap must have at least one bit set otherwise
7271       the result from this table is the empty set.
7272     */
7273     uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions));
7274     if (start_part == MY_BIT_NONE)
7275     {
7276       DBUG_PRINT("info", ("scan with no partition to scan"));
7277       DBUG_RETURN(HA_ERR_END_OF_FILE);
7278     }
7279     if (start_part > m_part_spec.start_part)
7280       m_part_spec.start_part= start_part;
7281     DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
7282     m_ordered_scan_ongoing= m_ordered;
7283   }
7284   DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
7285   DBUG_ASSERT(m_part_spec.end_part < m_tot_parts);
7286   DBUG_RETURN(0);
7287 }
7288 
7289 /**
7290   Check if we can search partitions in parallel
7291 
7292   @retval TRUE  yes
7293   @retval FALSE no
7294 */
7295 
check_parallel_search()7296 bool ha_partition::check_parallel_search()
7297 {
7298   TABLE_LIST *table_list= table->pos_in_table_list;
7299   st_select_lex *select_lex;
7300   JOIN *join;
7301   DBUG_ENTER("ha_partition::check_parallel_search");
7302   if (!table_list)
7303     goto not_parallel;
7304 
7305   while (table_list->parent_l)
7306     table_list= table_list->parent_l;
7307 
7308   select_lex= table_list->select_lex;
7309   DBUG_PRINT("info",("partition select_lex: %p", select_lex));
7310   if (!select_lex)
7311     goto not_parallel;
7312   if (!select_lex->explicit_limit)
7313   {
7314     DBUG_PRINT("info",("partition not using explicit_limit"));
7315     goto parallel;
7316   }
7317 
7318   join= select_lex->join;
7319   DBUG_PRINT("info",("partition join: %p", join));
7320   if (join && join->skip_sort_order)
7321   {
7322     DBUG_PRINT("info",("partition order_list.elements: %u",
7323                        select_lex->order_list.elements));
7324     if (select_lex->order_list.elements)
7325     {
7326       Item *item= *select_lex->order_list.first->item;
7327       DBUG_PRINT("info",("partition item: %p", item));
7328       DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7329       DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7330                          m_part_info->part_type));
7331       DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7332                          m_is_sub_partitioned ? "TRUE" : "FALSE"));
7333       DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7334                          m_part_info->part_expr));
7335       if (item->type() == Item::FIELD_ITEM &&
7336           m_part_info->part_type == RANGE_PARTITION &&
7337           !m_is_sub_partitioned &&
7338           (!m_part_info->part_expr ||
7339            m_part_info->part_expr->type() == Item::FIELD_ITEM))
7340       {
7341         Field *order_field= ((Item_field *)item)->field;
7342         DBUG_PRINT("info",("partition order_field: %p", order_field));
7343         if (order_field && order_field->table == table_list->table)
7344         {
7345           Field *part_field= m_part_info->full_part_field_array[0];
7346           DBUG_PRINT("info",("partition order_field: %p", order_field));
7347           DBUG_PRINT("info",("partition part_field: %p", part_field));
7348           if (part_field == order_field)
7349           {
7350             /*
7351               We are using ORDER BY partition_field LIMIT #
7352               In this case, let's not do things in parallel as it's
7353               likely that the query can be satisfied from the first
7354               partition
7355             */
7356             DBUG_PRINT("info",("partition with ORDER on partition field"));
7357             goto not_parallel;
7358           }
7359         }
7360       }
7361       DBUG_PRINT("info",("partition have order"));
7362       goto parallel;
7363     }
7364 
7365     DBUG_PRINT("info",("partition group_list.elements: %u",
7366                        select_lex->group_list.elements));
7367     if (select_lex->group_list.elements)
7368     {
7369       Item *item= *select_lex->group_list.first->item;
7370       DBUG_PRINT("info",("partition item: %p", item));
7371       DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7372       DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7373                          m_part_info->part_type));
7374       DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7375                          m_is_sub_partitioned ? "TRUE" : "FALSE"));
7376       DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7377                          m_part_info->part_expr));
7378       if (item->type() == Item::FIELD_ITEM &&
7379           m_part_info->part_type == RANGE_PARTITION &&
7380           !m_is_sub_partitioned &&
7381           (!m_part_info->part_expr ||
7382            m_part_info->part_expr->type() == Item::FIELD_ITEM))
7383       {
7384         Field *group_field= ((Item_field *)item)->field;
7385         DBUG_PRINT("info",("partition group_field: %p", group_field));
7386         if (group_field && group_field->table == table_list->table)
7387         {
7388           Field *part_field= m_part_info->full_part_field_array[0];
7389           DBUG_PRINT("info",("partition group_field: %p", group_field));
7390           DBUG_PRINT("info",("partition part_field: %p", part_field));
7391           if (part_field == group_field)
7392           {
7393             DBUG_PRINT("info",("partition with GROUP BY on partition field"));
7394             goto not_parallel;
7395           }
7396         }
7397       }
7398       DBUG_PRINT("info",("partition with GROUP BY"));
7399       goto parallel;
7400     }
7401   }
7402   else if (select_lex->order_list.elements ||
7403            select_lex->group_list.elements)
7404   {
7405     DBUG_PRINT("info",("partition is not skip_order"));
7406     DBUG_PRINT("info",("partition order_list.elements: %u",
7407                        select_lex->order_list.elements));
7408     DBUG_PRINT("info",("partition group_list.elements: %u",
7409                        select_lex->group_list.elements));
7410     goto parallel;
7411   }
7412   DBUG_PRINT("info",("partition is not skip_order"));
7413 
7414 not_parallel:
7415   DBUG_PRINT("return",("partition FALSE"));
7416   DBUG_RETURN(FALSE);
7417 
7418 parallel:
7419   DBUG_PRINT("return",("partition TRUE"));
7420   DBUG_RETURN(TRUE);
7421 }
7422 
7423 
handle_pre_scan(bool reverse_order,bool use_parallel)7424 int ha_partition::handle_pre_scan(bool reverse_order, bool use_parallel)
7425 {
7426   uint i;
7427   DBUG_ENTER("ha_partition::handle_pre_scan");
7428   DBUG_PRINT("enter",
7429              ("m_part_spec.start_part: %u  m_part_spec.end_part: %u",
7430               (uint) m_part_spec.start_part, (uint) m_part_spec.end_part));
7431 
7432   for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
7433   {
7434     if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
7435       continue;
7436     int error;
7437     handler *file= m_file[i];
7438 
7439     switch (m_index_scan_type) {
7440     case partition_index_read:
7441       error= file->pre_index_read_map(m_start_key.key,
7442                                   m_start_key.keypart_map,
7443                                   m_start_key.flag,
7444                                   use_parallel);
7445       break;
7446     case partition_index_first:
7447       error= file->pre_index_first(use_parallel);
7448       break;
7449     case partition_index_last:
7450       error= file->pre_index_last(use_parallel);
7451       break;
7452     case partition_index_read_last:
7453       error= file->pre_index_read_last_map(m_start_key.key,
7454                                        m_start_key.keypart_map,
7455                                        use_parallel);
7456       break;
7457     case partition_read_range:
7458       error= file->pre_read_range_first(m_start_key.key? &m_start_key: NULL,
7459                                     end_range, eq_range, TRUE, use_parallel);
7460       break;
7461     case partition_read_multi_range:
7462       if (!bitmap_is_set(&m_mrr_used_partitions, i))
7463         continue;
7464       error= file->pre_multi_range_read_next(use_parallel);
7465       break;
7466     case partition_ft_read:
7467       error= file->pre_ft_read(use_parallel);
7468       break;
7469     case partition_no_index_scan:
7470       error= file->pre_rnd_next(use_parallel);
7471       break;
7472     default:
7473       DBUG_ASSERT(FALSE);
7474       DBUG_RETURN(0);
7475     }
7476     if (error == HA_ERR_END_OF_FILE)
7477       error= 0;
7478     if (unlikely(error))
7479       DBUG_RETURN(error);
7480   }
7481   table->status= 0;
7482   DBUG_RETURN(0);
7483 }
7484 
7485 
7486 /****************************************************************************
7487   Unordered Index Scan Routines
7488 ****************************************************************************/
7489 /*
7490   Common routine to handle index_next with unordered results
7491 
7492   SYNOPSIS
7493     handle_unordered_next()
7494     out:buf                       Read row in MySQL Row Format
7495     next_same                     Called from index_next_same
7496 
7497   RETURN VALUE
7498     HA_ERR_END_OF_FILE            End of scan
7499     0                             Success
7500     other                         Error code
7501 
7502   DESCRIPTION
7503     These routines are used to scan partitions without considering order.
7504     This is performed in two situations.
7505     1) In read_multi_range this is the normal case
7506     2) When performing any type of index_read, index_first, index_last where
7507     all fields in the partition function is bound. In this case the index
7508     scan is performed on only one partition and thus it isn't necessary to
7509     perform any sort.
7510 */
7511 
handle_unordered_next(uchar * buf,bool is_next_same)7512 int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
7513 {
7514   handler *file;
7515   int error;
7516   DBUG_ENTER("ha_partition::handle_unordered_next");
7517 
7518   if (m_part_spec.start_part >= m_tot_parts)
7519   {
7520     /* Should never happen! */
7521     DBUG_ASSERT(0);
7522     DBUG_RETURN(HA_ERR_END_OF_FILE);
7523   }
7524   file= m_file[m_part_spec.start_part];
7525 
7526   /*
7527     We should consider if this should be split into three functions as
7528     partition_read_range is_next_same are always local constants
7529   */
7530 
7531   if (m_index_scan_type == partition_read_multi_range)
7532   {
7533     if (likely(!(error= file->
7534                  multi_range_read_next(&m_range_info[m_part_spec.start_part]))))
7535     {
7536       m_last_part= m_part_spec.start_part;
7537       DBUG_RETURN(0);
7538     }
7539   }
7540   else if (m_index_scan_type == partition_read_range)
7541   {
7542     if (likely(!(error= file->read_range_next())))
7543     {
7544       m_last_part= m_part_spec.start_part;
7545       DBUG_RETURN(0);
7546     }
7547   }
7548   else if (is_next_same)
7549   {
7550     if (likely(!(error= file->ha_index_next_same(buf, m_start_key.key,
7551                                                  m_start_key.length))))
7552     {
7553       m_last_part= m_part_spec.start_part;
7554       DBUG_RETURN(0);
7555     }
7556   }
7557   else
7558   {
7559     if (likely(!(error= file->ha_index_next(buf))))
7560     {
7561       m_last_part= m_part_spec.start_part;
7562       DBUG_RETURN(0);                           // Row was in range
7563     }
7564   }
7565 
7566     if (unlikely(error == HA_ERR_END_OF_FILE))
7567   {
7568     m_part_spec.start_part++;                    // Start using next part
7569     error= handle_unordered_scan_next_partition(buf);
7570   }
7571   DBUG_RETURN(error);
7572 }
7573 
7574 
7575 /*
7576   Handle index_next when changing to new partition
7577 
7578   SYNOPSIS
7579     handle_unordered_scan_next_partition()
7580     buf                       Read row in MariaDB Row Format
7581 
7582   RETURN VALUE
7583     HA_ERR_END_OF_FILE            End of scan
7584     0                             Success
7585     other                         Error code
7586 
7587   DESCRIPTION
7588     This routine is used to start the index scan on the next partition.
7589     Both initial start and after completing scan on one partition.
7590 */
7591 
handle_unordered_scan_next_partition(uchar * buf)7592 int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
7593 {
7594   uint i= m_part_spec.start_part;
7595   int saved_error= HA_ERR_END_OF_FILE;
7596   DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
7597 
7598   /* Read next partition that includes start_part */
7599   if (i)
7600     i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1);
7601   else
7602     i= bitmap_get_first_set(&m_part_info->read_partitions);
7603 
7604   for (;
7605        i <= m_part_spec.end_part;
7606        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7607   {
7608     int error;
7609     handler *file= m_file[i];
7610     m_part_spec.start_part= i;
7611 
7612     switch (m_index_scan_type) {
7613     case partition_read_multi_range:
7614       if (!bitmap_is_set(&m_mrr_used_partitions, i))
7615         continue;
7616       DBUG_PRINT("info", ("read_multi_range on partition %u", i));
7617       error= file->multi_range_read_next(&m_range_info[i]);
7618       break;
7619     case partition_read_range:
7620       DBUG_PRINT("info", ("read_range_first on partition %u", i));
7621       error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7622                                     end_range, eq_range, FALSE);
7623       break;
7624     case partition_index_read:
7625       DBUG_PRINT("info", ("index_read on partition %u", i));
7626       error= file->ha_index_read_map(buf, m_start_key.key,
7627                                      m_start_key.keypart_map,
7628                                      m_start_key.flag);
7629       break;
7630     case partition_index_first:
7631       DBUG_PRINT("info", ("index_first on partition %u", i));
7632       error= file->ha_index_first(buf);
7633       break;
7634     default:
7635       DBUG_ASSERT(FALSE);
7636       DBUG_RETURN(1);
7637     }
7638     if (likely(!error))
7639     {
7640       m_last_part= i;
7641       DBUG_RETURN(0);
7642     }
7643     if (likely((error != HA_ERR_END_OF_FILE) &&
7644                (error != HA_ERR_KEY_NOT_FOUND)))
7645       DBUG_RETURN(error);
7646 
7647     /*
7648       If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
7649       HA_ERR_END_OF_FILE, to be able to continue search.
7650     */
7651     if (saved_error != HA_ERR_KEY_NOT_FOUND)
7652       saved_error= error;
7653     DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %u", i));
7654   }
7655   if (saved_error == HA_ERR_END_OF_FILE)
7656     m_part_spec.start_part= NO_CURRENT_PART_ID;
7657   DBUG_RETURN(saved_error);
7658 }
7659 
7660 
7661 /**
7662   Common routine to start index scan with ordered results.
7663 
7664   @param[out] buf  Read row in MariaDB Row Format
7665 
7666   @return Operation status
7667     @retval HA_ERR_END_OF_FILE  End of scan
7668     @retval HA_ERR_KEY_NOT_FOUNE  End of scan
7669     @retval 0                   Success
7670     @retval other               Error code
7671 
7672   @details
7673     This part contains the logic to handle index scans that require ordered
7674     output. This includes all except those started by read_range_first with
7675     the flag ordered set to FALSE. Thus most direct index_read and all
7676     index_first and index_last.
7677 
7678     We implement ordering by keeping one record plus a key buffer for each
7679     partition. Every time a new entry is requested we will fetch a new
7680     entry from the partition that is currently not filled with an entry.
7681     Then the entry is put into its proper sort position.
7682 
7683     Returning a record is done by getting the top record, copying the
7684     record to the request buffer and setting the partition as empty on
7685     entries.
7686 */
7687 
handle_ordered_index_scan(uchar * buf,bool reverse_order)7688 int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
7689 {
7690   int error;
7691   uint i;
7692   uint j= queue_first_element(&m_queue);
7693   uint smallest_range_seq= 0;
7694   bool found= FALSE;
7695   uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
7696   int saved_error= HA_ERR_END_OF_FILE;
7697   DBUG_ENTER("ha_partition::handle_ordered_index_scan");
7698   DBUG_PRINT("enter", ("partition this: %p", this));
7699 
7700    if (m_pre_calling)
7701      error= handle_pre_scan(reverse_order, m_pre_call_use_parallel);
7702    else
7703      error= handle_pre_scan(reverse_order, check_parallel_search());
7704    if (unlikely(error))
7705     DBUG_RETURN(error);
7706 
7707   if (m_key_not_found)
7708   {
7709     /* m_key_not_found was set in the previous call to this function */
7710     m_key_not_found= false;
7711     bitmap_clear_all(&m_key_not_found_partitions);
7712   }
7713   m_top_entry= NO_CURRENT_PART_ID;
7714   DBUG_PRINT("info", ("partition queue_remove_all(1)"));
7715   queue_remove_all(&m_queue);
7716   DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions,
7717                             m_part_spec.start_part));
7718 
7719   /*
7720     Position part_rec_buf_ptr to point to the first used partition >=
7721     start_part. There may be partitions marked by used_partitions,
7722     but is before start_part. These partitions has allocated record buffers
7723     but is dynamically pruned, so those buffers must be skipped.
7724   */
7725   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7726        i < m_part_spec.start_part;
7727        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7728   {
7729     part_rec_buf_ptr+= m_priority_queue_rec_len;
7730   }
7731   DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
7732                       m_part_spec.start_part, i));
7733   for (/* continue from above */ ;
7734        i <= m_part_spec.end_part ;
7735        i= bitmap_get_next_set(&m_part_info->read_partitions, i),
7736        part_rec_buf_ptr+= m_priority_queue_rec_len)
7737   {
7738     DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
7739                         i, m_index_scan_type));
7740     DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET));
7741     uchar *rec_buf_ptr= part_rec_buf_ptr + ORDERED_REC_OFFSET;
7742     handler *file= m_file[i];
7743 
7744     switch (m_index_scan_type) {
7745     case partition_index_read:
7746       error= file->ha_index_read_map(rec_buf_ptr,
7747                                      m_start_key.key,
7748                                      m_start_key.keypart_map,
7749                                      m_start_key.flag);
7750       /* Caller has specified reverse_order */
7751       break;
7752     case partition_index_first:
7753       error= file->ha_index_first(rec_buf_ptr);
7754       reverse_order= FALSE;
7755       break;
7756     case partition_index_last:
7757       error= file->ha_index_last(rec_buf_ptr);
7758       reverse_order= TRUE;
7759       break;
7760     case partition_read_range:
7761     {
7762       /*
7763         This can only read record to table->record[0], as it was set when
7764         the table was being opened. We have to memcpy data ourselves.
7765       */
7766       error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7767                                     end_range, eq_range, TRUE);
7768       if (likely(!error))
7769         memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7770       reverse_order= FALSE;
7771       break;
7772     }
7773     case partition_read_multi_range:
7774     {
7775       if (!bitmap_is_set(&m_mrr_used_partitions, i))
7776         continue;
7777       DBUG_PRINT("info", ("partition %u", i));
7778       error= file->multi_range_read_next(&m_range_info[i]);
7779       DBUG_PRINT("info", ("error: %d", error));
7780       if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
7781       {
7782         bitmap_clear_bit(&m_mrr_used_partitions, i);
7783         continue;
7784       }
7785       if (likely(!error))
7786       {
7787         memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7788         reverse_order= FALSE;
7789         m_stock_range_seq[i]= (((PARTITION_KEY_MULTI_RANGE *)
7790                                 m_range_info[i])->id);
7791         /* Test if the key is in the first key range */
7792         if (m_stock_range_seq[i] != m_mrr_range_current->id)
7793         {
7794           /*
7795             smallest_range_seq contains the smallest key range we have seen
7796             so far
7797           */
7798           if (!smallest_range_seq || smallest_range_seq > m_stock_range_seq[i])
7799             smallest_range_seq= m_stock_range_seq[i];
7800           continue;
7801         }
7802       }
7803       break;
7804     }
7805     default:
7806       DBUG_ASSERT(FALSE);
7807       DBUG_RETURN(HA_ERR_END_OF_FILE);
7808     }
7809     if (likely(!error))
7810     {
7811       found= TRUE;
7812       if (!m_using_extended_keys)
7813       {
7814         file->position(rec_buf_ptr);
7815         memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length);
7816       }
7817       /*
7818         Initialize queue without order first, simply insert
7819       */
7820       queue_element(&m_queue, j++)= part_rec_buf_ptr;
7821       if (table->s->blob_fields)
7822       {
7823         Ordered_blob_storage **storage=
7824           *((Ordered_blob_storage ***) part_rec_buf_ptr);
7825         swap_blobs(rec_buf_ptr, storage, false);
7826       }
7827     }
7828     else if (error == HA_ERR_KEY_NOT_FOUND)
7829     {
7830       DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
7831       bitmap_set_bit(&m_key_not_found_partitions, i);
7832       m_key_not_found= true;
7833       saved_error= error;
7834     }
7835     else if (error != HA_ERR_END_OF_FILE)
7836     {
7837       DBUG_RETURN(error);
7838     }
7839   }
7840 
7841   if (!found && smallest_range_seq)
7842   {
7843     /* We know that there is an existing row based on code above */
7844     found= TRUE;
7845     part_rec_buf_ptr= m_ordered_rec_buffer;
7846 
7847     /*
7848       No key found in the first key range
7849       Collect all partitions that has a key in smallest_range_seq
7850      */
7851     DBUG_PRINT("info", ("partition !found && smallest_range_seq"));
7852     for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7853          i <= m_part_spec.end_part;
7854          i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7855     {
7856       DBUG_PRINT("info", ("partition current_part: %u", i));
7857       if (i < m_part_spec.start_part)
7858       {
7859         part_rec_buf_ptr+= m_priority_queue_rec_len;
7860         DBUG_PRINT("info", ("partition i < m_part_spec.start_part"));
7861         continue;
7862       }
7863       if (!bitmap_is_set(&m_mrr_used_partitions, i))
7864       {
7865         part_rec_buf_ptr+= m_priority_queue_rec_len;
7866         DBUG_PRINT("info", ("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
7867         continue;
7868       }
7869       DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET));
7870       if (smallest_range_seq == m_stock_range_seq[i])
7871       {
7872         m_stock_range_seq[i]= 0;
7873         queue_element(&m_queue, j++)= (uchar *) part_rec_buf_ptr;
7874         DBUG_PRINT("info", ("partition smallest_range_seq == m_stock_range_seq[i]"));
7875       }
7876       part_rec_buf_ptr+= m_priority_queue_rec_len;
7877     }
7878 
7879     /* Update global m_mrr_range_current to the current range */
7880     while (m_mrr_range_current->id < smallest_range_seq)
7881       m_mrr_range_current= m_mrr_range_current->next;
7882   }
7883   if (found)
7884   {
7885     /*
7886       We found at least one partition with data, now sort all entries and
7887       after that read the first entry and copy it to the buffer to return in.
7888     */
7889     queue_set_max_at_top(&m_queue, reverse_order);
7890     queue_set_cmp_arg(&m_queue, (void*) this);
7891     m_queue.elements= j - queue_first_element(&m_queue);
7892     queue_fix(&m_queue);
7893     return_top_record(buf);
7894     DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
7895     DBUG_RETURN(0);
7896   }
7897   DBUG_RETURN(saved_error);
7898 }
7899 
7900 
7901 /*
7902   Return the top record in sort order
7903 
7904   SYNOPSIS
7905     return_top_record()
7906     out:buf                  Row returned in MySQL Row Format
7907 
7908   RETURN VALUE
7909     NONE
7910 */
7911 
return_top_record(uchar * buf)7912 void ha_partition::return_top_record(uchar *buf)
7913 {
7914   uint part_id;
7915   uchar *key_buffer= queue_top(&m_queue);
7916   uchar *rec_buffer= key_buffer + ORDERED_REC_OFFSET;
7917   DBUG_ENTER("ha_partition::return_top_record");
7918   DBUG_PRINT("enter", ("partition this: %p", this));
7919 
7920   part_id= uint2korr(key_buffer + ORDERED_PART_NUM_OFFSET);
7921   memcpy(buf, rec_buffer, m_rec_length);
7922   if (table->s->blob_fields)
7923   {
7924     Ordered_blob_storage **storage= *((Ordered_blob_storage ***) key_buffer);
7925     swap_blobs(buf, storage, true);
7926   }
7927   m_last_part= part_id;
7928   DBUG_PRINT("info", ("partition m_last_part: %u", m_last_part));
7929   m_top_entry= part_id;
7930   table->status= 0;                             // Found an existing row
7931   m_file[part_id]->return_record_by_parent();
7932   DBUG_VOID_RETURN;
7933 }
7934 
7935 /*
7936   This function is only used if the partitioned table has own partitions.
7937   This can happen if the partitioned VP engine is used (part of spider).
7938 */
7939 
return_record_by_parent()7940 void ha_partition::return_record_by_parent()
7941 {
7942   m_file[m_last_part]->return_record_by_parent();
7943   DBUG_ASSERT(0);
7944 }
7945 
7946 
7947 /**
7948   Add index_next/prev from partitions without exact match.
7949 
7950   If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
7951   ha_index_read_map was done, those partitions must be included in the
7952   following index_next/prev call.
7953 */
7954 
handle_ordered_index_scan_key_not_found()7955 int ha_partition::handle_ordered_index_scan_key_not_found()
7956 {
7957   int error;
7958   uint i, old_elements= m_queue.elements;
7959   uchar *part_buf= m_ordered_rec_buffer;
7960   uchar *curr_rec_buf= NULL;
7961   DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found");
7962   DBUG_PRINT("enter", ("partition this: %p", this));
7963   DBUG_ASSERT(m_key_not_found);
7964   /*
7965     Loop over all used partitions to get the correct offset
7966     into m_ordered_rec_buffer.
7967   */
7968   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7969        i < m_tot_parts;
7970        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7971   {
7972     if (bitmap_is_set(&m_key_not_found_partitions, i))
7973     {
7974       /*
7975         This partition is used and did return HA_ERR_KEY_NOT_FOUND
7976         in index_read_map.
7977       */
7978       curr_rec_buf= part_buf + ORDERED_REC_OFFSET;
7979       error= m_file[i]->ha_index_next(curr_rec_buf);
7980       /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
7981       DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
7982       if (likely(!error))
7983       {
7984         DBUG_PRINT("info", ("partition queue_insert(1)"));
7985         queue_insert(&m_queue, part_buf);
7986       }
7987       else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
7988         DBUG_RETURN(error);
7989     }
7990     part_buf += m_priority_queue_rec_len;
7991   }
7992   DBUG_ASSERT(curr_rec_buf);
7993   bitmap_clear_all(&m_key_not_found_partitions);
7994   m_key_not_found= false;
7995 
7996   if (m_queue.elements > old_elements)
7997   {
7998     /* Update m_top_entry, which may have changed. */
7999     uchar *key_buffer= queue_top(&m_queue);
8000     m_top_entry= uint2korr(key_buffer);
8001   }
8002   DBUG_RETURN(0);
8003 }
8004 
8005 
8006 /*
8007   Common routine to handle index_next with ordered results
8008 
8009   SYNOPSIS
8010     handle_ordered_next()
8011     out:buf                       Read row in MySQL Row Format
8012     next_same                     Called from index_next_same
8013 
8014   RETURN VALUE
8015     HA_ERR_END_OF_FILE            End of scan
8016     0                             Success
8017     other                         Error code
8018 */
8019 
handle_ordered_next(uchar * buf,bool is_next_same)8020 int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
8021 {
8022   int error;
8023   DBUG_ENTER("ha_partition::handle_ordered_next");
8024 
8025   if (m_top_entry == NO_CURRENT_PART_ID)
8026     DBUG_RETURN(HA_ERR_END_OF_FILE);
8027 
8028   uint part_id= m_top_entry;
8029   uchar *part_rec_buf_ptr= queue_top(&m_queue);
8030   uchar *rec_buf= part_rec_buf_ptr + ORDERED_REC_OFFSET;
8031   handler *file;
8032 
8033   if (m_key_not_found)
8034   {
8035     if (is_next_same)
8036     {
8037       /* Only rows which match the key. */
8038       m_key_not_found= false;
8039       bitmap_clear_all(&m_key_not_found_partitions);
8040     }
8041     else
8042     {
8043       /* There are partitions not included in the index record queue. */
8044       uint old_elements= m_queue.elements;
8045       if (unlikely((error= handle_ordered_index_scan_key_not_found())))
8046         DBUG_RETURN(error);
8047       /*
8048         If the queue top changed, i.e. one of the partitions that gave
8049         HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
8050         return it.
8051         Otherwise replace the old with a call to index_next (fall through).
8052       */
8053       if (old_elements != m_queue.elements && part_id != m_top_entry)
8054       {
8055         return_top_record(buf);
8056         DBUG_RETURN(0);
8057       }
8058     }
8059   }
8060   if (part_id >= m_tot_parts)
8061   {
8062     /* This should never happen! */
8063     DBUG_ASSERT(0);
8064     DBUG_RETURN(HA_ERR_END_OF_FILE);
8065   }
8066 
8067   file= m_file[part_id];
8068 
8069   if (m_index_scan_type == partition_read_range)
8070   {
8071     error= file->read_range_next();
8072     if (likely(!error))
8073     {
8074       memcpy(rec_buf, table->record[0], m_rec_length);
8075       if (table->s->blob_fields)
8076       {
8077         Ordered_blob_storage **storage=
8078           *((Ordered_blob_storage ***) part_rec_buf_ptr);
8079         swap_blobs(rec_buf, storage, false);
8080       }
8081     }
8082   }
8083   else if (m_index_scan_type == partition_read_multi_range)
8084   {
8085     DBUG_PRINT("info", ("partition_read_multi_range route"));
8086     DBUG_PRINT("info", ("part_id: %u", part_id));
8087     bool get_next= FALSE;
8088     error= file->multi_range_read_next(&m_range_info[part_id]);
8089     DBUG_PRINT("info", ("error: %d", error));
8090     if (unlikely(error == HA_ERR_KEY_NOT_FOUND))
8091       error= HA_ERR_END_OF_FILE;
8092     if (unlikely(error == HA_ERR_END_OF_FILE))
8093     {
8094       bitmap_clear_bit(&m_mrr_used_partitions, part_id);
8095       DBUG_PRINT("info", ("partition m_queue.elements: %u", m_queue.elements));
8096       if (m_queue.elements)
8097       {
8098         DBUG_PRINT("info", ("partition queue_remove_top(1)"));
8099         queue_remove_top(&m_queue);
8100         if (m_queue.elements)
8101         {
8102           return_top_record(buf);
8103           DBUG_PRINT("info", ("Record returned from partition %u (3)",
8104                               m_top_entry));
8105           DBUG_RETURN(0);
8106         }
8107       }
8108       get_next= TRUE;
8109     }
8110     else if (likely(!error))
8111     {
8112       DBUG_PRINT("info", ("m_range_info[%u])->id: %u", part_id,
8113                           ((PARTITION_KEY_MULTI_RANGE *)
8114                            m_range_info[part_id])->id));
8115       DBUG_PRINT("info", ("m_mrr_range_current->id: %u",
8116                           m_mrr_range_current->id));
8117       memcpy(rec_buf, table->record[0], m_rec_length);
8118       if (table->s->blob_fields)
8119       {
8120         Ordered_blob_storage **storage= *((Ordered_blob_storage ***) part_rec_buf_ptr);
8121         swap_blobs(rec_buf, storage, false);
8122       }
8123       if (((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id !=
8124           m_mrr_range_current->id)
8125       {
8126         m_stock_range_seq[part_id]=
8127           ((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id;
8128         DBUG_PRINT("info", ("partition queue_remove_top(2)"));
8129         queue_remove_top(&m_queue);
8130         if (!m_queue.elements)
8131           get_next= TRUE;
8132       }
8133     }
8134     if (get_next)
8135     {
8136       DBUG_PRINT("info", ("get_next route"));
8137       uint i, j= 0, smallest_range_seq= UINT_MAX32;
8138       for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
8139       {
8140         if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
8141           continue;
8142         if (!bitmap_is_set(&m_mrr_used_partitions, i))
8143           continue;
8144         if (smallest_range_seq > m_stock_range_seq[i])
8145           smallest_range_seq= m_stock_range_seq[i];
8146       }
8147 
8148       DBUG_PRINT("info", ("smallest_range_seq: %u", smallest_range_seq));
8149       if (smallest_range_seq != UINT_MAX32)
8150       {
8151         uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
8152         DBUG_PRINT("info", ("partition queue_remove_all(2)"));
8153         queue_remove_all(&m_queue);
8154         DBUG_PRINT("info", ("m_part_spec.start_part: %u",
8155           m_part_spec.start_part));
8156 
8157         for (i= bitmap_get_first_set(&m_part_info->read_partitions);
8158              i <= m_part_spec.end_part;
8159              i= bitmap_get_next_set(&m_part_info->read_partitions, i),
8160                part_rec_buf_ptr+= m_priority_queue_rec_len)
8161         {
8162           DBUG_PRINT("info",("partition part_id: %u", i));
8163           if (i < m_part_spec.start_part)
8164           {
8165             DBUG_PRINT("info",("partition i < m_part_spec.start_part"));
8166             continue;
8167           }
8168           if (!bitmap_is_set(&m_mrr_used_partitions, i))
8169           {
8170             DBUG_PRINT("info",("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
8171             continue;
8172           }
8173           DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr +
8174                                      ORDERED_PART_NUM_OFFSET));
8175           DBUG_PRINT("info", ("partition m_stock_range_seq[%u]: %u",
8176                               i, m_stock_range_seq[i]));
8177           if (smallest_range_seq == m_stock_range_seq[i])
8178           {
8179             m_stock_range_seq[i]= 0;
8180             DBUG_PRINT("info", ("partition queue_insert(2)"));
8181             queue_insert(&m_queue, part_rec_buf_ptr);
8182             j++;
8183           }
8184         }
8185         while (m_mrr_range_current->id < smallest_range_seq)
8186           m_mrr_range_current= m_mrr_range_current->next;
8187 
8188         DBUG_PRINT("info",("partition m_mrr_range_current: %p",
8189                            m_mrr_range_current));
8190         DBUG_PRINT("info",("partition m_mrr_range_current->id: %u",
8191                            m_mrr_range_current ? m_mrr_range_current->id : 0));
8192         queue_set_max_at_top(&m_queue, FALSE);
8193         queue_set_cmp_arg(&m_queue, (void*) this);
8194         m_queue.elements= j;
8195         queue_fix(&m_queue);
8196         return_top_record(buf);
8197         DBUG_PRINT("info", ("Record returned from partition %u (4)",
8198                             m_top_entry));
8199         DBUG_RETURN(0);
8200       }
8201     }
8202   }
8203   else if (!is_next_same)
8204     error= file->ha_index_next(rec_buf);
8205   else
8206     error= file->ha_index_next_same(rec_buf, m_start_key.key,
8207                                     m_start_key.length);
8208 
8209   if (unlikely(error))
8210   {
8211     if (error == HA_ERR_END_OF_FILE && m_queue.elements)
8212     {
8213       /* Return next buffered row */
8214       DBUG_PRINT("info", ("partition queue_remove_top(3)"));
8215       queue_remove_top(&m_queue);
8216       if (m_queue.elements)
8217       {
8218          return_top_record(buf);
8219          DBUG_PRINT("info", ("Record returned from partition %u (2)",
8220                      m_top_entry));
8221          error= 0;
8222       }
8223     }
8224     DBUG_RETURN(error);
8225   }
8226 
8227   if (!m_using_extended_keys)
8228   {
8229     file->position(rec_buf);
8230     memcpy(rec_buf + m_rec_length, file->ref, file->ref_length);
8231   }
8232 
8233   queue_replace_top(&m_queue);
8234   return_top_record(buf);
8235   DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
8236   DBUG_RETURN(0);
8237 }
8238 
8239 
8240 /*
8241   Common routine to handle index_prev with ordered results
8242 
8243   SYNOPSIS
8244     handle_ordered_prev()
8245     out:buf                       Read row in MySQL Row Format
8246 
8247   RETURN VALUE
8248     HA_ERR_END_OF_FILE            End of scan
8249     0                             Success
8250     other                         Error code
8251 */
8252 
handle_ordered_prev(uchar * buf)8253 int ha_partition::handle_ordered_prev(uchar *buf)
8254 {
8255   int error;
8256   DBUG_ENTER("ha_partition::handle_ordered_prev");
8257   DBUG_PRINT("enter", ("partition: %p", this));
8258 
8259   if (m_top_entry == NO_CURRENT_PART_ID)
8260     DBUG_RETURN(HA_ERR_END_OF_FILE);
8261 
8262   uint part_id= m_top_entry;
8263   uchar *rec_buf= queue_top(&m_queue) + ORDERED_REC_OFFSET;
8264   handler *file= m_file[part_id];
8265 
8266   if (unlikely((error= file->ha_index_prev(rec_buf))))
8267   {
8268     if (error == HA_ERR_END_OF_FILE && m_queue.elements)
8269     {
8270       DBUG_PRINT("info", ("partition queue_remove_top(4)"));
8271       queue_remove_top(&m_queue);
8272       if (m_queue.elements)
8273       {
8274 	return_top_record(buf);
8275 	DBUG_PRINT("info", ("Record returned from partition %u (2)",
8276 			    m_top_entry));
8277         error= 0;
8278       }
8279     }
8280     DBUG_RETURN(error);
8281   }
8282   queue_replace_top(&m_queue);
8283   return_top_record(buf);
8284   DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
8285   DBUG_RETURN(0);
8286 }
8287 
8288 
8289 /****************************************************************************
8290                 MODULE information calls
8291 ****************************************************************************/
8292 
8293 /*
8294   These are all first approximations of the extra, info, scan_time
8295   and read_time calls
8296 */
8297 
8298 /**
8299   Helper function for sorting according to number of rows in descending order.
8300 */
8301 
compare_number_of_records(ha_partition * me,const uint32 * a,const uint32 * b)8302 int ha_partition::compare_number_of_records(ha_partition *me,
8303                                             const uint32 *a,
8304                                             const uint32 *b)
8305 {
8306   handler **file= me->m_file;
8307   /* Note: sorting in descending order! */
8308   if (file[*a]->stats.records > file[*b]->stats.records)
8309     return -1;
8310   if (file[*a]->stats.records < file[*b]->stats.records)
8311     return 1;
8312   return 0;
8313 }
8314 
8315 
8316 /*
8317   General method to gather info from handler
8318 
8319   SYNOPSIS
8320     info()
8321     flag              Specifies what info is requested
8322 
8323   RETURN VALUE
8324     NONE
8325 
8326   DESCRIPTION
8327     ::info() is used to return information to the optimizer.
8328     Currently this table handler doesn't implement most of the fields
8329     really needed. SHOW also makes use of this data
8330     Another note, if your handler doesn't provide exact record count,
8331     you will probably want to have the following in your code:
8332     if (records < 2)
8333       records = 2;
8334     The reason is that the server will optimize for cases of only a single
8335     record. If in a table scan you don't know the number of records
8336     it will probably be better to set records to two so you can return
8337     as many records as you need.
8338 
8339     Along with records a few more variables you may wish to set are:
8340       records
8341       deleted
8342       data_file_length
8343       index_file_length
8344       delete_length
8345       check_time
8346     Take a look at the public variables in handler.h for more information.
8347 
8348     Called in:
8349       filesort.cc
8350       ha_heap.cc
8351       item_sum.cc
8352       opt_sum.cc
8353       sql_delete.cc
8354      sql_delete.cc
8355      sql_derived.cc
8356       sql_select.cc
8357       sql_select.cc
8358       sql_select.cc
8359       sql_select.cc
8360       sql_select.cc
8361       sql_show.cc
8362       sql_show.cc
8363       sql_show.cc
8364       sql_show.cc
8365       sql_table.cc
8366       sql_union.cc
8367       sql_update.cc
8368 
8369     Some flags that are not implemented
8370       HA_STATUS_POS:
8371         This parameter is never used from the MySQL Server. It is checked in a
8372         place in MyISAM so could potentially be used by MyISAM specific
8373         programs.
8374       HA_STATUS_NO_LOCK:
8375       This is declared and often used. It's only used by MyISAM.
8376       It means that MySQL doesn't need the absolute latest statistics
8377       information. This may save the handler from doing internal locks while
8378       retrieving statistics data.
8379 */
8380 
info(uint flag)8381 int ha_partition::info(uint flag)
8382 {
8383   uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
8384   uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
8385   DBUG_ENTER("ha_partition::info");
8386 
8387 #ifndef DBUG_OFF
8388   if (bitmap_is_set_all(&(m_part_info->read_partitions)))
8389     DBUG_PRINT("info", ("All partitions are used"));
8390 #endif /* DBUG_OFF */
8391   if (flag & HA_STATUS_AUTO)
8392   {
8393     bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0);
8394     bool all_parts_opened= true;
8395     DBUG_PRINT("info", ("HA_STATUS_AUTO"));
8396     if (!table->found_next_number_field)
8397       stats.auto_increment_value= 0;
8398     else if (part_share->auto_inc_initialized)
8399     {
8400       lock_auto_increment();
8401       stats.auto_increment_value= part_share->next_auto_inc_val;
8402       unlock_auto_increment();
8403     }
8404     else
8405     {
8406       lock_auto_increment();
8407       /* to avoid two concurrent initializations, check again when locked */
8408       if (part_share->auto_inc_initialized)
8409         stats.auto_increment_value= part_share->next_auto_inc_val;
8410       else
8411       {
8412         /*
8413           The auto-inc mutex in the table_share is locked, so we do not need
8414           to have the handlers locked.
8415           HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
8416           the mutex, because it is initialized.
8417         */
8418         handler *file, **file_array;
8419         ulonglong auto_increment_value= 0;
8420         file_array= m_file;
8421         DBUG_PRINT("info",
8422                    ("checking all partitions for auto_increment_value"));
8423         do
8424         {
8425           if (!bitmap_is_set(&m_opened_partitions, (uint)(file_array - m_file)))
8426           {
8427             /*
8428               Some partitions aren't opened.
8429               So we can't calculate the autoincrement.
8430             */
8431             all_parts_opened= false;
8432             break;
8433           }
8434           file= *file_array;
8435           file->info(HA_STATUS_AUTO | no_lock_flag);
8436           set_if_bigger(auto_increment_value,
8437                         file->stats.auto_increment_value);
8438         } while (*(++file_array));
8439 
8440         DBUG_ASSERT(auto_increment_value);
8441         stats.auto_increment_value= auto_increment_value;
8442         if (all_parts_opened && auto_inc_is_first_in_idx)
8443         {
8444           set_if_bigger(part_share->next_auto_inc_val,
8445                         auto_increment_value);
8446           if (can_use_for_auto_inc_init())
8447             part_share->auto_inc_initialized= true;
8448           DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
8449                        (ulong) part_share->next_auto_inc_val));
8450         }
8451       }
8452       unlock_auto_increment();
8453     }
8454   }
8455   if (flag & HA_STATUS_VARIABLE)
8456   {
8457     uint i;
8458     DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
8459     /*
8460       Calculates statistical variables
8461       records:           Estimate of number records in table
8462       We report sum (always at least 2 if not empty)
8463       deleted:           Estimate of number holes in the table due to
8464       deletes
8465       We report sum
8466       data_file_length:  Length of data file, in principle bytes in table
8467       We report sum
8468       index_file_length: Length of index file, in principle bytes in
8469       indexes in the table
8470       We report sum
8471       delete_length: Length of free space easily used by new records in table
8472       We report sum
8473       mean_record_length:Mean record length in the table
8474       We calculate this
8475       check_time:        Time of last check (only applicable to MyISAM)
8476       We report last time of all underlying handlers
8477     */
8478     handler *file;
8479     stats.records= 0;
8480     stats.deleted= 0;
8481     stats.data_file_length= 0;
8482     stats.index_file_length= 0;
8483     stats.delete_length= 0;
8484     stats.check_time= 0;
8485     stats.checksum= 0;
8486     stats.checksum_null= TRUE;
8487     for (i= bitmap_get_first_set(&m_part_info->read_partitions);
8488          i < m_tot_parts;
8489          i= bitmap_get_next_set(&m_part_info->read_partitions, i))
8490     {
8491       file= m_file[i];
8492       file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8493       stats.records+= file->stats.records;
8494       stats.deleted+= file->stats.deleted;
8495       stats.data_file_length+= file->stats.data_file_length;
8496       stats.index_file_length+= file->stats.index_file_length;
8497       stats.delete_length+= file->stats.delete_length;
8498       if (file->stats.check_time > stats.check_time)
8499         stats.check_time= file->stats.check_time;
8500       if (!file->stats.checksum_null)
8501       {
8502         stats.checksum+= file->stats.checksum;
8503         stats.checksum_null= FALSE;
8504       }
8505     }
8506     if (stats.records && stats.records < 2 &&
8507         !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
8508       stats.records= 2;
8509     if (stats.records > 0)
8510       stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
8511     else
8512       stats.mean_rec_length= 0;
8513   }
8514   if (flag & HA_STATUS_CONST)
8515   {
8516     DBUG_PRINT("info", ("HA_STATUS_CONST"));
8517     /*
8518       Recalculate loads of constant variables. MyISAM also sets things
8519       directly on the table share object.
8520 
8521       Check whether this should be fixed since handlers should not
8522       change things directly on the table object.
8523 
8524       Monty comment: This should NOT be changed!  It's the handlers
8525       responsibility to correct table->s->keys_xxxx information if keys
8526       have been disabled.
8527 
8528       The most important parameters set here is records per key on
8529       all indexes. block_size and primar key ref_length.
8530 
8531       For each index there is an array of rec_per_key.
8532       As an example if we have an index with three attributes a,b and c
8533       we will have an array of 3 rec_per_key.
8534       rec_per_key[0] is an estimate of number of records divided by
8535       number of unique values of the field a.
8536       rec_per_key[1] is an estimate of the number of records divided
8537       by the number of unique combinations of the fields a and b.
8538       rec_per_key[2] is an estimate of the number of records divided
8539       by the number of unique combinations of the fields a,b and c.
8540 
8541       Many handlers only set the value of rec_per_key when all fields
8542       are bound (rec_per_key[2] in the example above).
8543 
8544       If the handler doesn't support statistics, it should set all of the
8545       above to 0.
8546 
8547       We first scans through all partitions to get the one holding most rows.
8548       We will then allow the handler with the most rows to set
8549       the rec_per_key and use this as an estimate on the total table.
8550 
8551       max_data_file_length:     Maximum data file length
8552       We ignore it, is only used in
8553       SHOW TABLE STATUS
8554       max_index_file_length:    Maximum index file length
8555       We ignore it since it is never used
8556       block_size:               Block size used
8557       We set it to the value of the first handler
8558       ref_length:               We set this to the value calculated
8559       and stored in local object
8560       create_time:              Creation time of table
8561 
8562       So we calculate these constants by using the variables from the
8563       handler with most rows.
8564     */
8565     handler *file, **file_array;
8566     ulonglong max_records= 0;
8567     uint32 i= 0;
8568     uint32 handler_instance= 0;
8569     bool handler_instance_set= 0;
8570 
8571     file_array= m_file;
8572     do
8573     {
8574       file= *file_array;
8575       if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
8576       {
8577         /* Get variables if not already done */
8578         if (!(flag & HA_STATUS_VARIABLE) ||
8579             !bitmap_is_set(&(m_part_info->read_partitions),
8580                            (uint) (file_array - m_file)))
8581           file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8582         if (file->stats.records > max_records || !handler_instance_set)
8583         {
8584           handler_instance_set= 1;
8585           max_records= file->stats.records;
8586           handler_instance= i;
8587         }
8588       }
8589       i++;
8590     } while (*(++file_array));
8591     /*
8592       Sort the array of part_ids by number of records in
8593       in descending order.
8594     */
8595     my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
8596               m_tot_parts,
8597               sizeof(uint32),
8598               (qsort2_cmp) compare_number_of_records,
8599               this);
8600 
8601     file= m_file[handler_instance];
8602     file->info(HA_STATUS_CONST | no_lock_flag);
8603     stats.block_size= file->stats.block_size;
8604     stats.create_time= file->stats.create_time;
8605     ref_length= m_ref_length;
8606   }
8607   if (flag & HA_STATUS_ERRKEY)
8608   {
8609     handler *file= m_file[m_last_part];
8610     DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
8611     /*
8612       This flag is used to get index number of the unique index that
8613       reported duplicate key
8614       We will report the errkey on the last handler used and ignore the rest
8615       Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
8616     */
8617     file->errkey= errkey;
8618     file->info(HA_STATUS_ERRKEY | no_lock_flag);
8619     errkey= file->errkey;
8620   }
8621   if (flag & HA_STATUS_TIME)
8622   {
8623     handler *file, **file_array;
8624     DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
8625     /*
8626       This flag is used to set the latest update time of the table.
8627       Used by SHOW commands
8628       We will report the maximum of these times
8629     */
8630     stats.update_time= 0;
8631     file_array= m_file;
8632     do
8633     {
8634       file= *file_array;
8635       file->info(HA_STATUS_TIME | no_lock_flag);
8636       if (file->stats.update_time > stats.update_time)
8637 	stats.update_time= file->stats.update_time;
8638     } while (*(++file_array));
8639   }
8640   DBUG_RETURN(0);
8641 }
8642 
8643 
get_dynamic_partition_info(PARTITION_STATS * stat_info,uint part_id)8644 void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
8645                                               uint part_id)
8646 {
8647   handler *file= m_file[part_id];
8648   DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
8649   file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
8650              HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
8651 
8652   stat_info->records=              file->stats.records;
8653   stat_info->mean_rec_length=      file->stats.mean_rec_length;
8654   stat_info->data_file_length=     file->stats.data_file_length;
8655   stat_info->max_data_file_length= file->stats.max_data_file_length;
8656   stat_info->index_file_length=    file->stats.index_file_length;
8657   stat_info->max_index_file_length= file->stats.max_index_file_length;
8658   stat_info->delete_length=        file->stats.delete_length;
8659   stat_info->create_time=          file->stats.create_time;
8660   stat_info->update_time=          file->stats.update_time;
8661   stat_info->check_time=           file->stats.check_time;
8662   stat_info->check_sum=            file->stats.checksum;
8663   stat_info->check_sum_null=       file->stats.checksum_null;
8664 }
8665 
8666 
set_partitions_to_open(List<String> * partition_names)8667 void ha_partition::set_partitions_to_open(List<String> *partition_names)
8668 {
8669   m_partitions_to_open= partition_names;
8670 }
8671 
8672 
open_read_partitions(char * name_buff,size_t name_buff_size)8673 int ha_partition::open_read_partitions(char *name_buff, size_t name_buff_size)
8674 {
8675   handler **file;
8676   char *name_buffer_ptr;
8677   int error= 0;
8678 
8679   name_buffer_ptr= m_name_buffer_ptr;
8680   file= m_file;
8681   m_file_sample= NULL;
8682   do
8683   {
8684     int n_file= (int)(file-m_file);
8685     int is_open= bitmap_is_set(&m_opened_partitions, n_file);
8686     int should_be_open= bitmap_is_set(&m_part_info->read_partitions, n_file);
8687 
8688     /*
8689       TODO: we can close some opened partitions if they're not
8690       used in the query. It probably should be syncronized with the
8691       table_open_cache value.
8692 
8693       if (is_open && !should_be_open)
8694       {
8695         if (unlikely((error= (*file)->ha_close())))
8696           goto err_handler;
8697         bitmap_clear_bit(&m_opened_partitions, n_file);
8698       }
8699       else
8700     */
8701     if (!is_open && should_be_open)
8702     {
8703       LEX_CSTRING save_connect_string= table->s->connect_string;
8704       if (unlikely((error=
8705                     create_partition_name(name_buff, name_buff_size,
8706                                           table->s->normalized_path.str,
8707                                           name_buffer_ptr, NORMAL_PART_NAME,
8708                                           FALSE))))
8709         goto err_handler;
8710       if (!((*file)->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
8711         table->s->connect_string= m_connect_string[(uint)(file-m_file)];
8712       error= (*file)->ha_open(table, name_buff, m_mode,
8713                               m_open_test_lock | HA_OPEN_NO_PSI_CALL);
8714       table->s->connect_string= save_connect_string;
8715       if (error)
8716         goto err_handler;
8717       bitmap_set_bit(&m_opened_partitions, n_file);
8718       m_last_part= n_file;
8719     }
8720     if (!m_file_sample && should_be_open)
8721       m_file_sample= *file;
8722     name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
8723   } while (*(++file));
8724 
8725 err_handler:
8726   return error;
8727 }
8728 
8729 
change_partitions_to_open(List<String> * partition_names)8730 int ha_partition::change_partitions_to_open(List<String> *partition_names)
8731 {
8732   char name_buff[FN_REFLEN+1];
8733   int error= 0;
8734 
8735   if (m_is_clone_of)
8736     return 0;
8737 
8738   m_partitions_to_open= partition_names;
8739   if (unlikely((error= m_part_info->set_partition_bitmaps(partition_names))))
8740     goto err_handler;
8741 
8742   if (m_lock_type != F_UNLCK)
8743   {
8744     /*
8745       That happens after the LOCK TABLE statement.
8746       Do nothing in this case.
8747     */
8748     return 0;
8749   }
8750 
8751   check_insert_autoincrement();
8752   if (bitmap_cmp(&m_opened_partitions, &m_part_info->read_partitions) != 0)
8753     return 0;
8754 
8755   if (unlikely((error= read_par_file(table->s->normalized_path.str)) ||
8756                (error= open_read_partitions(name_buff, sizeof(name_buff)))))
8757     goto err_handler;
8758 
8759   clear_handler_file();
8760 
8761 err_handler:
8762   return error;
8763 }
8764 
8765 
extra_cb(handler * h,void * operation)8766 static int extra_cb(handler *h, void *operation)
8767 {
8768   return h->extra(*(enum ha_extra_function*)operation);
8769 }
8770 
8771 
start_keyread_cb(handler * h,void * p)8772 static int start_keyread_cb(handler* h, void *p)
8773 {
8774   return h->ha_start_keyread(*(uint*)p);
8775 }
8776 
8777 
end_keyread_cb(handler * h,void * unused)8778 static int end_keyread_cb(handler* h, void *unused)
8779 {
8780   return h->ha_end_keyread();
8781 }
8782 
8783 
8784 /**
8785   General function to prepare handler for certain behavior.
8786 
8787   @param[in]    operation       operation to execute
8788 
8789   @return       status
8790     @retval     0               success
8791     @retval     >0              error code
8792 
8793   @detail
8794 
8795   extra() is called whenever the server wishes to send a hint to
8796   the storage engine. The MyISAM engine implements the most hints.
8797 
8798   We divide the parameters into the following categories:
8799   1) Operations used by most handlers
8800   2) Operations used by some non-MyISAM handlers
8801   3) Operations used only by MyISAM
8802   4) Operations only used by temporary tables for query processing
8803   5) Operations only used by MyISAM internally
8804   6) Operations not used at all
8805   7) Operations only used by federated tables for query processing
8806   8) Operations only used by NDB
8807   9) Operations only used by MERGE
8808 
8809   The partition handler need to handle category 1), 2) and 3).
8810 
8811   1) Operations used by most handlers
8812   -----------------------------------
8813   HA_EXTRA_RESET:
8814     This option is used by most handlers and it resets the handler state
8815     to the same state as after an open call. This includes releasing
8816     any READ CACHE or WRITE CACHE or other internal buffer used.
8817 
8818     It is called from the reset method in the handler interface. There are
8819     three instances where this is called.
8820     1) After completing a INSERT ... SELECT ... query the handler for the
8821        table inserted into is reset
8822     2) It is called from close_thread_table which in turn is called from
8823        close_thread_tables except in the case where the tables are locked
8824        in which case ha_commit_stmt is called instead.
8825        It is only called from here if refresh_version hasn't changed and the
8826        table is not an old table when calling close_thread_table.
8827        close_thread_tables is called from many places as a general clean up
8828        function after completing a query.
8829     3) It is called when deleting the QUICK_RANGE_SELECT object if the
8830        QUICK_RANGE_SELECT object had its own handler object. It is called
8831        immediately before close of this local handler object.
8832   HA_EXTRA_KEYREAD:
8833   HA_EXTRA_NO_KEYREAD:
8834     These parameters are used to provide an optimisation hint to the handler.
8835     If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
8836     many handlers this means that the index-only scans can be used and it
8837     is not necessary to use the real records to satisfy this part of the
8838     query. Index-only scans is a very important optimisation for disk-based
8839     indexes. For main-memory indexes most indexes contain a reference to the
8840     record and thus KEYREAD only says that it is enough to read key fields.
8841     HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
8842     will disable this option.
8843     The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
8844     feature is supported.
8845   HA_EXTRA_FLUSH:
8846     Indication to flush tables to disk, is supposed to be used to
8847     ensure disk based tables are flushed at end of query execution.
8848     Currently is never used.
8849 
8850   HA_EXTRA_FORCE_REOPEN:
8851     Only used by MyISAM and Archive, called when altering table,
8852     closing tables to enforce a reopen of the table files.
8853 
8854   2) Operations used by some non-MyISAM handlers
8855   ----------------------------------------------
8856   HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
8857     This is a strictly InnoDB feature that is more or less undocumented.
8858     When it is activated InnoDB copies field by field from its fetch
8859     cache instead of all fields in one memcpy. Have no idea what the
8860     purpose of this is.
8861     Cut from include/my_base.h:
8862     When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
8863     other fields intact. When this is off (by default) InnoDB will use memcpy
8864     to overwrite entire row.
8865   HA_EXTRA_IGNORE_DUP_KEY:
8866   HA_EXTRA_NO_IGNORE_DUP_KEY:
8867     Informs the handler to we will not stop the transaction if we get an
8868     duplicate key errors during insert/update.
8869     Always called in pair, triggered by INSERT IGNORE and other similar
8870     SQL constructs.
8871     Not used by MyISAM.
8872 
8873   3) Operations used only by MyISAM
8874   ---------------------------------
8875   HA_EXTRA_NORMAL:
8876     Only used in MyISAM to reset quick mode, not implemented by any other
8877     handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
8878 
8879     It is called after completing a successful DELETE query if the QUICK
8880     option is set.
8881 
8882   HA_EXTRA_QUICK:
8883     When the user does DELETE QUICK FROM table where-clause; this extra
8884     option is called before the delete query is performed and
8885     HA_EXTRA_NORMAL is called after the delete query is completed.
8886     Temporary tables used internally in MySQL always set this option
8887 
8888     The meaning of quick mode is that when deleting in a B-tree no merging
8889     of leafs is performed. This is a common method and many large DBMS's
8890     actually only support this quick mode since it is very difficult to
8891     merge leaves in a tree used by many threads concurrently.
8892 
8893   HA_EXTRA_CACHE:
8894     This flag is usually set with extra_opt along with a cache size.
8895     The size of this buffer is set by the user variable
8896     record_buffer_size. The value of this cache size is the amount of
8897     data read from disk in each fetch when performing a table scan.
8898     This means that before scanning a table it is normal to call
8899     extra with HA_EXTRA_CACHE and when the scan is completed to call
8900     HA_EXTRA_NO_CACHE to release the cache memory.
8901 
8902     Some special care is taken when using this extra parameter since there
8903     could be a write ongoing on the table in the same statement. In this
8904     one has to take special care since there might be a WRITE CACHE as
8905     well. HA_EXTRA_CACHE specifies using a READ CACHE and using
8906     READ CACHE and WRITE CACHE at the same time is not possible.
8907 
8908     Only MyISAM currently use this option.
8909 
8910     It is set when doing full table scans using rr_sequential and
8911     reset when completing such a scan with end_read_record
8912     (resetting means calling extra with HA_EXTRA_NO_CACHE).
8913 
8914     It is set in filesort.cc for MyISAM internal tables and it is set in
8915     a multi-update where HA_EXTRA_CACHE is called on a temporary result
8916     table and after that ha_rnd_init(0) on table to be updated
8917     and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
8918 
8919     Apart from that it is always used from init_read_record but not when
8920     used from UPDATE statements. It is not used from DELETE statements
8921     with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
8922     statements. The reason here is that DELETE's in MyISAM doesn't move
8923     existings data rows.
8924 
8925     It is also set in copy_data_between_tables when scanning the old table
8926     to copy over to the new table.
8927     And it is set in join_init_read_record where quick objects are used
8928     to perform a scan on the table. In this case the full table scan can
8929     even be performed multiple times as part of the nested loop join.
8930 
8931     For purposes of the partition handler it is obviously necessary to have
8932     special treatment of this extra call. If we would simply pass this
8933     extra call down to each handler we would allocate
8934     cache size * no of partitions amount of memory and this is not
8935     necessary since we will only scan one partition at a time when doing
8936     full table scans.
8937 
8938     Thus we treat it by first checking whether we have MyISAM handlers in
8939     the table, if not we simply ignore the call and if we have we will
8940     record the call but will not call any underlying handler yet. Then
8941     when performing the sequential scan we will check this recorded value
8942     and call extra_opt whenever we start scanning a new partition.
8943 
8944   HA_EXTRA_NO_CACHE:
8945     When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
8946     flush method in the select_union class.
8947     It is used to some extent when insert delayed inserts.
8948     See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
8949 
8950     It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
8951     if they are MyISAM handlers. Other handlers we can ignore the call
8952     for. If no cache is in use they will quickly return after finding
8953     this out. And we also ensure that all caches are disabled and no one
8954     is left by mistake.
8955     In the future this call will probably be deleted and we will instead call
8956     ::reset();
8957 
8958   HA_EXTRA_WRITE_CACHE:
8959     See above, called from various places. It is mostly used when we
8960     do INSERT ... SELECT
8961     No special handling to save cache space is developed currently.
8962 
8963   HA_EXTRA_PREPARE_FOR_UPDATE:
8964     This is called as part of a multi-table update. When the table to be
8965     updated is also scanned then this informs MyISAM handler to drop any
8966     caches if dynamic records are used (fixed size records do not care
8967     about this call). We pass this along to the first partition to scan, and
8968     flag that it is to be called after HA_EXTRA_CACHE when moving to the next
8969     partition to scan.
8970 
8971   HA_EXTRA_PREPARE_FOR_DROP:
8972     Only used by MyISAM, called in preparation for a DROP TABLE.
8973     It's used mostly by Windows that cannot handle dropping an open file.
8974     On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
8975 
8976   HA_EXTRA_PREPARE_FOR_RENAME:
8977     Informs the handler we are about to attempt a rename of the table.
8978     For handlers that have share open files (MyISAM key-file and
8979     Archive writer) they must close the files before rename is possible
8980     on Windows.
8981 
8982   HA_EXTRA_READCHECK:
8983   HA_EXTRA_NO_READCHECK:
8984     Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
8985     this is not needed in SQL. The reason for this call is that MyISAM sets
8986     the READ_CHECK_USED in the open call so the call is needed for MyISAM
8987     to reset this feature.
8988     The idea with this parameter was to inform of doing/not doing a read
8989     check before applying an update. Since SQL always performs a read before
8990     applying the update No Read Check is needed in MyISAM as well.
8991 
8992     This is a cut from Docs/myisam.txt
8993      Sometimes you might want to force an update without checking whether
8994      another user has changed the record since you last read it. This is
8995      somewhat dangerous, so it should ideally not be used. That can be
8996      accomplished by wrapping the mi_update() call in two calls to mi_extra(),
8997      using these functions:
8998      HA_EXTRA_NO_READCHECK=5                 No readcheck on update
8999      HA_EXTRA_READCHECK=6                    Use readcheck (def)
9000 
9001   HA_EXTRA_REMEMBER_POS:
9002   HA_EXTRA_RESTORE_POS:
9003     System versioning needs this for MyISAM and Aria tables.
9004     On DELETE using PRIMARY KEY:
9005     1) handler::ha_index_read_map() saves rowid used for row delete/update
9006     2) handler::ha_update_row() can rewrite saved rowid
9007     3) handler::ha_delete_row()/handler::ha_update_row() expects saved but got
9008        different rowid and operation fails
9009     Using those flags prevents harmful side effect of 2)
9010 
9011   4) Operations only used by temporary tables for query processing
9012   ----------------------------------------------------------------
9013   HA_EXTRA_RESET_STATE:
9014     Same as reset() except that buffers are not released. If there is
9015     a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
9016     or to change type of cache between READ CACHE and WRITE CACHE.
9017 
9018     This extra function is always called immediately before calling
9019     delete_all_rows on the handler for temporary tables.
9020     There are cases however when HA_EXTRA_RESET_STATE isn't called in
9021     a similar case for a temporary table in sql_union.cc and in two other
9022     cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
9023     called afterwards.
9024     The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
9025     disable caching, delete all rows and enable WRITE CACHE. This is
9026     used for temporary tables containing distinct sums and a
9027     functional group.
9028 
9029     The only case that delete_all_rows is called on non-temporary tables
9030     is in sql_delete.cc when DELETE FROM table; is called by a user.
9031     In this case no special extra calls are performed before or after this
9032     call.
9033 
9034     The partition handler should not need to bother about this one. It
9035     should never be called.
9036 
9037   HA_EXTRA_NO_ROWS:
9038     Don't insert rows indication to HEAP and MyISAM, only used by temporary
9039     tables used in query processing.
9040     Not handled by partition handler.
9041 
9042   5) Operations only used by MyISAM internally
9043   --------------------------------------------
9044   HA_EXTRA_REINIT_CACHE:
9045     This call reinitializes the READ CACHE described above if there is one
9046     and otherwise the call is ignored.
9047 
9048     We can thus safely call it on all underlying handlers if they are
9049     MyISAM handlers. It is however never called so we don't handle it at all.
9050   HA_EXTRA_FLUSH_CACHE:
9051     Flush WRITE CACHE in MyISAM. It is only from one place in the code.
9052     This is in sql_insert.cc where it is called if the table_flags doesn't
9053     contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS
9054     set is the MyISAM handler and so the only handler not receiving this
9055     call is MyISAM.
9056     Thus in effect this call is called but never used. Could be removed
9057     from sql_insert.cc
9058   HA_EXTRA_NO_USER_CHANGE:
9059     Only used by MyISAM, never called.
9060     Simulates lock_type as locked.
9061   HA_EXTRA_WAIT_LOCK:
9062   HA_EXTRA_WAIT_NOLOCK:
9063     Only used by MyISAM, called from MyISAM handler but never from server
9064     code on top of the handler.
9065     Sets lock_wait on/off
9066   HA_EXTRA_NO_KEYS:
9067     Only used MyISAM, only used internally in MyISAM handler, never called
9068     from server level.
9069   HA_EXTRA_KEYREAD_CHANGE_POS:
9070   HA_EXTRA_PRELOAD_BUFFER_SIZE:
9071   HA_EXTRA_CHANGE_KEY_TO_DUP:
9072   HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
9073     Only used by MyISAM, never called.
9074 
9075   6) Operations not used at all
9076   -----------------------------
9077   HA_EXTRA_KEY_CACHE:
9078   HA_EXTRA_NO_KEY_CACHE:
9079     This parameters are no longer used and could be removed.
9080 
9081   7) Operations only used by federated tables for query processing
9082   ----------------------------------------------------------------
9083   HA_EXTRA_INSERT_WITH_UPDATE:
9084     Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be
9085     executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY.
9086 
9087   8) Operations only used by NDB
9088   ------------------------------
9089   HA_EXTRA_DELETE_CANNOT_BATCH:
9090   HA_EXTRA_UPDATE_CANNOT_BATCH:
9091     Inform handler that delete_row()/update_row() cannot batch deletes/updates
9092     and should perform them immediately. This may be needed when table has
9093     AFTER DELETE/UPDATE triggers which access to subject table.
9094     These flags are reset by the handler::extra(HA_EXTRA_RESET) call.
9095 
9096   9) Operations only used by MERGE
9097   ------------------------------
9098   HA_EXTRA_ADD_CHILDREN_LIST:
9099   HA_EXTRA_ATTACH_CHILDREN:
9100   HA_EXTRA_IS_ATTACHED_CHILDREN:
9101   HA_EXTRA_DETACH_CHILDREN:
9102     Special actions for MERGE tables. Ignore.
9103 */
9104 
extra(enum ha_extra_function operation)9105 int ha_partition::extra(enum ha_extra_function operation)
9106 {
9107   DBUG_ENTER("ha_partition:extra");
9108   DBUG_PRINT("enter", ("operation: %d", (int) operation));
9109 
9110   switch (operation) {
9111     /* Category 1), used by most handlers */
9112   case HA_EXTRA_NO_KEYREAD:
9113     DBUG_RETURN(loop_partitions(end_keyread_cb, NULL));
9114   case HA_EXTRA_KEYREAD:
9115   case HA_EXTRA_FLUSH:
9116   case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
9117     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9118   case HA_EXTRA_PREPARE_FOR_RENAME:
9119   case HA_EXTRA_FORCE_REOPEN:
9120     DBUG_RETURN(loop_extra_alter(operation));
9121     break;
9122 
9123     /* Category 2), used by non-MyISAM handlers */
9124   case HA_EXTRA_IGNORE_DUP_KEY:
9125   case HA_EXTRA_NO_IGNORE_DUP_KEY:
9126   case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
9127   {
9128     if (!m_myisam)
9129       DBUG_RETURN(loop_partitions(extra_cb, &operation));
9130   }
9131   break;
9132 
9133   /* Category 3), used by MyISAM handlers */
9134   case HA_EXTRA_PREPARE_FOR_UPDATE:
9135     /*
9136       Needs to be run on the first partition in the range now, and
9137       later in late_extra_cache, when switching to a new partition to scan.
9138     */
9139     m_extra_prepare_for_update= TRUE;
9140     if (m_part_spec.start_part != NO_CURRENT_PART_ID)
9141     {
9142       if (!m_extra_cache)
9143         m_extra_cache_part_id= m_part_spec.start_part;
9144       DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part);
9145       (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
9146     }
9147     break;
9148   case HA_EXTRA_NORMAL:
9149   case HA_EXTRA_QUICK:
9150   case HA_EXTRA_PREPARE_FOR_DROP:
9151   case HA_EXTRA_FLUSH_CACHE:
9152   case HA_EXTRA_PREPARE_FOR_ALTER_TABLE:
9153   case HA_EXTRA_REMEMBER_POS:
9154   case HA_EXTRA_RESTORE_POS:
9155   {
9156     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9157   }
9158   case HA_EXTRA_NO_READCHECK:
9159   {
9160     /*
9161       This is only done as a part of ha_open, which is also used in
9162       ha_partition::open, so no need to do anything.
9163     */
9164     break;
9165   }
9166   case HA_EXTRA_CACHE:
9167   {
9168     prepare_extra_cache(0);
9169     break;
9170   }
9171   case HA_EXTRA_NO_CACHE:
9172   {
9173     int ret= 0;
9174     if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
9175       ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
9176     m_extra_cache= FALSE;
9177     m_extra_cache_size= 0;
9178     m_extra_prepare_for_update= FALSE;
9179     m_extra_cache_part_id= NO_CURRENT_PART_ID;
9180     DBUG_RETURN(ret);
9181   }
9182   case HA_EXTRA_WRITE_CACHE:
9183   {
9184     m_extra_cache= FALSE;
9185     m_extra_cache_size= 0;
9186     m_extra_prepare_for_update= FALSE;
9187     m_extra_cache_part_id= NO_CURRENT_PART_ID;
9188     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9189   }
9190   case HA_EXTRA_IGNORE_NO_KEY:
9191   case HA_EXTRA_NO_IGNORE_NO_KEY:
9192   {
9193     /*
9194       Ignore as these are specific to NDB for handling
9195       idempotency
9196      */
9197     break;
9198   }
9199   case HA_EXTRA_WRITE_CAN_REPLACE:
9200   case HA_EXTRA_WRITE_CANNOT_REPLACE:
9201   {
9202     /*
9203       Informs handler that write_row() can replace rows which conflict
9204       with row being inserted by PK/unique key without reporting error
9205       to the SQL-layer.
9206 
9207       At this time, this is safe by limitation of ha_partition
9208     */
9209     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9210   }
9211     /* Category 7), used by federated handlers */
9212   case HA_EXTRA_INSERT_WITH_UPDATE:
9213     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9214     /* Category 8) Operations only used by NDB */
9215   case HA_EXTRA_DELETE_CANNOT_BATCH:
9216   case HA_EXTRA_UPDATE_CANNOT_BATCH:
9217   {
9218     /* Currently only NDB use the *_CANNOT_BATCH */
9219     break;
9220   }
9221     /* Category 9) Operations only used by MERGE */
9222   case HA_EXTRA_ADD_CHILDREN_LIST:
9223     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9224   case HA_EXTRA_ATTACH_CHILDREN:
9225   {
9226     int result;
9227     uint num_locks;
9228     handler **file;
9229     if ((result= loop_partitions(extra_cb, &operation)))
9230       DBUG_RETURN(result);
9231 
9232     /* Recalculate lock count as each child may have different set of locks */
9233     num_locks= 0;
9234     file= m_file;
9235     do
9236     {
9237       num_locks+= (*file)->lock_count();
9238     } while (*(++file));
9239 
9240     m_num_locks= num_locks;
9241     break;
9242   }
9243   case HA_EXTRA_IS_ATTACHED_CHILDREN:
9244     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9245   case HA_EXTRA_DETACH_CHILDREN:
9246     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9247   case HA_EXTRA_MARK_AS_LOG_TABLE:
9248   /*
9249     http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
9250     says we no longer support logging to partitioned tables, so we fail
9251     here.
9252   */
9253     DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
9254   case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN:
9255   case HA_EXTRA_BEGIN_ALTER_COPY:
9256   case HA_EXTRA_END_ALTER_COPY:
9257     DBUG_RETURN(loop_partitions(extra_cb, &operation));
9258   default:
9259   {
9260     /* Temporary crash to discover what is wrong */
9261     DBUG_ASSERT(0);
9262     break;
9263   }
9264   }
9265   DBUG_RETURN(1);
9266 }
9267 
9268 
9269 /**
9270   Special extra call to reset extra parameters
9271 
9272   @return Operation status.
9273     @retval >0 Error code
9274     @retval 0  Success
9275 
9276   @note Called at end of each statement to reset buffers.
9277   To avoid excessive calls, the m_partitions_to_reset bitmap keep records
9278   of which partitions that have been used in extra(), external_lock() or
9279   start_stmt() and is needed to be called.
9280 */
9281 
reset(void)9282 int ha_partition::reset(void)
9283 {
9284   int result= 0;
9285   int tmp;
9286   uint i;
9287   DBUG_ENTER("ha_partition::reset");
9288 
9289   for (i= bitmap_get_first_set(&m_partitions_to_reset);
9290        i < m_tot_parts;
9291        i= bitmap_get_next_set(&m_partitions_to_reset, i))
9292   {
9293     if (bitmap_is_set(&m_opened_partitions, i) &&
9294         (tmp= m_file[i]->ha_reset()))
9295       result= tmp;
9296   }
9297   bitmap_clear_all(&m_partitions_to_reset);
9298   m_extra_prepare_for_update= FALSE;
9299   DBUG_RETURN(result);
9300 }
9301 
9302 /**
9303   Special extra method with additional parameter
9304   See @ref ha_partition::extra
9305 
9306   @param[in]    operation       operation to execute
9307   @param[in]    arg             extra argument
9308 
9309   @return       status
9310     @retval     0               success
9311     @retval     >0              error code
9312 
9313   @detail
9314     Operations supported by extra_opt:
9315     HA_EXTRA_KEYREAD:
9316       arg is interpreted as key index
9317     HA_EXTRA_CACHE:
9318       arg is interpreted as size of cache in full table scan
9319 
9320     For detailed description refer to @ref ha_partition::extra
9321 */
9322 
extra_opt(enum ha_extra_function operation,ulong arg)9323 int ha_partition::extra_opt(enum ha_extra_function operation, ulong arg)
9324 {
9325   DBUG_ENTER("ha_partition::extra_opt");
9326 
9327   switch (operation)
9328   {
9329     case HA_EXTRA_KEYREAD:
9330       DBUG_RETURN(loop_partitions(start_keyread_cb, &arg));
9331     case HA_EXTRA_CACHE:
9332       prepare_extra_cache(arg);
9333       DBUG_RETURN(0);
9334     default:
9335       DBUG_ASSERT(0);
9336   }
9337   DBUG_RETURN(1);
9338 }
9339 
9340 
9341 /*
9342   Call extra on handler with HA_EXTRA_CACHE and cachesize
9343 
9344   SYNOPSIS
9345     prepare_extra_cache()
9346     cachesize                Size of cache for full table scan
9347 
9348   RETURN VALUE
9349     NONE
9350 */
9351 
prepare_extra_cache(uint cachesize)9352 void ha_partition::prepare_extra_cache(uint cachesize)
9353 {
9354   DBUG_ENTER("ha_partition::prepare_extra_cache");
9355   DBUG_PRINT("enter", ("cachesize %u", cachesize));
9356 
9357   m_extra_cache= TRUE;
9358   m_extra_cache_size= cachesize;
9359   if (m_part_spec.start_part != NO_CURRENT_PART_ID)
9360   {
9361     DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset,
9362                               m_part_spec.start_part));
9363     bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
9364     late_extra_cache(m_part_spec.start_part);
9365   }
9366   DBUG_VOID_RETURN;
9367 }
9368 
9369 
9370 /**
9371   Prepares our new and reorged handlers for rename or delete.
9372 
9373   @param operation Operation to forward
9374 
9375   @return Operation status
9376     @retval 0  Success
9377     @retval !0 Error
9378 */
9379 
loop_extra_alter(enum ha_extra_function operation)9380 int ha_partition::loop_extra_alter(enum ha_extra_function operation)
9381 {
9382   int result= 0, tmp;
9383   handler **file;
9384   DBUG_ENTER("ha_partition::loop_extra_alter");
9385   DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME ||
9386               operation == HA_EXTRA_FORCE_REOPEN);
9387 
9388   if (m_new_file != NULL)
9389   {
9390     for (file= m_new_file; *file; file++)
9391       if ((tmp= (*file)->extra(operation)))
9392         result= tmp;
9393   }
9394   if (m_reorged_file != NULL)
9395   {
9396     for (file= m_reorged_file; *file; file++)
9397       if ((tmp= (*file)->extra(operation)))
9398         result= tmp;
9399   }
9400   if ((tmp= loop_partitions(extra_cb, &operation)))
9401     result= tmp;
9402   DBUG_RETURN(result);
9403 }
9404 
9405 
9406 /**
9407   Call callback(part, param) on all partitions
9408 
9409     @param callback                 a callback to call for each partition
9410     @param param                    a void*-parameter passed to callback
9411 
9412     @return Operation status
9413       @retval >0                    Error code
9414       @retval 0                     Success
9415 */
9416 
loop_partitions(handler_callback callback,void * param)9417 int ha_partition::loop_partitions(handler_callback callback, void *param)
9418 {
9419   int result= 0, tmp;
9420   uint i;
9421   DBUG_ENTER("ha_partition::loop_partitions");
9422 
9423   for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
9424        i < m_tot_parts;
9425        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
9426   {
9427     /*
9428       This can be called after an error in ha_open.
9429       In this case calling 'extra' can crash.
9430     */
9431     if (bitmap_is_set(&m_opened_partitions, i) &&
9432         (tmp= callback(m_file[i], param)))
9433       result= tmp;
9434   }
9435   /* Add all used partitions to be called in reset(). */
9436   bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
9437   DBUG_RETURN(result);
9438 }
9439 
9440 
9441 /*
9442   Call extra(HA_EXTRA_CACHE) on next partition_id
9443 
9444   SYNOPSIS
9445     late_extra_cache()
9446     partition_id               Partition id to call extra on
9447 
9448   RETURN VALUE
9449     NONE
9450 */
9451 
late_extra_cache(uint partition_id)9452 void ha_partition::late_extra_cache(uint partition_id)
9453 {
9454   handler *file;
9455   DBUG_ENTER("ha_partition::late_extra_cache");
9456   DBUG_PRINT("enter", ("extra_cache %u prepare %u partid %u size %u",
9457                        m_extra_cache, m_extra_prepare_for_update,
9458                        partition_id, m_extra_cache_size));
9459 
9460   if (!m_extra_cache && !m_extra_prepare_for_update)
9461     DBUG_VOID_RETURN;
9462   file= m_file[partition_id];
9463   if (m_extra_cache)
9464   {
9465     if (m_extra_cache_size == 0)
9466       (void) file->extra(HA_EXTRA_CACHE);
9467     else
9468       (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
9469   }
9470   if (m_extra_prepare_for_update)
9471   {
9472     (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
9473   }
9474   m_extra_cache_part_id= partition_id;
9475   DBUG_VOID_RETURN;
9476 }
9477 
9478 
9479 /*
9480   Call extra(HA_EXTRA_NO_CACHE) on next partition_id
9481 
9482   SYNOPSIS
9483     late_extra_no_cache()
9484     partition_id               Partition id to call extra on
9485 
9486   RETURN VALUE
9487     NONE
9488 */
9489 
late_extra_no_cache(uint partition_id)9490 void ha_partition::late_extra_no_cache(uint partition_id)
9491 {
9492   handler *file;
9493   DBUG_ENTER("ha_partition::late_extra_no_cache");
9494 
9495   if (!m_extra_cache && !m_extra_prepare_for_update)
9496     DBUG_VOID_RETURN;
9497   file= m_file[partition_id];
9498   (void) file->extra(HA_EXTRA_NO_CACHE);
9499   DBUG_ASSERT(partition_id == m_extra_cache_part_id);
9500   m_extra_cache_part_id= NO_CURRENT_PART_ID;
9501   DBUG_VOID_RETURN;
9502 }
9503 
9504 
9505 /****************************************************************************
9506                 MODULE optimiser support
9507 ****************************************************************************/
9508 
9509 /**
9510   Get keys to use for scanning.
9511 
9512   @return key_map of keys usable for scanning
9513 
9514   @note No need to use read_partitions here, since it does not depend on
9515   which partitions is used, only which storage engine used.
9516 */
9517 
keys_to_use_for_scanning()9518 const key_map *ha_partition::keys_to_use_for_scanning()
9519 {
9520   DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
9521   DBUG_RETURN(get_open_file_sample()->keys_to_use_for_scanning());
9522 }
9523 
9524 
9525 /**
9526   Minimum number of rows to base optimizer estimate on.
9527 */
9528 
min_rows_for_estimate()9529 ha_rows ha_partition::min_rows_for_estimate()
9530 {
9531   uint i, max_used_partitions, tot_used_partitions;
9532   DBUG_ENTER("ha_partition::min_rows_for_estimate");
9533 
9534   tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions);
9535 
9536   /*
9537     All partitions might have been left as unused during partition pruning
9538     due to, for example, an impossible WHERE condition. Nonetheless, the
9539     optimizer might still attempt to perform (e.g. range) analysis where an
9540     estimate of the the number of rows is calculated using records_in_range.
9541     Hence, to handle this and other possible cases, use zero as the minimum
9542     number of rows to base the estimate on if no partition is being used.
9543   */
9544   if (!tot_used_partitions)
9545     DBUG_RETURN(0);
9546 
9547   /*
9548     Allow O(log2(tot_partitions)) increase in number of used partitions.
9549     This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
9550     I.e when the total number of partitions doubles, allow one more
9551     partition to be checked.
9552   */
9553   i= 2;
9554   max_used_partitions= 1;
9555   while (i < m_tot_parts)
9556   {
9557     max_used_partitions++;
9558     i= i << 1;
9559   }
9560   if (max_used_partitions > tot_used_partitions)
9561     max_used_partitions= tot_used_partitions;
9562 
9563   /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
9564   DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
9565                       max_used_partitions,
9566                       (ulong) stats.records));
9567   DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
9568                       tot_used_partitions,
9569                       (ulong) stats.records * max_used_partitions
9570                               / tot_used_partitions));
9571   DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
9572 }
9573 
9574 
9575 /**
9576   Get the biggest used partition.
9577 
9578   Starting at the N:th biggest partition and skips all non used
9579   partitions, returning the biggest used partition found
9580 
9581   @param[in,out] part_index  Skip the *part_index biggest partitions
9582 
9583   @return The biggest used partition with index not lower than *part_index.
9584     @retval NO_CURRENT_PART_ID     No more partition used.
9585     @retval != NO_CURRENT_PART_ID  partition id of biggest used partition with
9586                                    index >= *part_index supplied. Note that
9587                                    *part_index will be updated to the next
9588                                    partition index to use.
9589 */
9590 
get_biggest_used_partition(uint * part_index)9591 uint ha_partition::get_biggest_used_partition(uint *part_index)
9592 {
9593   uint part_id;
9594   while ((*part_index) < m_tot_parts)
9595   {
9596     part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
9597     if (bitmap_is_set(&m_part_info->read_partitions, part_id))
9598       return part_id;
9599   }
9600   return NO_CURRENT_PART_ID;
9601 }
9602 
9603 
9604 /*
9605   Return time for a scan of the table
9606 
9607   SYNOPSIS
9608     scan_time()
9609 
9610   RETURN VALUE
9611     time for scan
9612 */
9613 
scan_time()9614 double ha_partition::scan_time()
9615 {
9616   double scan_time= 0;
9617   uint i;
9618   DBUG_ENTER("ha_partition::scan_time");
9619 
9620   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9621        i < m_tot_parts;
9622        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9623     scan_time+= m_file[i]->scan_time();
9624   DBUG_RETURN(scan_time);
9625 }
9626 
9627 
9628 /**
9629   @brief
9630   Caculate time to scan the given index (index only scan)
9631 
9632   @param inx      Index number to scan
9633 
9634   @return time for scanning index inx
9635 */
9636 
key_scan_time(uint inx)9637 double ha_partition::key_scan_time(uint inx)
9638 {
9639   double scan_time= 0;
9640   uint i;
9641   DBUG_ENTER("ha_partition::key_scan_time");
9642   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9643        i < m_tot_parts;
9644        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9645     scan_time+= m_file[i]->key_scan_time(inx);
9646   DBUG_RETURN(scan_time);
9647 }
9648 
9649 
keyread_time(uint inx,uint ranges,ha_rows rows)9650 double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows)
9651 {
9652   double read_time= 0;
9653   uint i;
9654   DBUG_ENTER("ha_partition::keyread_time");
9655   if (!ranges)
9656     DBUG_RETURN(handler::keyread_time(inx, ranges, rows));
9657   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9658        i < m_tot_parts;
9659        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9660     read_time+= m_file[i]->keyread_time(inx, ranges, rows);
9661   DBUG_RETURN(read_time);
9662 }
9663 
9664 
9665 /**
9666   Find number of records in a range.
9667   @param inx      Index number
9668   @param min_key  Start of range
9669   @param max_key  End of range
9670 
9671   @return Number of rows in range.
9672 
9673   Given a starting key, and an ending key estimate the number of rows that
9674   will exist between the two. max_key may be empty which in case determine
9675   if start_key matches any rows.
9676 */
9677 
records_in_range(uint inx,const key_range * min_key,const key_range * max_key,page_range * pages)9678 ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key,
9679 				       const key_range *max_key,
9680                                        page_range *pages)
9681 {
9682   ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
9683   uint partition_index= 0, part_id;
9684   page_range ignore_pages;
9685   DBUG_ENTER("ha_partition::records_in_range");
9686 
9687   /* Don't calculate pages of more than one active partition */
9688   if (bitmap_bits_set(&m_part_info->read_partitions) != 1)
9689     pages= &ignore_pages;
9690 
9691   min_rows_to_check= min_rows_for_estimate();
9692 
9693   while ((part_id= get_biggest_used_partition(&partition_index))
9694          != NO_CURRENT_PART_ID)
9695   {
9696     rows= m_file[part_id]->records_in_range(inx, min_key, max_key, pages);
9697 
9698     DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
9699                         (ulong) m_file[part_id]->stats.records));
9700 
9701     if (rows == HA_POS_ERROR)
9702       DBUG_RETURN(HA_POS_ERROR);
9703     estimated_rows+= rows;
9704     checked_rows+= m_file[part_id]->stats.records;
9705     /*
9706       Returning 0 means no rows can be found, so we must continue
9707       this loop as long as we have estimated_rows == 0.
9708       Also many engines return 1 to indicate that there may exist
9709       a matching row, we do not normalize this by dividing by number of
9710       used partitions, but leave it to be returned as a sum, which will
9711       reflect that we will need to scan each partition's index.
9712 
9713       Note that this statistics may not always be correct, so we must
9714       continue even if the current partition has 0 rows, since we might have
9715       deleted rows from the current partition, or inserted to the next
9716       partition.
9717     */
9718     if (estimated_rows && checked_rows &&
9719         checked_rows >= min_rows_to_check)
9720     {
9721       DBUG_PRINT("info",
9722                  ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
9723                   inx,
9724                   (ulong) (estimated_rows * stats.records / checked_rows),
9725                   (ulong) estimated_rows,
9726                   (ulong) stats.records,
9727                   (ulong) checked_rows));
9728       DBUG_RETURN(estimated_rows * stats.records / checked_rows);
9729     }
9730   }
9731   DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
9732                       inx,
9733                       (ulong) estimated_rows));
9734   DBUG_RETURN(estimated_rows);
9735 }
9736 
9737 
9738 /**
9739   Estimate upper bound of number of rows.
9740 
9741   @return Number of rows.
9742 */
9743 
estimate_rows_upper_bound()9744 ha_rows ha_partition::estimate_rows_upper_bound()
9745 {
9746   ha_rows rows, tot_rows= 0;
9747   handler **file= m_file;
9748   DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
9749 
9750   do
9751   {
9752     if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
9753     {
9754       rows= (*file)->estimate_rows_upper_bound();
9755       if (rows == HA_POS_ERROR)
9756         DBUG_RETURN(HA_POS_ERROR);
9757       tot_rows+= rows;
9758     }
9759   } while (*(++file));
9760   DBUG_RETURN(tot_rows);
9761 }
9762 
9763 
9764 /*
9765   Get time to read
9766 
9767   SYNOPSIS
9768     read_time()
9769     index                Index number used
9770     ranges               Number of ranges
9771     rows                 Number of rows
9772 
9773   RETURN VALUE
9774     time for read
9775 
9776   DESCRIPTION
9777     This will be optimised later to include whether or not the index can
9778     be used with partitioning. To achieve we need to add another parameter
9779     that specifies how many of the index fields that are bound in the ranges.
9780     Possibly added as a new call to handlers.
9781 */
9782 
read_time(uint index,uint ranges,ha_rows rows)9783 double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
9784 {
9785   DBUG_ENTER("ha_partition::read_time");
9786 
9787   DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows));
9788 }
9789 
9790 
9791 /**
9792   Number of rows in table. see handler.h
9793 
9794   @return Number of records in the table (after pruning!)
9795 */
9796 
records()9797 ha_rows ha_partition::records()
9798 {
9799   ha_rows tot_rows= 0;
9800   uint i;
9801   DBUG_ENTER("ha_partition::records");
9802 
9803   for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9804        i < m_tot_parts;
9805        i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9806   {
9807     if (unlikely(m_file[i]->pre_records()))
9808       DBUG_RETURN(HA_POS_ERROR);
9809     const ha_rows rows= m_file[i]->records();
9810     if (unlikely(rows == HA_POS_ERROR))
9811       DBUG_RETURN(HA_POS_ERROR);
9812     tot_rows+= rows;
9813   }
9814   DBUG_PRINT("exit", ("records: %lld", (longlong) tot_rows));
9815   DBUG_RETURN(tot_rows);
9816 }
9817 
9818 
9819 /*
9820   Is it ok to switch to a new engine for this table
9821 
9822   SYNOPSIS
9823     can_switch_engine()
9824 
9825   RETURN VALUE
9826     TRUE                  Ok
9827     FALSE                 Not ok
9828 
9829   DESCRIPTION
9830     Used to ensure that tables with foreign key constraints are not moved
9831     to engines without foreign key support.
9832 */
9833 
can_switch_engines()9834 bool ha_partition::can_switch_engines()
9835 {
9836   handler **file;
9837   DBUG_ENTER("ha_partition::can_switch_engines");
9838 
9839   file= m_file;
9840   do
9841   {
9842     if (!(*file)->can_switch_engines())
9843       DBUG_RETURN(FALSE);
9844   } while (*(++file));
9845   DBUG_RETURN(TRUE);
9846 }
9847 
9848 
9849 /*
9850   Is table cache supported
9851 
9852   SYNOPSIS
9853     table_cache_type()
9854 
9855 */
9856 
table_cache_type()9857 uint8 ha_partition::table_cache_type()
9858 {
9859   DBUG_ENTER("ha_partition::table_cache_type");
9860 
9861   DBUG_RETURN(get_open_file_sample()->table_cache_type());
9862 }
9863 
9864 
9865 /**
9866   Calculate hash value for KEY partitioning using an array of fields.
9867 
9868   @param field_array   An array of the fields in KEY partitioning
9869 
9870   @return hash_value calculated
9871 
9872   @note Uses the hash function on the character set of the field.
9873   Integer and floating point fields use the binary character set by default.
9874 */
9875 
calculate_key_hash_value(Field ** field_array)9876 uint32 ha_partition::calculate_key_hash_value(Field **field_array)
9877 {
9878   ulong nr1= 1;
9879   ulong nr2= 4;
9880   bool use_51_hash;
9881   use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm ==
9882                        partition_info::KEY_ALGORITHM_51);
9883 
9884   do
9885   {
9886     Field *field= *field_array;
9887     if (use_51_hash)
9888     {
9889       switch (field->real_type()) {
9890       case MYSQL_TYPE_TINY:
9891       case MYSQL_TYPE_SHORT:
9892       case MYSQL_TYPE_LONG:
9893       case MYSQL_TYPE_FLOAT:
9894       case MYSQL_TYPE_DOUBLE:
9895       case MYSQL_TYPE_NEWDECIMAL:
9896       case MYSQL_TYPE_TIMESTAMP:
9897       case MYSQL_TYPE_LONGLONG:
9898       case MYSQL_TYPE_INT24:
9899       case MYSQL_TYPE_TIME:
9900       case MYSQL_TYPE_DATETIME:
9901       case MYSQL_TYPE_YEAR:
9902       case MYSQL_TYPE_NEWDATE:
9903         {
9904           if (field->is_null())
9905           {
9906             nr1^= (nr1 << 1) | 1;
9907             continue;
9908           }
9909           /* Force this to my_hash_sort_bin, which was used in 5.1! */
9910           uint len= field->pack_length();
9911           my_charset_bin.hash_sort(field->ptr, len, &nr1, &nr2);
9912           /* Done with this field, continue with next one. */
9913           continue;
9914         }
9915       case MYSQL_TYPE_STRING:
9916       case MYSQL_TYPE_VARCHAR:
9917       case MYSQL_TYPE_BIT:
9918         /* Not affected, same in 5.1 and 5.5 */
9919         break;
9920       /*
9921         ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
9922         and my_hash_sort_bin in 5.5!
9923       */
9924       case MYSQL_TYPE_ENUM:
9925       case MYSQL_TYPE_SET:
9926         {
9927           if (field->is_null())
9928           {
9929             nr1^= (nr1 << 1) | 1;
9930             continue;
9931           }
9932           /* Force this to my_hash_sort_bin, which was used in 5.1! */
9933           uint len= field->pack_length();
9934           my_charset_latin1.hash_sort(field->ptr, len, &nr1, &nr2);
9935           continue;
9936         }
9937       /* New types in mysql-5.6. */
9938       case MYSQL_TYPE_DATETIME2:
9939       case MYSQL_TYPE_TIME2:
9940       case MYSQL_TYPE_TIMESTAMP2:
9941         /* Not affected, 5.6+ only! */
9942         break;
9943 
9944       /* These types should not be allowed for partitioning! */
9945       case MYSQL_TYPE_NULL:
9946       case MYSQL_TYPE_DECIMAL:
9947       case MYSQL_TYPE_DATE:
9948       case MYSQL_TYPE_TINY_BLOB:
9949       case MYSQL_TYPE_MEDIUM_BLOB:
9950       case MYSQL_TYPE_LONG_BLOB:
9951       case MYSQL_TYPE_BLOB:
9952       case MYSQL_TYPE_VAR_STRING:
9953       case MYSQL_TYPE_GEOMETRY:
9954         /* fall through */
9955       default:
9956         DBUG_ASSERT(0);                    // New type?
9957         /* Fall through for default hashing (5.5). */
9958       }
9959       /* fall through, use collation based hashing. */
9960     }
9961     field->hash(&nr1, &nr2);
9962   } while (*(++field_array));
9963   return (uint32) nr1;
9964 }
9965 
9966 
9967 /****************************************************************************
9968                 MODULE print messages
9969 ****************************************************************************/
9970 
index_type(uint inx)9971 const char *ha_partition::index_type(uint inx)
9972 {
9973   uint first_used_partition;
9974   DBUG_ENTER("ha_partition::index_type");
9975 
9976   first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions));
9977 
9978   if (first_used_partition == MY_BIT_NONE)
9979   {
9980     DBUG_ASSERT(0);                             // How can this happen?
9981     DBUG_RETURN(handler::index_type(inx));
9982   }
9983 
9984   DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
9985 }
9986 
9987 
get_row_type() const9988 enum row_type ha_partition::get_row_type() const
9989 {
9990   uint i;
9991   enum row_type type;
9992   DBUG_ENTER("ha_partition::get_row_type");
9993 
9994   i= bitmap_get_first_set(&m_part_info->read_partitions);
9995   DBUG_ASSERT(i < m_tot_parts);
9996   if (i >= m_tot_parts)
9997     DBUG_RETURN(ROW_TYPE_NOT_USED);
9998 
9999   type= m_file[i]->get_row_type();
10000   DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
10001 
10002   for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
10003        i < m_tot_parts;
10004        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
10005   {
10006     enum row_type part_type= m_file[i]->get_row_type();
10007     DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
10008     if (part_type != type)
10009       DBUG_RETURN(ROW_TYPE_NOT_USED);
10010   }
10011 
10012   DBUG_RETURN(type);
10013 }
10014 
10015 
append_row_to_str(String & str)10016 void ha_partition::append_row_to_str(String &str)
10017 {
10018   const uchar *rec;
10019   bool is_rec0= !m_err_rec || m_err_rec == table->record[0];
10020   if (is_rec0)
10021     rec= table->record[0];
10022   else
10023     rec= m_err_rec;
10024   // If PK, use full PK instead of full part field array!
10025   if (table->s->primary_key != MAX_KEY)
10026   {
10027     KEY *key= table->key_info + table->s->primary_key;
10028     KEY_PART_INFO *key_part=     key->key_part;
10029     KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
10030     if (!is_rec0)
10031       set_key_field_ptr(key, rec, table->record[0]);
10032     for (; key_part != key_part_end; key_part++)
10033     {
10034       Field *field= key_part->field;
10035       str.append(" ");
10036       str.append(&field->field_name);
10037       str.append(":");
10038       field_unpack(&str, field, rec, 0, false);
10039     }
10040     if (!is_rec0)
10041       set_key_field_ptr(key, table->record[0], rec);
10042   }
10043   else
10044   {
10045     Field **field_ptr;
10046     if (!is_rec0)
10047       table->move_fields(m_part_info->full_part_field_array, rec,
10048                     table->record[0]);
10049     /* No primary key, use full partition field array. */
10050     for (field_ptr= m_part_info->full_part_field_array;
10051          *field_ptr;
10052          field_ptr++)
10053     {
10054       Field *field= *field_ptr;
10055       str.append(" ");
10056       str.append(&field->field_name);
10057       str.append(":");
10058       field_unpack(&str, field, rec, 0, false);
10059     }
10060     if (!is_rec0)
10061       table->move_fields(m_part_info->full_part_field_array, table->record[0],
10062                     rec);
10063   }
10064 }
10065 
10066 
print_error(int error,myf errflag)10067 void ha_partition::print_error(int error, myf errflag)
10068 {
10069   THD *thd= ha_thd();
10070   DBUG_ENTER("ha_partition::print_error");
10071   DBUG_PRINT("enter", ("error: %d", error));
10072 
10073   /* Should probably look for my own errors first */
10074   if ((error == HA_ERR_NO_PARTITION_FOUND) &&
10075       ! (thd->lex->alter_info.partition_flags & ALTER_PARTITION_TRUNCATE))
10076   {
10077     m_part_info->print_no_partition_found(table, errflag);
10078     DBUG_VOID_RETURN;
10079   }
10080   else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
10081   {
10082     /* Should only happen on DELETE or UPDATE! */
10083     DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
10084                 thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
10085                 thd_sql_command(thd) == SQLCOM_UPDATE ||
10086                 thd_sql_command(thd) == SQLCOM_UPDATE_MULTI);
10087     DBUG_ASSERT(m_err_rec);
10088     if (m_err_rec)
10089     {
10090       uint max_length;
10091       char buf[MAX_KEY_LENGTH];
10092       String str(buf,sizeof(buf),system_charset_info);
10093       uint32 part_id;
10094       str.length(0);
10095       str.append("(");
10096       str.append_ulonglong(m_last_part);
10097       str.append(" != ");
10098       if (get_part_for_buf(m_err_rec, m_rec0, m_part_info, &part_id))
10099         str.append("?");
10100       else
10101         str.append_ulonglong(part_id);
10102       str.append(")");
10103       append_row_to_str(str);
10104 
10105       /* Log this error, so the DBA can notice it and fix it! */
10106       sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s"
10107                       "Please REPAIR the table!",
10108                       table->s->table_name.str,
10109                       str.c_ptr_safe());
10110 
10111       max_length= (MYSQL_ERRMSG_SIZE -
10112                    (uint) strlen(ER_THD(thd, ER_ROW_IN_WRONG_PARTITION)));
10113       if (str.length() >= max_length)
10114       {
10115         str.length(max_length-4);
10116         str.append(STRING_WITH_LEN("..."));
10117       }
10118       my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
10119       m_err_rec= NULL;
10120       DBUG_VOID_RETURN;
10121     }
10122     /* fall through to generic error handling. */
10123   }
10124 
10125   /*
10126     We choose a main handler's print_error if:
10127     * m_file has not been initialized, like in bug#42438
10128     * lookup_errkey is set, which means that an error has occured in the
10129       main handler, not in individual partitions
10130   */
10131   if (m_file && lookup_errkey == (uint)-1)
10132   {
10133     if (m_last_part >= m_tot_parts)
10134     {
10135       DBUG_ASSERT(0);
10136       m_last_part= 0;
10137     }
10138     m_file[m_last_part]->print_error(error, errflag);
10139   }
10140   else
10141     handler::print_error(error, errflag);
10142   DBUG_VOID_RETURN;
10143 }
10144 
10145 
get_error_message(int error,String * buf)10146 bool ha_partition::get_error_message(int error, String *buf)
10147 {
10148   DBUG_ENTER("ha_partition::get_error_message");
10149 
10150   /* Should probably look for my own errors first */
10151 
10152   /* In case m_file has not been initialized, like in bug#42438 */
10153   if (m_file)
10154     DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
10155   DBUG_RETURN(handler::get_error_message(error, buf));
10156 
10157 }
10158 
10159 
10160 /****************************************************************************
10161                 MODULE in-place ALTER
10162 ****************************************************************************/
10163 /**
10164   Get table flags.
10165 */
10166 
table_flags() const10167 handler::Table_flags ha_partition::table_flags() const
10168 {
10169   uint first_used_partition= 0;
10170   DBUG_ENTER("ha_partition::table_flags");
10171   if (m_handler_status < handler_initialized ||
10172       m_handler_status >= handler_closed)
10173     DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
10174 
10175   if (get_lock_type() != F_UNLCK)
10176   {
10177     /*
10178       The flags are cached after external_lock, and may depend on isolation
10179       level. So we should use a locked partition to get the correct flags.
10180     */
10181     first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
10182     if (first_used_partition == MY_BIT_NONE)
10183       first_used_partition= 0;
10184   }
10185   DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
10186                  ~(PARTITION_DISABLED_TABLE_FLAGS)) |
10187                  (PARTITION_ENABLED_TABLE_FLAGS));
10188 }
10189 
10190 
10191 /**
10192   alter_table_flags must be on handler/table level, not on hton level
10193   due to the ha_partition hton does not know what the underlying hton is.
10194 */
10195 
alter_table_flags(alter_table_operations flags)10196 alter_table_operations ha_partition::alter_table_flags(alter_table_operations flags)
10197 {
10198   alter_table_operations flags_to_return;
10199   DBUG_ENTER("ha_partition::alter_table_flags");
10200 
10201   flags_to_return= ht->alter_table_flags(flags);
10202   flags_to_return|= m_file[0]->alter_table_flags(flags);
10203 
10204   DBUG_RETURN(flags_to_return);
10205 }
10206 
10207 
10208 /**
10209   check if copy of data is needed in alter table.
10210 */
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)10211 bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info,
10212                                               uint table_changes)
10213 {
10214   /*
10215     The check for any partitioning related changes have already been done
10216     in mysql_alter_table (by fix_partition_func), so it is only up to
10217     the underlying handlers.
10218   */
10219   List_iterator<partition_element> part_it(m_part_info->partitions);
10220   HA_CREATE_INFO dummy_info= *create_info;
10221   uint i=0;
10222   while (partition_element *part_elem= part_it++)
10223   {
10224     if (m_is_sub_partitioned)
10225     {
10226       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
10227       while (partition_element *sub_elem= subpart_it++)
10228       {
10229         dummy_info.data_file_name= sub_elem->data_file_name;
10230         dummy_info.index_file_name= sub_elem->index_file_name;
10231         if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
10232           return COMPATIBLE_DATA_NO;
10233       }
10234     }
10235     else
10236     {
10237       dummy_info.data_file_name= part_elem->data_file_name;
10238       dummy_info.index_file_name= part_elem->index_file_name;
10239       if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
10240         return COMPATIBLE_DATA_NO;
10241     }
10242   }
10243   return COMPATIBLE_DATA_YES;
10244 }
10245 
10246 
10247 /**
10248   Support of in-place alter table.
10249 */
10250 
10251 /**
10252   Helper class for in-place alter, see handler.h
10253 */
10254 
10255 class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
10256 {
10257 public:
10258   inplace_alter_handler_ctx **handler_ctx_array;
10259 private:
10260   uint m_tot_parts;
10261 
10262 public:
ha_partition_inplace_ctx(THD * thd,uint tot_parts)10263   ha_partition_inplace_ctx(THD *thd, uint tot_parts)
10264     : inplace_alter_handler_ctx(),
10265       handler_ctx_array(NULL),
10266       m_tot_parts(tot_parts)
10267   {}
10268 
~ha_partition_inplace_ctx()10269   ~ha_partition_inplace_ctx()
10270   {
10271     if (handler_ctx_array)
10272     {
10273       for (uint index= 0; index < m_tot_parts; index++)
10274         delete handler_ctx_array[index];
10275     }
10276   }
10277 };
10278 
10279 
10280 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10281 ha_partition::check_if_supported_inplace_alter(TABLE *altered_table,
10282                                                Alter_inplace_info *ha_alter_info)
10283 {
10284   uint index= 0;
10285   enum_alter_inplace_result result;
10286   alter_table_operations orig_ops;
10287   ha_partition_inplace_ctx *part_inplace_ctx;
10288   bool first_is_set= false;
10289   THD *thd= ha_thd();
10290 
10291   DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
10292   /*
10293     Support inplace change of KEY () -> KEY ALGORITHM = N ().
10294     Any other change would set partition_changed in
10295     prep_alter_part_table() in mysql_alter_table().
10296   */
10297   if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10298   {
10299     DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10300     DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
10301   }
10302 
10303   part_inplace_ctx=
10304     new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
10305   if (!part_inplace_ctx)
10306     DBUG_RETURN(HA_ALTER_ERROR);
10307 
10308   part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
10309     thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
10310   if (!part_inplace_ctx->handler_ctx_array)
10311     DBUG_RETURN(HA_ALTER_ERROR);
10312 
10313   do {
10314     result= HA_ALTER_INPLACE_NO_LOCK;
10315     /* Set all to NULL, including the terminating one. */
10316     for (index= 0; index <= m_tot_parts; index++)
10317        part_inplace_ctx->handler_ctx_array[index]= NULL;
10318 
10319     ha_alter_info->handler_flags |= ALTER_PARTITIONED;
10320     orig_ops= ha_alter_info->handler_flags;
10321     for (index= 0; index < m_tot_parts; index++)
10322     {
10323       enum_alter_inplace_result p_result=
10324         m_file[index]->check_if_supported_inplace_alter(altered_table,
10325                                                         ha_alter_info);
10326       part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10327 
10328       if (index == 0)
10329         first_is_set= (ha_alter_info->handler_ctx != NULL);
10330       else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
10331       {
10332         /* Either none or all partitions must set handler_ctx! */
10333         DBUG_ASSERT(0);
10334         DBUG_RETURN(HA_ALTER_ERROR);
10335       }
10336       if (p_result < result)
10337         result= p_result;
10338       if (result == HA_ALTER_ERROR)
10339         break;
10340     }
10341   } while (orig_ops != ha_alter_info->handler_flags);
10342 
10343   ha_alter_info->handler_ctx= part_inplace_ctx;
10344   /*
10345     To indicate for future inplace calls that there are several
10346     partitions/handlers that need to be committed together,
10347     we set group_commit_ctx to the NULL terminated array of
10348     the partitions handlers.
10349   */
10350   ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
10351 
10352   DBUG_RETURN(result);
10353 }
10354 
10355 
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10356 bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table,
10357                                                Alter_inplace_info *ha_alter_info)
10358 {
10359   uint index= 0;
10360   bool error= false;
10361   ha_partition_inplace_ctx *part_inplace_ctx;
10362 
10363   DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
10364 
10365   /*
10366     Changing to similar partitioning, only update metadata.
10367     Non allowed changes would be caought in prep_alter_part_table().
10368   */
10369   if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10370   {
10371     DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10372     DBUG_RETURN(false);
10373   }
10374 
10375   part_inplace_ctx=
10376     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10377 
10378   for (index= 0; index < m_tot_parts && !error; index++)
10379   {
10380     ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
10381     if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
10382                                                       ha_alter_info))
10383       error= true;
10384     part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10385   }
10386   ha_alter_info->handler_ctx= part_inplace_ctx;
10387 
10388   DBUG_RETURN(error);
10389 }
10390 
10391 
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)10392 bool ha_partition::inplace_alter_table(TABLE *altered_table,
10393                                        Alter_inplace_info *ha_alter_info)
10394 {
10395   uint index= 0;
10396   bool error= false;
10397   ha_partition_inplace_ctx *part_inplace_ctx;
10398 
10399   DBUG_ENTER("ha_partition::inplace_alter_table");
10400 
10401   /*
10402     Changing to similar partitioning, only update metadata.
10403     Non allowed changes would be caught in prep_alter_part_table().
10404   */
10405   if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10406   {
10407     DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10408     DBUG_RETURN(false);
10409   }
10410 
10411   part_inplace_ctx=
10412     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10413 
10414   for (index= 0; index < m_tot_parts && !error; index++)
10415   {
10416     if ((ha_alter_info->handler_ctx=
10417 	 part_inplace_ctx->handler_ctx_array[index]) != NULL
10418 	&& index != 0)
10419       ha_alter_info->handler_ctx->set_shared_data
10420 	(*part_inplace_ctx->handler_ctx_array[index - 1]);
10421 
10422     if (m_file[index]->ha_inplace_alter_table(altered_table,
10423                                               ha_alter_info))
10424       error= true;
10425     part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10426   }
10427   ha_alter_info->handler_ctx= part_inplace_ctx;
10428 
10429   DBUG_RETURN(error);
10430 }
10431 
10432 
10433 /*
10434   Note that this function will try rollback failed ADD INDEX by
10435   executing DROP INDEX for the indexes that were committed (if any)
10436   before the error occurred. This means that the underlying storage
10437   engine must be able to drop index in-place with X-lock held.
10438   (As X-lock will be held here if new indexes are to be committed)
10439 */
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)10440 bool ha_partition::commit_inplace_alter_table(TABLE *altered_table,
10441                                               Alter_inplace_info *ha_alter_info,
10442                                               bool commit)
10443 {
10444   ha_partition_inplace_ctx *part_inplace_ctx;
10445   bool error= false;
10446 
10447   DBUG_ENTER("ha_partition::commit_inplace_alter_table");
10448 
10449   /*
10450     Changing to similar partitioning, only update metadata.
10451     Non allowed changes would be caught in prep_alter_part_table().
10452   */
10453   if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10454   {
10455     DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10456     DBUG_RETURN(false);
10457   }
10458 
10459   part_inplace_ctx=
10460     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10461 
10462   if (commit)
10463   {
10464     DBUG_ASSERT(ha_alter_info->group_commit_ctx ==
10465                 part_inplace_ctx->handler_ctx_array);
10466     ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
10467     error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
10468                                                     ha_alter_info, commit);
10469     if (unlikely(error))
10470       goto end;
10471     if (ha_alter_info->group_commit_ctx)
10472     {
10473       /*
10474         If ha_alter_info->group_commit_ctx is not set to NULL,
10475         then the engine did only commit the first partition!
10476         The engine is probably new, since both innodb and the default
10477         implementation of handler::commit_inplace_alter_table sets it to NULL
10478         and simply return false, since it allows metadata changes only.
10479         Loop over all other partitions as to follow the protocol!
10480       */
10481       uint i;
10482       DBUG_ASSERT(0);
10483       for (i= 1; i < m_tot_parts; i++)
10484       {
10485         ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10486         error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
10487                                                          ha_alter_info,
10488                                                          true);
10489       }
10490   }
10491     }
10492   else
10493   {
10494     uint i;
10495     for (i= 0; i < m_tot_parts; i++)
10496     {
10497       /* Rollback, commit == false,  is done for each partition! */
10498       ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10499       if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
10500                                                    ha_alter_info, false))
10501         error= true;
10502       }
10503     }
10504 end:
10505   ha_alter_info->handler_ctx= part_inplace_ctx;
10506 
10507   DBUG_RETURN(error);
10508 }
10509 
10510 
min_of_the_max_uint(uint (handler::* operator_func)(void)const) const10511 uint ha_partition::min_of_the_max_uint(
10512                        uint (handler::*operator_func)(void) const) const
10513 {
10514   handler **file;
10515   uint min_of_the_max= ((*m_file)->*operator_func)();
10516 
10517   for (file= m_file+1; *file; file++)
10518   {
10519     uint tmp= ((*file)->*operator_func)();
10520     set_if_smaller(min_of_the_max, tmp);
10521   }
10522   return min_of_the_max;
10523 }
10524 
10525 
max_supported_key_parts() const10526 uint ha_partition::max_supported_key_parts() const
10527 {
10528   return min_of_the_max_uint(&handler::max_supported_key_parts);
10529 }
10530 
10531 
max_supported_key_length() const10532 uint ha_partition::max_supported_key_length() const
10533 {
10534   return min_of_the_max_uint(&handler::max_supported_key_length);
10535 }
10536 
10537 
max_supported_key_part_length() const10538 uint ha_partition::max_supported_key_part_length() const
10539 {
10540   return min_of_the_max_uint(&handler::max_supported_key_part_length);
10541 }
10542 
10543 
max_supported_record_length() const10544 uint ha_partition::max_supported_record_length() const
10545 {
10546   return min_of_the_max_uint(&handler::max_supported_record_length);
10547 }
10548 
10549 
max_supported_keys() const10550 uint ha_partition::max_supported_keys() const
10551 {
10552   return min_of_the_max_uint(&handler::max_supported_keys);
10553 }
10554 
10555 
min_record_length(uint options) const10556 uint ha_partition::min_record_length(uint options) const
10557 {
10558   handler **file;
10559   uint max= (*m_file)->min_record_length(options);
10560 
10561   for (file= m_file, file++; *file; file++)
10562     if (max < (*file)->min_record_length(options))
10563       max= (*file)->min_record_length(options);
10564   return max;
10565 }
10566 
10567 /****************************************************************************
10568                 MODULE compare records
10569 ****************************************************************************/
10570 /*
10571   Compare two positions
10572 
10573   SYNOPSIS
10574     cmp_ref()
10575     ref1                   First position
10576     ref2                   Second position
10577 
10578   RETURN VALUE
10579     <0                     ref1 < ref2
10580     0                      Equal
10581     >0                     ref1 > ref2
10582 
10583   DESCRIPTION
10584     We get two references and need to check if those records are the same.
10585     If they belong to different partitions we decide that they are not
10586     the same record. Otherwise we use the particular handler to decide if
10587     they are the same. Sort in partition id order if not equal.
10588 
10589   MariaDB note:
10590     Please don't merge the code from MySQL that does this:
10591 
10592     We get two references and need to check if those records are the same.
10593     If they belong to different partitions we decide that they are not
10594     the same record. Otherwise we use the particular handler to decide if
10595     they are the same. Sort in partition id order if not equal.
10596 
10597     It is incorrect, MariaDB has an alternative fix.
10598 */
10599 
cmp_ref(const uchar * ref1,const uchar * ref2)10600 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
10601 {
10602   int cmp;
10603   uint32 diff1, diff2;
10604   DBUG_ENTER("ha_partition::cmp_ref");
10605 
10606   cmp= get_open_file_sample()->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
10607                                        (ref2 + PARTITION_BYTES_IN_POS));
10608   if (cmp)
10609     DBUG_RETURN(cmp);
10610 
10611   diff2= uint2korr(ref2);
10612   diff1= uint2korr(ref1);
10613 
10614   if (diff1 == diff2)
10615   {
10616    /* This means that the references are same and are in same partition.*/
10617     DBUG_RETURN(0);
10618   }
10619 
10620   /*
10621     In Innodb we compare with either primary key value or global DB_ROW_ID so
10622     it is not possible that the two references are equal and are in different
10623     partitions, but in myisam it is possible since we are comparing offsets.
10624     Remove this assert if DB_ROW_ID is changed to be per partition.
10625   */
10626   DBUG_ASSERT(!m_innodb);
10627   DBUG_RETURN(diff2 > diff1 ? -1 : 1);
10628 }
10629 
10630 
10631 /****************************************************************************
10632                 MODULE auto increment
10633 ****************************************************************************/
10634 
10635 
10636 /**
10637    Retreive new values for part_share->next_auto_inc_val if needed
10638 
10639    This is needed if the value has not been initialized or if one of
10640    the underlying partitions require that the value should be re-calculated
10641 */
10642 
update_next_auto_inc_val()10643 void ha_partition::update_next_auto_inc_val()
10644 {
10645   if (!part_share->auto_inc_initialized ||
10646       need_info_for_auto_inc())
10647     info(HA_STATUS_AUTO);
10648 }
10649 
10650 
10651 /**
10652   Determine whether a partition needs auto-increment initialization.
10653 
10654   @return
10655     TRUE                    A  partition needs auto-increment initialization
10656     FALSE                   No partition needs auto-increment initialization
10657 
10658   Resets part_share->auto_inc_initialized if next auto_increment needs to be
10659   recalculated.
10660 */
10661 
need_info_for_auto_inc()10662 bool ha_partition::need_info_for_auto_inc()
10663 {
10664   handler **file= m_file;
10665   DBUG_ENTER("ha_partition::need_info_for_auto_inc");
10666 
10667   do
10668   {
10669     if ((*file)->need_info_for_auto_inc())
10670     {
10671       /* We have to get new auto_increment values from handler */
10672       part_share->auto_inc_initialized= FALSE;
10673       DBUG_RETURN(TRUE);
10674     }
10675   } while (*(++file));
10676   DBUG_RETURN(FALSE);
10677 }
10678 
10679 
10680 /**
10681   Determine if all partitions can use the current auto-increment value for
10682   auto-increment initialization.
10683 
10684   @return
10685     TRUE                    All partitions can use the current auto-increment
10686                             value for auto-increment initialization
10687     FALSE                   All partitions cannot use the current
10688                             auto-increment value for auto-increment
10689                             initialization
10690 
10691   Notes
10692     This function is only called for ::info(HA_STATUS_AUTO) and is
10693     mainly used by the Spider engine, which returns false
10694     except in the case of DROP TABLE or ALTER TABLE when it returns TRUE.
10695     Other engines always returns TRUE for this call.
10696 */
10697 
can_use_for_auto_inc_init()10698 bool ha_partition::can_use_for_auto_inc_init()
10699 {
10700   handler **file= m_file;
10701   DBUG_ENTER("ha_partition::can_use_for_auto_inc_init");
10702 
10703   do
10704   {
10705     if (!(*file)->can_use_for_auto_inc_init())
10706       DBUG_RETURN(FALSE);
10707   } while (*(++file));
10708   DBUG_RETURN(TRUE);
10709 }
10710 
10711 
reset_auto_increment(ulonglong value)10712 int ha_partition::reset_auto_increment(ulonglong value)
10713 {
10714   handler **file= m_file;
10715   int res;
10716   DBUG_ENTER("ha_partition::reset_auto_increment");
10717   lock_auto_increment();
10718   part_share->auto_inc_initialized= false;
10719   part_share->next_auto_inc_val= 0;
10720   do
10721   {
10722     if ((res= (*file)->ha_reset_auto_increment(value)) != 0)
10723       break;
10724   } while (*(++file));
10725   unlock_auto_increment();
10726   DBUG_RETURN(res);
10727 }
10728 
10729 
10730 /**
10731   This method is called by update_auto_increment which in turn is called
10732   by the individual handlers as part of write_row. We use the
10733   part_share->next_auto_inc_val, or search all
10734   partitions for the highest auto_increment_value if not initialized or
10735   if auto_increment field is a secondary part of a key, we must search
10736   every partition when holding a mutex to be sure of correctness.
10737 */
10738 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)10739 void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
10740                                       ulonglong nb_desired_values,
10741                                       ulonglong *first_value,
10742                                       ulonglong *nb_reserved_values)
10743 {
10744   DBUG_ENTER("ha_partition::get_auto_increment");
10745   DBUG_PRINT("enter", ("offset: %lu  inc: %lu  desired_values: %lu  "
10746                        "first_value: %lu", (ulong) offset, (ulong) increment,
10747                       (ulong) nb_desired_values, (ulong) *first_value));
10748   DBUG_ASSERT(increment);
10749   DBUG_ASSERT(nb_desired_values);
10750   *first_value= 0;
10751   if (table->s->next_number_keypart)
10752   {
10753     /*
10754       next_number_keypart is != 0 if the auto_increment column is a secondary
10755       column in the index (it is allowed in MyISAM)
10756     */
10757     DBUG_PRINT("info", ("next_number_keypart != 0"));
10758     ulonglong nb_reserved_values_part;
10759     ulonglong first_value_part, max_first_value;
10760     handler **file= m_file;
10761     first_value_part= max_first_value= *first_value;
10762     /* Must find highest value among all partitions. */
10763     do
10764     {
10765       /* Only nb_desired_values = 1 makes sense */
10766       (*file)->get_auto_increment(offset, increment, 1,
10767                                  &first_value_part, &nb_reserved_values_part);
10768       if (unlikely(first_value_part == ULONGLONG_MAX)) // error in one partition
10769       {
10770         *first_value= first_value_part;
10771         /* log that the error was between table/partition handler */
10772         sql_print_error("Partition failed to reserve auto_increment value");
10773         DBUG_VOID_RETURN;
10774       }
10775       DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
10776       set_if_bigger(max_first_value, first_value_part);
10777     } while (*(++file));
10778     *first_value= max_first_value;
10779     *nb_reserved_values= 1;
10780   }
10781   else
10782   {
10783     THD *thd= ha_thd();
10784     /*
10785       This is initialized in the beginning of the first write_row call.
10786     */
10787     DBUG_ASSERT(part_share->auto_inc_initialized);
10788     /*
10789       Get a lock for handling the auto_increment in part_share
10790       for avoiding two concurrent statements getting the same number.
10791     */
10792 
10793     lock_auto_increment();
10794 
10795     /*
10796       In a multi-row insert statement like INSERT SELECT and LOAD DATA
10797       where the number of candidate rows to insert is not known in advance
10798       we must hold a lock/mutex for the whole statement if we have statement
10799       based replication. Because the statement-based binary log contains
10800       only the first generated value used by the statement, and slaves assumes
10801       all other generated values used by this statement were consecutive to
10802       this first one, we must exclusively lock the generator until the
10803       statement is done.
10804     */
10805     if (!auto_increment_safe_stmt_log_lock &&
10806         thd->lex->sql_command != SQLCOM_INSERT &&
10807         mysql_bin_log.is_open() &&
10808         !thd->is_current_stmt_binlog_format_row() &&
10809         (thd->variables.option_bits & OPTION_BIN_LOG))
10810     {
10811       DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
10812       auto_increment_safe_stmt_log_lock= TRUE;
10813     }
10814 
10815     /* this gets corrected (for offset/increment) in update_auto_increment */
10816     *first_value= part_share->next_auto_inc_val;
10817     part_share->next_auto_inc_val+= nb_desired_values * increment;
10818 
10819     unlock_auto_increment();
10820     DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
10821     *nb_reserved_values= nb_desired_values;
10822   }
10823   DBUG_VOID_RETURN;
10824 }
10825 
release_auto_increment()10826 void ha_partition::release_auto_increment()
10827 {
10828   DBUG_ENTER("ha_partition::release_auto_increment");
10829 
10830   if (table->s->next_number_keypart)
10831   {
10832     uint i;
10833     for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
10834          i < m_tot_parts;
10835          i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
10836     {
10837       m_file[i]->ha_release_auto_increment();
10838     }
10839   }
10840   else
10841   {
10842     lock_auto_increment();
10843     if (next_insert_id)
10844     {
10845       ulonglong next_auto_inc_val= part_share->next_auto_inc_val;
10846       /*
10847         If the current auto_increment values is lower than the reserved
10848         value, and the reserved value was reserved by this thread,
10849         we can lower the reserved value.
10850       */
10851       if (next_insert_id < next_auto_inc_val &&
10852           auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val)
10853       {
10854         THD *thd= ha_thd();
10855         /*
10856           Check that we do not lower the value because of a failed insert
10857           with SET INSERT_ID, i.e. forced/non generated values.
10858         */
10859         if (thd->auto_inc_intervals_forced.maximum() < next_insert_id)
10860           part_share->next_auto_inc_val= next_insert_id;
10861       }
10862       DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
10863                           (ulong) part_share->next_auto_inc_val));
10864     }
10865     /*
10866       Unlock the multi-row statement lock taken in get_auto_increment.
10867       These actions must be performed even if the next_insert_id field
10868       contains zero, otherwise if the update_auto_increment fails then
10869       an unnecessary lock will remain:
10870     */
10871     if (auto_increment_safe_stmt_log_lock)
10872     {
10873       auto_increment_safe_stmt_log_lock= FALSE;
10874       DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
10875     }
10876 
10877     unlock_auto_increment();
10878   }
10879   DBUG_VOID_RETURN;
10880 }
10881 
10882 /****************************************************************************
10883                 MODULE initialize handler for HANDLER call
10884 ****************************************************************************/
10885 
init_table_handle_for_HANDLER()10886 void ha_partition::init_table_handle_for_HANDLER()
10887 {
10888   return;
10889 }
10890 
10891 
10892 /**
10893   Calculate the checksum of the table (all partitions)
10894 */
10895 
pre_calculate_checksum()10896 int ha_partition::pre_calculate_checksum()
10897 {
10898   int error;
10899   DBUG_ENTER("ha_partition::pre_calculate_checksum");
10900   m_pre_calling= TRUE;
10901   if ((table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM)))
10902   {
10903     handler **file= m_file;
10904     do
10905     {
10906       if ((error= (*file)->pre_calculate_checksum()))
10907       {
10908         DBUG_RETURN(error);
10909       }
10910     } while (*(++file));
10911   }
10912   DBUG_RETURN(0);
10913 }
10914 
10915 
calculate_checksum()10916 int ha_partition::calculate_checksum()
10917 {
10918   int error;
10919   stats.checksum= 0;
10920   stats.checksum_null= TRUE;
10921 
10922   DBUG_ENTER("ha_partition::calculate_checksum");
10923   if (!m_pre_calling)
10924   {
10925     if ((error= pre_calculate_checksum()))
10926     {
10927       m_pre_calling= FALSE;
10928       DBUG_RETURN(error);
10929     }
10930   }
10931   m_pre_calling= FALSE;
10932 
10933   handler **file= m_file;
10934   do
10935   {
10936     if ((error= (*file)->calculate_checksum()))
10937     {
10938       DBUG_RETURN(error);
10939     }
10940     if (!(*file)->stats.checksum_null)
10941     {
10942       stats.checksum+= (*file)->stats.checksum;
10943       stats.checksum_null= FALSE;
10944     }
10945   } while (*(++file));
10946   DBUG_RETURN(0);
10947 }
10948 
10949 
10950 /****************************************************************************
10951                 MODULE enable/disable indexes
10952 ****************************************************************************/
10953 
10954 /*
10955   Disable indexes for a while
10956   SYNOPSIS
10957     disable_indexes()
10958     mode                      Mode
10959   RETURN VALUES
10960     0                         Success
10961     != 0                      Error
10962 */
10963 
disable_indexes(uint mode)10964 int ha_partition::disable_indexes(uint mode)
10965 {
10966   handler **file;
10967   int error= 0;
10968 
10969   DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10970   for (file= m_file; *file; file++)
10971   {
10972     if (unlikely((error= (*file)->ha_disable_indexes(mode))))
10973       break;
10974   }
10975   return error;
10976 }
10977 
10978 
10979 /*
10980   Enable indexes again
10981   SYNOPSIS
10982     enable_indexes()
10983     mode                      Mode
10984   RETURN VALUES
10985     0                         Success
10986     != 0                      Error
10987 */
10988 
enable_indexes(uint mode)10989 int ha_partition::enable_indexes(uint mode)
10990 {
10991   handler **file;
10992   int error= 0;
10993 
10994   DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10995   for (file= m_file; *file; file++)
10996   {
10997     if (unlikely((error= (*file)->ha_enable_indexes(mode))))
10998       break;
10999   }
11000   return error;
11001 }
11002 
11003 
11004 /*
11005   Check if indexes are disabled
11006   SYNOPSIS
11007     indexes_are_disabled()
11008 
11009   RETURN VALUES
11010     0                      Indexes are enabled
11011     != 0                   Indexes are disabled
11012 */
11013 
indexes_are_disabled(void)11014 int ha_partition::indexes_are_disabled(void)
11015 {
11016   handler **file;
11017   int error= 0;
11018 
11019   DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
11020   for (file= m_file; *file; file++)
11021   {
11022     if (unlikely((error= (*file)->indexes_are_disabled())))
11023       break;
11024   }
11025   return error;
11026 }
11027 
11028 
11029 /**
11030   Check/fix misplaced rows.
11031 
11032   @param read_part_id  Partition to check/fix.
11033   @param repair        If true, move misplaced rows to correct partition.
11034 
11035   @return Operation status.
11036     @retval HA_ADMIN_OK     Success
11037     @retval != HA_ADMIN_OK  Error
11038 */
11039 
check_misplaced_rows(uint read_part_id,bool do_repair)11040 int ha_partition::check_misplaced_rows(uint read_part_id, bool do_repair)
11041 {
11042   int result= 0;
11043   uint32 correct_part_id;
11044   longlong func_value;
11045   longlong num_misplaced_rows= 0;
11046 
11047   DBUG_ENTER("ha_partition::check_misplaced_rows");
11048 
11049   DBUG_ASSERT(m_file);
11050 
11051   if (m_part_info->vers_info &&
11052       read_part_id != m_part_info->vers_info->now_part->id &&
11053       !m_part_info->vers_info->interval.is_set())
11054   {
11055     /* Skip this check as it is not supported for non-INTERVAL history partitions. */
11056     DBUG_RETURN(HA_ADMIN_OK);
11057   }
11058 
11059   if (do_repair)
11060   {
11061     /* We must read the full row, if we need to move it! */
11062     bitmap_set_all(table->read_set);
11063     bitmap_set_all(table->write_set);
11064   }
11065   else
11066   {
11067     /* Only need to read the partitioning fields. */
11068     bitmap_union(table->read_set, &m_part_info->full_part_field_set);
11069   }
11070 
11071   if ((result= m_file[read_part_id]->ha_rnd_init(1)))
11072     DBUG_RETURN(result);
11073 
11074   while (true)
11075   {
11076     if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0)))
11077     {
11078       if (result != HA_ERR_END_OF_FILE)
11079         break;
11080 
11081       if (num_misplaced_rows > 0)
11082       {
11083 	print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "warning",
11084                         table_share->db.str, table->alias,
11085                         opt_op_name[REPAIR_PARTS],
11086                         "Moved %lld misplaced rows",
11087                         num_misplaced_rows);
11088       }
11089       /* End-of-file reached, all rows are now OK, reset result and break. */
11090       result= 0;
11091       break;
11092     }
11093 
11094     result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
11095                                           &func_value);
11096     if (result)
11097       break;
11098 
11099     if (correct_part_id != read_part_id)
11100     {
11101       num_misplaced_rows++;
11102       if (!do_repair)
11103       {
11104         /* Check. */
11105 	print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
11106                         table_share->db.str, table->alias,
11107                         opt_op_name[CHECK_PARTS],
11108                         "Found a misplaced row");
11109         /* Break on first misplaced row! */
11110         result= HA_ADMIN_NEEDS_UPGRADE;
11111         break;
11112       }
11113       else
11114       {
11115         DBUG_PRINT("info", ("Moving row from partition %u to %u",
11116                             (uint) read_part_id, (uint) correct_part_id));
11117 
11118         /*
11119           Insert row into correct partition. Notice that there are no commit
11120           for every N row, so the repair will be one large transaction!
11121         */
11122         if ((result= m_file[correct_part_id]->ha_write_row(m_rec0)))
11123         {
11124           /*
11125             We have failed to insert a row, it might have been a duplicate!
11126           */
11127           char buf[MAX_KEY_LENGTH];
11128           String str(buf,sizeof(buf),system_charset_info);
11129           str.length(0);
11130           if (result == HA_ERR_FOUND_DUPP_KEY)
11131           {
11132             str.append("Duplicate key found, "
11133                        "please update or delete the record:\n");
11134             result= HA_ADMIN_CORRUPT;
11135           }
11136           m_err_rec= NULL;
11137           append_row_to_str(str);
11138 
11139           /*
11140             If the engine supports transactions, the failure will be
11141             rolled back
11142           */
11143           if (!m_file[correct_part_id]->has_transactions_and_rollback())
11144           {
11145             /* Log this error, so the DBA can notice it and fix it! */
11146             sql_print_error("Table '%-192s' failed to move/insert a row"
11147                             " from part %u into part %u:\n%s",
11148                             table->s->table_name.str,
11149                             (uint) read_part_id,
11150                             (uint) correct_part_id,
11151                             str.c_ptr_safe());
11152           }
11153 	  print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
11154                           table_share->db.str, table->alias,
11155                           opt_op_name[REPAIR_PARTS],
11156                           "Failed to move/insert a row"
11157                           " from part %u into part %u:\n%s",
11158                           (uint) read_part_id,
11159                           (uint) correct_part_id,
11160                           str.c_ptr_safe());
11161           break;
11162         }
11163 
11164         /* Delete row from wrong partition. */
11165         if ((result= m_file[read_part_id]->ha_delete_row(m_rec0)))
11166         {
11167           if (m_file[correct_part_id]->has_transactions_and_rollback())
11168             break;
11169           /*
11170             We have introduced a duplicate, since we failed to remove it
11171             from the wrong partition.
11172           */
11173           char buf[MAX_KEY_LENGTH];
11174           String str(buf,sizeof(buf),system_charset_info);
11175           str.length(0);
11176           m_err_rec= NULL;
11177           append_row_to_str(str);
11178 
11179           /* Log this error, so the DBA can notice it and fix it! */
11180           sql_print_error("Table '%-192s': Delete from part %u failed with"
11181                           " error %d. But it was already inserted into"
11182                           " part %u, when moving the misplaced row!"
11183                           "\nPlease manually fix the duplicate row:\n%s",
11184                           table->s->table_name.str,
11185                           (uint) read_part_id,
11186                           result,
11187                           (uint) correct_part_id,
11188                           str.c_ptr_safe());
11189           break;
11190         }
11191       }
11192     }
11193   }
11194 
11195   int tmp_result= m_file[read_part_id]->ha_rnd_end();
11196   DBUG_RETURN(result ? result : tmp_result);
11197 }
11198 
11199 
11200 #define KEY_PARTITIONING_CHANGED_STR \
11201   "KEY () partitioning changed, please run:\n" \
11202   "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
11203 
check_for_upgrade(HA_CHECK_OPT * check_opt)11204 int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt)
11205 {
11206   int error= HA_ADMIN_NEEDS_CHECK;
11207   DBUG_ENTER("ha_partition::check_for_upgrade");
11208 
11209   /*
11210     This is called even without FOR UPGRADE,
11211     if the .frm version is lower than the current version.
11212     In that case return that it needs checking!
11213   */
11214   if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
11215     DBUG_RETURN(error);
11216 
11217   /*
11218     Partitions will be checked for during their ha_check!
11219 
11220     Check if KEY (sub)partitioning was used and any field's hash calculation
11221     differs from 5.1, see bug#14521864.
11222   */
11223   if (table->s->mysql_version < 50503 &&              // 5.1 table (<5.5.3)
11224       ((m_part_info->part_type == HASH_PARTITION &&   // KEY partitioned
11225         m_part_info->list_of_part_fields) ||
11226        (m_is_sub_partitioned &&                       // KEY subpartitioned
11227         m_part_info->list_of_subpart_fields)))
11228   {
11229     Field **field;
11230     if (m_is_sub_partitioned)
11231     {
11232       field= m_part_info->subpart_field_array;
11233     }
11234     else
11235     {
11236       field= m_part_info->part_field_array;
11237     }
11238     for (; *field; field++)
11239     {
11240       switch ((*field)->real_type()) {
11241       case MYSQL_TYPE_TINY:
11242       case MYSQL_TYPE_SHORT:
11243       case MYSQL_TYPE_LONG:
11244       case MYSQL_TYPE_FLOAT:
11245       case MYSQL_TYPE_DOUBLE:
11246       case MYSQL_TYPE_NEWDECIMAL:
11247       case MYSQL_TYPE_TIMESTAMP:
11248       case MYSQL_TYPE_LONGLONG:
11249       case MYSQL_TYPE_INT24:
11250       case MYSQL_TYPE_TIME:
11251       case MYSQL_TYPE_DATETIME:
11252       case MYSQL_TYPE_YEAR:
11253       case MYSQL_TYPE_NEWDATE:
11254       case MYSQL_TYPE_ENUM:
11255       case MYSQL_TYPE_SET:
11256         {
11257           THD *thd= ha_thd();
11258           char *part_buf;
11259           String db_name, table_name;
11260           uint part_buf_len;
11261           bool skip_generation= false;
11262           partition_info::enum_key_algorithm old_algorithm;
11263           old_algorithm= m_part_info->key_algorithm;
11264           error= HA_ADMIN_FAILED;
11265           append_identifier(ha_thd(), &db_name, &table_share->db);
11266           append_identifier(ha_thd(), &table_name, &table_share->table_name);
11267           if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
11268           {
11269             /*
11270               Only possible when someone tampered with .frm files,
11271               like during tests :)
11272             */
11273             skip_generation= true;
11274           }
11275           m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
11276           if (skip_generation ||
11277               !(part_buf= generate_partition_syntax_for_frm(thd, m_part_info,
11278                                                             &part_buf_len,
11279                                                             NULL, NULL)) ||
11280 	      print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
11281 	                      table_share->db.str,
11282 	                      table->alias,
11283                               opt_op_name[CHECK_PARTS],
11284                               KEY_PARTITIONING_CHANGED_STR,
11285                               db_name.c_ptr_safe(),
11286                               table_name.c_ptr_safe(),
11287                               part_buf))
11288 	  {
11289 	    /* Error creating admin message (too long string?). */
11290 	    print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
11291                             table_share->db.str, table->alias,
11292                             opt_op_name[CHECK_PARTS],
11293                             KEY_PARTITIONING_CHANGED_STR,
11294                             db_name.c_ptr_safe(), table_name.c_ptr_safe(),
11295                             "<old partition clause>, but add ALGORITHM = 1"
11296                             " between 'KEY' and '(' to change the metadata"
11297                             " without the need of a full table rebuild.");
11298           }
11299           m_part_info->key_algorithm= old_algorithm;
11300           DBUG_RETURN(error);
11301         }
11302       default:
11303         /* Not affected! */
11304         ;
11305       }
11306     }
11307   }
11308 
11309   DBUG_RETURN(error);
11310 }
11311 
11312 
get_next_global_for_child()11313 TABLE_LIST *ha_partition::get_next_global_for_child()
11314 {
11315   handler **file;
11316   DBUG_ENTER("ha_partition::get_next_global_for_child");
11317   for (file= m_file; *file; file++)
11318   {
11319     TABLE_LIST *table_list;
11320     if ((table_list= (*file)->get_next_global_for_child()))
11321       DBUG_RETURN(table_list);
11322   }
11323   DBUG_RETURN(0);
11324 }
11325 
11326 
11327 /**
11328   Push an engine condition to the condition stack of the storage engine
11329   for each partition.
11330 
11331   @param  cond              Pointer to the engine condition to be pushed.
11332 
11333   @return NULL              Underlying engine will not return rows that
11334                             do not match the passed condition.
11335           <> NULL           'Remainder' condition that the caller must use
11336                             to filter out records.
11337 */
11338 
cond_push(const COND * cond)11339 const COND *ha_partition::cond_push(const COND *cond)
11340 {
11341   uint i;
11342   COND *res_cond= NULL;
11343   DBUG_ENTER("ha_partition::cond_push");
11344 
11345   for (i= bitmap_get_first_set(&m_partitions_to_reset);
11346        i < m_tot_parts;
11347        i= bitmap_get_next_set(&m_partitions_to_reset, i))
11348   {
11349     if (bitmap_is_set(&m_opened_partitions, i))
11350     {
11351       if (m_file[i]->pushed_cond != cond)
11352       {
11353         if (m_file[i]->cond_push(cond))
11354           res_cond= (COND *) cond;
11355         else
11356           m_file[i]->pushed_cond= cond;
11357       }
11358     }
11359   }
11360   DBUG_RETURN(res_cond);
11361 }
11362 
11363 
11364 /**
11365   Pop the top condition from the condition stack of the storage engine
11366   for each partition.
11367 */
11368 
cond_pop()11369 void ha_partition::cond_pop()
11370 {
11371   uint i;
11372   DBUG_ENTER("ha_partition::cond_pop");
11373 
11374   for (i= bitmap_get_first_set(&m_partitions_to_reset);
11375        i < m_tot_parts;
11376        i= bitmap_get_next_set(&m_partitions_to_reset, i))
11377   {
11378     if (bitmap_is_set(&m_opened_partitions, i))
11379     {
11380       m_file[i]->cond_pop();
11381     }
11382   }
11383   DBUG_VOID_RETURN;
11384 }
11385 
11386 
11387 /**
11388   Perform bulk update preparation on each partition.
11389 
11390   SYNOPSIS
11391     start_bulk_update()
11392 
11393   RETURN VALUE
11394     TRUE                      Error
11395     FALSE                     Success
11396 */
11397 
start_bulk_update()11398 bool ha_partition::start_bulk_update()
11399 {
11400   handler **file= m_file;
11401   DBUG_ENTER("ha_partition::start_bulk_update");
11402 
11403   if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
11404                             table->write_set))
11405     DBUG_RETURN(TRUE);
11406 
11407   do
11408   {
11409     bzero(&(*file)->copy_info, sizeof((*file)->copy_info));
11410     if ((*file)->start_bulk_update())
11411       DBUG_RETURN(TRUE);
11412   } while (*(++file));
11413   DBUG_RETURN(FALSE);
11414 }
11415 
11416 
11417 /**
11418   Perform bulk update execution on each partition.  A bulk update allows
11419   a handler to batch the updated rows instead of performing the updates
11420   one row at a time.
11421 
11422   SYNOPSIS
11423     exec_bulk_update()
11424 
11425   RETURN VALUE
11426     TRUE                      Error
11427     FALSE                     Success
11428 */
11429 
exec_bulk_update(ha_rows * dup_key_found)11430 int ha_partition::exec_bulk_update(ha_rows *dup_key_found)
11431 {
11432   int error;
11433   handler **file= m_file;
11434   DBUG_ENTER("ha_partition::exec_bulk_update");
11435 
11436   do
11437   {
11438     if (unlikely((error= (*file)->exec_bulk_update(dup_key_found))))
11439       DBUG_RETURN(error);
11440   } while (*(++file));
11441   DBUG_RETURN(0);
11442 }
11443 
11444 
11445 /**
11446   Perform bulk update cleanup on each partition.
11447 
11448   SYNOPSIS
11449     end_bulk_update()
11450 
11451   RETURN VALUE
11452     NONE
11453 */
11454 
end_bulk_update()11455 int ha_partition::end_bulk_update()
11456 {
11457   int error= 0;
11458   handler **file= m_file;
11459   DBUG_ENTER("ha_partition::end_bulk_update");
11460 
11461   do
11462   {
11463     int tmp;
11464     if ((tmp= (*file)->end_bulk_update()))
11465       error= tmp;
11466   } while (*(++file));
11467   sum_copy_infos();
11468   DBUG_RETURN(error);
11469 }
11470 
11471 
11472 /**
11473   Add the row to the bulk update on the partition on which the row is stored.
11474   A bulk update allows a handler to batch the updated rows instead of
11475   performing the updates one row at a time.
11476 
11477   SYNOPSIS
11478     bulk_update_row()
11479     old_data                  Old record
11480     new_data                  New record
11481     dup_key_found             Number of duplicate keys found
11482 
11483   RETURN VALUE
11484     >1                        Error
11485     1                         Bulk update not used, normal operation used
11486     0                         Bulk update used by handler
11487 */
11488 
bulk_update_row(const uchar * old_data,const uchar * new_data,ha_rows * dup_key_found)11489 int ha_partition::bulk_update_row(const uchar *old_data, const uchar *new_data,
11490                                   ha_rows *dup_key_found)
11491 {
11492   int error= 0;
11493   uint32 part_id;
11494   longlong func_value;
11495   DBUG_ENTER("ha_partition::bulk_update_row");
11496 
11497   MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->read_set);
11498   error= m_part_info->get_partition_id(m_part_info, &part_id,
11499                                        &func_value);
11500   dbug_tmp_restore_column_map(&table->read_set, old_map);
11501   if (unlikely(error))
11502   {
11503     m_part_info->err_value= func_value;
11504     goto end;
11505   }
11506 
11507   error= m_file[part_id]->ha_bulk_update_row(old_data, new_data,
11508                                              dup_key_found);
11509 
11510 end:
11511   DBUG_RETURN(error);
11512 }
11513 
11514 
11515 /**
11516   Perform bulk delete preparation on each partition.
11517 
11518   SYNOPSIS
11519     start_bulk_delete()
11520 
11521   RETURN VALUE
11522     TRUE                      Error
11523     FALSE                     Success
11524 */
11525 
start_bulk_delete()11526 bool ha_partition::start_bulk_delete()
11527 {
11528   handler **file= m_file;
11529   DBUG_ENTER("ha_partition::start_bulk_delete");
11530 
11531   do
11532   {
11533     if ((*file)->start_bulk_delete())
11534       DBUG_RETURN(TRUE);
11535   } while (*(++file));
11536   DBUG_RETURN(FALSE);
11537 }
11538 
11539 
11540 /**
11541   Perform bulk delete cleanup on each partition.
11542 
11543   SYNOPSIS
11544     end_bulk_delete()
11545 
11546   RETURN VALUE
11547     >0                        Error
11548     0                         Success
11549 */
11550 
end_bulk_delete()11551 int ha_partition::end_bulk_delete()
11552 {
11553   int error= 0;
11554   handler **file= m_file;
11555   DBUG_ENTER("ha_partition::end_bulk_delete");
11556 
11557   do
11558   {
11559     int tmp;
11560     if ((tmp= (*file)->end_bulk_delete()))
11561       error= tmp;
11562   } while (*(++file));
11563   sum_copy_infos();
11564   DBUG_RETURN(error);
11565 }
11566 
11567 
check_if_updates_are_ignored(const char * op) const11568 bool ha_partition::check_if_updates_are_ignored(const char *op) const
11569 {
11570   return (handler::check_if_updates_are_ignored(op) ||
11571           ha_check_if_updates_are_ignored(table->in_use, partition_ht(), op));
11572 }
11573 
11574 /**
11575   Perform initialization for a direct update request.
11576 
11577   SYNOPSIS
11578     direct_update_rows_init()
11579     update fields             Pointer to the list of fields to update
11580 
11581   RETURN VALUE
11582     >0                        Error
11583     0                         Success
11584 */
11585 
direct_update_rows_init(List<Item> * update_fields)11586 int ha_partition::direct_update_rows_init(List<Item> *update_fields)
11587 {
11588   int error;
11589   uint i, found;
11590   handler *file;
11591   DBUG_ENTER("ha_partition::direct_update_rows_init");
11592 
11593   if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
11594                             table->write_set))
11595   {
11596     DBUG_PRINT("info", ("partition FALSE by updating part_key"));
11597     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11598   }
11599 
11600   m_part_spec.start_part= 0;
11601   m_part_spec.end_part= m_tot_parts - 1;
11602   m_direct_update_part_spec= m_part_spec;
11603 
11604   found= 0;
11605   for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11606   {
11607     if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11608         bitmap_is_set(&(m_part_info->lock_partitions), i))
11609     {
11610       file= m_file[i];
11611       if (unlikely((error= (m_pre_calling ?
11612                             file->pre_direct_update_rows_init(update_fields) :
11613                             file->direct_update_rows_init(update_fields)))))
11614       {
11615         DBUG_PRINT("info", ("partition FALSE by storage engine"));
11616         DBUG_RETURN(error);
11617       }
11618       found++;
11619     }
11620   }
11621 
11622   TABLE_LIST *table_list= table->pos_in_table_list;
11623   if (found != 1 && table_list)
11624   {
11625     while (table_list->parent_l)
11626       table_list= table_list->parent_l;
11627     st_select_lex *select_lex= table_list->select_lex;
11628     DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11629     if (select_lex && select_lex->explicit_limit)
11630     {
11631       DBUG_PRINT("info", ("partition explicit_limit=TRUE"));
11632       DBUG_PRINT("info", ("partition offset_limit: %p",
11633                           select_lex->offset_limit));
11634       DBUG_PRINT("info", ("partition select_limit: %p",
11635                           select_lex->select_limit));
11636       DBUG_PRINT("info", ("partition FALSE by select_lex"));
11637       DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11638     }
11639   }
11640   DBUG_PRINT("info", ("partition OK"));
11641   DBUG_RETURN(0);
11642 }
11643 
11644 
11645 /**
11646   Do initialization for performing parallel direct update
11647   for a handlersocket update request.
11648 
11649   SYNOPSIS
11650     pre_direct_update_rows_init()
11651     update fields             Pointer to the list of fields to update
11652 
11653   RETURN VALUE
11654     >0                        Error
11655     0                         Success
11656 */
11657 
pre_direct_update_rows_init(List<Item> * update_fields)11658 int ha_partition::pre_direct_update_rows_init(List<Item> *update_fields)
11659 {
11660   bool save_m_pre_calling;
11661   int error;
11662   DBUG_ENTER("ha_partition::pre_direct_update_rows_init");
11663   save_m_pre_calling= m_pre_calling;
11664   m_pre_calling= TRUE;
11665   error= direct_update_rows_init(update_fields);
11666   m_pre_calling= save_m_pre_calling;
11667   DBUG_RETURN(error);
11668 }
11669 
11670 
11671 /**
11672   Execute a direct update request.  A direct update request updates all
11673   qualified rows in a single operation, rather than one row at a time.
11674   The direct update operation is pushed down to each individual
11675   partition.
11676 
11677   SYNOPSIS
11678     direct_update_rows()
11679     update_rows               Number of updated rows
11680 
11681   RETURN VALUE
11682     >0                        Error
11683     0                         Success
11684 */
11685 
direct_update_rows(ha_rows * update_rows_result,ha_rows * found_rows_result)11686 int ha_partition::direct_update_rows(ha_rows *update_rows_result,
11687                                      ha_rows *found_rows_result)
11688 {
11689   int error;
11690   bool rnd_seq= FALSE;
11691   ha_rows update_rows= 0;
11692   ha_rows found_rows= 0;
11693   uint32 i;
11694   DBUG_ENTER("ha_partition::direct_update_rows");
11695 
11696   /* If first call to direct_update_rows with RND scan */
11697   if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11698   {
11699     rnd_seq= TRUE;
11700     m_scan_value= 2;
11701   }
11702 
11703   *update_rows_result= 0;
11704   *found_rows_result= 0;
11705   for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11706   {
11707     handler *file= m_file[i];
11708     if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11709         bitmap_is_set(&(m_part_info->lock_partitions), i))
11710     {
11711       if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11712       {
11713         if (unlikely((error= (m_pre_calling ?
11714                               file->ha_pre_rnd_init(TRUE) :
11715                               file->ha_rnd_init(TRUE)))))
11716           DBUG_RETURN(error);
11717       }
11718       if (unlikely((error= (m_pre_calling ?
11719                             (file)->pre_direct_update_rows() :
11720                             (file)->ha_direct_update_rows(&update_rows,
11721                                                           &found_rows)))))
11722       {
11723         if (rnd_seq)
11724         {
11725           if (m_pre_calling)
11726             file->ha_pre_rnd_end();
11727           else
11728             file->ha_rnd_end();
11729         }
11730         DBUG_RETURN(error);
11731       }
11732       *update_rows_result+= update_rows;
11733       *found_rows_result+= found_rows;
11734     }
11735     if (rnd_seq)
11736     {
11737       if (unlikely((error= (m_pre_calling ?
11738                             file->ha_pre_index_or_rnd_end() :
11739                             file->ha_index_or_rnd_end()))))
11740         DBUG_RETURN(error);
11741     }
11742   }
11743   DBUG_RETURN(0);
11744 }
11745 
11746 
11747 /**
11748   Start parallel execution of a direct update for a handlersocket update
11749   request.  A direct update request updates all qualified rows in a single
11750   operation, rather than one row at a time.  The direct update operation
11751   is pushed down to each individual partition.
11752 
11753   SYNOPSIS
11754     pre_direct_update_rows()
11755 
11756   RETURN VALUE
11757     >0                        Error
11758     0                         Success
11759 */
11760 
pre_direct_update_rows()11761 int ha_partition::pre_direct_update_rows()
11762 {
11763   bool save_m_pre_calling;
11764   int error;
11765   ha_rows not_used= 0;
11766   DBUG_ENTER("ha_partition::pre_direct_update_rows");
11767   save_m_pre_calling= m_pre_calling;
11768   m_pre_calling= TRUE;
11769   error= direct_update_rows(&not_used, &not_used);
11770   m_pre_calling= save_m_pre_calling;
11771   DBUG_RETURN(error);
11772 }
11773 
11774 
11775 /**
11776   Perform initialization for a direct delete request.
11777 
11778   SYNOPSIS
11779     direct_delete_rows_init()
11780 
11781   RETURN VALUE
11782     >0                        Error
11783     0                         Success
11784 */
11785 
direct_delete_rows_init()11786 int ha_partition::direct_delete_rows_init()
11787 {
11788   int error;
11789   uint i, found;
11790   DBUG_ENTER("ha_partition::direct_delete_rows_init");
11791 
11792   m_part_spec.start_part= 0;
11793   m_part_spec.end_part= m_tot_parts - 1;
11794   m_direct_update_part_spec= m_part_spec;
11795 
11796   found= 0;
11797   for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11798   {
11799     if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11800         bitmap_is_set(&(m_part_info->lock_partitions), i))
11801     {
11802       handler *file= m_file[i];
11803       if (unlikely((error= (m_pre_calling ?
11804                             file->pre_direct_delete_rows_init() :
11805                             file->direct_delete_rows_init()))))
11806       {
11807         DBUG_PRINT("exit", ("error in direct_delete_rows_init"));
11808         DBUG_RETURN(error);
11809       }
11810       found++;
11811     }
11812   }
11813 
11814   TABLE_LIST *table_list= table->pos_in_table_list;
11815   if (found != 1 && table_list)
11816   {
11817     while (table_list->parent_l)
11818       table_list= table_list->parent_l;
11819     st_select_lex *select_lex= table_list->select_lex;
11820     DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11821     if (select_lex && select_lex->explicit_limit)
11822     {
11823       DBUG_PRINT("info", ("partition explicit_limit: TRUE"));
11824       DBUG_PRINT("info", ("partition offset_limit: %p",
11825                           select_lex->offset_limit));
11826       DBUG_PRINT("info", ("partition select_limit: %p",
11827                           select_lex->select_limit));
11828       DBUG_PRINT("info", ("partition FALSE by select_lex"));
11829       DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11830     }
11831   }
11832   DBUG_PRINT("exit", ("OK"));
11833   DBUG_RETURN(0);
11834 }
11835 
11836 
11837 /**
11838   Do initialization for performing parallel direct delete
11839   for a handlersocket delete request.
11840 
11841   SYNOPSIS
11842     pre_direct_delete_rows_init()
11843 
11844   RETURN VALUE
11845     >0                        Error
11846     0                         Success
11847 */
11848 
pre_direct_delete_rows_init()11849 int ha_partition::pre_direct_delete_rows_init()
11850 {
11851   bool save_m_pre_calling;
11852   int error;
11853   DBUG_ENTER("ha_partition::pre_direct_delete_rows_init");
11854   save_m_pre_calling= m_pre_calling;
11855   m_pre_calling= TRUE;
11856   error= direct_delete_rows_init();
11857   m_pre_calling= save_m_pre_calling;
11858   DBUG_RETURN(error);
11859 }
11860 
11861 
11862 /**
11863   Execute a direct delete request.  A direct delete request deletes all
11864   qualified rows in a single operation, rather than one row at a time.
11865   The direct delete operation is pushed down to each individual
11866   partition.
11867 
11868   SYNOPSIS
11869     direct_delete_rows()
11870     delete_rows               Number of deleted rows
11871 
11872   RETURN VALUE
11873     >0                        Error
11874     0                         Success
11875 */
11876 
direct_delete_rows(ha_rows * delete_rows_result)11877 int ha_partition::direct_delete_rows(ha_rows *delete_rows_result)
11878 {
11879   int error;
11880   bool rnd_seq= FALSE;
11881   ha_rows delete_rows= 0;
11882   uint32 i;
11883   handler *file;
11884   DBUG_ENTER("ha_partition::direct_delete_rows");
11885 
11886   if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11887   {
11888     rnd_seq= TRUE;
11889     m_scan_value= 2;
11890   }
11891 
11892   *delete_rows_result= 0;
11893   m_part_spec= m_direct_update_part_spec;
11894   for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11895   {
11896     file= m_file[i];
11897     if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11898         bitmap_is_set(&(m_part_info->lock_partitions), i))
11899     {
11900       if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11901       {
11902         if (unlikely((error= (m_pre_calling ?
11903                               file->ha_pre_rnd_init(TRUE) :
11904                               file->ha_rnd_init(TRUE)))))
11905           DBUG_RETURN(error);
11906       }
11907       if ((error= (m_pre_calling ?
11908                    file->pre_direct_delete_rows() :
11909                    file->ha_direct_delete_rows(&delete_rows))))
11910       {
11911         if (m_pre_calling)
11912           file->ha_pre_rnd_end();
11913         else
11914           file->ha_rnd_end();
11915         DBUG_RETURN(error);
11916       }
11917       delete_rows_result+= delete_rows;
11918     }
11919     if (rnd_seq)
11920     {
11921       if (unlikely((error= (m_pre_calling ?
11922                             file->ha_pre_index_or_rnd_end() :
11923                             file->ha_index_or_rnd_end()))))
11924         DBUG_RETURN(error);
11925     }
11926   }
11927   DBUG_RETURN(0);
11928 }
11929 
11930 
11931 /**
11932   Start parallel execution of a direct delete for a handlersocket delete
11933   request.  A direct delete request deletes all qualified rows in a single
11934   operation, rather than one row at a time.  The direct delete operation
11935   is pushed down to each individual partition.
11936 
11937   SYNOPSIS
11938     pre_direct_delete_rows()
11939 
11940   RETURN VALUE
11941     >0                        Error
11942     0                         Success
11943 */
11944 
pre_direct_delete_rows()11945 int ha_partition::pre_direct_delete_rows()
11946 {
11947   bool save_m_pre_calling;
11948   int error;
11949   ha_rows not_used;
11950   DBUG_ENTER("ha_partition::pre_direct_delete_rows");
11951   save_m_pre_calling= m_pre_calling;
11952   m_pre_calling= TRUE;
11953   error= direct_delete_rows(&not_used);
11954   m_pre_calling= save_m_pre_calling;
11955   DBUG_RETURN(error);
11956 }
11957 
11958 /**
11959   Push metadata for the current operation down to each partition.
11960 
11961   SYNOPSIS
11962     info_push()
11963 
11964   RETURN VALUE
11965     >0                        Error
11966     0                         Success
11967 */
11968 
info_push(uint info_type,void * info)11969 int ha_partition::info_push(uint info_type, void *info)
11970 {
11971   int error= 0, tmp;
11972   uint i;
11973   DBUG_ENTER("ha_partition::info_push");
11974 
11975   for (i= bitmap_get_first_set(&m_partitions_to_reset);
11976        i < m_tot_parts;
11977        i= bitmap_get_next_set(&m_partitions_to_reset, i))
11978   {
11979     if (bitmap_is_set(&m_opened_partitions, i))
11980     {
11981       if ((tmp= m_file[i]->info_push(info_type, info)))
11982       {
11983         error= tmp;
11984       }
11985     }
11986   }
11987   DBUG_RETURN(error);
11988 }
11989 
11990 
11991 bool
can_convert_string(const Field_string * field,const Column_definition & new_type) const11992 ha_partition::can_convert_string(const Field_string* field,
11993 		                 const Column_definition& new_type) const
11994 {
11995   for (uint index= 0; index < m_tot_parts; index++)
11996   {
11997     if (!m_file[index]->can_convert_string(field, new_type))
11998       return false;
11999   }
12000   return true;
12001 }
12002 
12003 bool
can_convert_varstring(const Field_varstring * field,const Column_definition & new_type) const12004 ha_partition::can_convert_varstring(const Field_varstring* field,
12005 		                    const Column_definition& new_type) const{
12006   for (uint index= 0; index < m_tot_parts; index++)
12007   {
12008     if (!m_file[index]->can_convert_varstring(field, new_type))
12009       return false;
12010   }
12011   return true;
12012 }
12013 
12014 bool
can_convert_blob(const Field_blob * field,const Column_definition & new_type) const12015 ha_partition::can_convert_blob(const Field_blob* field,
12016 		               const Column_definition& new_type) const
12017 {
12018   for (uint index= 0; index < m_tot_parts; index++)
12019   {
12020     if (!m_file[index]->can_convert_blob(field, new_type))
12021       return false;
12022   }
12023   return true;
12024 }
12025 
12026 struct st_mysql_storage_engine partition_storage_engine=
12027 { MYSQL_HANDLERTON_INTERFACE_VERSION };
12028 
maria_declare_plugin(partition)12029 maria_declare_plugin(partition)
12030 {
12031   MYSQL_STORAGE_ENGINE_PLUGIN,
12032   &partition_storage_engine,
12033   "partition",
12034   "Mikael Ronstrom, MySQL AB",
12035   "Partition Storage Engine Helper",
12036   PLUGIN_LICENSE_GPL,
12037   partition_initialize, /* Plugin Init */
12038   NULL, /* Plugin Deinit */
12039   0x0100, /* 1.0 */
12040   NULL,                       /* status variables                */
12041   NULL,                       /* system variables                */
12042   "1.0",                      /* string version                  */
12043   MariaDB_PLUGIN_MATURITY_STABLE /* maturity                     */
12044 }
12045 maria_declare_plugin_end;
12046 
12047 #endif
12048