1 /*
2    Copyright (c) 2005, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 /*
26   This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
27   It is an abstraction layer on top of other handlers such as MyISAM,
28   InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
29   be handled by a storage engine. The current example of this is NDB
30   Cluster that has internally handled partitioning. This have benefits in
31   that many loops needed in the partition handler can be avoided.
32 
33   Partitioning has an inherent feature which in some cases is positive and
34   in some cases is negative. It splits the data into chunks. This makes
35   the data more manageable, queries can easily be parallelised towards the
36   parts and indexes are split such that there are less levels in the
37   index trees. The inherent disadvantage is that to use a split index
38   one has to scan all index parts which is ok for large queries but for
39   small queries it can be a disadvantage.
40 
41   Partitioning lays the foundation for more manageable databases that are
42   extremely large. It does also lay the foundation for more parallelism
43   in the execution of queries. This functionality will grow with later
44   versions of MySQL.
45 
46   You can enable it in your buld by doing the following during your build
47   process:
48   ./configure --with-partition
49 
50   The partition is setup to use table locks. It implements an partition "SHARE"
51   that is inserted into a hash by table name. You can use this to store
52   information of state that any partition handler object will be able to see
53   if it is using the same table.
54 
55   Please read the object definition in ha_partition.h before reading the rest
56   if this file.
57 */
58 
59 /*
60   This engine need server classes (like THD etc.) which only is defined if
61   MYSQL_SERVER define is set!
62 */
63 #define MYSQL_SERVER 1
64 #include "sql_parse.h"                          // append_file_to_dir
65 #include "partition_info.h"                  // partition_info
66 #include "ha_partition.h"
67 #include "sql_table.h"                        // tablename_to_filename
68 #include "key.h"                             // key_rec_cmp, field_unpack
69 #include "sql_show.h"                        // append_identifier
70 #include "sql_admin.h"                       // SQL_ADMIN_MSG_TEXT_SIZE
71 #include "myisam.h"                          // TT_FOR_UPGRADE
72 #include "sql_plugin.h"                      // plugin_unlock_list
73 #include "log.h"                             // sql_print_error
74 
75 #include "debug_sync.h"
76 #ifndef NDEBUG
77 #include "sql_test.h"                        // print_where
78 #endif
79 
80 #include "pfs_file_provider.h"
81 #include "mysql/psi/mysql_file.h"
82 
83 using std::min;
84 using std::max;
85 
86 
87 /* First 4 bytes in the .par file is the number of 32-bit words in the file */
88 #define PAR_WORD_SIZE 4
89 /* offset to the .par file checksum */
90 #define PAR_CHECKSUM_OFFSET 4
91 /* offset to the total number of partitions */
92 #define PAR_NUM_PARTS_OFFSET 8
93 /* offset to the engines array */
94 #define PAR_ENGINES_OFFSET 12
95 #define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
96                                        HA_REC_NOT_IN_SEQ | \
97                                        HA_CAN_REPAIR)
98 #define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
99                                         HA_CAN_FULLTEXT | \
100                                         HA_DUPLICATE_POS | \
101                                         HA_READ_BEFORE_WRITE_REMOVAL)
102 static const char *ha_par_ext= ".par";
103 /** operation names for the enum_part_operation. */
104 static const char *opt_op_name[]= {"optimize", "analyze", "check", "repair",
105                                    "assign_to_keycache", "preload_keys"};
106 
107 /****************************************************************************
108                 MODULE create/delete handler object
109 ****************************************************************************/
110 
111 static handler *partition_create_handler(handlerton *hton,
112                                          TABLE_SHARE *share,
113                                          MEM_ROOT *mem_root);
114 static uint partition_flags();
115 
116 
117 static PSI_memory_key key_memory_ha_partition_file;
118 static PSI_memory_key key_memory_ha_partition_engine_array;
119 static PSI_memory_key key_memory_ha_partition_part_ids;
120 #ifdef HAVE_PSI_INTERFACE
121 static PSI_memory_info all_partition_memory[]=
122 { { &key_memory_ha_partition_file, "ha_partition::file", 0},
123   { &key_memory_ha_partition_engine_array, "ha_partition::engine_array", 0},
124   { &key_memory_ha_partition_part_ids, "ha_partition::part_ids", 0} };
125 
126 PSI_file_key key_file_ha_partition_par;
127 static PSI_file_info all_partition_file[]=
128 { { &key_file_ha_partition_par, "ha_partition::parfile", 0} };
129 
init_partition_psi_keys(void)130 static void init_partition_psi_keys(void)
131 {
132   const char* category= "partition";
133   int count;
134 
135   count= array_elements(all_partition_memory);
136   mysql_memory_register(category, all_partition_memory, count);
137   count= array_elements(all_partition_file);
138   mysql_file_register(category, all_partition_file, count);
139 }
140 #endif /* HAVE_PSI_INTERFACE */
141 
partition_initialize(void * p)142 static int partition_initialize(void *p)
143 {
144 
145   handlerton *partition_hton;
146   partition_hton= (handlerton *)p;
147 
148   partition_hton->state= SHOW_OPTION_YES;
149   partition_hton->db_type= DB_TYPE_PARTITION_DB;
150   partition_hton->create= partition_create_handler;
151   partition_hton->partition_flags= partition_flags;
152   partition_hton->flags= HTON_NOT_USER_SELECTABLE |
153                          HTON_HIDDEN |
154                          HTON_TEMPORARY_NOT_SUPPORTED |
155                          HTON_SUPPORTS_CLUSTERED_KEYS |
156                          HTON_SUPPORTS_COMPRESSED_COLUMNS;
157 #ifdef HAVE_PSI_INTERFACE
158   init_partition_psi_keys();
159 #endif
160   return 0;
161 }
162 
Parts_share_refs()163 Parts_share_refs::Parts_share_refs()
164   : num_parts(0), ha_shares(NULL)
165 {}
166 
167 
~Parts_share_refs()168 Parts_share_refs::~Parts_share_refs()
169 {
170   uint i;
171   if (ha_shares)
172   {
173     for (i= 0; i < num_parts; i++)
174       if (ha_shares[i])
175         delete ha_shares[i];
176     delete [] ha_shares;
177   }
178 }
179 
init(uint arg_num_parts)180 bool Parts_share_refs::init(uint arg_num_parts)
181 {
182   assert(!num_parts && !ha_shares);
183   num_parts= arg_num_parts;
184   /* Allocate an array of Handler_share pointers */
185   ha_shares= new Handler_share *[num_parts];
186   if (!ha_shares)
187   {
188     num_parts= 0;
189     return true;
190   }
191   memset(ha_shares, 0, sizeof(Handler_share*) * num_parts);
192   return false;
193 }
194 
195 
Ha_partition_share()196 Ha_partition_share::Ha_partition_share()
197   : Partition_share(), partitions_share_refs(NULL)
198 {}
199 
200 
~Ha_partition_share()201 Ha_partition_share::~Ha_partition_share()
202 {
203   if (partitions_share_refs)
204     delete partitions_share_refs;
205 }
206 
207 
208 /**
209   Initialize and allocate space for partitions shares.
210 
211   @param num_parts  Number of partitions to allocate storage for.
212 
213   @return Operation status.
214     @retval true  Failure (out of memory).
215     @retval false Success.
216 */
217 
init(uint num_parts)218 bool Ha_partition_share::init(uint num_parts)
219 {
220   DBUG_ENTER("Ha_partition_share::init");
221   partitions_share_refs= new Parts_share_refs;
222   if (!partitions_share_refs)
223     DBUG_RETURN(true);
224   if (partitions_share_refs->init(num_parts))
225   {
226     delete partitions_share_refs;
227     DBUG_RETURN(true);
228   }
229   DBUG_RETURN(false);
230 }
231 
232 /*
233   Create new partition handler
234 
235   SYNOPSIS
236     partition_create_handler()
237     table                       Table object
238 
239   RETURN VALUE
240     New partition object
241 */
242 
partition_create_handler(handlerton * hton,TABLE_SHARE * share,MEM_ROOT * mem_root)243 static handler *partition_create_handler(handlerton *hton,
244                                          TABLE_SHARE *share,
245                                          MEM_ROOT *mem_root)
246 {
247   ha_partition *file= new (mem_root) ha_partition(hton, share);
248   if (file && file->initialize_partition(mem_root))
249   {
250     delete file;
251     file= 0;
252   }
253   return file;
254 }
255 
256 /*
257   HA_CAN_UPDATE_PARTITION_KEY:
258   Set if the handler can update fields that are part of the partition
259   function.
260 
261   HA_CAN_PARTITION_UNIQUE:
262   Set if the handler can handle unique indexes where the fields of the
263   unique key are not part of the fields of the partition function. Thus
264   a unique key can be set on all fields.
265 
266   HA_USE_AUTO_PARTITION
267   Set if the handler sets all tables to be partitioned by default.
268 
269   HA_CAN_EXCHANGE_PARTITION:
270   Set if the handler can exchange a partition with a non-partitioned table
271   of the same handlerton/engine.
272 
273   HA_CANNOT_PARTITION_FK:
274   Set if the handler does not support foreign keys on partitioned tables.
275 */
276 
partition_flags()277 static uint partition_flags()
278 {
279   return HA_CAN_EXCHANGE_PARTITION | HA_CANNOT_PARTITION_FK;
280 }
281 
282 const uint32 ha_partition::NO_CURRENT_PART_ID= NOT_A_PARTITION_ID;
283 
284 /*
285   Constructor method
286 
287   SYNOPSIS
288     ha_partition()
289     table                       Table object
290 
291   RETURN VALUE
292     NONE
293 */
294 
ha_partition(handlerton * hton,TABLE_SHARE * share)295 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
296   : handler(hton, share),
297   Partition_helper(this)
298 {
299   DBUG_ENTER("ha_partition::ha_partition(table)");
300   init_handler_variables();
301   DBUG_VOID_RETURN;
302 }
303 
304 
305 /**
306   ha_partition constructor method used by ha_partition::clone()
307 
308   @param hton               Handlerton (partition_hton)
309   @param share              Table share object
310   @param part_info_arg      partition_info to use
311   @param clone_arg          ha_partition to clone
312   @param clme_mem_root_arg  MEM_ROOT to use
313 
314   @return New partition handler
315 */
316 
ha_partition(handlerton * hton,TABLE_SHARE * share,partition_info * part_info_arg,ha_partition * clone_arg,MEM_ROOT * clone_mem_root_arg)317 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
318                            partition_info *part_info_arg,
319                            ha_partition *clone_arg,
320                            MEM_ROOT *clone_mem_root_arg)
321   : handler(hton, share),
322   Partition_helper(this)
323 {
324   DBUG_ENTER("ha_partition::ha_partition(clone)");
325   init_handler_variables();
326   m_part_info= part_info_arg;
327   m_is_sub_partitioned= m_part_info->is_sub_partitioned();
328   m_is_clone_of= clone_arg;
329   m_clone_mem_root= clone_mem_root_arg;
330   part_share= clone_arg->part_share;
331   m_tot_parts= clone_arg->m_tot_parts;
332   m_pkey_is_clustered= clone_arg->primary_key_is_clustered();
333   DBUG_VOID_RETURN;
334 }
335 
336 /*
337   Initialize handler object
338 
339   SYNOPSIS
340     init_handler_variables()
341 
342   RETURN VALUE
343     NONE
344 */
345 
init_handler_variables()346 void ha_partition::init_handler_variables()
347 {
348   active_index= MAX_KEY;
349   m_mode= 0;
350   m_open_test_lock= 0;
351   m_file_buffer= NULL;
352   m_name_buffer_ptr= NULL;
353   m_engine_array= NULL;
354   m_file= NULL;
355   m_file_tot_parts= 0;
356   m_tot_parts= 0;
357   m_pkey_is_clustered= 0;
358   m_myisam= FALSE;
359   m_innodb= FALSE;
360   m_extra_cache= FALSE;
361   m_extra_cache_size= 0;
362   m_extra_prepare_for_update= FALSE;
363   m_extra_cache_part_id= NO_CURRENT_PART_ID;
364   m_handler_status= handler_not_initialized;
365   m_low_byte_first= 1;
366   m_part_func_monotonicity_info= NON_MONOTONIC;
367   /*
368     this allows blackhole to work properly
369   */
370   m_num_locks= 0;
371   m_is_clone_of= NULL;
372   m_clone_mem_root= NULL;
373   part_share= NULL;
374   m_new_partitions_share_refs.empty();
375   m_part_ids_sorted_by_num_of_records= NULL;
376   m_new_file= NULL;
377   m_num_new_partitions= 0;
378   m_indexes_are_disabled= false;
379 }
380 
381 
table_type() const382 const char *ha_partition::table_type() const
383 {
384   // we can do this since we only support a single engine type
385   return m_file[0]->table_type();
386 }
387 
388 
389 /*
390   Destructor method
391 
392   SYNOPSIS
393     ~ha_partition()
394 
395   RETURN VALUE
396     NONE
397 */
398 
~ha_partition()399 ha_partition::~ha_partition()
400 {
401   DBUG_ENTER("ha_partition::~ha_partition()");
402   if (m_new_partitions_share_refs.elements)
403     m_new_partitions_share_refs.delete_elements();
404   if (m_file != NULL)
405   {
406     uint i;
407     for (i= 0; i < m_tot_parts; i++)
408       delete m_file[i];
409   }
410   my_free(m_part_ids_sorted_by_num_of_records);
411 
412   clear_handler_file();
413   DBUG_VOID_RETURN;
414 }
415 
init_with_fields()416 bool ha_partition::init_with_fields()
417 {
418   /* Pass the call to each partition */
419   for (uint i= 0; i < m_tot_parts; i++)
420   {
421     if (m_file[i]->init_with_fields())
422       return true;
423   }
424   /* Re-read table flags in case init_with_fields caused it to change */
425   cached_table_flags= (m_file[0]->ha_table_flags() &
426                        ~(PARTITION_DISABLED_TABLE_FLAGS)) |
427                       PARTITION_ENABLED_TABLE_FLAGS;
428   return false;
429 }
430 
431 
432 /*
433   Initialize partition handler object
434 
435   SYNOPSIS
436     initialize_partition()
437     mem_root                    Allocate memory through this
438 
439   RETURN VALUE
440     1                         Error
441     0                         Success
442 
443   DESCRIPTION
444 
445   The partition handler is only a layer on top of other engines. Thus it
446   can't really perform anything without the underlying handlers. Thus we
447   add this method as part of the allocation of a handler object.
448 
449   1) Allocation of underlying handlers
450      If we have access to the partition info we will allocate one handler
451      instance for each partition.
452   2) Allocation without partition info
453      The cases where we don't have access to this information is when called
454      in preparation for delete_table and rename_table and in that case we
455      only need to set HA_FILE_BASED. In that case we will use the .par file
456      that contains information about the partitions and their engines and
457      the names of each partition.
458   3) Table flags initialisation
459      We need also to set table flags for the partition handler. This is not
460      static since it depends on what storage engines are used as underlying
461      handlers.
462      The table flags is set in this routine to simulate the behaviour of a
463      normal storage engine
464      The flag HA_FILE_BASED will be set independent of the underlying handlers
465   4) Index flags initialisation
466      When knowledge exists on the indexes it is also possible to initialize the
467      index flags. Again the index flags must be initialized by using the under-
468      lying handlers since this is storage engine dependent.
469      The flag HA_READ_ORDER will be reset for the time being to indicate no
470      ordered output is available from partition handler indexes. Later a merge
471      sort will be performed using the underlying handlers.
472   5) primary_key_is_clustered, has_transactions and low_byte_first is
473      calculated here.
474 
475 */
476 
initialize_partition(MEM_ROOT * mem_root)477 bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
478 {
479   handler **file_array, *file;
480   ulonglong check_table_flags;
481   DBUG_ENTER("ha_partition::initialize_partition");
482 
483   if (Partition_helper::init_partitioning(mem_root))
484   {
485     DBUG_RETURN(true);
486   }
487   if (m_part_info)
488   {
489     assert(m_tot_parts > 0);
490     if (new_handlers_from_part_info(mem_root))
491       DBUG_RETURN(true);
492   }
493   else if (!table_share || !table_share->normalized_path.str)
494   {
495     /*
496       Called with dummy table share (delete, rename and alter table).
497       Don't need to set-up anything.
498     */
499     DBUG_RETURN(false);
500   }
501   else if (get_from_handler_file(table_share->normalized_path.str,
502                                  mem_root, false))
503   {
504     my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
505     DBUG_RETURN(true);
506   }
507   /*
508     We create all underlying table handlers here. We do it in this special
509     method to be able to report allocation errors.
510 
511     Set up low_byte_first, primary_key_is_clustered and
512     has_transactions since they are called often in all kinds of places,
513     other parameters are calculated on demand.
514     Verify that all partitions have the same table_flags.
515   */
516   check_table_flags= m_file[0]->ha_table_flags();
517   m_low_byte_first= m_file[0]->low_byte_first();
518   m_pkey_is_clustered= TRUE;
519   file_array= m_file;
520   do
521   {
522     file= *file_array;
523     if (m_low_byte_first != file->low_byte_first())
524     {
525       // Cannot have handlers with different endian
526       my_error(ER_MIX_HANDLER_ERROR, MYF(0));
527       DBUG_RETURN(true);
528     }
529     if (!file->primary_key_is_clustered())
530       m_pkey_is_clustered= FALSE;
531     if (check_table_flags != file->ha_table_flags())
532     {
533       my_error(ER_MIX_HANDLER_ERROR, MYF(0));
534       DBUG_RETURN(true);
535     }
536   } while (*(++file_array));
537   m_handler_status= handler_initialized;
538   DBUG_RETURN(false);
539 }
540 
541 /****************************************************************************
542                 MODULE meta data changes
543 ****************************************************************************/
544 /*
545   Delete a table
546 
547   SYNOPSIS
548     delete_table()
549     name                    Full path of table name
550 
551   RETURN VALUE
552     >0                        Error
553     0                         Success
554 
555   DESCRIPTION
556     Used to delete a table. By the time delete_table() has been called all
557     opened references to this table will have been closed (and your globally
558     shared references released. The variable name will just be the name of
559     the table. You will need to remove any files you have created at this
560     point.
561 
562     If you do not implement this, the default delete_table() is called from
563     handler.cc and it will delete all files with the file extentions returned
564     by bas_ext().
565 
566     Called from handler.cc by delete_table and  ha_create_table(). Only used
567     during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
568     the storage engine.
569 */
570 
delete_table(const char * name)571 int ha_partition::delete_table(const char *name)
572 {
573   DBUG_ENTER("ha_partition::delete_table");
574 
575   DBUG_RETURN(del_ren_table(name, NULL));
576 }
577 
578 
579 /*
580   Rename a table
581 
582   SYNOPSIS
583     rename_table()
584     from                      Full path of old table name
585     to                        Full path of new table name
586 
587   RETURN VALUE
588     >0                        Error
589     0                         Success
590 
591   DESCRIPTION
592     Renames a table from one name to another from alter table call.
593 
594     If you do not implement this, the default rename_table() is called from
595     handler.cc and it will rename all files with the file extentions returned
596     by bas_ext().
597 
598     Called from sql_table.cc by mysql_rename_table().
599 */
600 
rename_table(const char * from,const char * to)601 int ha_partition::rename_table(const char *from, const char *to)
602 {
603   DBUG_ENTER("ha_partition::rename_table");
604 
605   DBUG_RETURN(del_ren_table(from, to));
606 }
607 
608 
609 /*
610   Create the handler file (.par-file)
611 
612   SYNOPSIS
613     create_handler_files()
614     name                              Full path of table name
615     create_info                       Create info generated for CREATE TABLE
616 
617   RETURN VALUE
618     >0                        Error
619     0                         Success
620 
621   DESCRIPTION
622     create_handler_files is called to create any handler specific files
623     before opening the file with openfrm to later call ::create on the
624     file object.
625     In the partition handler this is used to store the names of partitions
626     and types of engines in the partitions.
627 */
628 
create_handler_files(const char * path,const char * old_path,int action_flag,HA_CREATE_INFO * create_info)629 int ha_partition::create_handler_files(const char *path,
630                                        const char *old_path,
631                                        int action_flag,
632                                        HA_CREATE_INFO *create_info)
633 {
634   DBUG_ENTER("ha_partition::create_handler_files()");
635 
636   /*
637     We need to update total number of parts since we might write the handler
638     file as part of a partition management command
639   */
640   if (action_flag == CHF_DELETE_FLAG ||
641       action_flag == CHF_RENAME_FLAG)
642   {
643     char name[FN_REFLEN];
644     char old_name[FN_REFLEN];
645 
646     strxmov(name, path, ha_par_ext, NullS);
647     strxmov(old_name, old_path, ha_par_ext, NullS);
648     if ((action_flag == CHF_DELETE_FLAG &&
649          mysql_file_delete(key_file_ha_partition_par, name, MYF(MY_WME))) ||
650         (action_flag == CHF_RENAME_FLAG &&
651          mysql_file_rename(key_file_ha_partition_par,
652                            old_name,
653                            name,
654                            MYF(MY_WME))))
655     {
656       DBUG_RETURN(TRUE);
657     }
658   }
659   else if (action_flag == CHF_CREATE_FLAG)
660   {
661     if (create_handler_file(path))
662     {
663       my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
664       DBUG_RETURN(1);
665     }
666   }
667   DBUG_RETURN(0);
668 }
669 
670 
671 /*
672   Create a partitioned table
673 
674   SYNOPSIS
675     create()
676     name                              Full path of table name
677     table_arg                         Table object
678     create_info                       Create info generated for CREATE TABLE
679 
680   RETURN VALUE
681     >0                        Error
682     0                         Success
683 
684   DESCRIPTION
685     create() is called to create a table. The variable name will have the name
686     of the table. When create() is called you do not need to worry about
687     opening the table. Also, the FRM file will have already been created so
688     adjusting create_info will not do you any good. You can overwrite the frm
689     file at this point if you wish to change the table definition, but there
690     are no methods currently provided for doing that.
691 
692     Called from handler.cc by ha_create_table().
693 */
694 
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * create_info)695 int ha_partition::create(const char *name, TABLE *table_arg,
696                          HA_CREATE_INFO *create_info)
697 {
698   int error;
699   char name_buff[FN_REFLEN], name_lc_buff[FN_REFLEN];
700   char *name_buffer_ptr;
701   const char *path;
702   uint i;
703   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
704   partition_element *part_elem;
705   partition_element table_level_options;
706   handler **file, **abort_file;
707   THD *thd= ha_thd();
708   TABLE_SHARE *share= table_arg->s;
709   DBUG_ENTER("ha_partition::create");
710 
711   assert(*fn_rext((char*)name) == '\0');
712 
713   /* Not allowed to create temporary partitioned tables */
714   if (create_info && create_info->options & HA_LEX_CREATE_TMP_TABLE)
715   {
716     my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
717     DBUG_RETURN(TRUE);
718   }
719 
720   if (get_from_handler_file(name, ha_thd()->mem_root, false))
721     DBUG_RETURN(TRUE);
722   assert(m_file_buffer);
723   DBUG_PRINT("enter", ("name: (%s)", name));
724   name_buffer_ptr= m_name_buffer_ptr;
725   file= m_file;
726   /*
727     Since ha_partition has HA_FILE_BASED, it must alter underlying table names
728     if they do not have HA_FILE_BASED and lower_case_table_names == 2.
729     See Bug#37402, for Mac OS X.
730     The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
731     Using the first partitions handler, since mixing handlers is not allowed.
732   */
733   path= get_canonical_filename(*file, name, name_lc_buff);
734   table_level_options.set_from_info(create_info);
735 
736   for (i= 0; i < m_part_info->num_parts; i++)
737   {
738     part_elem= part_it++;
739     if (m_is_sub_partitioned)
740     {
741       uint j;
742       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
743       for (j= 0; j < m_part_info->num_subparts; j++)
744       {
745         part_elem= sub_it++;
746         create_partition_name(name_buff, path, name_buffer_ptr,
747                               NORMAL_PART_NAME, FALSE);
748         if ((error= set_up_table_before_create(thd, share, name_buff,
749                                                create_info, part_elem)) ||
750             ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
751           goto create_error;
752 
753         table_level_options.put_to_info(create_info);
754         name_buffer_ptr= strend(name_buffer_ptr) + 1;
755         file++;
756       }
757     }
758     else
759     {
760       create_partition_name(name_buff, path, name_buffer_ptr,
761                             NORMAL_PART_NAME, FALSE);
762       if ((error= set_up_table_before_create(thd, share, name_buff,
763                                              create_info, part_elem)) ||
764           ((error= (*file)->ha_create(name_buff, table_arg, create_info))))
765         goto create_error;
766 
767       table_level_options.put_to_info(create_info);
768       name_buffer_ptr= strend(name_buffer_ptr) + 1;
769       file++;
770     }
771   }
772   DBUG_RETURN(0);
773 
774 create_error:
775   name_buffer_ptr= m_name_buffer_ptr;
776   for (abort_file= file, file= m_file; file < abort_file; file++)
777   {
778     create_partition_name(name_buff, path, name_buffer_ptr, NORMAL_PART_NAME,
779                           FALSE);
780     (void) (*file)->ha_delete_table((const char*) name_buff);
781     name_buffer_ptr= strend(name_buffer_ptr) + 1;
782   }
783   handler::delete_table(name);
784   DBUG_RETURN(error);
785 }
786 
787 /** This function reads zip dict-related info from partition handlers.
788 It may do nothing if individual handlers do not support COMPRESSED_COLUMNS.
789 
790 @param    thd          Thread handler.
791 @param    part_name    Must be always NULL.
792 */
update_field_defs_with_zip_dict_info(THD * thd,const char * part_name)793 void ha_partition::update_field_defs_with_zip_dict_info(THD* thd,
794                                                         const char* part_name)
795 {
796   DBUG_ENTER("ha_partition::update_field_defs_with_zip_dict_info");
797   assert(part_name == NULL);
798   char full_name[FN_REFLEN];
799   create_partition_name(full_name, table_share->path.str, m_name_buffer_ptr,
800                         NORMAL_PART_NAME, FALSE);
801 
802   /*
803   As table structure is the same for all partitions,
804   we can use the first partition for this function.
805   */
806   assert(m_file);
807   assert(m_file[0]);
808 
809   m_file[0]->update_field_defs_with_zip_dict_info(thd, full_name);
810 
811   DBUG_VOID_RETURN;
812 }
813 
814 
815 /*
816   Optimize table
817 
818   SYNOPSIS
819     optimize()
820     thd               Thread object
821     check_opt         Check/analyze/repair/optimize options
822 
823   RETURN VALUES
824     >0                Error
825     0                 Success
826 */
827 
optimize(THD * thd,HA_CHECK_OPT * check_opt)828 int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
829 {
830   DBUG_ENTER("ha_partition::optimize");
831 
832   DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
833 }
834 
835 
836 /*
837   Analyze table
838 
839   SYNOPSIS
840     analyze()
841     thd               Thread object
842     check_opt         Check/analyze/repair/optimize options
843 
844   RETURN VALUES
845     >0                Error
846     0                 Success
847 */
848 
analyze(THD * thd,HA_CHECK_OPT * check_opt)849 int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
850 {
851   DBUG_ENTER("ha_partition::analyze");
852 
853   int result= handle_opt_partitions(thd, check_opt, ANALYZE_PARTS);
854 
855   if ((result == 0) && m_file[0]
856       && (m_file[0]->ha_table_flags() & HA_ONLINE_ANALYZE))
857   {
858     /* If this is ANALYZE TABLE that will not force table definition cache
859        eviction, update statistics for the partition handler. */
860     this->info(HA_STATUS_CONST | HA_STATUS_NO_LOCK);
861   }
862 
863   DBUG_RETURN(result);
864 }
865 
866 
867 /*
868   Check table
869 
870   SYNOPSIS
871     check()
872     thd               Thread object
873     check_opt         Check/analyze/repair/optimize options
874 
875   RETURN VALUES
876     >0                Error
877     0                 Success
878 */
879 
check(THD * thd,HA_CHECK_OPT * check_opt)880 int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
881 {
882   DBUG_ENTER("ha_partition::check");
883 
884   DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
885 }
886 
887 
888 /*
889   Repair table
890 
891   SYNOPSIS
892     repair()
893     thd               Thread object
894     check_opt         Check/analyze/repair/optimize options
895 
896   RETURN VALUES
897     >0                Error
898     0                 Success
899 */
900 
repair(THD * thd,HA_CHECK_OPT * check_opt)901 int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
902 {
903   DBUG_ENTER("ha_partition::repair");
904 
905   DBUG_RETURN(handle_opt_partitions(thd, check_opt, REPAIR_PARTS));
906 }
907 
908 /**
909   Assign to keycache
910 
911   @param thd          Thread object
912   @param check_opt    Check/analyze/repair/optimize options
913 
914   @return
915     @retval >0        Error
916     @retval 0         Success
917 */
918 
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)919 int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
920 {
921   DBUG_ENTER("ha_partition::assign_to_keycache");
922 
923   DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
924 }
925 
926 
927 /**
928   Preload to keycache
929 
930   @param thd          Thread object
931   @param check_opt    Check/analyze/repair/optimize options
932 
933   @return
934     @retval >0        Error
935     @retval 0         Success
936 */
937 
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)938 int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
939 {
940   DBUG_ENTER("ha_partition::preload_keys");
941 
942   DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
943 }
944 
945 
946 /*
947   Handle optimize/analyze/check/repair of one partition
948 
949   SYNOPSIS
950     handle_opt_part()
951     thd                      Thread object
952     check_opt                Options
953     file                     Handler object of partition
954     flag                     Optimize/Analyze/Check/Repair flag
955 
956   RETURN VALUE
957     >0                        Failure
958     0                         Success
959 */
960 
handle_opt_part(THD * thd,HA_CHECK_OPT * check_opt,uint part_id,enum_part_operation operation)961 int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
962                                   uint part_id, enum_part_operation operation)
963 {
964   int error;
965   handler *file= m_file[part_id];
966   DBUG_ENTER("handle_opt_part");
967   DBUG_PRINT("enter", ("operation = %u", operation));
968 
969   if (operation == OPTIMIZE_PARTS)
970     error= file->ha_optimize(thd, check_opt);
971   else if (operation == ANALYZE_PARTS)
972     error= file->ha_analyze(thd, check_opt);
973   else if (operation == CHECK_PARTS)
974   {
975     error= file->ha_check(thd, check_opt);
976     if (!error ||
977         error == HA_ADMIN_ALREADY_DONE ||
978         error == HA_ADMIN_NOT_IMPLEMENTED)
979     {
980       if (check_opt->flags & (T_MEDIUM | T_EXTEND))
981         error= Partition_helper::check_misplaced_rows(part_id, false);
982     }
983   }
984   else if (operation == REPAIR_PARTS)
985   {
986     error= file->ha_repair(thd, check_opt);
987     if (!error ||
988         error == HA_ADMIN_ALREADY_DONE ||
989         error == HA_ADMIN_NOT_IMPLEMENTED)
990     {
991       if (check_opt->flags & (T_MEDIUM | T_EXTEND))
992         error= Partition_helper::check_misplaced_rows(part_id, true);
993     }
994   }
995   else if (operation == ASSIGN_KEYCACHE_PARTS)
996     error= file->assign_to_keycache(thd, check_opt);
997   else if (operation == PRELOAD_KEYS_PARTS)
998     error= file->preload_keys(thd, check_opt);
999   else
1000   {
1001     assert(FALSE);
1002     error= 1;
1003   }
1004   if (error == HA_ADMIN_ALREADY_DONE)
1005     error= 0;
1006   DBUG_RETURN(error);
1007 }
1008 
1009 
1010 /*
1011   Handle optimize/analyze/check/repair of partitions
1012 
1013   SYNOPSIS
1014     handle_opt_partitions()
1015     thd                      Thread object
1016     check_opt                Options
1017     operation                     Optimize/Analyze/Check/Repair flag
1018 
1019   RETURN VALUE
1020     >0                        Failure
1021     0                         Success
1022 */
1023 
handle_opt_partitions(THD * thd,HA_CHECK_OPT * check_opt,enum_part_operation operation)1024 int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1025                                         enum_part_operation operation)
1026 {
1027   List_iterator<partition_element> part_it(m_part_info->partitions);
1028   uint num_parts= m_part_info->num_parts;
1029   uint num_subparts= m_part_info->num_subparts;
1030   uint i= 0;
1031   bool use_all_parts= !(thd->lex->alter_info.flags &
1032                           Alter_info::ALTER_ADMIN_PARTITION);
1033   int error;
1034   DBUG_ENTER("ha_partition::handle_opt_partitions");
1035   DBUG_PRINT("enter", ("operation= %u", operation));
1036 
1037   do
1038   {
1039     partition_element *part_elem= part_it++;
1040     /*
1041       when ALTER TABLE <CMD> PARTITION ...
1042       it should only do named [sub]partitions, otherwise all partitions
1043     */
1044     if (m_is_sub_partitioned)
1045     {
1046       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1047       partition_element *sub_elem;
1048       uint j= 0, part;
1049       do
1050       {
1051         sub_elem= subpart_it++;
1052         if (use_all_parts ||
1053             part_elem->part_state == PART_ADMIN ||
1054             sub_elem->part_state == PART_ADMIN)
1055         {
1056           part= i * num_subparts + j;
1057           DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1058                      part, sub_elem->partition_name));
1059           if ((error= handle_opt_part(thd, check_opt, part, operation)))
1060           {
1061             /* print a line which partition the error belongs to */
1062             if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1063                 error != HA_ADMIN_ALREADY_DONE &&
1064                 error != HA_ADMIN_TRY_ALTER)
1065             {
1066               print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1067                               table_share->db.str, table->alias,
1068                               opt_op_name[operation],
1069                               "Subpartition %s returned error",
1070                               sub_elem->partition_name);
1071             }
1072             /* reset part_state for the remaining partitions */
1073             do
1074             {
1075               if (sub_elem->part_state == PART_ADMIN)
1076                 sub_elem->part_state= PART_NORMAL;
1077             } while ((sub_elem= subpart_it++));
1078             if (part_elem->part_state == PART_ADMIN)
1079               part_elem->part_state= PART_NORMAL;
1080 
1081             while ((part_elem= part_it++))
1082             {
1083               List_iterator<partition_element> s_it(part_elem->subpartitions);
1084               while ((sub_elem= s_it++))
1085               {
1086                 if (sub_elem->part_state == PART_ADMIN)
1087                   sub_elem->part_state= PART_NORMAL;
1088               }
1089               if (part_elem->part_state == PART_ADMIN)
1090                 part_elem->part_state= PART_NORMAL;
1091             }
1092             DBUG_RETURN(error);
1093           }
1094           sub_elem->part_state= PART_NORMAL;
1095         }
1096       } while (++j < num_subparts);
1097       part_elem->part_state= PART_NORMAL;
1098     }
1099     else
1100     {
1101       if (use_all_parts ||
1102           part_elem->part_state == PART_ADMIN)
1103       {
1104         DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1105                             part_elem->partition_name));
1106         if ((error= handle_opt_part(thd, check_opt, i, operation)))
1107         {
1108           /* print a line which partition the error belongs to */
1109           if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1110               error != HA_ADMIN_ALREADY_DONE &&
1111               error != HA_ADMIN_TRY_ALTER)
1112           {
1113             print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
1114                             table_share->db.str, table->alias,
1115                             opt_op_name[operation], "Partition %s returned error",
1116                             part_elem->partition_name);
1117           }
1118           /* reset part_state for the remaining partitions */
1119           do
1120           {
1121             if (part_elem->part_state == PART_ADMIN)
1122               part_elem->part_state= PART_NORMAL;
1123           } while ((part_elem= part_it++));
1124           DBUG_RETURN(error);
1125         }
1126       }
1127       part_elem->part_state= PART_NORMAL;
1128     }
1129   } while (++i < num_parts);
1130   DBUG_RETURN(FALSE);
1131 }
1132 
1133 
1134 /**
1135   @brief Check and repair the table if neccesary
1136 
1137   @param thd    Thread object
1138 
1139   @retval TRUE  Error/Not supported
1140   @retval FALSE Success
1141 
1142   @note Called if open_table_from_share fails and ::is_crashed().
1143 */
1144 
check_and_repair(THD * thd)1145 bool ha_partition::check_and_repair(THD *thd)
1146 {
1147   handler **file= m_file;
1148   DBUG_ENTER("ha_partition::check_and_repair");
1149 
1150   do
1151   {
1152     if ((*file)->ha_check_and_repair(thd))
1153       DBUG_RETURN(TRUE);
1154   } while (*(++file));
1155   DBUG_RETURN(FALSE);
1156 }
1157 
1158 
1159 /**
1160   @breif Check if the table can be automatically repaired
1161 
1162   @retval TRUE  Can be auto repaired
1163   @retval FALSE Cannot be auto repaired
1164 */
1165 
auto_repair() const1166 bool ha_partition::auto_repair() const
1167 {
1168   DBUG_ENTER("ha_partition::auto_repair");
1169 
1170   /*
1171     As long as we only support one storage engine per table,
1172     we can use the first partition for this function.
1173   */
1174   DBUG_RETURN(m_file[0]->auto_repair());
1175 }
1176 
1177 
1178 /**
1179   @breif Check if the table is crashed
1180 
1181   @retval TRUE  Crashed
1182   @retval FALSE Not crashed
1183 */
1184 
is_crashed() const1185 bool ha_partition::is_crashed() const
1186 {
1187   handler **file= m_file;
1188   DBUG_ENTER("ha_partition::is_crashed");
1189 
1190   do
1191   {
1192     if ((*file)->is_crashed())
1193       DBUG_RETURN(TRUE);
1194   } while (*(++file));
1195   DBUG_RETURN(FALSE);
1196 }
1197 
1198 
1199 /**
1200   Prepare for creating new partitions during ALTER TABLE ... PARTITION.
1201 
1202   @param  num_partitions            Number of new partitions to be created.
1203   @param  only_create               True if only creating the partition
1204                                     (no open/lock is needed).
1205   @param  disable_non_uniq_indexes  True if non unique indexes are disabled.
1206 
1207   @return Operation status.
1208     @retval    0  Success.
1209     @retval != 0  Error code.
1210 */
1211 
prepare_for_new_partitions(uint num_partitions,bool only_create)1212 int ha_partition::prepare_for_new_partitions(uint num_partitions,
1213                                              bool only_create)
1214 {
1215   size_t alloc_size = num_partitions * sizeof(handler*);
1216   DBUG_ENTER("ha_partition::prepare_for_new_partition");
1217   m_new_file= static_cast<handler**>(
1218                 my_malloc(key_memory_ha_partition_engine_array,
1219                           alloc_size,
1220                           MYF(MY_WME)));
1221   if (!m_new_file)
1222   {
1223     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1224   }
1225   memset(m_new_file, 0, alloc_size);
1226   m_num_new_partitions= num_partitions;
1227   m_indexes_are_disabled= indexes_are_disabled();
1228   DBUG_RETURN(0);
1229 }
1230 
1231 
1232 /**
1233   Creating a new partition
1234 
1235   Used during fast_alter_part_table (ALTER TABLE ... ADD/DROP... PARTITION).
1236 
1237   @param  table        Table object.
1238   @param  create_info  Create info from CREATE TABLE.
1239   @param  part_name    Partition name.
1240   @param  new_part_id  Partition id in m_new_file array.
1241   @param  p_elem       Partition element.
1242 
1243   @return Operation status
1244     @retval  0  Success
1245     @retval >0  Error
1246 */
1247 
create_new_partition(TABLE * tbl,HA_CREATE_INFO * create_info,const char * part_name,uint new_part_id,partition_element * p_elem)1248 int ha_partition::create_new_partition(TABLE *tbl,
1249                                        HA_CREATE_INFO *create_info,
1250                                        const char *part_name,
1251                                        uint new_part_id,
1252                                        partition_element *p_elem)
1253 {
1254   int error;
1255   THD *thd= ha_thd();
1256   TABLE_SHARE *share= tbl->s;
1257   handler *file;
1258   Parts_share_refs *p_share_refs;
1259   DBUG_ENTER("ha_partition::create_new_partition");
1260 
1261   file= get_new_handler(share, thd->mem_root, p_elem->engine_type);
1262   if (!file)
1263   {
1264     mem_alloc_error(sizeof(ha_partition));
1265     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1266   }
1267 
1268   /*
1269     The Handler_shares for each partition's handler can be allocated
1270     within this handler, since there will not be any more instances of the
1271     new partitions, until the table is reopened after the ALTER succeeded.
1272   */
1273   p_share_refs= new Parts_share_refs;
1274   if (!p_share_refs)
1275   {
1276     mem_alloc_error(sizeof(Parts_share_refs));
1277     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1278   }
1279   if (p_share_refs->init(1))
1280   {
1281     mem_alloc_error(sizeof(Parts_share_refs));
1282     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1283   }
1284   if (m_new_partitions_share_refs.push_back(p_share_refs))
1285   {
1286     mem_alloc_error(sizeof(Parts_share_refs));
1287     DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1288   }
1289   if (file->set_ha_share_ref(&p_share_refs->ha_shares[0]))
1290   {
1291     DBUG_RETURN(HA_ERR_INITIALIZATION);
1292   }
1293 
1294   if ((error= file->ha_create(part_name, tbl, create_info)))
1295   {
1296     /*
1297       Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1298       if the table/partition already exists.
1299       If we return that error code, then print_error would try to
1300       get_dup_key on a non-existing partition.
1301       So return a more reasonable error code.
1302     */
1303     if (error == HA_ERR_FOUND_DUPP_KEY)
1304       error= HA_ERR_TABLE_EXIST;
1305     goto error_create;
1306   }
1307   DBUG_PRINT("info", ("partition %s created", part_name));
1308   if ((error= file->ha_open(tbl, part_name, m_mode,
1309                             m_open_test_lock | HA_OPEN_NO_PSI_CALL)))
1310   {
1311     goto error_open;
1312   }
1313   DBUG_PRINT("info", ("partition %s opened", part_name));
1314 
1315   /*
1316     Note: if you plan to add another call that may return failure,
1317     better to do it before external_lock() as close_new_partitions()
1318     assumes that external_lock() is last call that may fail here.
1319     Otherwise see description for close_new_partitions().
1320   */
1321   if ((error= file->ha_external_lock(ha_thd(), F_WRLCK)))
1322   {
1323     goto error_external_lock;
1324   }
1325   DBUG_PRINT("info", ("partition %s external locked", part_name));
1326 
1327   if (m_indexes_are_disabled)
1328   {
1329     file->ha_disable_indexes(m_indexes_are_disabled);
1330   }
1331 
1332   m_new_file[new_part_id]= file;
1333   DBUG_RETURN(0);
1334 error_external_lock:
1335   (void) file->ha_close();
1336 error_open:
1337   (void) file->ha_delete_table(part_name);
1338 error_create:
1339   DBUG_RETURN(error);
1340 }
1341 
1342 
1343 /** Insert a row to the new partition.
1344   @param part_id  Partition to insert into.
1345 
1346   @return Operation status.
1347     @retval 0    Success
1348     @retval != 0 Error code
1349 */
write_row_in_new_part(uint part_id)1350 int ha_partition::write_row_in_new_part(uint part_id)
1351 {
1352   int error;
1353   THD *thd= ha_thd();
1354   DBUG_ENTER("ha_partition::write_row_in_new_part");
1355   m_last_part= part_id;
1356 
1357   if (!m_new_file[part_id])
1358   {
1359     /* Altered partition contains misplaced row. */
1360     m_err_rec= table->record[0];
1361     DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
1362   }
1363 
1364   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
1365   error= m_new_file[part_id]->ha_write_row(table->record[0]);
1366   reenable_binlog(thd);
1367   DBUG_RETURN(error);
1368 }
1369 
1370 
1371 /*
1372   Close and unlock all created partitions.
1373 
1374   So they can be renamed and included in the altered table
1375   or deleted by the ddl-log in case of failure.
1376 */
1377 
close_new_partitions()1378 void ha_partition::close_new_partitions()
1379 {
1380   DBUG_ENTER("ha_partition::close_new_partitions");
1381 
1382   THD *thd;
1383   if (m_new_file)
1384   {
1385     thd= ha_thd();
1386     handler **file= &m_new_file[m_num_new_partitions - 1];
1387     for (; m_new_file <= file; file--)
1388     {
1389       if (*file == NULL)
1390       {
1391         /* Not a new partition, skip it. */
1392         continue;
1393       }
1394       (*file)->ha_external_lock(thd, F_UNLCK);
1395       (*file)->ha_close();
1396       delete *file;
1397     }
1398     my_free(m_new_file);
1399     m_new_file= NULL;
1400   }
1401   DBUG_VOID_RETURN;
1402 }
1403 
1404 
1405 /*
1406   Update create info as part of ALTER TABLE
1407 
1408   SYNOPSIS
1409     update_create_info()
1410     create_info                   Create info from ALTER TABLE
1411 
1412   RETURN VALUE
1413     NONE
1414 
1415   DESCRIPTION
1416   Forward this handler call to the storage engine foreach
1417   partition handler.  The data_file_name for each partition may
1418   need to be reset if the tablespace was moved.  Use a dummy
1419   HA_CREATE_INFO structure and transfer necessary data.
1420 */
1421 
update_create_info(HA_CREATE_INFO * create_info)1422 void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
1423 {
1424   DBUG_ENTER("ha_partition::update_create_info");
1425 
1426   /*
1427     Fix for bug#38751, some engines needs info-calls in ALTER.
1428     Archive need this since it flushes in ::info.
1429     HA_STATUS_AUTO is optimized so it will not always be forwarded
1430     to all partitions, but HA_STATUS_VARIABLE will.
1431   */
1432   info(HA_STATUS_VARIABLE);
1433 
1434   info(HA_STATUS_AUTO);
1435 
1436   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
1437     create_info->auto_increment_value= stats.auto_increment_value;
1438 
1439   /*
1440     DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
1441     partitioned table, only its parts.
1442   */
1443   my_bool from_alter = (create_info->data_file_name == (const char*) -1);
1444   create_info->data_file_name= create_info->index_file_name = NULL;
1445 
1446   /*
1447   We do not need to update the individual partition DATA DIRECTORY settings
1448   since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
1449   */
1450   if (from_alter)
1451     DBUG_VOID_RETURN;
1452 
1453   /*
1454     send Handler::update_create_info() to the storage engine for each
1455     partition that currently has a handler object.  Using a dummy
1456     HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
1457   */
1458 
1459   List_iterator<partition_element> part_it(m_part_info->partitions);
1460   partition_element *part_elem, *sub_elem;
1461   uint num_subparts= m_part_info->num_subparts;
1462   uint num_parts = num_subparts ? m_file_tot_parts / num_subparts
1463                                 : m_file_tot_parts;
1464   HA_CREATE_INFO dummy_info;
1465 
1466   /*
1467   Since update_create_info() can be called from mysql_prepare_alter_table()
1468   when not all handlers are set up, we look for that condition first.
1469   If all handlers are not available, do not call update_create_info for any.
1470   */
1471   uint i, j, part;
1472   for (i= 0; i < num_parts; i++)
1473   {
1474     part_elem= part_it++;
1475     if (!part_elem)
1476       DBUG_VOID_RETURN;
1477     if (m_is_sub_partitioned)
1478     {
1479       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1480       for (j= 0; j < num_subparts; j++)
1481       {
1482         sub_elem= subpart_it++;
1483         if (!sub_elem)
1484           DBUG_VOID_RETURN;
1485         part= i * num_subparts + j;
1486         if (part >= m_file_tot_parts || !m_file[part])
1487           DBUG_VOID_RETURN;
1488       }
1489     }
1490     else
1491     {
1492       if (!m_file[i])
1493         DBUG_VOID_RETURN;
1494     }
1495   }
1496   part_it.rewind();
1497 
1498   for (i= 0; i < num_parts; i++)
1499   {
1500     part_elem= part_it++;
1501     assert(part_elem);
1502     if (m_is_sub_partitioned)
1503     {
1504       List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1505       for (j= 0; j < num_subparts; j++)
1506       {
1507         sub_elem= subpart_it++;
1508         assert(sub_elem);
1509         part= i * num_subparts + j;
1510         assert(part < m_file_tot_parts && m_file[part]);
1511         if (ha_legacy_type(m_file[part]->ht) == DB_TYPE_INNODB)
1512         {
1513           dummy_info.data_file_name= dummy_info.index_file_name = NULL;
1514           m_file[part]->update_create_info(&dummy_info);
1515 
1516           if (dummy_info.data_file_name || sub_elem->data_file_name)
1517           {
1518             sub_elem->data_file_name = (char*) dummy_info.data_file_name;
1519           }
1520           if (dummy_info.index_file_name || sub_elem->index_file_name)
1521           {
1522             sub_elem->index_file_name = (char*) dummy_info.index_file_name;
1523           }
1524         }
1525       }
1526     }
1527     else
1528     {
1529       assert(m_file[i]);
1530       if (ha_legacy_type(m_file[i]->ht) == DB_TYPE_INNODB)
1531       {
1532         dummy_info.data_file_name= dummy_info.index_file_name= NULL;
1533         m_file[i]->update_create_info(&dummy_info);
1534         if (dummy_info.data_file_name || part_elem->data_file_name)
1535         {
1536           part_elem->data_file_name = (char*) dummy_info.data_file_name;
1537         }
1538         if (dummy_info.index_file_name || part_elem->index_file_name)
1539         {
1540           part_elem->index_file_name = (char*) dummy_info.index_file_name;
1541         }
1542       }
1543     }
1544   }
1545   DBUG_VOID_RETURN;
1546 }
1547 
1548 
1549 /**
1550   Change the internal TABLE_SHARE pointer
1551 
1552   @param table_arg    TABLE object
1553   @param share        New share to use
1554 
1555   @note Is used in error handling in ha_delete_table.
1556   All handlers should exist (lock_partitions should not be used)
1557 */
1558 
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)1559 void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
1560 {
1561   handler **file_array;
1562   table= table_arg;
1563   table_share= share;
1564   /*
1565     m_file can be NULL when using an old cached table in DROP TABLE, when the
1566     table just has REMOVED PARTITIONING, see Bug#42438
1567   */
1568   if (m_file)
1569   {
1570     file_array= m_file;
1571     assert(*file_array);
1572     do
1573     {
1574       (*file_array)->change_table_ptr(table_arg, share);
1575     } while (*(++file_array));
1576   }
1577 }
1578 
1579 /**
1580   Handle delete and rename table
1581 
1582     @param from         Full path of old table
1583     @param to           Full path of new table
1584 
1585   @return Operation status
1586     @retval >0  Error
1587     @retval 0   Success
1588 
1589   @note  Common routine to handle delete_table and rename_table.
1590   The routine uses the partition handler file to get the
1591   names of the partition instances. Both these routines
1592   are called after creating the handler without table
1593   object and thus the file is needed to discover the
1594   names of the partitions and the underlying storage engines.
1595 */
1596 
del_ren_table(const char * from,const char * to)1597 int ha_partition::del_ren_table(const char *from, const char *to)
1598 {
1599   int save_error= 0;
1600   int error= HA_ERR_INTERNAL_ERROR;
1601   char from_buff[FN_REFLEN], to_buff[FN_REFLEN], from_lc_buff[FN_REFLEN],
1602        to_lc_buff[FN_REFLEN], buff[FN_REFLEN];
1603   char *name_buffer_ptr;
1604   const char *from_path;
1605   const char *to_path= NULL;
1606   uint i;
1607   handler **file, **abort_file;
1608   DBUG_ENTER("ha_partition::del_ren_table");
1609 
1610   fn_format(buff,from, "", ha_par_ext, MY_APPEND_EXT);
1611   /* Check if the  par file exists */
1612   if (my_access(buff,F_OK))
1613   {
1614     /*
1615       If the .par file does not exist, return HA_ERR_NO_SUCH_TABLE,
1616       This will signal to the caller that it can remove the .frm
1617       file.
1618     */
1619     error= HA_ERR_NO_SUCH_TABLE;
1620     DBUG_RETURN(error);
1621   }
1622 
1623   if (get_from_handler_file(from, ha_thd()->mem_root, false))
1624     DBUG_RETURN(error);
1625   assert(m_file_buffer);
1626   DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
1627   name_buffer_ptr= m_name_buffer_ptr;
1628   file= m_file;
1629   /*
1630     Since ha_partition has HA_FILE_BASED, it must alter underlying table names
1631     if they do not have HA_FILE_BASED and lower_case_table_names == 2.
1632     See Bug#37402, for Mac OS X.
1633     The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
1634     Using the first partitions handler, since mixing handlers is not allowed.
1635   */
1636   from_path= get_canonical_filename(*file, from, from_lc_buff);
1637   if (to != NULL)
1638     to_path= get_canonical_filename(*file, to, to_lc_buff);
1639   i= 0;
1640   do
1641   {
1642     create_partition_name(from_buff, from_path, name_buffer_ptr,
1643                           NORMAL_PART_NAME, FALSE);
1644 
1645     if (to != NULL)
1646     {                                           // Rename branch
1647       create_partition_name(to_buff, to_path, name_buffer_ptr,
1648                             NORMAL_PART_NAME, FALSE);
1649       error= (*file)->ha_rename_table(from_buff, to_buff);
1650       if (error)
1651         goto rename_error;
1652     }
1653     else                                        // delete branch
1654     {
1655       error= (*file)->ha_delete_table(from_buff);
1656     }
1657     name_buffer_ptr= strend(name_buffer_ptr) + 1;
1658     if (error)
1659       save_error= error;
1660     i++;
1661   } while (*(++file));
1662 
1663   if (to == NULL)
1664   {
1665     DBUG_EXECUTE_IF("crash_before_deleting_par_file", DBUG_SUICIDE(););
1666 
1667     /* Delete the .par file. If error, break.*/
1668     if ((error= handler::delete_table(from)))
1669       DBUG_RETURN(error);
1670 
1671     DBUG_EXECUTE_IF("crash_after_deleting_par_file", DBUG_SUICIDE(););
1672   }
1673 
1674   if (to != NULL)
1675   {
1676     if ((error= handler::rename_table(from, to)))
1677     {
1678       /* Try to revert everything, ignore errors */
1679       (void) handler::rename_table(to, from);
1680       goto rename_error;
1681     }
1682   }
1683   DBUG_RETURN(save_error);
1684 rename_error:
1685   name_buffer_ptr= m_name_buffer_ptr;
1686   for (abort_file= file, file= m_file; file < abort_file; file++)
1687   {
1688     /* Revert the rename, back from 'to' to the original 'from' */
1689     create_partition_name(from_buff, from_path, name_buffer_ptr,
1690                           NORMAL_PART_NAME, FALSE);
1691     create_partition_name(to_buff, to_path, name_buffer_ptr,
1692                           NORMAL_PART_NAME, FALSE);
1693     /* Ignore error here */
1694     (void) (*file)->ha_rename_table(to_buff, from_buff);
1695     name_buffer_ptr= strend(name_buffer_ptr) + 1;
1696   }
1697   DBUG_RETURN(error);
1698 }
1699 
1700 
1701 /*
1702   Add two names together
1703 
1704   SYNOPSIS
1705     name_add()
1706     out:dest                          Destination string
1707     first_name                        First name
1708     sec_name                          Second name
1709 
1710   RETURN VALUE
1711     >0                                Error
1712     0                                 Success
1713 
1714   DESCRIPTION
1715     Routine used to add two names with '_' in between then. Service routine
1716     to create_handler_file
1717     Include the NULL in the count of characters since it is needed as separator
1718     between the partition names.
1719 */
1720 
name_add(char * dest,const char * first_name,const char * sec_name)1721 static uint name_add(char *dest, const char *first_name, const char *sec_name)
1722 {
1723   return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
1724 }
1725 
1726 
1727 /**
1728   Create the special .par file
1729 
1730   @param name  Full path of table name
1731 
1732   @return Operation status
1733     @retval FALSE  Error code
1734     @retval TRUE   Success
1735 
1736   @note
1737     Method used to create handler file with names of partitions, their
1738     engine types and the number of partitions.
1739 */
1740 
create_handler_file(const char * name)1741 bool ha_partition::create_handler_file(const char *name)
1742 {
1743   partition_element *part_elem, *subpart_elem;
1744   uint i, j;
1745   size_t part_name_len, subpart_name_len, tot_name_len;
1746   uint tot_partition_words, num_parts;
1747   uint tot_parts= 0;
1748   uint tot_len_words, tot_len_byte, chksum, tot_name_words;
1749   char *name_buffer_ptr;
1750   uchar *file_buffer, *engine_array;
1751   bool result= TRUE;
1752   char file_name[FN_REFLEN];
1753   char part_name[FN_REFLEN];
1754   char subpart_name[FN_REFLEN];
1755   File file;
1756   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
1757   DBUG_ENTER("create_handler_file");
1758 
1759   num_parts= m_part_info->partitions.elements;
1760   DBUG_PRINT("info", ("table name = %s, num_parts = %u", name,
1761                       num_parts));
1762   tot_name_len= 0;
1763   for (i= 0; i < num_parts; i++)
1764   {
1765     part_elem= part_it++;
1766     if (part_elem->part_state != PART_NORMAL &&
1767         part_elem->part_state != PART_TO_BE_ADDED &&
1768         part_elem->part_state != PART_CHANGED)
1769       continue;
1770     tablename_to_filename(part_elem->partition_name, part_name,
1771                           FN_REFLEN);
1772     part_name_len= strlen(part_name);
1773     if (!m_is_sub_partitioned)
1774     {
1775       tot_name_len+= part_name_len + 1;
1776       tot_parts++;
1777     }
1778     else
1779     {
1780       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
1781       for (j= 0; j < m_part_info->num_subparts; j++)
1782       {
1783         subpart_elem= sub_it++;
1784         tablename_to_filename(subpart_elem->partition_name,
1785                               subpart_name,
1786                               FN_REFLEN);
1787         subpart_name_len= strlen(subpart_name);
1788         tot_name_len+= part_name_len + subpart_name_len + 5;
1789         tot_parts++;
1790       }
1791     }
1792   }
1793   /*
1794      File format:
1795      Length in words              4 byte
1796      Checksum                     4 byte
1797      Total number of partitions   4 byte
1798      Array of engine types        n * 4 bytes where
1799      n = (m_tot_parts + 3)/4
1800      Length of name part in bytes 4 bytes
1801      (Names in filename format)
1802      Name part                    m * 4 bytes where
1803      m = ((length_name_part + 3)/4)*4
1804 
1805      All padding bytes are zeroed
1806   */
1807   tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
1808   tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
1809   /* 4 static words (tot words, checksum, tot partitions, name length) */
1810   tot_len_words= 4 + tot_partition_words + tot_name_words;
1811   tot_len_byte= PAR_WORD_SIZE * tot_len_words;
1812   if (!(file_buffer= (uchar *) my_malloc(key_memory_ha_partition_file,
1813                                          tot_len_byte, MYF(MY_ZEROFILL))))
1814     DBUG_RETURN(TRUE);
1815   engine_array= (file_buffer + PAR_ENGINES_OFFSET);
1816   name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
1817                             + PAR_WORD_SIZE);
1818   part_it.rewind();
1819   for (i= 0; i < num_parts; i++)
1820   {
1821     part_elem= part_it++;
1822     if (part_elem->part_state != PART_NORMAL &&
1823         part_elem->part_state != PART_TO_BE_ADDED &&
1824         part_elem->part_state != PART_CHANGED)
1825       continue;
1826     if (!m_is_sub_partitioned)
1827     {
1828       tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
1829       name_buffer_ptr= my_stpcpy(name_buffer_ptr, part_name)+1;
1830       *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
1831       DBUG_PRINT("info", ("engine: %u", *engine_array));
1832       engine_array++;
1833     }
1834     else
1835     {
1836       List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
1837       for (j= 0; j < m_part_info->num_subparts; j++)
1838       {
1839         subpart_elem= sub_it++;
1840         tablename_to_filename(part_elem->partition_name, part_name,
1841                               FN_REFLEN);
1842         tablename_to_filename(subpart_elem->partition_name, subpart_name,
1843                               FN_REFLEN);
1844         name_buffer_ptr+= name_add(name_buffer_ptr,
1845                                    part_name,
1846                                    subpart_name);
1847         *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
1848         DBUG_PRINT("info", ("engine: %u", *engine_array));
1849         engine_array++;
1850       }
1851     }
1852   }
1853   chksum= 0;
1854   int4store(file_buffer, tot_len_words);
1855   int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
1856   int4store(file_buffer + PAR_ENGINES_OFFSET +
1857             (tot_partition_words * PAR_WORD_SIZE),
1858             static_cast<uint32>(tot_name_len));
1859   for (i= 0; i < tot_len_words; i++)
1860     chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
1861   int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
1862   /*
1863     Add .par extension to the file name.
1864     Create and write and close file
1865     to be used at open, delete_table and rename_table
1866   */
1867   fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
1868   if ((file= mysql_file_create(key_file_ha_partition_par,
1869                                file_name, CREATE_MODE, O_RDWR | O_TRUNC,
1870                                MYF(MY_WME))) >= 0)
1871   {
1872     result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
1873                              MYF(MY_WME | MY_NABP)) != 0;
1874     (void) mysql_file_close(file, MYF(0));
1875   }
1876   else
1877     result= TRUE;
1878   my_free(file_buffer);
1879   DBUG_RETURN(result);
1880 }
1881 
1882 
1883 /**
1884   Clear handler variables and free some memory
1885 */
1886 
clear_handler_file()1887 void ha_partition::clear_handler_file()
1888 {
1889   if (m_engine_array)
1890   {
1891     plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
1892     my_free(m_engine_array);
1893     m_engine_array= NULL;
1894   }
1895   if (m_file_buffer)
1896   {
1897     my_free(m_file_buffer);
1898     m_file_buffer= NULL;
1899   }
1900 }
1901 
1902 
1903 /**
1904   Create underlying handler objects
1905 
1906   @param mem_root  Allocate memory through this
1907 
1908   @return Operation status
1909     @retval TRUE   Error
1910     @retval FALSE  Success
1911 */
1912 
create_handlers(MEM_ROOT * mem_root)1913 bool ha_partition::create_handlers(MEM_ROOT *mem_root)
1914 {
1915   uint i;
1916   uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
1917   handlerton *hton0;
1918   DBUG_ENTER("create_handlers");
1919 
1920   if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
1921     DBUG_RETURN(TRUE);
1922   m_file_tot_parts= m_tot_parts;
1923   memset(m_file, 0, alloc_len);
1924   for (i= 0; i < m_tot_parts; i++)
1925   {
1926     handlerton *hton= plugin_data<handlerton*>(m_engine_array[i]);
1927     if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
1928       DBUG_RETURN(TRUE);
1929     DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
1930   }
1931   /* For the moment we only support partition over the same table engine */
1932   hton0= plugin_data<handlerton*>(m_engine_array[0]);
1933   if (ha_legacy_type(hton0) == DB_TYPE_MYISAM)
1934   {
1935     DBUG_PRINT("info", ("MyISAM"));
1936     m_myisam= TRUE;
1937   }
1938   /* INNODB may not be compiled in... */
1939   else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
1940   {
1941     DBUG_PRINT("info", ("InnoDB"));
1942     m_innodb= TRUE;
1943   }
1944   DBUG_RETURN(FALSE);
1945 }
1946 
1947 
1948 /*
1949   Create underlying handler objects from partition info
1950 
1951   SYNOPSIS
1952     new_handlers_from_part_info()
1953     mem_root            Allocate memory through this
1954 
1955   RETURN VALUE
1956     TRUE                  Error
1957     FALSE                 Success
1958 */
1959 
new_handlers_from_part_info(MEM_ROOT * mem_root)1960 bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
1961 {
1962   uint i, j, part_count;
1963   partition_element *part_elem;
1964   uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
1965   List_iterator_fast <partition_element> part_it(m_part_info->partitions);
1966   DBUG_ENTER("ha_partition::new_handlers_from_part_info");
1967 
1968   if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
1969   {
1970     mem_alloc_error(alloc_len);
1971     goto error_end;
1972   }
1973   m_file_tot_parts= m_tot_parts;
1974   memset(m_file, 0, alloc_len);
1975   assert(m_part_info->num_parts > 0);
1976   assert(m_part_info->num_parts == m_part_info->partitions.elements);
1977 
1978   i= 0;
1979   part_count= 0;
1980   /*
1981     Don't know the size of the underlying storage engine, invent a number of
1982     bytes allocated for error message if allocation fails
1983   */
1984   do
1985   {
1986     part_elem= part_it++;
1987     if (m_is_sub_partitioned)
1988     {
1989       for (j= 0; j < m_part_info->num_subparts; j++)
1990       {
1991         if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
1992                                                     part_elem->engine_type)))
1993           goto error;
1994         DBUG_PRINT("info", ("engine_type: %u",
1995                    (uint) ha_legacy_type(part_elem->engine_type)));
1996       }
1997     }
1998     else
1999     {
2000       if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2001                                                   part_elem->engine_type)))
2002         goto error;
2003       DBUG_PRINT("info", ("engine_type: %u",
2004                  (uint) ha_legacy_type(part_elem->engine_type)));
2005     }
2006   } while (++i < m_part_info->num_parts);
2007   if (ha_legacy_type(part_elem->engine_type) == DB_TYPE_MYISAM)
2008   {
2009     DBUG_PRINT("info", ("MyISAM"));
2010     m_myisam= TRUE;
2011   }
2012   DBUG_RETURN(FALSE);
2013 error:
2014   mem_alloc_error(sizeof(handler));
2015 error_end:
2016   DBUG_RETURN(TRUE);
2017 }
2018 
2019 
2020 /**
2021   Read the .par file to get the partitions engines and names
2022 
2023   @param name  Name of table file (without extention)
2024 
2025   @return Operation status
2026     @retval true   Failure
2027     @retval false  Success
2028 
2029   @note On success, m_file_buffer is allocated and must be
2030   freed by the caller. m_name_buffer_ptr and m_tot_parts is also set.
2031 */
2032 
read_par_file(const char * name)2033 bool ha_partition::read_par_file(const char *name)
2034 {
2035   char buff[FN_REFLEN], *tot_name_len_offset, *buff_p= buff;
2036   File file;
2037   char *file_buffer;
2038   uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
2039   DBUG_ENTER("ha_partition::read_par_file");
2040   DBUG_PRINT("enter", ("table name: '%s'", name));
2041 
2042   if (m_file_buffer)
2043     DBUG_RETURN(false);
2044   fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
2045 
2046   /* Following could be done with mysql_file_stat to read in whole file */
2047   if ((file= mysql_file_open(key_file_ha_partition_par,
2048                              buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
2049     DBUG_RETURN(TRUE);
2050   if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
2051     goto err1;
2052   len_words= uint4korr(buff_p);
2053   len_bytes= PAR_WORD_SIZE * len_words;
2054   if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
2055     goto err1;
2056   if (!(file_buffer= (char*) my_malloc(key_memory_ha_partition_file,
2057                                        len_bytes, MYF(0))))
2058     goto err1;
2059   if (mysql_file_read(file, (uchar *) file_buffer, len_bytes, MYF(MY_NABP)))
2060     goto err2;
2061 
2062   chksum= 0;
2063   for (i= 0; i < len_words; i++)
2064     chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
2065   if (chksum)
2066     goto err2;
2067   m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
2068   DBUG_PRINT("info", ("No of parts = %u", m_tot_parts));
2069   assert(!m_file_tot_parts || m_file_tot_parts == m_tot_parts);
2070   tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2071 
2072   tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
2073                        PAR_WORD_SIZE * tot_partition_words;
2074   tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
2075                   PAR_WORD_SIZE;
2076   /*
2077     Verify the total length = tot size word, checksum word, num parts word +
2078     engines array + name length word + name array.
2079   */
2080   if (len_words != (tot_partition_words + tot_name_words + 4))
2081     goto err2;
2082   (void) mysql_file_close(file, MYF(0));
2083   m_file_buffer= file_buffer;          // Will be freed in clear_handler_file()
2084   m_name_buffer_ptr= tot_name_len_offset + PAR_WORD_SIZE;
2085 
2086   DBUG_RETURN(false);
2087 
2088 err2:
2089   my_free(file_buffer);
2090 err1:
2091   (void) mysql_file_close(file, MYF(0));
2092   DBUG_RETURN(true);
2093 }
2094 
2095 
2096 /**
2097   Setup m_engine_array
2098 
2099   @param mem_root  MEM_ROOT to use for allocating new handlers
2100 
2101   @return Operation status
2102     @retval false  Success
2103     @retval true   Failure
2104 */
2105 
setup_engine_array(MEM_ROOT * mem_root)2106 bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
2107 {
2108   uint i;
2109   uchar *buff;
2110   handlerton *first_engine;
2111   enum legacy_db_type db_type, first_db_type;
2112 
2113   assert(!m_file);
2114   assert(!m_engine_array);
2115   DBUG_ENTER("ha_partition::setup_engine_array");
2116 
2117   buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
2118   first_db_type= (enum legacy_db_type) buff[0];
2119   first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
2120   if (!first_engine)
2121     goto err;
2122 
2123   if (!(m_engine_array= (plugin_ref*)
2124                 my_malloc(key_memory_ha_partition_engine_array,
2125                           m_tot_parts * sizeof(plugin_ref), MYF(MY_WME))))
2126     goto err;
2127 
2128   for (i= 0; i < m_tot_parts; i++)
2129   {
2130     db_type= (enum legacy_db_type) buff[i];
2131     if (db_type != first_db_type)
2132     {
2133       DBUG_PRINT("error", ("partition %u engine %d is not same as "
2134                            "first partition %d", i, db_type,
2135                            (int) first_db_type));
2136       assert(0);
2137       clear_handler_file();
2138       goto err;
2139     }
2140     m_engine_array[i]= ha_lock_engine(NULL, first_engine);
2141     if (!m_engine_array[i])
2142     {
2143       clear_handler_file();
2144       goto err;
2145     }
2146   }
2147 
2148   if (create_handlers(mem_root))
2149   {
2150     clear_handler_file();
2151     DBUG_RETURN(true);
2152   }
2153 
2154   DBUG_RETURN(false);
2155 
2156 err:
2157   DBUG_RETURN(true);
2158 }
2159 
2160 
2161 /**
2162   Get info about partition engines and their names from the .par file
2163 
2164   @param name      Full path of table name
2165   @param mem_root  Allocate memory through this
2166   @param is_clone  If it is a clone, don't create new handlers
2167 
2168   @return Operation status
2169     @retval true   Error
2170     @retval false  Success
2171 
2172   @note Open handler file to get partition names, engine types and number of
2173   partitions.
2174 */
2175 
get_from_handler_file(const char * name,MEM_ROOT * mem_root,bool is_clone)2176 bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
2177                                          bool is_clone)
2178 {
2179   DBUG_ENTER("ha_partition::get_from_handler_file");
2180   DBUG_PRINT("enter", ("table name: '%s'", name));
2181 
2182   if (m_file_buffer)
2183     DBUG_RETURN(false);
2184 
2185   if (read_par_file(name))
2186     DBUG_RETURN(true);
2187 
2188   if (!is_clone && setup_engine_array(mem_root))
2189     DBUG_RETURN(true);
2190 
2191   DBUG_RETURN(false);
2192 }
2193 
2194 
2195 /****************************************************************************
2196                 MODULE open/close object
2197 ****************************************************************************/
2198 
2199 /**
2200   Set Handler_share pointer and allocate Handler_share pointers
2201   for each partition and set those.
2202 
2203   @param ha_share_arg  Where to store/retrieve the Partitioning_share pointer
2204                        to be shared by all instances of the same table.
2205 
2206   @return Operation status
2207     @retval true  Failure
2208     @retval false Sucess
2209 */
2210 
set_ha_share_ref(Handler_share ** ha_share_arg)2211 bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
2212 {
2213   Handler_share **ha_shares;
2214   uint i;
2215   DBUG_ENTER("ha_partition::set_ha_share_ref");
2216 
2217   assert(!part_share);
2218   assert(table_share);
2219   assert(!m_is_clone_of);
2220   assert(m_tot_parts);
2221   if (handler::set_ha_share_ref(ha_share_arg))
2222     DBUG_RETURN(true);
2223   if (!(part_share= get_share()))
2224     DBUG_RETURN(true);
2225   assert(part_share->partitions_share_refs);
2226   assert(part_share->partitions_share_refs->num_parts >= m_tot_parts);
2227   ha_shares= part_share->partitions_share_refs->ha_shares;
2228   for (i= 0; i < m_tot_parts; i++)
2229   {
2230     if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
2231       DBUG_RETURN(true);
2232   }
2233   DBUG_RETURN(false);
2234 }
2235 
2236 
2237 /**
2238   Get the PARTITION_SHARE for the table.
2239 
2240   @return Operation status
2241     @retval true   Error
2242     @retval false  Success
2243 
2244   @note Gets or initializes the Ha_partition_share object used by partitioning.
2245   The Ha_partition_share is used for handling the auto_increment etc.
2246 */
2247 
get_share()2248 Ha_partition_share *ha_partition::get_share()
2249 {
2250   Ha_partition_share *tmp_share;
2251   DBUG_ENTER("ha_partition::get_share");
2252   assert(table_share);
2253 
2254   lock_shared_ha_data();
2255   if (!(tmp_share= static_cast<Ha_partition_share*>(get_ha_share_ptr())))
2256   {
2257     tmp_share= new Ha_partition_share;
2258     if (!tmp_share)
2259       goto err;
2260     if (tmp_share->init(m_tot_parts))
2261     {
2262       delete tmp_share;
2263       tmp_share= NULL;
2264       goto err;
2265     }
2266     if (table && table->found_next_number_field &&
2267         tmp_share->init_auto_inc_mutex(table_share))
2268     {
2269       delete tmp_share;
2270       tmp_share= NULL;
2271       goto err;
2272     }
2273 
2274     set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
2275   }
2276 err:
2277   unlock_shared_ha_data();
2278   DBUG_RETURN(tmp_share);
2279 }
2280 
2281 
2282 
2283 /**
2284   Helper function for freeing all internal bitmaps.
2285 */
2286 
free_partition_bitmaps()2287 void ha_partition::free_partition_bitmaps()
2288 {
2289   /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
2290   bitmap_free(&m_bulk_insert_started);
2291   bitmap_free(&m_locked_partitions);
2292   bitmap_free(&m_partitions_to_reset);
2293 }
2294 
2295 
2296 /**
2297   Helper function for initializing all internal bitmaps.
2298 */
2299 
init_partition_bitmaps()2300 bool ha_partition::init_partition_bitmaps()
2301 {
2302   DBUG_ENTER("ha_partition::init_partition_bitmaps");
2303   /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
2304   if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
2305     DBUG_RETURN(true);
2306   bitmap_clear_all(&m_bulk_insert_started);
2307 
2308   /* Initialize the bitmap we use to keep track of locked partitions */
2309   if (bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
2310   {
2311     bitmap_free(&m_bulk_insert_started);
2312     DBUG_RETURN(true);
2313   }
2314   bitmap_clear_all(&m_locked_partitions);
2315 
2316   /*
2317     Initialize the bitmap we use to keep track of partitions which may have
2318     something to reset in ha_reset().
2319   */
2320   if (bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
2321   {
2322     bitmap_free(&m_bulk_insert_started);
2323     bitmap_free(&m_locked_partitions);
2324     DBUG_RETURN(true);
2325   }
2326   bitmap_clear_all(&m_partitions_to_reset);
2327 
2328   /* Initialize the bitmap for read/lock_partitions */
2329   if (!m_is_clone_of)
2330   {
2331     assert(!m_clone_mem_root);
2332     if (m_part_info->set_partition_bitmaps(NULL))
2333     {
2334       free_partition_bitmaps();
2335       DBUG_RETURN(true);
2336     }
2337   }
2338   DBUG_RETURN(false);
2339 }
2340 
2341 
2342 /*
2343   Open handler object
2344 
2345   SYNOPSIS
2346     open()
2347     name                  Full path of table name
2348     mode                  Open mode flags
2349     test_if_locked        ?
2350 
2351   RETURN VALUE
2352     >0                    Error
2353     0                     Success
2354 
2355   DESCRIPTION
2356     Used for opening tables. The name will be the name of the file.
2357     A table is opened when it needs to be opened. For instance
2358     when a request comes in for a select on the table (tables are not
2359     open and closed for each request, they are cached).
2360 
2361     Called from handler.cc by handler::ha_open(). The server opens all tables
2362     by calling ha_open() which then calls the handler specific open().
2363 */
2364 
open(const char * name,int mode,uint test_if_locked)2365 int ha_partition::open(const char *name, int mode, uint test_if_locked)
2366 {
2367   char *name_buffer_ptr;
2368   int error= HA_ERR_INITIALIZATION;
2369   handler **file;
2370   char name_buff[FN_REFLEN];
2371   ulonglong check_table_flags;
2372   DBUG_ENTER("ha_partition::open");
2373 
2374   assert(table->s == table_share);
2375   assert(m_part_info);
2376   ref_length= 0;
2377   m_mode= mode;
2378   m_open_test_lock= test_if_locked;
2379   if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of)))
2380     DBUG_RETURN(error);
2381   name_buffer_ptr= m_name_buffer_ptr;
2382 
2383   /* Check/update the partition share. */
2384   lock_shared_ha_data();
2385   if (part_share->populate_partition_name_hash(m_part_info))
2386   {
2387     unlock_shared_ha_data();
2388     DBUG_RETURN(HA_ERR_INITIALIZATION);
2389   }
2390   if (!part_share->auto_inc_mutex && table->found_next_number_field)
2391   {
2392     if (part_share->init_auto_inc_mutex(table_share))
2393     {
2394       unlock_shared_ha_data();
2395       DBUG_RETURN(HA_ERR_INITIALIZATION);
2396     }
2397   }
2398   unlock_shared_ha_data();
2399 
2400   if (open_partitioning(part_share))
2401   {
2402     goto err;
2403   }
2404   assert(!m_file_tot_parts || m_file_tot_parts == m_tot_parts);
2405   if (!m_part_ids_sorted_by_num_of_records)
2406   {
2407     if (!(m_part_ids_sorted_by_num_of_records=
2408             (uint32*) my_malloc(key_memory_ha_partition_part_ids,
2409                                 m_tot_parts * sizeof(uint32), MYF(MY_WME))))
2410     {
2411       goto err;
2412     }
2413     uint32 i;
2414     /* Initialize it with all partition ids. */
2415     for (i= 0; i < m_tot_parts; i++)
2416       m_part_ids_sorted_by_num_of_records[i]= i;
2417   }
2418 
2419   if (init_partition_bitmaps())
2420   {
2421     goto err;
2422   }
2423 
2424   assert(m_part_info);
2425 
2426   if (m_is_clone_of)
2427   {
2428     uint i, alloc_len;
2429     assert(m_clone_mem_root);
2430     /* Allocate an array of handler pointers for the partitions handlers. */
2431     alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2432     if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
2433     {
2434       error= HA_ERR_INITIALIZATION;
2435       goto err_alloc;
2436     }
2437     memset(m_file, 0, alloc_len);
2438     /*
2439       Populate them by cloning the original partitions. This also opens them.
2440       Note that file->ref is allocated too.
2441     */
2442     file= m_is_clone_of->m_file;
2443     for (i= 0; i < m_tot_parts; i++)
2444     {
2445       create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME,
2446                             FALSE);
2447       /* ::clone() will also set ha_share from the original. */
2448       if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
2449       {
2450         error= HA_ERR_INITIALIZATION;
2451         file= &m_file[i];
2452         goto err_handler;
2453       }
2454       name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
2455     }
2456   }
2457   else
2458   {
2459    file= m_file;
2460    do
2461    {
2462       create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME,
2463                             FALSE);
2464       if ((error= (*file)->ha_open(table, name_buff, mode,
2465                                    test_if_locked | HA_OPEN_NO_PSI_CALL)))
2466         goto err_handler;
2467       if (m_file == file)
2468         m_num_locks= (*file)->lock_count();
2469       assert(m_num_locks == (*file)->lock_count());
2470       name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
2471     } while (*(++file));
2472   }
2473 
2474   file= m_file;
2475   ref_length= (*file)->ref_length;
2476   check_table_flags= (((*file)->ha_table_flags() &
2477                        ~(PARTITION_DISABLED_TABLE_FLAGS)) |
2478                       (PARTITION_ENABLED_TABLE_FLAGS));
2479   while (*(++file))
2480   {
2481     /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
2482     set_if_bigger(ref_length, ((*file)->ref_length));
2483     /*
2484       Verify that all partitions have the same set of table flags.
2485       Mask all flags that partitioning enables/disables.
2486     */
2487     if (check_table_flags != (((*file)->ha_table_flags() &
2488                                ~(PARTITION_DISABLED_TABLE_FLAGS)) |
2489                               (PARTITION_ENABLED_TABLE_FLAGS)))
2490     {
2491       error= HA_ERR_INITIALIZATION;
2492       /* set file to last handler, so all of them are closed */
2493       file = &m_file[m_tot_parts - 1];
2494       goto err_handler;
2495     }
2496   }
2497   key_used_on_scan= m_file[0]->key_used_on_scan;
2498   implicit_emptied= m_file[0]->implicit_emptied;
2499   /*
2500     Add 2 bytes for partition id in position ref length.
2501     ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
2502   */
2503   ref_length+= PARTITION_BYTES_IN_POS;
2504 
2505   /*
2506     Release buffer read from .par file. It will not be reused again after
2507     being opened once.
2508   */
2509   clear_handler_file();
2510 
2511   /*
2512     Some handlers update statistics as part of the open call. This will in
2513     some cases corrupt the statistics of the partition handler and thus
2514     to ensure we have correct statistics we call info from open after
2515     calling open on all individual handlers.
2516   */
2517   m_handler_status= handler_opened;
2518   if (m_part_info->part_expr)
2519     m_part_func_monotonicity_info=
2520                             m_part_info->part_expr->get_monotonicity_info();
2521   else if (m_part_info->list_of_part_fields)
2522     m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
2523   info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
2524   DBUG_RETURN(0);
2525 
2526 err_handler:
2527   DEBUG_SYNC(ha_thd(), "partition_open_error");
2528   while (file-- != m_file)
2529     (*file)->ha_close();
2530 err_alloc:
2531   free_partition_bitmaps();
2532 err:
2533   close_partitioning();
2534 
2535   DBUG_RETURN(error);
2536 }
2537 
2538 
2539 /*
2540   Disabled since it is not possible to prune yet.
2541   without pruning, it need to rebind/unbind every partition in every
2542   statement which uses a table from the table cache. Will also use
2543   as many PSI_tables as there are partitions.
2544 */
2545 #ifdef HAVE_M_PSI_PER_PARTITION
unbind_psi()2546 void ha_partition::unbind_psi()
2547 {
2548   uint i;
2549 
2550   DBUG_ENTER("ha_partition::unbind_psi");
2551   handler::unbind_psi();
2552   for (i= 0; i < m_tot_parts; i++)
2553   {
2554     assert(m_file[i] != NULL);
2555     m_file[i]->unbind_psi();
2556   }
2557   DBUG_VOID_RETURN;
2558 }
2559 
rebind_psi()2560 void ha_partition::rebind_psi()
2561 {
2562   uint i;
2563 
2564   DBUG_ENTER("ha_partition::rebind_psi");
2565   handler::rebind_psi();
2566   for (i= 0; i < m_tot_parts; i++)
2567   {
2568     assert(m_file[i] != NULL);
2569     m_file[i]->rebind_psi();
2570   }
2571   DBUG_VOID_RETURN;
2572 }
2573 #endif /* HAVE_M_PSI_PER_PARTITION */
2574 
2575 
2576 /**
2577   Clone the open and locked partitioning handler.
2578 
2579   @param  mem_root  MEM_ROOT to use.
2580 
2581   @return Pointer to the successfully created clone or NULL
2582 
2583   @details
2584   This function creates a new ha_partition handler as a clone/copy. The
2585   original (this) must already be opened and locked. The clone will use
2586   the originals m_part_info.
2587   It also allocates memory for ref + ref_dup.
2588   In ha_partition::open() it will clone its original handlers partitions
2589   which will allocate then on the correct MEM_ROOT and also open them.
2590 */
2591 
clone(const char * name,MEM_ROOT * mem_root)2592 handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
2593 {
2594   ha_partition *new_handler;
2595 
2596   DBUG_ENTER("ha_partition::clone");
2597 
2598   /* If this->table == NULL, then the current handler has been created but not
2599   opened. Prohibit cloning such handler. */
2600   if (!table)
2601     DBUG_RETURN(NULL);
2602 
2603   new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
2604                                            this, mem_root);
2605   if (!new_handler)
2606     DBUG_RETURN(NULL);
2607 
2608   /*
2609     We will not clone each partition's handler here, it will be done in
2610     ha_partition::open() for clones. Also set_ha_share_ref is not needed
2611     here, since 1) ha_share is copied in the constructor used above
2612     2) each partition's cloned handler will set it from its original.
2613   */
2614 
2615   /*
2616     Allocate new_handler->ref here because otherwise ha_open will allocate it
2617     on this->table->mem_root and we will not be able to reclaim that memory
2618     when the clone handler object is destroyed.
2619   */
2620   if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
2621                                               ALIGN_SIZE(ref_length)*2)))
2622     goto err;
2623 
2624   if (new_handler->ha_open(table, name,
2625                            table->db_stat,
2626                            HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
2627     goto err;
2628 
2629   DBUG_RETURN((handler*) new_handler);
2630 
2631 err:
2632   delete new_handler;
2633   DBUG_RETURN(NULL);
2634 }
2635 
2636 
2637 /*
2638   Close handler object
2639 
2640   SYNOPSIS
2641     close()
2642 
2643   RETURN VALUE
2644     >0                   Error code
2645     0                    Success
2646 
2647   DESCRIPTION
2648     Called from sql_base.cc, sql_select.cc, and table.cc.
2649     In sql_select.cc it is only used to close up temporary tables or during
2650     the process where a temporary table is converted over to being a
2651     myisam table.
2652     For sql_base.cc look at close_data_tables().
2653 */
2654 
close(void)2655 int ha_partition::close(void)
2656 {
2657   handler **file;
2658   DBUG_ENTER("ha_partition::close");
2659 
2660   assert(table->s == table_share);
2661   close_partitioning();
2662   free_partition_bitmaps();
2663   assert(m_part_info);
2664   file= m_file;
2665 
2666   do
2667   {
2668     (*file)->ha_close();
2669   } while (*(++file));
2670 
2671   m_handler_status= handler_closed;
2672   DBUG_RETURN(0);
2673 }
2674 
2675 /****************************************************************************
2676                 MODULE start/end statement
2677 ****************************************************************************/
2678 /*
2679   A number of methods to define various constants for the handler. In
2680   the case of the partition handler we need to use some max and min
2681   of the underlying handlers in most cases.
2682 */
2683 
2684 /*
2685   Set external locks on table
2686 
2687   SYNOPSIS
2688     external_lock()
2689     thd                    Thread object
2690     lock_type              Type of external lock
2691 
2692   RETURN VALUE
2693     >0                   Error code
2694     0                    Success
2695 
2696   DESCRIPTION
2697     First you should go read the section "locking functions for mysql" in
2698     lock.cc to understand this.
2699     This create a lock on the table. If you are implementing a storage engine
2700     that can handle transactions look at ha_berkeley.cc to see how you will
2701     want to go about doing this. Otherwise you should consider calling
2702     flock() here.
2703     Originally this method was used to set locks on file level to enable
2704     several MySQL Servers to work on the same data. For transactional
2705     engines it has been "abused" to also mean start and end of statements
2706     to enable proper rollback of statements and transactions. When LOCK
2707     TABLES has been issued the start_stmt method takes over the role of
2708     indicating start of statement but in this case there is no end of
2709     statement indicator(?).
2710 
2711     Called from lock.cc by lock_external() and unlock_external(). Also called
2712     from sql_table.cc by copy_data_between_tables().
2713 */
2714 
external_lock(THD * thd,int lock_type)2715 int ha_partition::external_lock(THD *thd, int lock_type)
2716 {
2717   uint error;
2718   uint i, first_used_partition;
2719   MY_BITMAP *used_partitions;
2720   DBUG_ENTER("ha_partition::external_lock");
2721 
2722   assert(!m_auto_increment_lock && !m_auto_increment_safe_stmt_log_lock);
2723 
2724   if (lock_type == F_UNLCK)
2725     used_partitions= &m_locked_partitions;
2726   else
2727     used_partitions= &(m_part_info->lock_partitions);
2728 
2729   first_used_partition= bitmap_get_first_set(used_partitions);
2730 
2731   for (i= first_used_partition;
2732        i < m_tot_parts;
2733        i= bitmap_get_next_set(used_partitions, i))
2734   {
2735     DBUG_PRINT("info", ("external_lock(thd, %d) part %d", lock_type, i));
2736     if ((error= m_file[i]->ha_external_lock(thd, lock_type)))
2737     {
2738       if (lock_type != F_UNLCK)
2739         goto err_handler;
2740     }
2741     DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
2742     if (lock_type != F_UNLCK)
2743       bitmap_set_bit(&m_locked_partitions, i);
2744   }
2745   if (lock_type == F_UNLCK)
2746   {
2747     bitmap_clear_all(used_partitions);
2748   }
2749   else
2750   {
2751     /* Add touched partitions to be included in reset(). */
2752     bitmap_union(&m_partitions_to_reset, used_partitions);
2753   }
2754 
2755   DBUG_RETURN(0);
2756 
2757 err_handler:
2758   uint j;
2759   for (j= first_used_partition;
2760        j < i;
2761        j= bitmap_get_next_set(&m_locked_partitions, j))
2762   {
2763     (void) m_file[j]->ha_external_lock(thd, F_UNLCK);
2764   }
2765   bitmap_clear_all(&m_locked_partitions);
2766   DBUG_RETURN(error);
2767 }
2768 
2769 
2770 /*
2771   Get the lock(s) for the table and perform conversion of locks if needed
2772 
2773   SYNOPSIS
2774     store_lock()
2775     thd                   Thread object
2776     to                    Lock object array
2777     lock_type             Table lock type
2778 
2779   RETURN VALUE
2780     >0                   Error code
2781     0                    Success
2782 
2783   DESCRIPTION
2784     The idea with handler::store_lock() is the following:
2785 
2786     The statement decided which locks we should need for the table
2787     for updates/deletes/inserts we get WRITE locks, for SELECT... we get
2788     read locks.
2789 
2790     Before adding the lock into the table lock handler (see thr_lock.c)
2791     mysqld calls store lock with the requested locks.  Store lock can now
2792     modify a write lock to a read lock (or some other lock), ignore the
2793     lock (if we don't want to use MySQL table locks at all) or add locks
2794     for many tables (like we do when we are using a MERGE handler).
2795 
2796     Berkeley DB for partition  changes all WRITE locks to TL_WRITE_ALLOW_WRITE
2797     (which signals that we are doing WRITES, but we are still allowing other
2798     reader's and writer's.
2799 
2800     When releasing locks, store_lock() is also called. In this case one
2801     usually doesn't have to do anything.
2802 
2803     store_lock is called when holding a global mutex to ensure that only
2804     one thread at a time changes the locking information of tables.
2805 
2806     In some exceptional cases MySQL may send a request for a TL_IGNORE;
2807     This means that we are requesting the same lock as last time and this
2808     should also be ignored. (This may happen when someone does a flush
2809     table when we have opened a part of the tables, in which case mysqld
2810     closes and reopens the tables and tries to get the same locks as last
2811     time).  In the future we will probably try to remove this.
2812 
2813     Called from lock.cc by get_lock_data().
2814 */
2815 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)2816 THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
2817                                          THR_LOCK_DATA **to,
2818                                          enum thr_lock_type lock_type)
2819 {
2820   uint i;
2821   DBUG_ENTER("ha_partition::store_lock");
2822   assert(thd == current_thd);
2823 
2824   /*
2825     This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
2826     even when thd != table->in_use. In that case don't use partition pruning,
2827     but use all partitions instead to avoid using another threads structures.
2828   */
2829   if (thd != table->in_use)
2830   {
2831     for (i= 0; i < m_tot_parts; i++)
2832       to= m_file[i]->store_lock(thd, to, lock_type);
2833   }
2834   else
2835   {
2836     for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
2837          i < m_tot_parts;
2838          i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
2839     {
2840       DBUG_PRINT("info", ("store lock %d iteration", i));
2841       to= m_file[i]->store_lock(thd, to, lock_type);
2842     }
2843   }
2844   DBUG_RETURN(to);
2845 }
2846 
2847 /*
2848   Start a statement when table is locked
2849 
2850   SYNOPSIS
2851     start_stmt()
2852     thd                  Thread object
2853     lock_type            Type of external lock
2854 
2855   RETURN VALUE
2856     >0                   Error code
2857     0                    Success
2858 
2859   DESCRIPTION
2860     This method is called instead of external lock when the table is locked
2861     before the statement is executed.
2862 */
2863 
start_stmt(THD * thd,thr_lock_type lock_type)2864 int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
2865 {
2866   int error= 0;
2867   uint i;
2868   /* Assert that read_partitions is included in lock_partitions */
2869   assert(bitmap_is_subset(&m_part_info->read_partitions,
2870                           &m_part_info->lock_partitions));
2871   /*
2872     m_locked_partitions is set in previous external_lock/LOCK TABLES.
2873     Current statement's lock requests must not include any partitions
2874     not previously locked.
2875   */
2876   assert(bitmap_is_subset(&m_part_info->lock_partitions,
2877                           &m_locked_partitions));
2878   DBUG_ENTER("ha_partition::start_stmt");
2879 
2880   for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
2881        i < m_tot_parts;
2882        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
2883   {
2884     if ((error= m_file[i]->start_stmt(thd, lock_type)))
2885       break;
2886     /* Add partition to be called in reset(). */
2887     bitmap_set_bit(&m_partitions_to_reset, i);
2888   }
2889   DBUG_RETURN(error);
2890 }
2891 
2892 
2893 /**
2894   Get number of lock objects returned in store_lock
2895 
2896   @returns Number of locks returned in call to store_lock
2897 
2898   @desc
2899     Returns the number of store locks needed in call to store lock.
2900     We return number of partitions we will lock multiplied with number of
2901     locks needed by each partition. Assists the above functions in allocating
2902     sufficient space for lock structures.
2903 */
2904 
lock_count() const2905 uint ha_partition::lock_count() const
2906 {
2907   DBUG_ENTER("ha_partition::lock_count");
2908   /*
2909     The caller want to know the upper bound, to allocate enough memory.
2910     There is no performance lost if we simply return maximum number locks
2911     needed, only some minor over allocation of memory in get_lock_data().
2912 
2913     Also notice that this may be called for another thread != table->in_use,
2914     when mysql_lock_abort_for_thread() is called. So this is more safe, then
2915     using number of partitions after pruning.
2916   */
2917   DBUG_RETURN(m_tot_parts * m_num_locks);
2918 }
2919 
2920 
2921 /*
2922   Unlock last accessed row
2923 
2924   SYNOPSIS
2925     unlock_row()
2926 
2927   RETURN VALUE
2928     NONE
2929 
2930   DESCRIPTION
2931     Record currently processed was not in the result set of the statement
2932     and is thus unlocked. Used for UPDATE and DELETE queries.
2933 */
2934 
unlock_row()2935 void ha_partition::unlock_row()
2936 {
2937   DBUG_ENTER("ha_partition::unlock_row");
2938   m_file[m_last_part]->unlock_row();
2939   DBUG_VOID_RETURN;
2940 }
2941 
2942 /**
2943   Check if semi consistent read was used
2944 
2945   SYNOPSIS
2946     was_semi_consistent_read()
2947 
2948   RETURN VALUE
2949     TRUE   Previous read was a semi consistent read
2950     FALSE  Previous read was not a semi consistent read
2951 
2952   DESCRIPTION
2953     See handler.h:
2954     In an UPDATE or DELETE, if the row under the cursor was locked by another
2955     transaction, and the engine used an optimistic read of the last
2956     committed row value under the cursor, then the engine returns 1 from this
2957     function. MySQL must NOT try to update this optimistic value. If the
2958     optimistic value does not match the WHERE condition, MySQL can decide to
2959     skip over this row. Currently only works for InnoDB. This can be used to
2960     avoid unnecessary lock waits.
2961 
2962     If this method returns nonzero, it will also signal the storage
2963     engine that the next read will be a locking re-read of the row.
2964 */
was_semi_consistent_read()2965 bool ha_partition::was_semi_consistent_read()
2966 {
2967   DBUG_ENTER("ha_partition::was_semi_consistent_read");
2968   assert(m_last_part < m_tot_parts &&
2969          m_part_info->is_partition_used(m_last_part));
2970   DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
2971 }
2972 
2973 /**
2974   Use semi consistent read if possible
2975 
2976   SYNOPSIS
2977     try_semi_consistent_read()
2978     yes   Turn on semi consistent read
2979 
2980   RETURN VALUE
2981     NONE
2982 
2983   DESCRIPTION
2984     See handler.h:
2985     Tell the engine whether it should avoid unnecessary lock waits.
2986     If yes, in an UPDATE or DELETE, if the row under the cursor was locked
2987     by another transaction, the engine may try an optimistic read of
2988     the last committed row value under the cursor.
2989     Note: prune_partitions are already called before this call, so using
2990     pruning is OK.
2991 */
try_semi_consistent_read(bool yes)2992 void ha_partition::try_semi_consistent_read(bool yes)
2993 {
2994   uint i;
2995   DBUG_ENTER("ha_partition::try_semi_consistent_read");
2996 
2997   i= m_part_info->get_first_used_partition();
2998   assert(i != MY_BIT_NONE);
2999   for (;
3000        i < m_tot_parts;
3001        i= m_part_info->get_next_used_partition(i))
3002   {
3003     m_file[i]->try_semi_consistent_read(yes);
3004   }
3005   DBUG_VOID_RETURN;
3006 }
3007 
3008 
3009 /****************************************************************************
3010                 MODULE change record
3011 ****************************************************************************/
3012 
3013 /** Insert a row to the partition.
3014   @param part_id  Partition to insert into.
3015   @param buf      The row in MySQL Row Format.
3016 
3017   @return Operation status.
3018     @retval 0    Success
3019     @retval != 0 Error code
3020 */
write_row_in_part(uint part_id,uchar * buf)3021 int ha_partition::write_row_in_part(uint part_id, uchar *buf)
3022 {
3023   int error;
3024   THD *thd= ha_thd();
3025   DBUG_ENTER("ha_partition::write_row_in_part");
3026   m_last_part= part_id;
3027   start_part_bulk_insert(thd, part_id);
3028 
3029   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
3030   error= m_file[part_id]->ha_write_row(buf);
3031   reenable_binlog(thd);
3032   DBUG_RETURN(error);
3033 }
3034 
3035 
update_row_in_part(uint part_id,const uchar * old_data,uchar * new_data)3036 int ha_partition::update_row_in_part(uint part_id,
3037                                      const uchar *old_data,
3038                                      uchar *new_data)
3039 {
3040   int error;
3041   THD *thd= ha_thd();
3042   DBUG_ENTER("ha_partition::update_row_in_part");
3043   start_part_bulk_insert(thd, part_id);
3044   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
3045   error= m_file[part_id]->ha_update_row(old_data, new_data);
3046   reenable_binlog(thd);
3047   DBUG_RETURN(error);
3048 }
3049 
3050 
3051 /**
3052   Delete an existing row in the partition.
3053 
3054   This will delete a row. buf will contain a copy of the row to be deleted.
3055   The server will call this right after the current row has been read
3056   (from either a previous rnd_xxx() or index_xxx() call).
3057   If you keep a pointer to the last row or can access a primary key it will
3058   make doing the deletion quite a bit easier.
3059   Keep in mind that the server does no guarantee consecutive deletions.
3060   ORDER BY clauses can be used.
3061 
3062   buf is either record[0] or record[1]
3063 
3064   @param part_id  The partition to delete the row from.
3065   @param buf      The record in MySQL Row Format.
3066 
3067   @return Operation status.
3068     @retval 0    Success
3069     @retval != 0 Error code
3070 */
3071 
delete_row_in_part(uint part_id,const uchar * buf)3072 int ha_partition::delete_row_in_part(uint part_id, const uchar *buf)
3073 {
3074   int error;
3075   THD *thd= ha_thd();
3076   DBUG_ENTER("ha_partition::delete_row_in_part");
3077 
3078   m_last_part= part_id;
3079   /* Do not replicate low level changes, already registered in ha_* wrapper. */
3080   tmp_disable_binlog(thd);
3081   error= m_file[part_id]->ha_delete_row(buf);
3082   reenable_binlog(thd);
3083   DBUG_RETURN(error);
3084 }
3085 
3086 
3087 /*
3088   Delete all rows in a table
3089 
3090   SYNOPSIS
3091     delete_all_rows()
3092 
3093   RETURN VALUE
3094     >0                       Error Code
3095     0                        Success
3096 
3097   DESCRIPTION
3098     Used to delete all rows in a table. Both for cases of truncate and
3099     for cases where the optimizer realizes that all rows will be
3100     removed as a result of a SQL statement.
3101 
3102     Called from item_sum.cc by Item_func_group_concat::clear(),
3103     Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
3104     Called from sql_delete.cc by mysql_delete().
3105     Called from sql_select.cc by JOIN::reset().
3106     Called from sql_union.cc by st_select_lex_unit::exec().
3107 */
3108 
delete_all_rows()3109 int ha_partition::delete_all_rows()
3110 {
3111   int error;
3112   uint i;
3113   DBUG_ENTER("ha_partition::delete_all_rows");
3114 
3115   for (i= m_part_info->get_first_used_partition();
3116        i < m_tot_parts;
3117        i= m_part_info->get_next_used_partition(i))
3118   {
3119     /* Can be pruned, like DELETE FROM t PARTITION (pX) */
3120     if ((error= m_file[i]->ha_delete_all_rows()))
3121       DBUG_RETURN(error);
3122   }
3123   DBUG_RETURN(0);
3124 }
3125 
3126 
3127 /**
3128   Manually truncate the table.
3129 
3130   @retval  0    Success.
3131   @retval  > 0  Error code.
3132 */
3133 
truncate()3134 int ha_partition::truncate()
3135 {
3136   int error;
3137   handler **file;
3138   DBUG_ENTER("ha_partition::truncate");
3139 
3140   /*
3141     TRUNCATE also means resetting auto_increment. Hence, reset
3142     it so that it will be initialized again at the next use.
3143   */
3144   if (table->found_next_number_field)
3145   {
3146     // TODO: Create Partition_helper::reset_auto_inc().
3147     lock_auto_increment();
3148     part_share->next_auto_inc_val= 0;
3149     part_share->auto_inc_initialized= false;
3150     unlock_auto_increment();
3151   }
3152 
3153   file= m_file;
3154   do
3155   {
3156     if ((error= (*file)->ha_truncate()))
3157       DBUG_RETURN(error);
3158   } while (*(++file));
3159   DBUG_RETURN(0);
3160 }
3161 
3162 
3163 /**
3164   Truncate a set of specific partitions.
3165 
3166   @remark Auto increment value will be truncated in that partition as well!
3167 
3168   ALTER TABLE t TRUNCATE PARTITION ...
3169 */
3170 
truncate_partition_low()3171 int ha_partition::truncate_partition_low()
3172 {
3173   int error= 0;
3174   List_iterator<partition_element> part_it(m_part_info->partitions);
3175   uint i= 0;
3176   DBUG_ENTER("ha_partition::truncate_partition");
3177 
3178   /*
3179     TRUNCATE also means resetting auto_increment. Hence, reset
3180     it so that it will be initialized again at the next use.
3181   */
3182   if (table->found_next_number_field)
3183   {
3184     lock_auto_increment();
3185     part_share->next_auto_inc_val= 0;
3186     part_share->auto_inc_initialized= false;
3187     unlock_auto_increment();
3188   }
3189 
3190   for (i= m_part_info->get_first_used_partition();
3191        i < m_tot_parts;
3192        i= m_part_info->get_next_used_partition(i))
3193   {
3194     DBUG_PRINT("info", ("truncate partition %u", i));
3195     if ((error= m_file[i]->ha_truncate()))
3196       break;
3197   }
3198   if (error)
3199   {
3200     /* Reset to PART_NORMAL. */
3201     set_all_part_state(m_part_info, PART_NORMAL);
3202   }
3203   DBUG_RETURN(error);
3204 }
3205 
3206 
3207 /*
3208   Start a large batch of insert rows
3209 
3210   SYNOPSIS
3211     start_bulk_insert()
3212     rows                  Number of rows to insert
3213 
3214   RETURN VALUE
3215     NONE
3216 
3217   DESCRIPTION
3218     rows == 0 means we will probably insert many rows
3219 */
start_bulk_insert(ha_rows rows)3220 void ha_partition::start_bulk_insert(ha_rows rows)
3221 {
3222   DBUG_ENTER("ha_partition::start_bulk_insert");
3223 
3224   m_bulk_inserted_rows= 0;
3225   bitmap_clear_all(&m_bulk_insert_started);
3226   /* use the last bit for marking if bulk_insert_started was called */
3227   bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
3228   DBUG_VOID_RETURN;
3229 }
3230 
3231 
3232 /*
3233   Check if start_bulk_insert has been called for this partition,
3234   if not, call it and mark it called
3235 */
start_part_bulk_insert(THD * thd,uint part_id)3236 void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
3237 {
3238   long old_buffer_size;
3239   if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
3240       bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
3241   {
3242     assert(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
3243     old_buffer_size= thd->variables.read_buff_size;
3244     /* Update read_buffer_size for this partition */
3245     thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
3246     m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
3247     bitmap_set_bit(&m_bulk_insert_started, part_id);
3248     thd->variables.read_buff_size= old_buffer_size;
3249   }
3250   m_bulk_inserted_rows++;
3251 }
3252 
3253 /*
3254   Estimate the read buffer size for each partition.
3255   SYNOPSIS
3256     ha_partition::estimate_read_buffer_size()
3257     original_size  read buffer size originally set for the server
3258   RETURN VALUE
3259     estimated buffer size.
3260   DESCRIPTION
3261     If the estimated number of rows to insert is less than 10 (but not 0)
3262     the new buffer size is same as original buffer size.
3263     In case of first partition of when partition function is monotonic
3264     new buffer size is same as the original buffer size.
3265     For rest of the partition total buffer of 10*original_size is divided
3266     equally if number of partition is more than 10 other wise each partition
3267     will be allowed to use original buffer size.
3268 */
estimate_read_buffer_size(long original_size)3269 long ha_partition::estimate_read_buffer_size(long original_size)
3270 {
3271   /*
3272     If number of rows to insert is less than 10, but not 0,
3273     return original buffer size.
3274   */
3275   if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
3276     return (original_size);
3277   /*
3278     If first insert/partition and monotonic partition function,
3279     allow using buffer size originally set.
3280    */
3281   if (!m_bulk_inserted_rows &&
3282       m_part_func_monotonicity_info != NON_MONOTONIC &&
3283       m_tot_parts > 1)
3284     return original_size;
3285   /*
3286     Allow total buffer used in all partition to go up to 10*read_buffer_size.
3287     11*read_buffer_size in case of monotonic partition function.
3288   */
3289 
3290   if (m_tot_parts < 10)
3291       return original_size;
3292   return (original_size * 10 / m_tot_parts);
3293 }
3294 
3295 /*
3296   Try to predict the number of inserts into this partition.
3297 
3298   If less than 10 rows (including 0 which means Unknown)
3299     just give that as a guess
3300   If monotonic partitioning function was used
3301     guess that 50 % of the inserts goes to the first partition
3302   For all other cases, guess on equal distribution between the partitions
3303 */
guess_bulk_insert_rows()3304 ha_rows ha_partition::guess_bulk_insert_rows()
3305 {
3306   DBUG_ENTER("guess_bulk_insert_rows");
3307 
3308   if (estimation_rows_to_insert < 10)
3309     DBUG_RETURN(estimation_rows_to_insert);
3310 
3311   /* If first insert/partition and monotonic partition function, guess 50%.  */
3312   if (!m_bulk_inserted_rows &&
3313       m_part_func_monotonicity_info != NON_MONOTONIC &&
3314       m_tot_parts > 1)
3315     DBUG_RETURN(estimation_rows_to_insert / 2);
3316 
3317   /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
3318   if (m_bulk_inserted_rows < estimation_rows_to_insert)
3319     DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
3320                 / m_tot_parts) + 1);
3321   /* The estimation was wrong, must say 'Unknown' */
3322   DBUG_RETURN(0);
3323 }
3324 
3325 
3326 /**
3327   Finish a large batch of insert rows.
3328 
3329   @return Operation status.
3330     @retval     0 Success
3331     @retval  != 0 Error code
3332 */
3333 
end_bulk_insert()3334 int ha_partition::end_bulk_insert()
3335 {
3336   int error= 0;
3337   uint i;
3338   DBUG_ENTER("ha_partition::end_bulk_insert");
3339 
3340   if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
3341   {
3342     assert(0);
3343     DBUG_RETURN(error);
3344   }
3345 
3346   for (i= bitmap_get_first_set(&m_bulk_insert_started);
3347        i < m_tot_parts;
3348        i= bitmap_get_next_set(&m_bulk_insert_started, i))
3349   {
3350     int tmp;
3351     if ((tmp= m_file[i]->ha_end_bulk_insert()))
3352       error= tmp;
3353   }
3354   bitmap_clear_all(&m_bulk_insert_started);
3355   DBUG_EXECUTE_IF("ha_partition_end_bulk_insert_fail", { error= 1; set_my_errno(EPERM); } );
3356   DBUG_RETURN(error);
3357 }
3358 
3359 
3360 /****************************************************************************
3361                 MODULE full table scan
3362 ****************************************************************************/
3363 
3364 /**
3365   Initialize partition for random reads.
3366 
3367   rnd_init() is called when the server wants the storage engine to do a
3368   table scan or when the server wants to access data through rnd_pos.
3369 
3370   When scan is used we will scan one handler partition at a time.
3371   When preparing for rnd_pos we will initialize all handler partitions.
3372   No extra cache handling is needed when scanning is not performed.
3373 
3374   Before initializing we will call rnd_end to ensure that we clean up from
3375   any previous incarnation of a table scan.
3376 
3377   @param part_id  partition to initialize.
3378   @param scan     false for initialize for random reads through rnd_pos()
3379                   true for initialize for random scan through rnd_next().
3380 
3381   @return Operation status.
3382     @retval    0  Success
3383     @retval != 0  Error code
3384 */
3385 
rnd_init_in_part(uint part_id,bool scan)3386 int ha_partition::rnd_init_in_part(uint part_id, bool scan)
3387 {
3388   if (scan)
3389   {
3390     /*
3391       We have a partition and we are scanning with rnd_next
3392       so we bump our cache.
3393     */
3394     late_extra_cache(part_id);
3395   }
3396   return m_file[part_id]->ha_rnd_init(scan);
3397 }
3398 
3399 
3400 /**
3401   End of a partition scan.
3402 
3403   @return Operation status.
3404     @retval    0  Success
3405     @retval != 0  Error code
3406 */
3407 
rnd_end_in_part(uint part_id,bool scan)3408 int ha_partition::rnd_end_in_part(uint part_id, bool scan)
3409 {
3410   if (scan && m_extra_cache_part_id != NO_CURRENT_PART_ID)
3411   {
3412     late_extra_no_cache(part_id);
3413   }
3414   return m_file[part_id]->ha_rnd_end();
3415 }
3416 
3417 
3418 /**
3419   Read next row during full partition scan (scan in random row order).
3420 
3421   This is called for each row of the table scan. When you run out of records
3422   you should return HA_ERR_END_OF_FILE.
3423   The Field structure for the table is the key to getting data into buf
3424   in a manner that will allow the server to understand it.
3425 
3426   @param[in]     part_id  Partition to read from.
3427   @param[in,out] buf      buffer that should be filled with data.
3428 
3429   @return Operation status.
3430     @retval    0  Success
3431     @retval != 0  Error code
3432 */
3433 
rnd_next_in_part(uint part_id,uchar * buf)3434 int ha_partition::rnd_next_in_part(uint part_id, uchar *buf)
3435 {
3436   return m_file[part_id]->ha_rnd_next(buf);
3437 }
3438 
3439 
3440 /**
3441   Save position of current row.
3442 
3443   position() is called after each call to rnd_next() if the data needs
3444   to be ordered.
3445 
3446   The server uses ref to store data. ref_length in the above case is
3447   the size needed to store current_position. ref is just a byte array
3448   that the server will maintain. If you are using offsets to mark rows, then
3449   current_position should be the offset. If it is a primary key like in
3450   InnoDB, then it needs to be a primary key.
3451 
3452   @param record  Current record in MySQL Row Format.
3453 
3454   @note m_last_part must be set (normally done by
3455   Partition_helper::return_top_record()).
3456 */
3457 
position_in_last_part(uchar * ref,const uchar * record)3458 void ha_partition::position_in_last_part(uchar *ref, const uchar *record)
3459 {
3460   handler *file= m_file[m_last_part];
3461   file->position(record);
3462   memcpy(ref, file->ref, file->ref_length);
3463   /* MyISAM partitions can have different ref_length depending on MAX_ROWS! */
3464   uint pad_length= ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
3465   if (pad_length)
3466     memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
3467 }
3468 
3469 
3470 /**
3471   Read row from partition using position.
3472 
3473   This is like rnd_next, but you are given a position to use to determine
3474   the row. The position will be pointing to data of length handler::ref_length
3475   that handler::ref was set by position(record). Tables clustered on primary
3476   key usually use the full primary key as reference (like InnoDB). Heap based
3477   tables usually returns offset in heap file (like MyISAM).
3478 
3479   @param[in]     part_id  Partition to read from.
3480   @param[in,out] buf      Buffer to fill with record in MySQL format.
3481   @param[in]     pos      Position (data pointed to from ::ref) from position().
3482 
3483   @return Operation status.
3484     @retval    0  Success
3485     @retval != 0  Error code
3486 */
3487 
rnd_pos_in_part(uint part_id,uchar * buf,uchar * pos)3488 int ha_partition::rnd_pos_in_part(uint part_id, uchar *buf, uchar *pos)
3489 {
3490   return m_file[part_id]->ha_rnd_pos(buf, pos);
3491 }
3492 
3493 
3494 /****************************************************************************
3495                 MODULE index scan
3496 ****************************************************************************/
3497 /*
3498   Positions an index cursor to the index specified in the handle. Fetches the
3499   row if available. If the key value is null, begin at the first key of the
3500   index.
3501 */
3502 
3503 /** Compare key and rowid.
3504   Helper function for sorting records in the priority queue.
3505   a/b points to table->record[0] rows which must have the
3506   key fields set. The bytes before a and b store the handler::ref.
3507   This is used for comparing/sorting rows first according to
3508   KEY and if same KEY, by handler::ref (rowid).
3509 
3510   @param key_info  Null terminated array of index information
3511   @param a         Pointer to record+ref in first record
3512   @param b         Pointer to record+ref in second record
3513 
3514   @return Return value is SIGN(first_rec - second_rec)
3515     @retval  0                  Keys are equal
3516     @retval -1                  second_rec is greater than first_rec
3517     @retval +1                  first_rec is greater than second_rec
3518 */
3519 
key_and_ref_cmp(KEY ** key_info,uchar * a,uchar * b)3520 static int key_and_ref_cmp(KEY** key_info, uchar *a, uchar *b)
3521 {
3522   int cmp= key_rec_cmp(key_info, a, b);
3523   if (cmp)
3524     return cmp;
3525   /*
3526     We must compare by handler::ref, which is added before the record,
3527     in the priority queue.
3528   */
3529   KEY **key = key_info;
3530   uint ref_length= (*key)->table->file->ref_length;
3531   return (*key)->table->file->cmp_ref(a - ref_length, b - ref_length);
3532 }
3533 
3534 
3535 /**
3536   Initialize partition before start of index scan.
3537 
3538   @param part    Partition to initialize the index in.
3539   @param inx     Index number.
3540   @param sorted  Is rows to be returned in sorted order.
3541 
3542   @return Operation status
3543     @retval    0  Success
3544     @retval != 0  Error code
3545 */
3546 
index_init_in_part(uint part,uint keynr,bool sorted)3547 int ha_partition::index_init_in_part(uint part, uint keynr, bool sorted)
3548 {
3549   return m_file[part]->ha_index_init(keynr, sorted);
3550 }
3551 
3552 
3553 /**
3554   End of index scan in a partition.
3555 
3556   index_end_in_part is called at the end of an index scan to clean up any
3557   things needed to clean up.
3558 
3559   @return Operation status.
3560     @retval    0  Success
3561     @retval != 0  Error code
3562 */
3563 
index_end_in_part(uint part)3564 int ha_partition::index_end_in_part(uint part)
3565 {
3566   return m_file[part]->ha_index_end();
3567 }
3568 
3569 
3570 /**
3571   Read one record in an index scan and start an index scan in one partition.
3572 
3573   index_read_map_in_part starts a new index scan using a start key.
3574   index_read_map_in_part can be restarted without calling index_end on
3575   the previous index scan and without calling index_init.
3576   In this case the index_read_map_in_part is on the same index as the previous
3577   index_scan. This is particularly used in conjunction with multi read ranges.
3578 
3579   @param[in]     part         Partition to read from.
3580   @param[in,out] buf          Read row in MySQL Row Format
3581   @param[in]     key          Key parts in consecutive order
3582   @param[in]     keypart_map  Which part of key is used
3583   @param[in]     find_flag    What type of key condition is used
3584 
3585   @return Operation status.
3586     @retval    0  Success
3587     @retval != 0  Error code
3588 */
3589 
index_read_map_in_part(uint part,uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3590 int ha_partition::index_read_map_in_part(uint part,
3591                                          uchar *buf,
3592                                          const uchar *key,
3593                                          key_part_map keypart_map,
3594                                          enum ha_rkey_function find_flag)
3595 {
3596   return m_file[part]->ha_index_read_map(buf, key, keypart_map, find_flag);
3597 }
3598 
3599 
3600 /**
3601   Start an index scan from leftmost record and return first record.
3602 
3603   index_first() asks for the first key in the index.
3604   This is similar to index_read except that there is no start key since
3605   the scan starts from the leftmost entry and proceeds forward with
3606   index_next.
3607 
3608   @param[in]     part  Partition to read from.
3609   @param[in,out] buf   Read row in MySQL Row Format.
3610 
3611   @return Operation status.
3612     @retval    0  Success
3613     @retval != 0  Error code
3614 */
3615 
index_first_in_part(uint part,uchar * buf)3616 int ha_partition::index_first_in_part(uint part, uchar* buf)
3617 {
3618   return m_file[part]->ha_index_first(buf);
3619 }
3620 
3621 
3622 /**
3623   Start an index scan from rightmost record and return first record.
3624 
3625   index_last() asks for the last key in the index.
3626   This is similar to index_read except that there is no start key since
3627   the scan starts from the rightmost entry and proceeds forward with
3628   index_prev.
3629 
3630   @param[in]     part  Partition to read from.
3631   @param[in,out] buf   Read row in MySQL Row Format.
3632 
3633   @return Operation status.
3634     @retval    0  Success
3635     @retval != 0  Error code
3636 */
3637 
index_last_in_part(uint part,uchar * buf)3638 int ha_partition::index_last_in_part(uint part, uchar *buf)
3639 {
3640   return m_file[part]->ha_index_last(buf);
3641 }
3642 
3643 
3644 /**
3645   Read last using key.
3646 
3647   This is used in join_read_last_key to optimize away an ORDER BY.
3648   Can only be used on indexes supporting HA_READ_ORDER.
3649 
3650   @param[in,out] buf          Read row in MySQL Row Format
3651   @param[in]     key          Key
3652   @param[in]     keypart_map  Which part of key is used
3653 
3654   @return Operation status.
3655     @retval    0  Success
3656     @retval != 0  Error code
3657 */
3658 
index_read_last_map_in_part(uint part,uchar * buf,const uchar * key,key_part_map keypart_map)3659 int ha_partition::index_read_last_map_in_part(uint part,
3660                                               uchar *buf,
3661                                               const uchar *key,
3662                                               key_part_map keypart_map)
3663 {
3664   return m_file[part]->ha_index_read_last_map(buf, key, keypart_map);
3665 }
3666 
3667 
3668 /**
3669   Read index by key and keymap in a partition.
3670 
3671   @param[in]     part         Index to read from
3672   @param[in,out] buf          Read row in MySQL Row Format
3673   @param[in]     index        Index to read from
3674   @param[in]     key          Key
3675   @param[in]     keypart_map  Which part of key is used
3676   @param[in]     find_flag    Direction/how to search.
3677 
3678   @return Operation status.
3679     @retval    0  Success
3680     @retval != 0  Error code
3681 */
3682 
index_read_idx_map_in_part(uint part,uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)3683 int ha_partition::index_read_idx_map_in_part(uint part,
3684                                              uchar *buf,
3685                                              uint index,
3686                                              const uchar *key,
3687                                              key_part_map keypart_map,
3688                                              enum ha_rkey_function find_flag)
3689 {
3690   return m_file[part]->ha_index_read_idx_map(buf,
3691                                              index,
3692                                              key,
3693                                              keypart_map,
3694                                              find_flag);
3695 }
3696 
3697 
3698 /**
3699   Read next record in a forward index scan.
3700 
3701   Used to read forward through the index (left to right, low to high).
3702 
3703   @param[in]     part  Partition to read from.
3704   @param[in,out] buf   Read row in MySQL Row Format.
3705 
3706   @return Operation status.
3707     @retval    0  Success
3708     @retval != 0  Error code
3709 */
3710 
index_next_in_part(uint part,uchar * buf)3711 int ha_partition::index_next_in_part(uint part, uchar *buf)
3712 {
3713   return m_file[part]->ha_index_next(buf);
3714 }
3715 
3716 
3717 /**
3718   Read next same record in partition.
3719 
3720   This routine is used to read the next but only if the key is the same
3721   as supplied in the call.
3722 
3723   @param[in]     part    Partition to read from.
3724   @param[in,out] buf     Read row in MySQL Row Format.
3725   @param[in]     key     Key.
3726   @param[in]     keylen  Length of key.
3727 
3728   @return Operation status.
3729     @retval    0  Success
3730     @retval != 0  Error code
3731 */
3732 
index_next_same_in_part(uint part,uchar * buf,const uchar * key,uint length)3733 int ha_partition::index_next_same_in_part(uint part,
3734                                           uchar *buf,
3735                                           const uchar *key,
3736                                           uint length)
3737 {
3738   return m_file[part]->ha_index_next_same(buf, key, length);
3739 }
3740 
3741 
3742 /**
3743   Read next record when performing index scan backwards.
3744 
3745   Used to read backwards through the index (right to left, high to low).
3746 
3747   @param[in,out] buf  Read row in MySQL Row Format.
3748 
3749   @return Operation status.
3750     @retval    0  Success
3751     @retval != 0  Error code
3752 */
3753 
index_prev_in_part(uint part,uchar * buf)3754 int ha_partition::index_prev_in_part(uint part, uchar *buf)
3755 {
3756   return m_file[part]->ha_index_prev(buf);
3757 }
3758 
3759 
3760 /**
3761   Start a read of one range with start and end key.
3762 
3763   @param part_id       Partition to start in.
3764   @param start_key     Specification of start key.
3765   @param end_key       Specification of end key.
3766   @param eq_range_arg  Is it equal range.
3767   @param sorted        Should records be returned in sorted order.
3768 
3769   @return Operation status.
3770     @retval    0  Success
3771     @retval != 0  Error code
3772 */
3773 
read_range_first_in_part(uint part_id,uchar * buf,const key_range * start_key,const key_range * end_key,bool eq_range_arg,bool sorted)3774 int ha_partition::read_range_first_in_part(uint part_id,
3775                                            uchar *buf,
3776                                            const key_range *start_key,
3777                                            const key_range *end_key,
3778                                            bool eq_range_arg,
3779                                            bool sorted)
3780 {
3781   int error;
3782   error= m_file[part_id]->read_range_first(start_key,
3783                                            end_key,
3784                                            eq_range_arg,
3785                                            sorted);
3786   if (!error && buf != NULL)
3787   {
3788     memcpy(buf, table->record[0], m_rec_length);
3789   }
3790   return error;
3791 }
3792 
3793 
3794 /**
3795   Read next record in read of a range with start and end key in partition.
3796 
3797   @param part  Partition to read from.
3798 
3799   @return Operation status.
3800     @retval    0  Success
3801     @retval != 0  Error code
3802 */
3803 
read_range_next_in_part(uint part,uchar * buf)3804 int ha_partition::read_range_next_in_part(uint part, uchar *buf)
3805 {
3806   int error;
3807   error= m_file[part]->read_range_next();
3808   if (!error && buf != NULL)
3809   {
3810     memcpy(buf, table->record[0], m_rec_length);
3811   }
3812   return error;
3813 }
3814 
has_gap_locks() const3815 bool ha_partition::has_gap_locks() const
3816 {
3817   /* Pass the call to each partition */
3818   for (uint i= 0; i < m_tot_parts; i++)
3819   {
3820     if (!m_file[i]->has_gap_locks())
3821       return false;
3822   }
3823   return true;
3824 }
3825 
3826 /****************************************************************************
3827                 MODULE information calls
3828 ****************************************************************************/
3829 
3830 /*
3831   These are all first approximations of the extra, info, scan_time
3832   and read_time calls
3833 */
3834 
3835 /**
3836   Helper function for sorting according to number of rows in descending order.
3837 */
3838 
compare_number_of_records(ha_partition * me,const uint32 * a,const uint32 * b)3839 int ha_partition::compare_number_of_records(ha_partition *me,
3840                                             const uint32 *a,
3841                                             const uint32 *b)
3842 {
3843   handler **file= me->m_file;
3844   /* Note: sorting in descending order! */
3845   if (file[*a]->stats.records > file[*b]->stats.records)
3846     return -1;
3847   if (file[*a]->stats.records < file[*b]->stats.records)
3848     return 1;
3849   return 0;
3850 }
3851 
3852 
3853 /*
3854   General method to gather info from handler
3855 
3856   SYNOPSIS
3857     info()
3858     flag              Specifies what info is requested
3859 
3860   RETURN VALUE
3861     NONE
3862 
3863   DESCRIPTION
3864     ::info() is used to return information to the optimizer.
3865     Currently this table handler doesn't implement most of the fields
3866     really needed. SHOW also makes use of this data
3867     Another note, if your handler doesn't proved exact record count,
3868     you will probably want to have the following in your code:
3869     if (records < 2)
3870       records = 2;
3871     The reason is that the server will optimize for cases of only a single
3872     record. If in a table scan you don't know the number of records
3873     it will probably be better to set records to two so you can return
3874     as many records as you need.
3875 
3876     Along with records a few more variables you may wish to set are:
3877       records
3878       deleted
3879       data_file_length
3880       index_file_length
3881       delete_length
3882       check_time
3883     Take a look at the public variables in handler.h for more information.
3884 
3885     Called in:
3886       filesort.cc
3887       ha_heap.cc
3888       item_sum.cc
3889       opt_sum.cc
3890       sql_delete.cc
3891      sql_delete.cc
3892      sql_derived.cc
3893       sql_select.cc
3894       sql_select.cc
3895       sql_select.cc
3896       sql_select.cc
3897       sql_select.cc
3898       sql_show.cc
3899       sql_show.cc
3900       sql_show.cc
3901       sql_show.cc
3902       sql_table.cc
3903       sql_union.cc
3904       sql_update.cc
3905 
3906     Some flags that are not implemented
3907       HA_STATUS_POS:
3908         This parameter is never used from the MySQL Server. It is checked in a
3909         place in MyISAM so could potentially be used by MyISAM specific
3910         programs.
3911       HA_STATUS_NO_LOCK:
3912       This is declared and often used. It's only used by MyISAM.
3913       It means that MySQL doesn't need the absolute latest statistics
3914       information. This may save the handler from doing internal locks while
3915       retrieving statistics data.
3916 */
3917 
info(uint flag)3918 int ha_partition::info(uint flag)
3919 {
3920   uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
3921   uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
3922   int res, error= 0;
3923   DBUG_ENTER("ha_partition::info");
3924 
3925 #ifndef NDEBUG
3926   if (bitmap_is_set_all(&(m_part_info->read_partitions)))
3927     DBUG_PRINT("info", ("All partitions are used"));
3928 #endif /* NDEBUG */
3929   if (flag & HA_STATUS_AUTO)
3930   {
3931     DBUG_PRINT("info", ("HA_STATUS_AUTO"));
3932     if (!table->found_next_number_field)
3933     {
3934       stats.auto_increment_value= 0;
3935     }
3936     else
3937     {
3938       /* Must lock to avoid two concurrent initializations. */
3939       lock_auto_increment();
3940       if (part_share->auto_inc_initialized)
3941       {
3942         stats.auto_increment_value= part_share->next_auto_inc_val;
3943       }
3944       else
3945       {
3946         error= initialize_auto_increment(no_lock_flag != 0);
3947       }
3948       unlock_auto_increment();
3949     }
3950   }
3951   if (flag & HA_STATUS_VARIABLE)
3952   {
3953     uint i;
3954     DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
3955     /*
3956       Calculates statistical variables
3957       records:           Estimate of number records in table
3958       We report sum (always at least 2 if not empty)
3959       deleted:           Estimate of number holes in the table due to
3960       deletes
3961       We report sum
3962       data_file_length:  Length of data file, in principle bytes in table
3963       We report sum
3964       index_file_length: Length of index file, in principle bytes in
3965       indexes in the table
3966       We report sum
3967       delete_length: Length of free space easily used by new records in table
3968       We report sum
3969       mean_record_length:Mean record length in the table
3970       We calculate this
3971       check_time:        Time of last check (only applicable to MyISAM)
3972       We report last time of all underlying handlers
3973     */
3974     handler *file;
3975     stats.records= 0;
3976     stats.deleted= 0;
3977     stats.data_file_length= 0;
3978     stats.index_file_length= 0;
3979     stats.check_time= 0;
3980     stats.delete_length= 0;
3981     for (i= m_part_info->get_first_used_partition();
3982          i < m_tot_parts;
3983          i= m_part_info->get_next_used_partition(i))
3984     {
3985       file= m_file[i];
3986       res= file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
3987       if (res && !error)
3988       {
3989         error= res;
3990       }
3991       stats.records+= file->stats.records;
3992       stats.deleted+= file->stats.deleted;
3993       stats.data_file_length+= file->stats.data_file_length;
3994       stats.index_file_length+= file->stats.index_file_length;
3995       stats.delete_length+= file->stats.delete_length;
3996       if (file->stats.check_time > stats.check_time)
3997         stats.check_time= file->stats.check_time;
3998     }
3999     if (stats.records && stats.records < 2 &&
4000         !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
4001       stats.records= 2;
4002     if (stats.records > 0)
4003       stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
4004     else
4005       stats.mean_rec_length= 0;
4006   }
4007   if (flag & HA_STATUS_CONST)
4008   {
4009     DBUG_PRINT("info", ("HA_STATUS_CONST"));
4010     /*
4011       Recalculate loads of constant variables. MyISAM also sets things
4012       directly on the table share object.
4013 
4014       Check whether this should be fixed since handlers should not
4015       change things directly on the table object.
4016 
4017       Monty comment: This should NOT be changed!  It's the handlers
4018       responsibility to correct table->s->keys_xxxx information if keys
4019       have been disabled.
4020 
4021       The most important parameters set here is records per key on
4022       all indexes. block_size and primar key ref_length.
4023 
4024       For each index there is an array of rec_per_key.
4025       As an example if we have an index with three attributes a,b and c
4026       we will have an array of 3 rec_per_key.
4027       rec_per_key[0] is an estimate of number of records divided by
4028       number of unique values of the field a.
4029       rec_per_key[1] is an estimate of the number of records divided
4030       by the number of unique combinations of the fields a and b.
4031       rec_per_key[2] is an estimate of the number of records divided
4032       by the number of unique combinations of the fields a,b and c.
4033 
4034       Many handlers only set the value of rec_per_key when all fields
4035       are bound (rec_per_key[2] in the example above).
4036 
4037       If the handler doesn't support statistics, it should set all of the
4038       above to 0.
4039 
4040       We first scans through all partitions to get the one holding most rows.
4041       We will then allow the handler with the most rows to set
4042       the rec_per_key and use this as an estimate on the total table.
4043 
4044       max_data_file_length:     Maximum data file length
4045       We ignore it, is only used in
4046       SHOW TABLE STATUS
4047       max_index_file_length:    Maximum index file length
4048       We ignore it since it is never used
4049       block_size:               Block size used
4050       We set it to the value of the first handler
4051       ref_length:               We set this to the value calculated
4052       and stored in local object
4053       create_time:              Creation time of table
4054 
4055       So we calculate these constants by using the variables from the
4056       handler with most rows.
4057     */
4058     handler *file, **file_array;
4059     ulonglong max_records= 0;
4060     uint32 i= 0;
4061     uint32 handler_instance= 0;
4062 
4063     file_array= m_file;
4064     do
4065     {
4066       file= *file_array;
4067       /* Get variables if not already done */
4068       if (!(flag & HA_STATUS_VARIABLE) ||
4069           !m_part_info->is_partition_used(file_array - m_file))
4070       {
4071         res= file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
4072         if (res && !error)
4073         {
4074           error= res;
4075         }
4076       }
4077       if (file->stats.records > max_records)
4078       {
4079         max_records= file->stats.records;
4080         handler_instance= i;
4081       }
4082       i++;
4083     } while (*(++file_array));
4084     /*
4085       Sort the array of part_ids by number of records in
4086       in descending order.
4087     */
4088     my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
4089               m_tot_parts,
4090               sizeof(uint32),
4091               (qsort2_cmp) compare_number_of_records,
4092               this);
4093 
4094     file= m_file[handler_instance];
4095     res= file->info(HA_STATUS_CONST | no_lock_flag);
4096     if (res && !error)
4097     {
4098       error= res;
4099     }
4100     stats.block_size= file->stats.block_size;
4101     stats.create_time= file->stats.create_time;
4102   }
4103   if (flag & HA_STATUS_ERRKEY)
4104   {
4105     handler *file= m_file[m_last_part];
4106     DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
4107     /*
4108       This flag is used to get index number of the unique index that
4109       reported duplicate key
4110       We will report the errkey on the last handler used and ignore the rest
4111       Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
4112     */
4113     file->errkey= errkey;
4114     res= file->info(HA_STATUS_ERRKEY | no_lock_flag);
4115     if (res && !error)
4116     {
4117       error= res;
4118     }
4119     errkey= file->errkey;
4120   }
4121   if (flag & HA_STATUS_TIME)
4122   {
4123     DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
4124     /*
4125       This flag is used to set the latest update time of the table.
4126       Used by SHOW commands
4127       We will report the maximum of these times
4128     */
4129     stats.update_time= 0;
4130     for (uint i= m_part_info->get_first_used_partition();
4131          i < m_tot_parts;
4132          i= m_part_info->get_next_used_partition(i))
4133     {
4134       handler *file= m_file[i];
4135       res= file->info(HA_STATUS_TIME | no_lock_flag);
4136       if (res && !error)
4137       {
4138         error= res;
4139       }
4140       if (file->stats.update_time > stats.update_time)
4141         stats.update_time= file->stats.update_time;
4142     }
4143   }
4144   DBUG_RETURN(error);
4145 }
4146 
4147 
get_dynamic_partition_info(ha_statistics * stat_info,ha_checksum * check_sum,uint part_id)4148 void ha_partition::get_dynamic_partition_info(ha_statistics *stat_info,
4149                                               ha_checksum *check_sum,
4150                                               uint part_id)
4151 {
4152   handler *file= m_file[part_id];
4153   assert(bitmap_is_set(&(m_part_info->read_partitions), part_id));
4154   file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
4155              HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
4156 
4157   stat_info->records=              file->stats.records;
4158   stat_info->mean_rec_length=      file->stats.mean_rec_length;
4159   stat_info->data_file_length=     file->stats.data_file_length;
4160   stat_info->max_data_file_length= file->stats.max_data_file_length;
4161   stat_info->index_file_length=    file->stats.index_file_length;
4162   stat_info->delete_length=        file->stats.delete_length;
4163   stat_info->create_time=          static_cast<ulong>(file->stats.create_time);
4164   stat_info->update_time=          file->stats.update_time;
4165   stat_info->check_time=           file->stats.check_time;
4166   *check_sum= 0;
4167   if (file->ha_table_flags() & HA_HAS_CHECKSUM)
4168     *check_sum= file->checksum();
4169   return;
4170 }
4171 
4172 
4173 /**
4174   General function to prepare handler for certain behavior.
4175 
4176   @param[in]    operation       operation to execute
4177 
4178   @return       status
4179     @retval     0               success
4180     @retval     >0              error code
4181 
4182   @detail
4183 
4184   extra() is called whenever the server wishes to send a hint to
4185   the storage engine. The MyISAM engine implements the most hints.
4186 
4187   We divide the parameters into the following categories:
4188   1) Operations used by most handlers
4189   2) Operations used by some non-MyISAM handlers
4190   3) Operations used only by MyISAM
4191   4) Operations only used by temporary tables for query processing
4192   5) Operations only used by MyISAM internally
4193   6) Operations not used at all
4194   7) Operations only used by federated tables for query processing
4195   8) Operations only used by NDB
4196   9) Operations only used by MERGE
4197   10) Operations only used by InnoDB
4198   11) Operations only used by partitioning
4199 
4200   The partition handler need to handle category 1), 2), 3), 10) and 11).
4201 
4202   1) Operations used by most handlers
4203   -----------------------------------
4204   HA_EXTRA_RESET:
4205     This option is used by most handlers and it resets the handler state
4206     to the same state as after an open call. This includes releasing
4207     any READ CACHE or WRITE CACHE or other internal buffer used.
4208 
4209     It is called from the reset method in the handler interface. There are
4210     three instances where this is called.
4211     1) After completing a INSERT ... SELECT ... query the handler for the
4212        table inserted into is reset
4213     2) It is called from close_thread_table which in turn is called from
4214        close_thread_tables except in the case where the tables are locked
4215        in which case ha_commit_stmt is called instead.
4216        It is only called from here if refresh_version hasn't changed and the
4217        table is not an old table when calling close_thread_table.
4218        close_thread_tables is called from many places as a general clean up
4219        function after completing a query.
4220     3) It is called when deleting the QUICK_RANGE_SELECT object if the
4221        QUICK_RANGE_SELECT object had its own handler object. It is called
4222        immediatley before close of this local handler object.
4223   HA_EXTRA_KEYREAD:
4224   HA_EXTRA_NO_KEYREAD:
4225     These parameters are used to provide an optimisation hint to the handler.
4226     If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
4227     many handlers this means that the index-only scans can be used and it
4228     is not necessary to use the real records to satisfy this part of the
4229     query. Index-only scans is a very important optimisation for disk-based
4230     indexes. For main-memory indexes most indexes contain a reference to the
4231     record and thus KEYREAD only says that it is enough to read key fields.
4232     HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
4233     will disable this option.
4234     The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
4235     feature is supported.
4236   HA_EXTRA_FLUSH:
4237     Indication to flush tables to disk, is supposed to be used to
4238     ensure disk based tables are flushed at end of query execution.
4239     Currently is never used.
4240   HA_EXTRA_PREPARE_FOR_RENAME:
4241     Informs the handler we are about to attempt a rename of the table.
4242     For handlers that have share open files (MyISAM key-file and
4243     Archive writer) they must close the files before rename is possible
4244     on Windows. This handler will only forward this call, since during
4245     ALTER TABLE ... ADD/DROP/REORGANIZE/COALESCE/... PARTITION we will
4246     close and remove all instances before rename/drop and does not need
4247     special treatment for this flag.
4248   HA_EXTRA_FORCE_REOPEN:
4249     Only used by MyISAM and Archive, called when altering table,
4250     closing tables to enforce a reopen of the table files.
4251     This handler will only forward this call, since during
4252     ALTER TABLE ... ADD/DROP/REORGANIZE/COALESCE/... PARTITION we will
4253     close and remove all instances before rename/drop and does not need
4254     special treatment for this flag.
4255 
4256   2) Operations used by some non-MyISAM handlers
4257   ----------------------------------------------
4258   HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4259     This is a strictly InnoDB feature that is more or less undocumented.
4260     When it is activated InnoDB copies field by field from its fetch
4261     cache instead of all fields in one memcpy. Have no idea what the
4262     purpose of this is.
4263     Cut from include/my_base.h:
4264     When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
4265     other fields intact. When this is off (by default) InnoDB will use memcpy
4266     to overwrite entire row.
4267   HA_EXTRA_IGNORE_DUP_KEY:
4268   HA_EXTRA_NO_IGNORE_DUP_KEY:
4269     Informs the handler to we will not stop the transaction if we get an
4270     duplicate key errors during insert/upate.
4271     Always called in pair, triggered by INSERT IGNORE and other similar
4272     SQL constructs.
4273     Not used by MyISAM.
4274 
4275   3) Operations used only by MyISAM
4276   ---------------------------------
4277   HA_EXTRA_NORMAL:
4278     Only used in MyISAM to reset quick mode, not implemented by any other
4279     handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
4280 
4281     It is called after completing a successful DELETE query if the QUICK
4282     option is set.
4283 
4284   HA_EXTRA_QUICK:
4285     When the user does DELETE QUICK FROM table where-clause; this extra
4286     option is called before the delete query is performed and
4287     HA_EXTRA_NORMAL is called after the delete query is completed.
4288     Temporary tables used internally in MySQL always set this option
4289 
4290     The meaning of quick mode is that when deleting in a B-tree no merging
4291     of leafs is performed. This is a common method and many large DBMS's
4292     actually only support this quick mode since it is very difficult to
4293     merge leaves in a tree used by many threads concurrently.
4294 
4295   HA_EXTRA_CACHE:
4296     This flag is usually set with extra_opt along with a cache size.
4297     The size of this buffer is set by the user variable
4298     record_buffer_size. The value of this cache size is the amount of
4299     data read from disk in each fetch when performing a table scan.
4300     This means that before scanning a table it is normal to call
4301     extra with HA_EXTRA_CACHE and when the scan is completed to call
4302     HA_EXTRA_NO_CACHE to release the cache memory.
4303 
4304     Some special care is taken when using this extra parameter since there
4305     could be a write ongoing on the table in the same statement. In this
4306     one has to take special care since there might be a WRITE CACHE as
4307     well. HA_EXTRA_CACHE specifies using a READ CACHE and using
4308     READ CACHE and WRITE CACHE at the same time is not possible.
4309 
4310     Only MyISAM currently use this option.
4311 
4312     It is set when doing full table scans using rr_sequential and
4313     reset when completing such a scan with end_read_record
4314     (resetting means calling extra with HA_EXTRA_NO_CACHE).
4315 
4316     It is set in filesort.cc for MyISAM internal tables and it is set in
4317     a multi-update where HA_EXTRA_CACHE is called on a temporary result
4318     table and after that ha_rnd_init(0) on table to be updated
4319     and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
4320 
4321     Apart from that it is always used from init_read_record but not when
4322     used from UPDATE statements. It is not used from DELETE statements
4323     with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
4324     statements. The reason here is that DELETE's in MyISAM doesn't move
4325     existings data rows.
4326 
4327     It is also set in copy_data_between_tables when scanning the old table
4328     to copy over to the new table.
4329     And it is set in join_init_read_record where quick objects are used
4330     to perform a scan on the table. In this case the full table scan can
4331     even be performed multiple times as part of the nested loop join.
4332 
4333     For purposes of the partition handler it is obviously necessary to have
4334     special treatment of this extra call. If we would simply pass this
4335     extra call down to each handler we would allocate
4336     cache size * no of partitions amount of memory and this is not
4337     necessary since we will only scan one partition at a time when doing
4338     full table scans.
4339 
4340     Thus we treat it by first checking whether we have MyISAM handlers in
4341     the table, if not we simply ignore the call and if we have we will
4342     record the call but will not call any underlying handler yet. Then
4343     when performing the sequential scan we will check this recorded value
4344     and call extra_opt whenever we start scanning a new partition.
4345 
4346   HA_EXTRA_NO_CACHE:
4347     When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
4348     flush method in the Query_result_union class.
4349     See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
4350 
4351     It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
4352     if they are MyISAM handlers. Other handlers we can ignore the call
4353     for. If no cache is in use they will quickly return after finding
4354     this out. And we also ensure that all caches are disabled and no one
4355     is left by mistake.
4356     In the future this call will probably be deleted and we will instead call
4357     ::reset();
4358 
4359   HA_EXTRA_WRITE_CACHE:
4360     See above, called from various places. It is mostly used when we
4361     do INSERT ... SELECT
4362     No special handling to save cache space is developed currently.
4363 
4364   HA_EXTRA_PREPARE_FOR_UPDATE:
4365     This is called as part of a multi-table update. When the table to be
4366     updated is also scanned then this informs MyISAM handler to drop any
4367     caches if dynamic records are used (fixed size records do not care
4368     about this call). We pass this along to the first partition to scan, and
4369     flag that it is to be called after HA_EXTRA_CACHE when moving to the next
4370     partition to scan.
4371 
4372   HA_EXTRA_PREPARE_FOR_DROP:
4373     Only used by MyISAM, called in preparation for a DROP TABLE.
4374     It's used mostly by Windows that cannot handle dropping an open file.
4375     On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
4376 
4377   HA_EXTRA_READCHECK:
4378   HA_EXTRA_NO_READCHECK:
4379     Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
4380     this is not needed in SQL. The reason for this call is that MyISAM sets
4381     the READ_CHECK_USED in the open call so the call is needed for MyISAM
4382     to reset this feature.
4383     The idea with this parameter was to inform of doing/not doing a read
4384     check before applying an update. Since SQL always performs a read before
4385     applying the update No Read Check is needed in MyISAM as well.
4386 
4387     This is a cut from Docs/myisam.txt
4388      Sometimes you might want to force an update without checking whether
4389      another user has changed the record since you last read it. This is
4390      somewhat dangerous, so it should ideally not be used. That can be
4391      accomplished by wrapping the mi_update() call in two calls to mi_extra(),
4392      using these functions:
4393      HA_EXTRA_NO_READCHECK=5                 No readcheck on update
4394      HA_EXTRA_READCHECK=6                    Use readcheck (def)
4395 
4396 
4397   4) Operations only used by temporary tables for query processing
4398   ----------------------------------------------------------------
4399   HA_EXTRA_RESET_STATE:
4400     Same as reset() except that buffers are not released. If there is
4401     a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
4402     or to change type of cache between READ CACHE and WRITE CACHE.
4403 
4404     This extra function is always called immediately before calling
4405     delete_all_rows on the handler for temporary tables.
4406     There are cases however when HA_EXTRA_RESET_STATE isn't called in
4407     a similar case for a temporary table in sql_union.cc and in two other
4408     cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
4409     called afterwards.
4410     The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
4411     disable caching, delete all rows and enable WRITE CACHE. This is
4412     used for temporary tables containing distinct sums and a
4413     functional group.
4414 
4415     The only case that delete_all_rows is called on non-temporary tables
4416     is in sql_delete.cc when DELETE FROM table; is called by a user.
4417     In this case no special extra calls are performed before or after this
4418     call.
4419 
4420     The partition handler should not need to bother about this one. It
4421     should never be called.
4422 
4423   HA_EXTRA_NO_ROWS:
4424     Don't insert rows indication to HEAP and MyISAM, only used by temporary
4425     tables used in query processing.
4426     Not handled by partition handler.
4427 
4428   5) Operations only used by MyISAM internally
4429   --------------------------------------------
4430   HA_EXTRA_REINIT_CACHE:
4431     This call reinitializes the READ CACHE described above if there is one
4432     and otherwise the call is ignored.
4433 
4434     We can thus safely call it on all underlying handlers if they are
4435     MyISAM handlers. It is however never called so we don't handle it at all.
4436   HA_EXTRA_FLUSH_CACHE:
4437     Flush WRITE CACHE in MyISAM. It is only from one place in the code.
4438     This is in sql_insert.cc where it is called if the table_flags doesn't
4439     contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS
4440     set is the MyISAM handler and so the only handler not receiving this
4441     call is MyISAM.
4442     Thus in effect this call is called but never used. Could be removed
4443     from sql_insert.cc
4444   HA_EXTRA_NO_USER_CHANGE:
4445     Only used by MyISAM, never called.
4446     Simulates lock_type as locked.
4447   HA_EXTRA_WAIT_LOCK:
4448   HA_EXTRA_WAIT_NOLOCK:
4449     Only used by MyISAM, called from MyISAM handler but never from server
4450     code on top of the handler.
4451     Sets lock_wait on/off
4452   HA_EXTRA_NO_KEYS:
4453     Only used MyISAM, only used internally in MyISAM handler, never called
4454     from server level.
4455   HA_EXTRA_KEYREAD_CHANGE_POS:
4456   HA_EXTRA_REMEMBER_POS:
4457   HA_EXTRA_RESTORE_POS:
4458   HA_EXTRA_PRELOAD_BUFFER_SIZE:
4459   HA_EXTRA_CHANGE_KEY_TO_DUP:
4460   HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
4461     Only used by MyISAM, never called.
4462 
4463   6) Operations not used at all
4464   -----------------------------
4465   HA_EXTRA_KEY_CACHE:
4466   HA_EXTRA_NO_KEY_CACHE:
4467     This parameters are no longer used and could be removed.
4468 
4469   7) Operations only used by federated tables for query processing
4470   ----------------------------------------------------------------
4471   HA_EXTRA_INSERT_WITH_UPDATE:
4472     Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be
4473     executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY.
4474 
4475   8) Operations only used by NDB
4476   ------------------------------
4477   HA_EXTRA_DELETE_CANNOT_BATCH:
4478   HA_EXTRA_UPDATE_CANNOT_BATCH:
4479     Inform handler that delete_row()/update_row() cannot batch deletes/updates
4480     and should perform them immediately. This may be needed when table has
4481     AFTER DELETE/UPDATE triggers which access to subject table.
4482     These flags are reset by the handler::extra(HA_EXTRA_RESET) call.
4483 
4484   9) Operations only used by MERGE
4485   ------------------------------
4486   HA_EXTRA_ADD_CHILDREN_LIST:
4487   HA_EXTRA_ATTACH_CHILDREN:
4488   HA_EXTRA_IS_ATTACHED_CHILDREN:
4489   HA_EXTRA_DETACH_CHILDREN:
4490     Special actions for MERGE tables. Ignore.
4491 
4492   10) Operations only used by InnoDB
4493   ----------------------------------
4494   HA_EXTRA_EXPORT:
4495     Prepare table for export
4496     (e.g. quiesce the table and write table metadata).
4497 
4498   11) Operations only used by partitioning
4499   ------------------------------
4500   HA_EXTRA_SECONDARY_SORT_ROWID:
4501     INDEX_MERGE type of execution, needs to do secondary sort by
4502     ROWID (handler::ref).
4503 */
4504 
extra(enum ha_extra_function operation)4505 int ha_partition::extra(enum ha_extra_function operation)
4506 {
4507   DBUG_ENTER("ha_partition:extra");
4508   DBUG_PRINT("info", ("operation: %d", (int) operation));
4509 
4510   switch (operation) {
4511     /* Category 1), used by most handlers */
4512   case HA_EXTRA_KEYREAD:
4513   case HA_EXTRA_NO_KEYREAD:
4514   case HA_EXTRA_FLUSH:
4515   case HA_EXTRA_PREPARE_FOR_RENAME:
4516   case HA_EXTRA_FORCE_REOPEN:
4517     DBUG_RETURN(loop_extra(operation));
4518     break;
4519 
4520     /* Category 2), used by non-MyISAM handlers */
4521   case HA_EXTRA_IGNORE_DUP_KEY:
4522   case HA_EXTRA_NO_IGNORE_DUP_KEY:
4523   case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4524   {
4525     if (!m_myisam)
4526       DBUG_RETURN(loop_extra(operation));
4527     break;
4528   }
4529 
4530   /* Category 3), used by MyISAM handlers */
4531   case HA_EXTRA_PREPARE_FOR_UPDATE:
4532     /*
4533       Needs to be run on the first partition in the range now, and
4534       later in late_extra_cache, when switching to a new partition to scan.
4535     */
4536     m_extra_prepare_for_update= TRUE;
4537     if (m_part_spec.start_part != NO_CURRENT_PART_ID)
4538     {
4539       if (!m_extra_cache)
4540         m_extra_cache_part_id= m_part_spec.start_part;
4541       assert(m_extra_cache_part_id == m_part_spec.start_part);
4542       (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
4543     }
4544     break;
4545   case HA_EXTRA_NORMAL:
4546   case HA_EXTRA_QUICK:
4547   case HA_EXTRA_PREPARE_FOR_DROP:
4548   case HA_EXTRA_FLUSH_CACHE:
4549   {
4550     if (m_myisam)
4551       DBUG_RETURN(loop_extra(operation));
4552     break;
4553   }
4554   case HA_EXTRA_NO_READCHECK:
4555   {
4556     /*
4557       This is only done as a part of ha_open, which is also used in
4558       ha_partition::open, so no need to do anything.
4559     */
4560     break;
4561   }
4562   case HA_EXTRA_CACHE:
4563   {
4564     prepare_extra_cache(0);
4565     break;
4566   }
4567   case HA_EXTRA_NO_CACHE:
4568   {
4569     int ret= 0;
4570     if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
4571       ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
4572     m_extra_cache= FALSE;
4573     m_extra_cache_size= 0;
4574     m_extra_prepare_for_update= FALSE;
4575     m_extra_cache_part_id= NO_CURRENT_PART_ID;
4576     DBUG_RETURN(ret);
4577   }
4578   case HA_EXTRA_WRITE_CACHE:
4579   {
4580     m_extra_cache= FALSE;
4581     m_extra_cache_size= 0;
4582     m_extra_prepare_for_update= FALSE;
4583     m_extra_cache_part_id= NO_CURRENT_PART_ID;
4584     DBUG_RETURN(loop_extra(operation));
4585   }
4586   case HA_EXTRA_IGNORE_NO_KEY:
4587   case HA_EXTRA_NO_IGNORE_NO_KEY:
4588   {
4589     /*
4590       Ignore as these are specific to NDB for handling
4591       idempotency
4592      */
4593     break;
4594   }
4595   case HA_EXTRA_WRITE_CAN_REPLACE:
4596   case HA_EXTRA_WRITE_CANNOT_REPLACE:
4597   {
4598     /*
4599       Informs handler that write_row() can replace rows which conflict
4600       with row being inserted by PK/unique key without reporting error
4601       to the SQL-layer.
4602 
4603       This optimization is not safe for partitioned table in general case
4604       since we may have to put new version of row into partition which is
4605       different from partition in which old version resides (for example
4606       when we partition by non-PK column or by some column which is not
4607       part of unique key which were violated).
4608       And since NDB which is the only engine at the moment that supports
4609       this optimization handles partitioning on its own we simple disable
4610       it here. (BTW for NDB this optimization is safe since it supports
4611       only KEY partitioning and won't use this optimization for tables
4612       which have additional unique constraints).
4613     */
4614     break;
4615   }
4616     /* Category 7), used by federated handlers */
4617   case HA_EXTRA_INSERT_WITH_UPDATE:
4618     DBUG_RETURN(loop_extra(operation));
4619     /* Category 8) Operations only used by NDB */
4620   case HA_EXTRA_DELETE_CANNOT_BATCH:
4621   case HA_EXTRA_UPDATE_CANNOT_BATCH:
4622   {
4623     /* Currently only NDB use the *_CANNOT_BATCH */
4624     break;
4625   }
4626     /* Category 9) Operations only used by MERGE */
4627   case HA_EXTRA_ADD_CHILDREN_LIST:
4628   case HA_EXTRA_ATTACH_CHILDREN:
4629   case HA_EXTRA_IS_ATTACHED_CHILDREN:
4630   case HA_EXTRA_DETACH_CHILDREN:
4631   {
4632     /* Special actions for MERGE tables. Ignore. */
4633     break;
4634   }
4635   /*
4636     http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
4637     says we no longer support logging to partitioned tables, so we fail
4638     here.
4639   */
4640   case HA_EXTRA_MARK_AS_LOG_TABLE:
4641     DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
4642     /* Category 10), used by InnoDB handlers */
4643   case HA_EXTRA_EXPORT:
4644     DBUG_RETURN(loop_extra(operation));
4645     /* Category 11) Operations only used by partitioning. */
4646   case HA_EXTRA_SECONDARY_SORT_ROWID:
4647   {
4648     // TODO: Remove this and add a flag to index_init instead,
4649     // so we can avoid allocating ref_length bytes for every used partition
4650     // in init_record_priority_queue()!
4651     /* index_init(sorted=true) must have been called! */
4652     assert(m_ordered);
4653     assert(m_ordered_rec_buffer);
4654     /* No index_read call must have been done! */
4655     assert(m_queue->empty());
4656     /* If not PK is set as secondary sort, do secondary sort by rowid/ref. */
4657     if (!m_curr_key_info[1])
4658     {
4659       m_ref_usage= Partition_helper::REF_USED_FOR_SORT;
4660       m_queue->m_fun= key_and_ref_cmp;
4661     }
4662     break;
4663   }
4664   case HA_EXTRA_RESET_STATE:
4665   {
4666     break;
4667   }
4668   default:
4669   {
4670     /* Temporary crash to discover what is wrong */
4671     assert(0);
4672     break;
4673   }
4674   }
4675   DBUG_RETURN(0);
4676 }
4677 
4678 
4679 /**
4680   Special extra call to reset extra parameters
4681 
4682   @return Operation status.
4683     @retval >0 Error code
4684     @retval 0  Success
4685 
4686   @note Called at end of each statement to reset buffers.
4687   To avoid excessive calls, the m_partitions_to_reset bitmap keep records
4688   of which partitions that have been used in extra(), external_lock() or
4689   start_stmt() and is needed to be called.
4690 */
4691 
reset(void)4692 int ha_partition::reset(void)
4693 {
4694   int result= 0;
4695   int tmp;
4696   uint i;
4697   DBUG_ENTER("ha_partition::reset");
4698 
4699   for (i= bitmap_get_first_set(&m_partitions_to_reset);
4700        i < m_tot_parts;
4701        i= bitmap_get_next_set(&m_partitions_to_reset, i))
4702   {
4703     if ((tmp= m_file[i]->ha_reset()))
4704       result= tmp;
4705   }
4706   bitmap_clear_all(&m_partitions_to_reset);
4707   DBUG_RETURN(result);
4708 }
4709 
4710 /*
4711   Special extra method for HA_EXTRA_CACHE with cachesize as extra parameter
4712 
4713   SYNOPSIS
4714     extra_opt()
4715     operation                      Must be HA_EXTRA_CACHE
4716     cachesize                      Size of cache in full table scan
4717 
4718   RETURN VALUE
4719     >0                   Error code
4720     0                    Success
4721 */
4722 
extra_opt(enum ha_extra_function operation,ulong cachesize)4723 int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
4724 {
4725   DBUG_ENTER("ha_partition::extra_opt()");
4726 
4727   assert(HA_EXTRA_CACHE == operation);
4728   prepare_extra_cache(cachesize);
4729   DBUG_RETURN(0);
4730 }
4731 
4732 
4733 /*
4734   Call extra on handler with HA_EXTRA_CACHE and cachesize
4735 
4736   SYNOPSIS
4737     prepare_extra_cache()
4738     cachesize                Size of cache for full table scan
4739 
4740   RETURN VALUE
4741     NONE
4742 */
4743 
prepare_extra_cache(uint cachesize)4744 void ha_partition::prepare_extra_cache(uint cachesize)
4745 {
4746   DBUG_ENTER("ha_partition::prepare_extra_cache()");
4747   DBUG_PRINT("info", ("cachesize %u", cachesize));
4748 
4749   m_extra_cache= TRUE;
4750   m_extra_cache_size= cachesize;
4751   if (m_part_spec.start_part != NO_CURRENT_PART_ID)
4752   {
4753     assert(bitmap_is_set(&m_partitions_to_reset,
4754                          m_part_spec.start_part));
4755     bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
4756     late_extra_cache(m_part_spec.start_part);
4757   }
4758   DBUG_VOID_RETURN;
4759 }
4760 
4761 
4762 /*
4763   Call extra on all partitions
4764 
4765   SYNOPSIS
4766     loop_extra()
4767     operation             extra operation type
4768 
4769   RETURN VALUE
4770     >0                    Error code
4771     0                     Success
4772 */
4773 
loop_extra(enum ha_extra_function operation)4774 int ha_partition::loop_extra(enum ha_extra_function operation)
4775 {
4776   int result= 0, tmp;
4777   uint i;
4778   DBUG_ENTER("ha_partition::loop_extra()");
4779 
4780   for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
4781        i < m_tot_parts;
4782        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
4783   {
4784     if ((tmp= m_file[i]->extra(operation)))
4785       result= tmp;
4786   }
4787   /* Add all used partitions to be called in reset(). */
4788   bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
4789   DBUG_RETURN(result);
4790 }
4791 
4792 
4793 /*
4794   Call extra(HA_EXTRA_CACHE) on next partition_id
4795 
4796   SYNOPSIS
4797     late_extra_cache()
4798     partition_id               Partition id to call extra on
4799 
4800   RETURN VALUE
4801     NONE
4802 */
4803 
late_extra_cache(uint partition_id)4804 void ha_partition::late_extra_cache(uint partition_id)
4805 {
4806   handler *file;
4807   DBUG_ENTER("ha_partition::late_extra_cache");
4808   DBUG_PRINT("info", ("extra_cache %u prepare %u partid %u size %u",
4809                       m_extra_cache, m_extra_prepare_for_update,
4810                       partition_id, m_extra_cache_size));
4811 
4812   if (!m_extra_cache && !m_extra_prepare_for_update)
4813     DBUG_VOID_RETURN;
4814   file= m_file[partition_id];
4815   if (m_extra_cache)
4816   {
4817     if (m_extra_cache_size == 0)
4818       (void) file->extra(HA_EXTRA_CACHE);
4819     else
4820       (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
4821   }
4822   if (m_extra_prepare_for_update)
4823   {
4824     (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
4825   }
4826   m_extra_cache_part_id= partition_id;
4827   DBUG_VOID_RETURN;
4828 }
4829 
4830 
4831 /*
4832   Call extra(HA_EXTRA_NO_CACHE) on next partition_id
4833 
4834   SYNOPSIS
4835     late_extra_no_cache()
4836     partition_id               Partition id to call extra on
4837 
4838   RETURN VALUE
4839     NONE
4840 */
4841 
late_extra_no_cache(uint partition_id)4842 void ha_partition::late_extra_no_cache(uint partition_id)
4843 {
4844   handler *file;
4845   DBUG_ENTER("ha_partition::late_extra_no_cache");
4846 
4847   if (!m_extra_cache && !m_extra_prepare_for_update)
4848     DBUG_VOID_RETURN;
4849   file= m_file[partition_id];
4850   (void) file->extra(HA_EXTRA_NO_CACHE);
4851   assert(partition_id == m_extra_cache_part_id);
4852   m_extra_cache_part_id= NO_CURRENT_PART_ID;
4853   DBUG_VOID_RETURN;
4854 }
4855 
4856 
4857 /****************************************************************************
4858                 MODULE optimiser support
4859 ****************************************************************************/
4860 
4861 /**
4862   Get keys to use for scanning.
4863 
4864   @return key_map of keys usable for scanning
4865 
4866   @note No need to use read_partitions here, since it does not depend on
4867   which partitions is used, only which storage engine used.
4868 */
4869 
keys_to_use_for_scanning()4870 const key_map *ha_partition::keys_to_use_for_scanning()
4871 {
4872   DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
4873   DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
4874 }
4875 
4876 
4877 /**
4878   Minimum number of rows to base optimizer estimate on.
4879 */
4880 
min_rows_for_estimate()4881 ha_rows ha_partition::min_rows_for_estimate()
4882 {
4883   uint i, max_used_partitions, tot_used_partitions;
4884   DBUG_ENTER("ha_partition::min_rows_for_estimate");
4885 
4886   tot_used_partitions= m_part_info->num_partitions_used();
4887 
4888   /*
4889     All partitions might have been left as unused during partition pruning
4890     due to, for example, an impossible WHERE condition. Nonetheless, the
4891     optimizer might still attempt to perform (e.g. range) analysis where an
4892     estimate of the the number of rows is calculated using records_in_range.
4893     Hence, to handle this and other possible cases, use zero as the minimum
4894     number of rows to base the estimate on if no partition is being used.
4895   */
4896   if (!tot_used_partitions)
4897     DBUG_RETURN(0);
4898 
4899   /*
4900     Allow O(log2(tot_partitions)) increase in number of used partitions.
4901     This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
4902     I.e when the total number of partitions doubles, allow one more
4903     partition to be checked.
4904   */
4905   i= 2;
4906   max_used_partitions= 1;
4907   while (i < m_tot_parts)
4908   {
4909     max_used_partitions++;
4910     i= i << 1;
4911   }
4912   if (max_used_partitions > tot_used_partitions)
4913     max_used_partitions= tot_used_partitions;
4914 
4915   /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
4916   DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
4917                       max_used_partitions,
4918                       (ulong) stats.records));
4919   DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
4920                       tot_used_partitions,
4921                       (ulong) stats.records * max_used_partitions
4922                               / tot_used_partitions));
4923   DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
4924 }
4925 
4926 
4927 /**
4928   Get the biggest used partition.
4929 
4930   Starting at the N:th biggest partition and skips all non used
4931   partitions, returning the biggest used partition found
4932 
4933   @param[in,out] part_index  Skip the *part_index biggest partitions
4934 
4935   @return The biggest used partition with index not lower than *part_index.
4936     @retval NO_CURRENT_PART_ID     No more partition used.
4937     @retval != NO_CURRENT_PART_ID  partition id of biggest used partition with
4938                                    index >= *part_index supplied. Note that
4939                                    *part_index will be updated to the next
4940                                    partition index to use.
4941 */
4942 
get_biggest_used_partition(uint * part_index)4943 uint ha_partition::get_biggest_used_partition(uint *part_index)
4944 {
4945   uint part_id;
4946   while ((*part_index) < m_tot_parts)
4947   {
4948     part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
4949     if (m_part_info->is_partition_used(part_id))
4950       return part_id;
4951   }
4952   return NO_CURRENT_PART_ID;
4953 }
4954 
4955 
4956 /*
4957   Return time for a scan of the table
4958 
4959   SYNOPSIS
4960     scan_time()
4961 
4962   RETURN VALUE
4963     time for scan
4964 */
4965 
scan_time()4966 double ha_partition::scan_time()
4967 {
4968   double scan_time= 0;
4969   uint i;
4970   DBUG_ENTER("ha_partition::scan_time");
4971 
4972   for (i= m_part_info->get_first_used_partition();
4973        i < m_tot_parts;
4974        i= m_part_info->get_next_used_partition(i))
4975     scan_time+= m_file[i]->scan_time();
4976   DBUG_RETURN(scan_time);
4977 }
4978 
4979 
4980 /**
4981   Find number of records in a range.
4982   @param inx      Index number
4983   @param min_key  Start of range
4984   @param max_key  End of range
4985 
4986   @return Number of rows in range.
4987 
4988   Given a starting key, and an ending key estimate the number of rows that
4989   will exist between the two. max_key may be empty which in case determine
4990   if start_key matches any rows.
4991 */
4992 
records_in_range(uint inx,key_range * min_key,key_range * max_key)4993 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
4994                                        key_range *max_key)
4995 {
4996   ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
4997   uint partition_index= 0, part_id;
4998   DBUG_ENTER("ha_partition::records_in_range");
4999 
5000   min_rows_to_check= min_rows_for_estimate();
5001 
5002   while ((part_id= get_biggest_used_partition(&partition_index))
5003          != NO_CURRENT_PART_ID)
5004   {
5005     rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
5006 
5007     DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
5008                         (ulong) m_file[part_id]->stats.records));
5009 
5010     if (rows == HA_POS_ERROR)
5011       DBUG_RETURN(HA_POS_ERROR);
5012     estimated_rows+= rows;
5013     checked_rows+= m_file[part_id]->stats.records;
5014     /*
5015       Returning 0 means no rows can be found, so we must continue
5016       this loop as long as we have estimated_rows == 0.
5017       Also many engines return 1 to indicate that there may exist
5018       a matching row, we do not normalize this by dividing by number of
5019       used partitions, but leave it to be returned as a sum, which will
5020       reflect that we will need to scan each partition's index.
5021 
5022       Note that this statistics may not always be correct, so we must
5023       continue even if the current partition has 0 rows, since we might have
5024       deleted rows from the current partition, or inserted to the next
5025       partition.
5026     */
5027     if (estimated_rows && checked_rows &&
5028         checked_rows >= min_rows_to_check)
5029     {
5030       DBUG_PRINT("info",
5031                  ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
5032                   inx,
5033                   (ulong) (estimated_rows * stats.records / checked_rows),
5034                   (ulong) estimated_rows,
5035                   (ulong) stats.records,
5036                   (ulong) checked_rows));
5037       DBUG_RETURN(estimated_rows * stats.records / checked_rows);
5038     }
5039   }
5040   DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
5041                       inx,
5042                       (ulong) estimated_rows));
5043   DBUG_RETURN(estimated_rows);
5044 }
5045 
5046 
5047 /**
5048   Estimate upper bound of number of rows.
5049 
5050   @return Number of rows.
5051 */
5052 
estimate_rows_upper_bound()5053 ha_rows ha_partition::estimate_rows_upper_bound()
5054 {
5055   ha_rows rows, tot_rows= 0;
5056   handler **file= m_file;
5057   DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
5058 
5059   do
5060   {
5061     if (m_part_info->is_partition_used(file - m_file))
5062     {
5063       rows= (*file)->estimate_rows_upper_bound();
5064       if (rows == HA_POS_ERROR)
5065         DBUG_RETURN(HA_POS_ERROR);
5066       tot_rows+= rows;
5067     }
5068   } while (*(++file));
5069   DBUG_RETURN(tot_rows);
5070 }
5071 
5072 
5073 /*
5074   Get time to read
5075 
5076   SYNOPSIS
5077     read_time()
5078     index                Index number used
5079     ranges               Number of ranges
5080     rows                 Number of rows
5081 
5082   RETURN VALUE
5083     time for read
5084 
5085   DESCRIPTION
5086     This will be optimised later to include whether or not the index can
5087     be used with partitioning. To achieve we need to add another parameter
5088     that specifies how many of the index fields that are bound in the ranges.
5089     Possibly added as a new call to handlers.
5090 */
5091 
read_time(uint index,uint ranges,ha_rows rows)5092 double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
5093 {
5094   DBUG_ENTER("ha_partition::read_time");
5095 
5096   DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
5097 }
5098 
5099 
5100 /**
5101   Number of rows in table. see handler.h
5102   @param[out] num_rows Number of records in the table (after pruning!)
5103   @return possible error code.
5104 */
5105 
records(ha_rows * num_rows)5106 int ha_partition::records(ha_rows *num_rows)
5107 {
5108   ha_rows tot_rows= 0;
5109   uint i;
5110   DBUG_ENTER("ha_partition::records");
5111 
5112   for (i= m_part_info->get_first_used_partition();
5113        i < m_tot_parts;
5114        i= m_part_info->get_next_used_partition(i))
5115   {
5116     int error= m_file[i]->ha_records(num_rows);
5117     if (error != 0)
5118       DBUG_RETURN(error);
5119     tot_rows+= *num_rows;
5120   }
5121   *num_rows= tot_rows;
5122   DBUG_RETURN(0);
5123 }
5124 
5125 
5126 /*
5127   Is it ok to switch to a new engine for this table
5128 
5129   SYNOPSIS
5130     can_switch_engine()
5131 
5132   RETURN VALUE
5133     TRUE                  Ok
5134     FALSE                 Not ok
5135 
5136   DESCRIPTION
5137     Used to ensure that tables with foreign key constraints are not moved
5138     to engines without foreign key support.
5139 */
5140 
can_switch_engines()5141 bool ha_partition::can_switch_engines()
5142 {
5143   handler **file;
5144   DBUG_ENTER("ha_partition::can_switch_engines");
5145 
5146   file= m_file;
5147   do
5148   {
5149     if (!(*file)->can_switch_engines())
5150     {
5151       assert(0);          // A ha_partition table should never have FKs!!!
5152       DBUG_RETURN(FALSE);
5153     }
5154   } while (*(++file));
5155   DBUG_RETURN(TRUE);
5156 }
5157 
5158 
5159 /*
5160   Is table cache supported
5161 
5162   SYNOPSIS
5163     table_cache_type()
5164 
5165 */
5166 
table_cache_type()5167 uint8 ha_partition::table_cache_type()
5168 {
5169   DBUG_ENTER("ha_partition::table_cache_type");
5170 
5171   DBUG_RETURN(m_file[0]->table_cache_type());
5172 }
5173 
5174 
5175 /****************************************************************************
5176                 MODULE print messages
5177 ****************************************************************************/
5178 
index_type(uint inx)5179 const char *ha_partition::index_type(uint inx)
5180 {
5181   uint first_used_partition;
5182   DBUG_ENTER("ha_partition::index_type");
5183 
5184   first_used_partition= m_part_info->get_first_used_partition();
5185 
5186   if (first_used_partition == MY_BIT_NONE)
5187   {
5188     assert(0);                             // How can this happen?
5189     DBUG_RETURN(handler::index_type(inx));
5190   }
5191 
5192   DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
5193 }
5194 
5195 
get_row_type() const5196 enum row_type ha_partition::get_row_type() const
5197 {
5198   uint i;
5199   enum row_type type;
5200   DBUG_ENTER("ha_partition::get_row_type");
5201 
5202   i= m_part_info->get_first_used_partition();
5203   assert(i < m_tot_parts);
5204   if (i >= m_tot_parts)
5205     DBUG_RETURN(ROW_TYPE_NOT_USED);
5206 
5207   type= m_file[i]->get_row_type();
5208   DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
5209 
5210   for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
5211        i < m_tot_parts;
5212        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
5213   {
5214     enum row_type part_type= m_file[i]->get_row_type();
5215     DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
5216     if (part_type != type)
5217       DBUG_RETURN(ROW_TYPE_NOT_USED);
5218   }
5219 
5220   DBUG_RETURN(type);
5221 }
5222 
print_error(int error,myf errflag)5223 void ha_partition::print_error(int error, myf errflag)
5224 {
5225   DBUG_ENTER("ha_partition::print_error");
5226   if (print_partition_error(error, errflag))
5227   {
5228     /* Not a partitioning error. */
5229     /* In case m_file has not been initialized, like in bug#42438 */
5230     if (m_file)
5231     {
5232       if (m_last_part >= m_tot_parts)
5233       {
5234         m_last_part= 0;
5235       }
5236       m_file[m_last_part]->print_error(error, errflag);
5237     }
5238     else
5239       handler::print_error(error, errflag);
5240   }
5241   DBUG_VOID_RETURN;
5242 }
5243 
5244 
get_error_message(int error,String * buf)5245 bool ha_partition::get_error_message(int error, String *buf)
5246 {
5247   DBUG_ENTER("ha_partition::get_error_message");
5248 
5249   /* Should probably look for my own errors first */
5250 
5251   /* In case m_file has not been initialized, like in bug#42438 */
5252   if (m_file)
5253     DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
5254   DBUG_RETURN(handler::get_error_message(error, buf));
5255 
5256 }
5257 
5258 
5259 /****************************************************************************
5260                 MODULE in-place ALTER
5261 ****************************************************************************/
5262 /**
5263   Get table flags.
5264 */
5265 
table_flags() const5266 handler::Table_flags ha_partition::table_flags() const
5267 {
5268   uint first_used_partition= 0;
5269   DBUG_ENTER("ha_partition::table_flags");
5270   if (m_handler_status < handler_initialized ||
5271       m_handler_status >= handler_closed)
5272     DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
5273 
5274   if (get_lock_type() != F_UNLCK)
5275   {
5276     /*
5277       The flags are cached after external_lock, and may depend on isolation
5278       level. So we should use a locked partition to get the correct flags.
5279     */
5280     first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
5281     if (first_used_partition == MY_BIT_NONE)
5282       first_used_partition= 0;
5283   }
5284   DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
5285                  ~(PARTITION_DISABLED_TABLE_FLAGS)) |
5286                  (PARTITION_ENABLED_TABLE_FLAGS));
5287 }
5288 
5289 
5290 /**
5291   check if copy of data is needed in alter table.
5292 */
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)5293 bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info,
5294                                               uint table_changes)
5295 {
5296   handler **file;
5297   bool ret= COMPATIBLE_DATA_YES;
5298 
5299   /*
5300     The check for any partitioning related changes have already been done
5301     in mysql_alter_table (by fix_partition_func), so it is only up to
5302     the underlying handlers.
5303   */
5304   for (file= m_file; *file; file++)
5305     if ((ret=  (*file)->check_if_incompatible_data(create_info,
5306                                                    table_changes)) !=
5307         COMPATIBLE_DATA_YES)
5308       break;
5309   return ret;
5310 }
5311 
5312 
5313 /**
5314   Support of in-place alter table.
5315 */
5316 
5317 /**
5318   Helper class for in-place alter, see handler.h
5319 */
5320 
5321 class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
5322 {
5323 public:
5324   inplace_alter_handler_ctx **handler_ctx_array;
5325 private:
5326   uint m_tot_parts;
5327 
5328 public:
ha_partition_inplace_ctx(THD * thd,uint tot_parts)5329   ha_partition_inplace_ctx(THD *thd, uint tot_parts)
5330     : inplace_alter_handler_ctx(),
5331       handler_ctx_array(NULL),
5332       m_tot_parts(tot_parts)
5333   {}
5334 
~ha_partition_inplace_ctx()5335   ~ha_partition_inplace_ctx()
5336   {
5337     if (handler_ctx_array)
5338     {
5339       for (uint index= 0; index < m_tot_parts; index++)
5340         delete handler_ctx_array[index];
5341     }
5342   }
5343 };
5344 
5345 
5346 enum_alter_inplace_result
check_if_supported_inplace_alter(TABLE * altered_table,Alter_inplace_info * ha_alter_info)5347 ha_partition::check_if_supported_inplace_alter(TABLE *altered_table,
5348                                                Alter_inplace_info *ha_alter_info)
5349 {
5350   uint index= 0;
5351   enum_alter_inplace_result result= HA_ALTER_INPLACE_NO_LOCK;
5352   ha_partition_inplace_ctx *part_inplace_ctx;
5353   bool first_is_set= false;
5354   THD *thd= ha_thd();
5355 
5356   DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
5357   /*
5358     Support inplace change of KEY () -> KEY ALGORITHM = N ()
5359     and UPGRADE PARTITIONING.
5360     Any other change would set partition_changed in
5361     prep_alter_part_table() in mysql_alter_table().
5362   */
5363   if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION ||
5364       ha_alter_info->alter_info->flags ==
5365         Alter_info::ALTER_UPGRADE_PARTITIONING)
5366     DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
5367 
5368   /* We cannot allow INPLACE to change order of KEY partitioning fields! */
5369   if (ha_alter_info->handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_ORDER)
5370   {
5371     if (!m_part_info->same_key_column_order(
5372            &ha_alter_info->alter_info->create_list))
5373     {
5374       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
5375     }
5376   }
5377 
5378   part_inplace_ctx=
5379     new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
5380   if (!part_inplace_ctx)
5381     DBUG_RETURN(HA_ALTER_ERROR);
5382 
5383   part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
5384     thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
5385   if (!part_inplace_ctx->handler_ctx_array)
5386     DBUG_RETURN(HA_ALTER_ERROR);
5387 
5388   /* Set all to NULL, including the terminating one. */
5389   for (index= 0; index <= m_tot_parts; index++)
5390     part_inplace_ctx->handler_ctx_array[index]= NULL;
5391 
5392   for (index= 0; index < m_tot_parts; index++)
5393   {
5394     enum_alter_inplace_result p_result=
5395       m_file[index]->check_if_supported_inplace_alter(altered_table,
5396                                                       ha_alter_info);
5397     part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
5398 
5399     if (index == 0)
5400     {
5401       first_is_set= (ha_alter_info->handler_ctx != NULL);
5402     }
5403     else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
5404     {
5405       /* Either none or all partitions must set handler_ctx! */
5406       assert(0);
5407       DBUG_RETURN(HA_ALTER_ERROR);
5408     }
5409     if (p_result < result)
5410       result= p_result;
5411     if (result == HA_ALTER_ERROR)
5412       break;
5413   }
5414 
5415   ha_alter_info->handler_ctx= part_inplace_ctx;
5416   /*
5417     To indicate for future inplace calls that there are several
5418     partitions/handlers that need to be committed together,
5419     we set group_commit_ctx to the NULL terminated array of
5420     the partitions handlers.
5421   */
5422   ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
5423 
5424   DBUG_RETURN(result);
5425 }
5426 
5427 
prepare_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)5428 bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table,
5429                                                Alter_inplace_info *ha_alter_info)
5430 {
5431   uint index= 0;
5432   bool error= false;
5433   ha_partition_inplace_ctx *part_inplace_ctx;
5434 
5435   DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
5436 
5437   /*
5438     Changing to similar partitioning, only update metadata.
5439     Non allowed changes would be catched in prep_alter_part_table().
5440   */
5441   if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION ||
5442       ha_alter_info->alter_info->flags ==
5443         Alter_info::ALTER_UPGRADE_PARTITIONING)
5444     DBUG_RETURN(false);
5445 
5446   part_inplace_ctx=
5447     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
5448 
5449   for (index= 0; index < m_tot_parts && !error; index++)
5450   {
5451     ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
5452     if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
5453                                                       ha_alter_info))
5454       error= true;
5455     part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
5456   }
5457   ha_alter_info->handler_ctx= part_inplace_ctx;
5458 
5459   DBUG_RETURN(error);
5460 }
5461 
5462 
inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info)5463 bool ha_partition::inplace_alter_table(TABLE *altered_table,
5464                                        Alter_inplace_info *ha_alter_info)
5465 {
5466   uint index= 0;
5467   bool error= false;
5468   ha_partition_inplace_ctx *part_inplace_ctx;
5469 
5470   DBUG_ENTER("ha_partition::inplace_alter_table");
5471 
5472   /*
5473     Changing to similar partitioning, only update metadata.
5474     Non allowed changes would be catched in prep_alter_part_table().
5475   */
5476   if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION ||
5477       ha_alter_info->alter_info->flags ==
5478         Alter_info::ALTER_UPGRADE_PARTITIONING)
5479     DBUG_RETURN(false);
5480 
5481   part_inplace_ctx=
5482     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
5483 
5484   for (index= 0; index < m_tot_parts && !error; index++)
5485   {
5486     ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
5487     if (m_file[index]->ha_inplace_alter_table(altered_table,
5488                                               ha_alter_info))
5489       error= true;
5490     part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
5491   }
5492   ha_alter_info->handler_ctx= part_inplace_ctx;
5493 
5494   DBUG_RETURN(error);
5495 }
5496 
5497 
5498 /*
5499   Note that this function will try rollback failed ADD INDEX by
5500   executing DROP INDEX for the indexes that were committed (if any)
5501   before the error occured. This means that the underlying storage
5502   engine must be able to drop index in-place with X-lock held.
5503   (As X-lock will be held here if new indexes are to be committed)
5504 */
commit_inplace_alter_table(TABLE * altered_table,Alter_inplace_info * ha_alter_info,bool commit)5505 bool ha_partition::commit_inplace_alter_table(TABLE *altered_table,
5506                                               Alter_inplace_info *ha_alter_info,
5507                                               bool commit)
5508 {
5509   ha_partition_inplace_ctx *part_inplace_ctx;
5510   bool error= false;
5511 
5512   DBUG_ENTER("ha_partition::commit_inplace_alter_table");
5513 
5514   /*
5515     Changing to similar partitioning, only update metadata.
5516     Non allowed changes would be catched in prep_alter_part_table().
5517   */
5518   if (ha_alter_info->alter_info->flags == Alter_info::ALTER_PARTITION ||
5519       ha_alter_info->alter_info->flags ==
5520         Alter_info::ALTER_UPGRADE_PARTITIONING)
5521     DBUG_RETURN(false);
5522 
5523   part_inplace_ctx=
5524     static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
5525 
5526   if (commit)
5527   {
5528     assert(ha_alter_info->group_commit_ctx ==
5529            part_inplace_ctx->handler_ctx_array);
5530     ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
5531     error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
5532                                                     ha_alter_info, commit);
5533     if (error)
5534       goto end;
5535     if (ha_alter_info->group_commit_ctx)
5536     {
5537       /*
5538         If ha_alter_info->group_commit_ctx is not set to NULL,
5539         then the engine did only commit the first partition!
5540         The engine is probably new, since both innodb and the default
5541         implementation of handler::commit_inplace_alter_table sets it to NULL
5542         and simply return false, since it allows metadata changes only.
5543         Loop over all other partitions as to follow the protocol!
5544       */
5545       uint i;
5546       assert(0);
5547       for (i= 1; i < m_tot_parts; i++)
5548       {
5549         ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
5550         error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
5551                                                          ha_alter_info,
5552                                                          true);
5553       }
5554     }
5555   }
5556   else
5557   {
5558     uint i;
5559     for (i= 0; i < m_tot_parts; i++)
5560     {
5561       /* Rollback, commit == false,  is done for each partition! */
5562       ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
5563       if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
5564                                                    ha_alter_info, false))
5565         error= true;
5566     }
5567   }
5568 end:
5569   ha_alter_info->handler_ctx= part_inplace_ctx;
5570 
5571   DBUG_RETURN(error);
5572 }
5573 
5574 
notify_table_changed()5575 void ha_partition::notify_table_changed()
5576 {
5577   handler **file;
5578 
5579   DBUG_ENTER("ha_partition::notify_table_changed");
5580 
5581   for (file= m_file; *file; file++)
5582     (*file)->ha_notify_table_changed();
5583 
5584   DBUG_VOID_RETURN;
5585 }
5586 
discard_or_import_tablespace(my_bool discard)5587 int ha_partition::discard_or_import_tablespace(my_bool discard)
5588 {
5589   int error= 0;
5590   uint i;
5591 
5592   DBUG_ENTER("ha_partition::discard_or_import_tablespace");
5593 
5594   for (i= m_part_info->get_first_used_partition();
5595        i < m_tot_parts;
5596        i= m_part_info->get_next_used_partition(i))
5597   {
5598     error= m_file[i]->ha_discard_or_import_tablespace(discard);
5599     if (error)
5600       break;
5601   }
5602 
5603   DBUG_RETURN(error);
5604 }
5605 
5606 /*
5607   If frm_error() is called then we will use this to to find out what file
5608   extensions exist for the storage engine. This is also used by the default
5609   rename_table and delete_table method in handler.cc.
5610 */
5611 
5612 static const char *ha_partition_ext[]=
5613 {
5614   ha_par_ext, NullS
5615 };
5616 
bas_ext() const5617 const char **ha_partition::bas_ext() const
5618 { return ha_partition_ext; }
5619 
5620 
min_of_the_max_uint(uint (handler::* operator_func)(void)const) const5621 uint ha_partition::min_of_the_max_uint(
5622                        uint (handler::*operator_func)(void) const) const
5623 {
5624   handler **file;
5625   uint min_of_the_max= ((*m_file)->*operator_func)();
5626 
5627   for (file= m_file+1; *file; file++)
5628   {
5629     uint tmp= ((*file)->*operator_func)();
5630     set_if_smaller(min_of_the_max, tmp);
5631   }
5632   return min_of_the_max;
5633 }
5634 
5635 
min_of_the_max_uint(HA_CREATE_INFO * create_info,uint (handler::* operator_func)(HA_CREATE_INFO *)const) const5636 uint ha_partition::min_of_the_max_uint(HA_CREATE_INFO *create_info,
5637                        uint (handler::*operator_func)(HA_CREATE_INFO *) const) const
5638 {
5639   handler **file;
5640   uint min_of_the_max= ((*m_file)->*operator_func)(create_info);
5641 
5642   for (file= m_file+1; *file; file++)
5643   {
5644     uint tmp= ((*file)->*operator_func)(create_info);
5645     set_if_smaller(min_of_the_max, tmp);
5646   }
5647   return min_of_the_max;
5648 }
5649 
5650 
max_supported_key_parts() const5651 uint ha_partition::max_supported_key_parts() const
5652 {
5653   return min_of_the_max_uint(&handler::max_supported_key_parts);
5654 }
5655 
5656 
max_supported_key_length() const5657 uint ha_partition::max_supported_key_length() const
5658 {
5659   return min_of_the_max_uint(&handler::max_supported_key_length);
5660 }
5661 
5662 
max_supported_key_part_length(HA_CREATE_INFO * create_info) const5663 uint ha_partition::max_supported_key_part_length(HA_CREATE_INFO
5664                                                  *create_info) const
5665 {
5666   return
5667   min_of_the_max_uint(create_info, &handler::max_supported_key_part_length);
5668 }
5669 
5670 
max_supported_record_length() const5671 uint ha_partition::max_supported_record_length() const
5672 {
5673   return min_of_the_max_uint(&handler::max_supported_record_length);
5674 }
5675 
5676 
max_supported_keys() const5677 uint ha_partition::max_supported_keys() const
5678 {
5679   return min_of_the_max_uint(&handler::max_supported_keys);
5680 }
5681 
5682 
extra_rec_buf_length() const5683 uint ha_partition::extra_rec_buf_length() const
5684 {
5685   handler **file;
5686   uint max= (*m_file)->extra_rec_buf_length();
5687 
5688   for (file= m_file, file++; *file; file++)
5689     if (max < (*file)->extra_rec_buf_length())
5690       max= (*file)->extra_rec_buf_length();
5691   return max;
5692 }
5693 
5694 
min_record_length(uint options) const5695 uint ha_partition::min_record_length(uint options) const
5696 {
5697   handler **file;
5698   uint max= (*m_file)->min_record_length(options);
5699 
5700   for (file= m_file, file++; *file; file++)
5701     if (max < (*file)->min_record_length(options))
5702       max= (*file)->min_record_length(options);
5703   return max;
5704 }
5705 
5706 
5707 /****************************************************************************
5708                 MODULE compare records
5709 ****************************************************************************/
5710 /*
5711   Compare two positions
5712 
5713   SYNOPSIS
5714     cmp_ref()
5715     ref1                   First position
5716     ref2                   Second position
5717 
5718   RETURN VALUE
5719     <0                     ref1 < ref2
5720     0                      Equal
5721     >0                     ref1 > ref2
5722 
5723   DESCRIPTION
5724     We get two references and need to check if those records are the same.
5725     If they belong to different partitions we decide that they are not
5726     the same record. Otherwise we use the particular handler to decide if
5727     they are the same. Sort in partition id order if not equal.
5728 */
5729 
cmp_ref(const uchar * ref1,const uchar * ref2)5730 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
5731 {
5732   int cmp;
5733   my_ptrdiff_t diff1, diff2;
5734   DBUG_ENTER("ha_partition::cmp_ref");
5735 
5736   cmp = m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
5737                            (ref2 + PARTITION_BYTES_IN_POS));
5738   if (cmp)
5739     DBUG_RETURN(cmp);
5740 
5741   if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
5742   {
5743    /* This means that the references are same and are in same partition.*/
5744     DBUG_RETURN(0);
5745   }
5746 
5747   /*
5748     In Innodb we compare with either primary key value or global DB_ROW_ID so
5749     it is not possible that the two references are equal and are in different
5750     partitions, but in myisam it is possible since we are comparing offsets.
5751     Remove this assert if DB_ROW_ID is changed to be per partition.
5752   */
5753   assert(!m_innodb);
5754 
5755   diff1= ref2[1] - ref1[1];
5756   diff2= ref2[0] - ref1[0];
5757   if (diff1 > 0)
5758   {
5759     DBUG_RETURN(-1);
5760   }
5761   if (diff1 < 0)
5762   {
5763     DBUG_RETURN(+1);
5764   }
5765   if (diff2 > 0)
5766   {
5767     DBUG_RETURN(-1);
5768   }
5769   DBUG_RETURN(+1);
5770 }
5771 
5772 
5773 /****************************************************************************
5774                 MODULE condition pushdown
5775 ****************************************************************************/
5776 
5777 
5778 /**
5779   Index condition pushdown registation
5780   @param keyno     Key number for the condition
5781   @param idx_cond  Item tree of the condition to test
5782 
5783   @return Remainder of non handled condition
5784 
5785   @note Only handles full condition or nothing at all. MyISAM and InnoDB
5786   both only supports full or nothing.
5787 */
idx_cond_push(uint keyno,Item * idx_cond)5788 Item *ha_partition::idx_cond_push(uint keyno, Item* idx_cond)
5789 {
5790   uint i;
5791   Item *res;
5792   DBUG_ENTER("ha_partition::idx_cond_push");
5793   DBUG_EXECUTE("where", print_where(idx_cond, "cond", QT_ORDINARY););
5794   DBUG_PRINT("info", ("keyno: %u, active_index: %u", keyno, active_index));
5795   assert(pushed_idx_cond == NULL);
5796 
5797   for (i= m_part_info->get_first_used_partition();
5798        i < m_tot_parts;
5799        i= m_part_info->get_next_used_partition(i))
5800   {
5801     res= m_file[i]->idx_cond_push(keyno, idx_cond);
5802     if (res)
5803     {
5804       uint j;
5805       /*
5806         All partitions has the same structure, so if the first partition
5807         succeeds, then the rest will also succeed.
5808       */
5809       assert(i == m_part_info->get_first_used_partition());
5810       /* Only supports entire index conditions or no conditions! */
5811       assert(res == idx_cond);
5812       if (res != idx_cond)
5813         m_file[i]->cancel_pushed_idx_cond();
5814       /* cancel previous calls. */
5815       for (j= m_part_info->get_first_used_partition();
5816            j < i; // No need for cancel i, since no support
5817            j= m_part_info->get_next_used_partition(j))
5818       {
5819         m_file[j]->cancel_pushed_idx_cond();
5820       }
5821       DBUG_RETURN(idx_cond);
5822     }
5823   }
5824   assert(pushed_idx_cond_keyno == MAX_KEY);
5825   pushed_idx_cond= idx_cond;
5826   pushed_idx_cond_keyno= keyno;
5827   DBUG_PRINT("info", ("Index condition pushdown used for keyno: %u", keyno));
5828   DBUG_RETURN(NULL);
5829 }
5830 
5831 
5832 /** Reset information about pushed index conditions */
cancel_pushed_idx_cond()5833 void ha_partition::cancel_pushed_idx_cond()
5834 {
5835   uint i;
5836   DBUG_ENTER("ha_partition::cancel_pushed_idx_cond");
5837   if (pushed_idx_cond)
5838   {
5839     for (i= m_part_info->get_first_used_partition();
5840          i < m_tot_parts;
5841          i= m_part_info->get_next_used_partition(i))
5842     {
5843       m_file[i]->cancel_pushed_idx_cond();
5844     }
5845     pushed_idx_cond= NULL;
5846     pushed_idx_cond_keyno= MAX_KEY;
5847   }
5848 
5849   DBUG_VOID_RETURN;
5850 }
5851 
5852 
5853 /****************************************************************************
5854                 MODULE auto increment
5855 ****************************************************************************/
5856 
5857 /**
5858   Initialize the shared auto increment value.
5859 
5860   @param no_lock  If HA_STATUS_NO_LOCK should be used in info(HA_STATUS_AUTO).
5861 
5862   Also sets stats.auto_increment_value.
5863 */
5864 
initialize_auto_increment(bool no_lock)5865 inline int ha_partition::initialize_auto_increment(bool no_lock)
5866 {
5867   DBUG_ENTER("ha_partition::initialize_auto_increment");
5868 #ifndef NDEBUG
5869   if (table_share->tmp_table == NO_TMP_TABLE)
5870   {
5871     mysql_mutex_assert_owner(part_share->auto_inc_mutex);
5872   }
5873 #endif
5874   assert(!part_share->auto_inc_initialized);
5875 
5876   /*
5877     The auto-inc mutex in the table_share is locked, so we do not need
5878     to have the handlers locked.
5879     HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
5880     the mutex, because it is initialized.
5881   */
5882   handler *file, **file_array;
5883   ulonglong auto_increment_value= 0;
5884   uint no_lock_flag= no_lock ? HA_STATUS_NO_LOCK : 0;
5885   int ret_error, error= 0;
5886   file_array= m_file;
5887   DBUG_PRINT("info",
5888              ("checking all partitions for auto_increment_value"));
5889   do
5890   {
5891     file= *file_array;
5892     ret_error= file->info(HA_STATUS_AUTO | no_lock_flag);
5893     set_if_bigger(auto_increment_value,
5894                   file->stats.auto_increment_value);
5895     if (ret_error && !error)
5896     {
5897       error= ret_error;
5898     }
5899   } while (*(++file_array));
5900 
5901   assert(auto_increment_value);
5902   stats.auto_increment_value= auto_increment_value;
5903   /*
5904     We only use the cached auto inc value if it is
5905     the first part of the key.
5906   */
5907   if (table_share->next_number_keypart == 0)
5908   {
5909     assert(part_share->next_auto_inc_val <= auto_increment_value);
5910     part_share->next_auto_inc_val= auto_increment_value;
5911     part_share->auto_inc_initialized= true;
5912     DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
5913                  (ulong) part_share->next_auto_inc_val));
5914   }
5915   DBUG_RETURN(error);
5916 }
5917 
5918 
5919 /**
5920   This method is called by update_auto_increment which in turn is called
5921   by the individual handlers as part of write_row. We use the
5922   part_share->next_auto_inc_val, or search all
5923   partitions for the highest auto_increment_value if not initialized or
5924   if auto_increment field is a secondary part of a key, we must search
5925   every partition when holding a mutex to be sure of correctness.
5926 */
5927 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)5928 void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
5929                                       ulonglong nb_desired_values,
5930                                       ulonglong *first_value,
5931                                       ulonglong *nb_reserved_values)
5932 {
5933   DBUG_ENTER("ha_partition::get_auto_increment");
5934   DBUG_PRINT("info", ("offset: %lu inc: %lu desired_values: %lu "
5935                       "first_value: %lu", (ulong) offset, (ulong) increment,
5936                       (ulong) nb_desired_values, (ulong) *first_value));
5937   assert(increment && nb_desired_values);
5938   *first_value= 0;
5939   if (table->s->next_number_keypart)
5940   {
5941     /*
5942       next_number_keypart is != 0 if the auto_increment column is a secondary
5943       column in the index (it is allowed in MyISAM)
5944     */
5945     DBUG_PRINT("info", ("next_number_keypart != 0"));
5946     ulonglong nb_reserved_values_part;
5947     ulonglong first_value_part, max_first_value;
5948     handler **file= m_file;
5949     first_value_part= max_first_value= *first_value;
5950     /* Must lock and find highest value among all partitions. */
5951     lock_auto_increment();
5952     do
5953     {
5954       /* Only nb_desired_values = 1 makes sense */
5955       (*file)->get_auto_increment(offset, increment, 1,
5956                                  &first_value_part, &nb_reserved_values_part);
5957       if (first_value_part == ULLONG_MAX) // error in one partition
5958       {
5959         *first_value= first_value_part;
5960         /* log that the error was between table/partition handler */
5961         sql_print_error("Partition failed to reserve auto_increment value");
5962         unlock_auto_increment();
5963         DBUG_VOID_RETURN;
5964       }
5965       DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
5966       set_if_bigger(max_first_value, first_value_part);
5967     } while (*(++file));
5968     *first_value= max_first_value;
5969     *nb_reserved_values= 1;
5970     unlock_auto_increment();
5971   }
5972   else
5973   {
5974     Partition_helper::get_auto_increment_first_field(increment,
5975                                                      nb_desired_values,
5976                                                      first_value,
5977                                                      nb_reserved_values);
5978   }
5979   DBUG_VOID_RETURN;
5980 }
5981 
5982 /** Get partition row type
5983 @param[in] Id of partition for which row type to be retrieved
5984 @return Partition row type */
get_partition_row_type(uint part_id)5985 enum row_type ha_partition::get_partition_row_type(
5986         uint part_id)
5987 {
5988 	return m_file[part_id]->get_row_type();
5989 }
5990 
release_auto_increment_all_parts()5991 void ha_partition::release_auto_increment_all_parts()
5992 {
5993   uint i;
5994   DBUG_ENTER("ha_partition::release_auto_increment_all_parts");
5995 
5996   assert(table->s->next_number_keypart);
5997   for (i= m_part_info->get_first_used_partition();
5998        i < m_tot_parts;
5999        i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
6000   {
6001     m_file[i]->ha_release_auto_increment();
6002   }
6003   DBUG_VOID_RETURN;
6004 }
6005 
6006 /****************************************************************************
6007                 MODULE initialize handler for HANDLER call
6008 ****************************************************************************/
6009 
init_table_handle_for_HANDLER()6010 void ha_partition::init_table_handle_for_HANDLER()
6011 {
6012   uint i;
6013   for (i= m_part_info->get_first_used_partition();
6014        i < m_tot_parts;
6015        i= m_part_info->get_next_used_partition(i))
6016     m_file[i]->init_table_handle_for_HANDLER();
6017   return;
6018 }
6019 
6020 
6021 /**
6022   Return the checksum of the partition.
6023 
6024   @param part_id Partition to checksum.
6025 
6026   @return Checksum or 0 if not supported.
6027 */
6028 
checksum_in_part(uint part_id) const6029 ha_checksum ha_partition::checksum_in_part(uint part_id) const
6030 {
6031   if ((table_flags() & HA_HAS_CHECKSUM))
6032   {
6033     return m_file[part_id]->checksum();
6034   }
6035   return 0;
6036 }
6037 
6038 /****************************************************************************
6039                 MODULE enable/disable indexes
6040 ****************************************************************************/
6041 
6042 /*
6043   Disable indexes for a while
6044   SYNOPSIS
6045     disable_indexes()
6046     mode                      Mode
6047   RETURN VALUES
6048     0                         Success
6049     != 0                      Error
6050 */
6051 
disable_indexes(uint mode)6052 int ha_partition::disable_indexes(uint mode)
6053 {
6054   handler **file;
6055   int error= 0;
6056 
6057   assert(bitmap_is_set_all(&(m_part_info->lock_partitions)));
6058   for (file= m_file; *file; file++)
6059   {
6060     if ((error= (*file)->ha_disable_indexes(mode)))
6061       break;
6062   }
6063   return error;
6064 }
6065 
6066 
6067 /*
6068   Enable indexes again
6069   SYNOPSIS
6070     enable_indexes()
6071     mode                      Mode
6072   RETURN VALUES
6073     0                         Success
6074     != 0                      Error
6075 */
6076 
enable_indexes(uint mode)6077 int ha_partition::enable_indexes(uint mode)
6078 {
6079   handler **file;
6080   int error= 0;
6081 
6082   assert(bitmap_is_set_all(&(m_part_info->lock_partitions)));
6083   for (file= m_file; *file; file++)
6084   {
6085     if ((error= (*file)->ha_enable_indexes(mode)))
6086       break;
6087   }
6088   return error;
6089 }
6090 
6091 
6092 /*
6093   Check if indexes are disabled
6094   SYNOPSIS
6095     indexes_are_disabled()
6096 
6097   RETURN VALUES
6098     0                      Indexes are enabled
6099     != 0                   Indexes are disabled
6100 */
6101 
indexes_are_disabled(void)6102 int ha_partition::indexes_are_disabled(void)
6103 {
6104   handler **file;
6105   int error= 0;
6106 
6107   assert(bitmap_is_set_all(&(m_part_info->lock_partitions)));
6108   for (file= m_file; *file; file++)
6109   {
6110     if ((error= (*file)->indexes_are_disabled()))
6111       break;
6112   }
6113   return error;
6114 }
6115 
6116 
6117 #define KEY_PARTITIONING_CHANGED_STR \
6118   "KEY () partitioning changed, please run:\n" \
6119   "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
6120 
check_for_upgrade(HA_CHECK_OPT * check_opt)6121 int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt)
6122 {
6123   int error= HA_ADMIN_NEEDS_CHECK;
6124   DBUG_ENTER("ha_partition::check_for_upgrade");
6125 
6126   /*
6127     This is called even without FOR UPGRADE,
6128     if the .frm version is lower than the current version.
6129     In that case return that it needs checking!
6130   */
6131   if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
6132   {
6133     if (m_file[0]->ht->partition_flags)
6134       DBUG_RETURN(HA_ADMIN_NEEDS_UPG_PART);
6135 
6136     DBUG_RETURN(error);
6137   }
6138 
6139   /*
6140     Partitions will be checked for during their ha_check!
6141 
6142     Check if KEY (sub)partitioning was used and any field's hash calculation
6143     differs from 5.1, see bug#14521864.
6144   */
6145   if (table->s->mysql_version < 50503 &&              // 5.1 table (<5.5.3)
6146       ((m_part_info->part_type == HASH_PARTITION &&   // KEY partitioned
6147         m_part_info->list_of_part_fields) ||
6148        (m_is_sub_partitioned &&                       // KEY subpartitioned
6149         m_part_info->list_of_subpart_fields)))
6150   {
6151     Field **field;
6152     if (m_is_sub_partitioned)
6153     {
6154       field= m_part_info->subpart_field_array;
6155     }
6156     else
6157     {
6158       field= m_part_info->part_field_array;
6159     }
6160     for (; *field; field++)
6161     {
6162       switch ((*field)->real_type()) {
6163       case MYSQL_TYPE_TINY:
6164       case MYSQL_TYPE_SHORT:
6165       case MYSQL_TYPE_LONG:
6166       case MYSQL_TYPE_FLOAT:
6167       case MYSQL_TYPE_DOUBLE:
6168       case MYSQL_TYPE_NEWDECIMAL:
6169       case MYSQL_TYPE_TIMESTAMP:
6170       case MYSQL_TYPE_LONGLONG:
6171       case MYSQL_TYPE_INT24:
6172       case MYSQL_TYPE_TIME:
6173       case MYSQL_TYPE_DATETIME:
6174       case MYSQL_TYPE_YEAR:
6175       case MYSQL_TYPE_NEWDATE:
6176       case MYSQL_TYPE_ENUM:
6177       case MYSQL_TYPE_SET:
6178         {
6179           THD *thd= ha_thd();
6180           char *part_buf;
6181           String db_name, table_name;
6182           uint part_buf_len;
6183           bool skip_generation= false;
6184           partition_info::enum_key_algorithm old_algorithm;
6185           old_algorithm= m_part_info->key_algorithm;
6186           error= HA_ADMIN_FAILED;
6187           append_identifier(ha_thd(), &db_name, table_share->db.str,
6188                             table_share->db.length);
6189           append_identifier(ha_thd(), &table_name, table_share->table_name.str,
6190                             table_share->table_name.length);
6191           if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
6192           {
6193             /*
6194               Only possible when someone tampered with .frm files,
6195               like during tests :)
6196             */
6197             skip_generation= true;
6198           }
6199           m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
6200           if (skip_generation ||
6201               !(part_buf= generate_partition_syntax(m_part_info,
6202                                                     &part_buf_len,
6203                                                     true,
6204                                                     true,
6205                                                     NULL,
6206                                                     NULL,
6207                                                     NULL)) ||
6208               print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
6209                               table_share->db.str,
6210                               table->alias,
6211                               opt_op_name[CHECK_PARTS],
6212                               KEY_PARTITIONING_CHANGED_STR,
6213                               db_name.c_ptr_safe(),
6214                               table_name.c_ptr_safe(),
6215                               part_buf))
6216           {
6217             /* Error creating admin message (too long string?). */
6218             print_admin_msg(thd, MI_MAX_MSG_BUF, "error",
6219                             table_share->db.str, table->alias,
6220                             opt_op_name[CHECK_PARTS],
6221                             KEY_PARTITIONING_CHANGED_STR,
6222                             db_name.c_ptr_safe(), table_name.c_ptr_safe(),
6223                             "<old partition clause>, but add ALGORITHM = 1"
6224                             " between 'KEY' and '(' to change the metadata"
6225                             " without the need of a full table rebuild.");
6226           }
6227           m_part_info->key_algorithm= old_algorithm;
6228           DBUG_RETURN(error);
6229         }
6230       default:
6231         /* Not affected! */
6232         ;
6233       }
6234     }
6235   }
6236 
6237   if (m_file[0]->ht->partition_flags)
6238   {
6239     /* No longer needs ha_partition. */
6240     error= HA_ADMIN_NEEDS_UPG_PART;
6241   }
6242 
6243   DBUG_RETURN(error);
6244 }
6245 
6246 /*
6247   We don't know which partition table will be updated before executing
6248   Write_rows_log_event, so update all partitions.
6249 */
rpl_before_write_rows()6250 void ha_partition::rpl_before_write_rows()
6251 {
6252   uint i;
6253   for (i= 0; i < m_tot_parts; i++)
6254   {
6255     m_file[i]->rpl_before_write_rows();
6256   }
6257 }
6258 
6259 /*
6260   Clear flag of all partitions after executing Write_rows_log_event.
6261 */
rpl_after_write_rows()6262 void ha_partition::rpl_after_write_rows()
6263 {
6264   uint i;
6265   for (i= 0; i < m_tot_parts; i++)
6266   {
6267     m_file[i]->rpl_after_write_rows();
6268   }
6269 }
6270 
6271 /*
6272   We don't know which partition table will be updated before executing
6273   Delete_rows_log_event, so update all partitions.
6274 */
rpl_before_delete_rows()6275 void ha_partition::rpl_before_delete_rows()
6276 {
6277 
6278   uint i;
6279   for (i= 0; i < m_tot_parts; i++)
6280   {
6281     m_file[i]->rpl_before_delete_rows();
6282   }
6283 }
6284 
6285 /*
6286   Clear flag of all partitions after executing Delete_rows_log_event.
6287 */
rpl_after_delete_rows()6288 void ha_partition::rpl_after_delete_rows()
6289 {
6290   uint i;
6291   for (i= 0; i < m_tot_parts; i++)
6292   {
6293     m_file[i]->rpl_after_delete_rows();
6294   }
6295 }
6296 
6297 /*
6298   We don't know which partition table will be updated before executing
6299   Update_rows_log_event, so update all partitions.
6300 */
rpl_before_update_rows()6301 void ha_partition::rpl_before_update_rows()
6302 {
6303   uint i;
6304   for (i= 0; i < m_tot_parts; i++)
6305   {
6306     m_file[i]->rpl_before_update_rows();
6307   }
6308 }
6309 
6310 /*
6311   Clear flag of all partitions after executing Update_rows_log_event.
6312 */
rpl_after_update_rows()6313 void ha_partition::rpl_after_update_rows()
6314 {
6315   uint i;
6316   for (i= 0; i < m_tot_parts; i++)
6317   {
6318     m_file[i]->rpl_after_update_rows();
6319   }
6320 }
6321 
6322 /*
6323   Check whether we need to perform row lookup when executing
6324   Update_rows_log_event or Delete_rows_log_event. Use the 1st
6325   partition is enough, see @c ha_tokudb::rpl_lookup_rows().
6326 */
rpl_lookup_rows()6327 bool ha_partition::rpl_lookup_rows()
6328 {
6329   return m_file[0]->rpl_lookup_rows();
6330 }
6331 
6332 /*
6333   Query storage engine to see if it can support handling specific replication
6334   method in its current configuration.
6335 */
rpl_can_handle_stm_event() const6336 bool ha_partition::rpl_can_handle_stm_event() const
6337 {
6338   return m_file[0]->rpl_can_handle_stm_event();
6339 }
6340 
6341 struct st_mysql_storage_engine partition_storage_engine=
6342 { MYSQL_HANDLERTON_INTERFACE_VERSION };
6343 
mysql_declare_plugin(partition)6344 mysql_declare_plugin(partition)
6345 {
6346   MYSQL_STORAGE_ENGINE_PLUGIN,
6347   &partition_storage_engine,
6348   "partition",
6349   "Mikael Ronstrom, MySQL AB",
6350   "Partition Storage Engine Helper",
6351   PLUGIN_LICENSE_GPL,
6352   partition_initialize, /* Plugin Init */
6353   NULL, /* Plugin Deinit */
6354   0x0100, /* 1.0 */
6355   NULL,                       /* status variables                */
6356   NULL,                       /* system variables                */
6357   NULL,                       /* config options                  */
6358   0,                          /* flags                           */
6359 }
6360 mysql_declare_plugin_end;
6361