1 #ifndef HA_PARTITION_INCLUDED
2 #define HA_PARTITION_INCLUDED
3 
4 /*
5    Copyright (c) 2005, 2012, Oracle and/or its affiliates.
6    Copyright (c) 2009, 2021, MariaDB Corporation.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; version 2 of the License.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
20 
21 #include "sql_partition.h"      /* part_id_range, partition_element */
22 #include "queues.h"             /* QUEUE */
23 
24 struct Ordered_blob_storage
25 {
26   String blob;
27   bool set_read_value;
Ordered_blob_storageOrdered_blob_storage28   Ordered_blob_storage() : set_read_value(false)
29   {}
30 };
31 
32 #define PARTITION_BYTES_IN_POS 2
33 #define ORDERED_PART_NUM_OFFSET sizeof(Ordered_blob_storage **)
34 #define ORDERED_REC_OFFSET (ORDERED_PART_NUM_OFFSET + PARTITION_BYTES_IN_POS)
35 
36 
37 /** Struct used for partition_name_hash */
38 typedef struct st_part_name_def
39 {
40   uchar *partition_name;
41   uint length;
42   uint32 part_id;
43   my_bool is_subpart;
44 } PART_NAME_DEF;
45 
46 /** class where to save partitions Handler_share's */
47 class Parts_share_refs
48 {
49 public:
50   uint num_parts;                              /**< Size of ha_share array */
51   Handler_share **ha_shares;                   /**< Storage for each part */
Parts_share_refs()52   Parts_share_refs()
53   {
54     num_parts= 0;
55     ha_shares= NULL;
56   }
~Parts_share_refs()57   ~Parts_share_refs()
58   {
59     uint i;
60     for (i= 0; i < num_parts; i++)
61       delete ha_shares[i];
62     delete[] ha_shares;
63   }
init(uint arg_num_parts)64   bool init(uint arg_num_parts)
65   {
66     DBUG_ASSERT(!num_parts && !ha_shares);
67     num_parts= arg_num_parts;
68     /* Allocate an array of Handler_share pointers */
69     ha_shares= new Handler_share *[num_parts];
70     if (!ha_shares)
71     {
72       num_parts= 0;
73       return true;
74     }
75     memset(ha_shares, 0, sizeof(Handler_share*) * num_parts);
76     return false;
77   }
78 };
79 
80 class ha_partition;
81 
82 /* Partition Full Text Search info */
83 struct st_partition_ft_info
84 {
85   struct _ft_vft        *please;
86   st_partition_ft_info  *next;
87   ha_partition          *file;
88   FT_INFO               **part_ft_info;
89 };
90 
91 
92 #ifdef HAVE_PSI_MUTEX_INTERFACE
93 extern PSI_mutex_key key_partition_auto_inc_mutex;
94 #endif
95 
96 /**
97   Partition specific Handler_share.
98 */
99 class Partition_share : public Handler_share
100 {
101 public:
102   bool auto_inc_initialized;
103   mysql_mutex_t auto_inc_mutex;                /**< protecting auto_inc val */
104   ulonglong next_auto_inc_val;                 /**< first non reserved value */
105   /**
106     Hash of partition names. Initialized in the first ha_partition::open()
107     for the table_share. After that it is read-only, i.e. no locking required.
108   */
109   bool partition_name_hash_initialized;
110   HASH partition_name_hash;
111   /** Storage for each partitions Handler_share */
112   Parts_share_refs partitions_share_refs;
Partition_share()113   Partition_share()
114     : auto_inc_initialized(false),
115     next_auto_inc_val(0),
116     partition_name_hash_initialized(false),
117     partition_names(NULL)
118   {
119     mysql_mutex_init(key_partition_auto_inc_mutex,
120                     &auto_inc_mutex,
121                     MY_MUTEX_INIT_FAST);
122   }
123 
~Partition_share()124   ~Partition_share()
125   {
126     mysql_mutex_destroy(&auto_inc_mutex);
127     if (partition_names)
128     {
129       my_free(partition_names);
130     }
131     if (partition_name_hash_initialized)
132     {
133       my_hash_free(&partition_name_hash);
134     }
135   }
136 
137   bool init(uint num_parts);
138 
139   /**
140     Release reserved auto increment values not used.
141     @param thd             Thread.
142     @param table_share     Table Share
143     @param next_insert_id  Next insert id (first non used auto inc value).
144     @param max_reserved    End of reserved auto inc range.
145   */
146   void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
147                                     const ulonglong next_insert_id,
148                                     const ulonglong max_reserved);
149 
150   /** lock mutex protecting auto increment value next_auto_inc_val. */
lock_auto_inc()151   inline void lock_auto_inc()
152   {
153     mysql_mutex_lock(&auto_inc_mutex);
154   }
155   /** unlock mutex protecting auto increment value next_auto_inc_val. */
unlock_auto_inc()156   inline void unlock_auto_inc()
157   {
158     mysql_mutex_unlock(&auto_inc_mutex);
159   }
160   /**
161     Populate partition_name_hash with partition and subpartition names
162     from part_info.
163     @param part_info  Partition info containing all partitions metadata.
164 
165     @return Operation status.
166       @retval false Success.
167       @retval true  Failure.
168   */
169   bool populate_partition_name_hash(partition_info *part_info);
170   /** Get partition name.
171 
172   @param part_id  Partition id (for subpartitioned table only subpartition
173                   names will be returned.)
174 
175   @return partition name or NULL if error.
176   */
177   const char *get_partition_name(size_t part_id) const;
178 private:
179   const uchar **partition_names;
180   /**
181     Insert [sub]partition name into  partition_name_hash
182     @param name        Partition name.
183     @param part_id     Partition id.
184     @param is_subpart  True if subpartition else partition.
185 
186     @return Operation status.
187       @retval false Success.
188       @retval true  Failure.
189   */
190   bool insert_partition_name_in_hash(const char *name,
191                                      uint part_id,
192                                      bool is_subpart);
193 };
194 
195 
196 /*
197   List of ranges to be scanned by ha_partition's MRR implementation
198 
199   This object is
200    - A KEY_MULTI_RANGE structure (the MRR range)
201    - Storage for the range endpoints that the KEY_MULTI_RANGE has pointers to
202    - list of such ranges (connected through the "next" pointer).
203 */
204 
205 typedef struct st_partition_key_multi_range
206 {
207   /*
208     Number of the range. The ranges are numbered in the order RANGE_SEQ_IF has
209     emitted them, starting from 1. The numbering in used by ordered MRR scans.
210   */
211   uint id;
212   uchar *key[2];
213   /*
214     Sizes of allocated memory in key[]. These may be larger then the actual
215     values as this structure is reused across MRR scans
216   */
217   uint length[2];
218 
219   /*
220     The range.
221     key_multi_range.ptr is a pointer to the this PARTITION_KEY_MULTI_RANGE
222     object
223   */
224   KEY_MULTI_RANGE key_multi_range;
225 
226   // Range id from the SQL layer
227   range_id_t ptr;
228 
229   // The next element in the list of MRR ranges.
230   st_partition_key_multi_range *next;
231 } PARTITION_KEY_MULTI_RANGE;
232 
233 
234 /*
235   List of ranges to be scanned in a certain [sub]partition
236 
237   The idea is that there's a list of ranges to be scanned in the table
238   (formed by PARTITION_KEY_MULTI_RANGE structures),
239   and for each [sub]partition, we only need to scan a subset of that list.
240 
241      PKMR1 --> PKMR2 --> PKMR3 -->... // list of PARTITION_KEY_MULTI_RANGE
242        ^                   ^
243        |                   |
244      PPKMR1 ----------> PPKMR2 -->... // list of PARTITION_PART_KEY_MULTI_RANGE
245 
246   This way, per-partition lists of PARTITION_PART_KEY_MULTI_RANGE have pointers
247   to the elements of the global list of PARTITION_KEY_MULTI_RANGE.
248 */
249 
250 typedef struct st_partition_part_key_multi_range
251 {
252   PARTITION_KEY_MULTI_RANGE *partition_key_multi_range;
253   st_partition_part_key_multi_range *next;
254 } PARTITION_PART_KEY_MULTI_RANGE;
255 
256 
257 class ha_partition;
258 
259 /*
260   The structure holding information about range sequence to be used with one
261   partition.
262   (pointer to this is used as seq_init_param for RANGE_SEQ_IF structure when
263    invoking MRR for an individual partition)
264 */
265 
266 typedef struct st_partition_part_key_multi_range_hld
267 {
268   /* Owner object */
269   ha_partition *partition;
270 
271   /* id of the the partition this structure is for */
272   uint32 part_id;
273 
274   /* Current range we're iterating through */
275   PARTITION_PART_KEY_MULTI_RANGE *partition_part_key_multi_range;
276 } PARTITION_PART_KEY_MULTI_RANGE_HLD;
277 
278 
279 extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
280 extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
281 
282 class ha_partition :public handler
283 {
284 private:
285   enum partition_index_scan_type
286   {
287     partition_index_read= 0,
288     partition_index_first= 1,
289     partition_index_last= 3,
290     partition_index_read_last= 4,
291     partition_read_range = 5,
292     partition_no_index_scan= 6,
293     partition_read_multi_range = 7,
294     partition_ft_read= 8
295   };
296   /* Data for the partition handler */
297   int  m_mode;                          // Open mode
298   uint m_open_test_lock;                // Open test_if_locked
299   uchar *m_file_buffer;                 // Content of the .par file
300   char *m_name_buffer_ptr;		// Pointer to first partition name
301   MEM_ROOT m_mem_root;
302   plugin_ref *m_engine_array;           // Array of types of the handlers
303   handler **m_file;                     // Array of references to handler inst.
304   uint m_file_tot_parts;                // Debug
305   handler **m_new_file;                 // Array of references to new handlers
306   handler **m_reorged_file;             // Reorganised partitions
307   handler **m_added_file;               // Added parts kept for errors
308   LEX_CSTRING *m_connect_string;
309   partition_info *m_part_info;          // local reference to partition
310   Field **m_part_field_array;           // Part field array locally to save acc
311   uchar *m_ordered_rec_buffer;          // Row and key buffer for ord. idx scan
312   st_partition_ft_info *ft_first;
313   st_partition_ft_info *ft_current;
314   /*
315     Current index.
316     When used in key_rec_cmp: If clustered pk, index compare
317     must compare pk if given index is same for two rows.
318     So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL,
319     and if clustered pk, [0]= current index, [1]= pk, [2]= NULL
320   */
321   KEY *m_curr_key_info[3];              // Current index
322   uchar *m_rec0;                        // table->record[0]
323   const uchar *m_err_rec;               // record which gave error
324   QUEUE m_queue;                        // Prio queue used by sorted read
325 
326   /*
327     Length of an element in m_ordered_rec_buffer. The elements are composed of
328 
329       [part_no] [table->record copy] [underlying_table_rowid]
330 
331     underlying_table_rowid is only stored when the table has no extended keys.
332   */
333   size_t m_priority_queue_rec_len;
334 
335   /*
336     If true, then sorting records by key value also sorts them by their
337     underlying_table_rowid.
338   */
339   bool m_using_extended_keys;
340 
341   /*
342     Since the partition handler is a handler on top of other handlers, it
343     is necessary to keep information about what the underlying handler
344     characteristics is. It is not possible to keep any handler instances
345     for this since the MySQL Server sometimes allocating the handler object
346     without freeing them.
347   */
348   enum enum_handler_status
349   {
350     handler_not_initialized= 0,
351     handler_initialized,
352     handler_opened,
353     handler_closed
354   };
355   enum_handler_status m_handler_status;
356 
357   uint m_reorged_parts;                  // Number of reorganised parts
358   uint m_tot_parts;                      // Total number of partitions;
359   uint m_num_locks;                       // For engines like ha_blackhole, which needs no locks
360   uint m_last_part;                      // Last file that we update,write,read
361   part_id_range m_part_spec;             // Which parts to scan
362   uint m_scan_value;                     // Value passed in rnd_init
363                                          // call
364   uint m_ref_length;                     // Length of position in this
365                                          // handler object
366   key_range m_start_key;                 // index read key range
367   enum partition_index_scan_type m_index_scan_type;// What type of index
368                                                    // scan
369   uint m_top_entry;                      // Which partition is to
370                                          // deliver next result
371   uint m_rec_length;                     // Local copy of record length
372 
373   bool m_ordered;                        // Ordered/Unordered index scan
374   bool m_pkey_is_clustered;              // Is primary key clustered
375   bool m_create_handler;                 // Handler used to create table
376   bool m_is_sub_partitioned;             // Is subpartitioned
377   bool m_ordered_scan_ongoing;
378   bool m_rnd_init_and_first;
379   bool m_ft_init_and_first;
380 
381   /*
382     If set, this object was created with ha_partition::clone and doesn't
383     "own" the m_part_info structure.
384   */
385   ha_partition *m_is_clone_of;
386   MEM_ROOT *m_clone_mem_root;
387 
388   /*
389     We keep track if all underlying handlers are MyISAM since MyISAM has a
390     great number of extra flags not needed by other handlers.
391   */
392   bool m_myisam;                         // Are all underlying handlers
393                                          // MyISAM
394   /*
395     We keep track of InnoDB handlers below since it requires proper setting
396     of query_id in fields at index_init and index_read calls.
397   */
398   bool m_innodb;                        // Are all underlying handlers
399                                         // InnoDB
400   /*
401     When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying
402     handlers immediately. Instead we cache it and call the underlying
403     immediately before starting the scan on the partition. This is to
404     prevent allocating a READ CACHE for each partition in parallel when
405     performing a full table scan on MyISAM partitioned table.
406     This state is cleared by extra(HA_EXTRA_NO_CACHE).
407   */
408   bool m_extra_cache;
409   uint m_extra_cache_size;
410   /* The same goes for HA_EXTRA_PREPARE_FOR_UPDATE */
411   bool m_extra_prepare_for_update;
412   /* Which partition has active cache */
413   uint m_extra_cache_part_id;
414 
415   void init_handler_variables();
416   /*
417     Variables for lock structures.
418   */
419 
420   bool auto_increment_lock;             /**< lock reading/updating auto_inc */
421   /**
422     Flag to keep the auto_increment lock through out the statement.
423     This to ensure it will work with statement based replication.
424   */
425   bool auto_increment_safe_stmt_log_lock;
426   /** For optimizing ha_start_bulk_insert calls */
427   MY_BITMAP m_bulk_insert_started;
428   ha_rows   m_bulk_inserted_rows;
429   /** used for prediction of start_bulk_insert rows */
430   enum_monotonicity_info m_part_func_monotonicity_info;
431   part_id_range m_direct_update_part_spec;
432   bool                m_pre_calling;
433   bool                m_pre_call_use_parallel;
434   /* Keep track of bulk access requests */
435   bool                bulk_access_executing;
436 
437   /** keep track of locked partitions */
438   MY_BITMAP m_locked_partitions;
439   /** Stores shared auto_increment etc. */
440   Partition_share *part_share;
441   /** Temporary storage for new partitions Handler_shares during ALTER */
442   List<Parts_share_refs> m_new_partitions_share_refs;
443   /** Sorted array of partition ids in descending order of number of rows. */
444   uint32 *m_part_ids_sorted_by_num_of_records;
445   /* Compare function for my_qsort2, for reversed order. */
446   static int compare_number_of_records(ha_partition *me,
447                                        const uint32 *a,
448                                        const uint32 *b);
449   /** keep track of partitions to call ha_reset */
450   MY_BITMAP m_partitions_to_reset;
451   /** partitions that returned HA_ERR_KEY_NOT_FOUND. */
452   MY_BITMAP m_key_not_found_partitions;
453   bool m_key_not_found;
454   List<String> *m_partitions_to_open;
455   MY_BITMAP m_opened_partitions;
456   /** This is one of the m_file-s that it guaranteed to be opened. */
457   /**  It is set in open_read_partitions() */
458   handler *m_file_sample;
459 public:
get_child_handlers()460   handler **get_child_handlers()
461   {
462     return m_file;
463   }
get_part_spec()464   virtual part_id_range *get_part_spec()
465   {
466     return &m_part_spec;
467   }
get_no_current_part_id()468   virtual uint get_no_current_part_id()
469   {
470     return NO_CURRENT_PART_ID;
471   }
get_part_share()472   Partition_share *get_part_share() { return part_share; }
473   handler *clone(const char *name, MEM_ROOT *mem_root);
set_part_info(partition_info * part_info)474   virtual void set_part_info(partition_info *part_info)
475   {
476      m_part_info= part_info;
477      m_is_sub_partitioned= part_info->is_sub_partitioned();
478   }
479 
480   virtual void return_record_by_parent();
481 
vers_can_native(THD * thd)482   virtual bool vers_can_native(THD *thd)
483   {
484     if (thd->lex->part_info)
485     {
486       // PARTITION BY SYSTEM_TIME is not supported for now
487       return thd->lex->part_info->part_type != VERSIONING_PARTITION;
488     }
489     else
490     {
491       bool can= true;
492       for (uint i= 0; i < m_tot_parts && can; i++)
493         can= can && m_file[i]->vers_can_native(thd);
494       return can;
495     }
496   }
497 
498   /*
499     -------------------------------------------------------------------------
500     MODULE create/delete handler object
501     -------------------------------------------------------------------------
502     Object create/delete method. Normally called when a table object
503     exists. There is also a method to create the handler object with only
504     partition information. This is used from mysql_create_table when the
505     table is to be created and the engine type is deduced to be the
506     partition handler.
507     -------------------------------------------------------------------------
508   */
509     ha_partition(handlerton *hton, TABLE_SHARE * table);
510     ha_partition(handlerton *hton, partition_info * part_info);
511     ha_partition(handlerton *hton, TABLE_SHARE *share,
512                  partition_info *part_info_arg,
513                  ha_partition *clone_arg,
514                  MEM_ROOT *clone_mem_root_arg);
515    ~ha_partition();
516    void ha_partition_init();
517   /*
518     A partition handler has no characteristics in itself. It only inherits
519     those from the underlying handlers. Here we set-up those constants to
520     enable later calls of the methods to retrieve constants from the under-
521     lying handlers. Returns false if not successful.
522   */
523    bool initialize_partition(MEM_ROOT *mem_root);
524 
525   /*
526     -------------------------------------------------------------------------
527     MODULE meta data changes
528     -------------------------------------------------------------------------
529     Meta data routines to CREATE, DROP, RENAME table and often used at
530     ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..).
531 
532     create_partitioning_metadata is called before opening a new handler object
533     with openfrm to call create. It is used to create any local handler
534     object needed in opening the object in openfrm
535     -------------------------------------------------------------------------
536   */
537   virtual int delete_table(const char *from);
538   virtual int rename_table(const char *from, const char *to);
539   virtual int create(const char *name, TABLE *form,
540 		     HA_CREATE_INFO *create_info);
541   virtual int create_partitioning_metadata(const char *name,
542                                    const char *old_name, int action_flag);
543   virtual void update_create_info(HA_CREATE_INFO *create_info);
544   virtual int change_partitions(HA_CREATE_INFO *create_info,
545                                 const char *path,
546                                 ulonglong * const copied,
547                                 ulonglong * const deleted,
548                                 const uchar *pack_frm_data,
549                                 size_t pack_frm_len);
550   virtual int drop_partitions(const char *path);
551   virtual int rename_partitions(const char *path);
get_no_parts(const char * name,uint * num_parts)552   bool get_no_parts(const char *name, uint *num_parts)
553   {
554     DBUG_ENTER("ha_partition::get_no_parts");
555     *num_parts= m_tot_parts;
556     DBUG_RETURN(0);
557   }
558   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
559   virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
560                                           uint table_changes);
update_part_create_info(HA_CREATE_INFO * create_info,uint part_id)561   void update_part_create_info(HA_CREATE_INFO *create_info, uint part_id)
562   {
563     m_file[part_id]->update_create_info(create_info);
564   }
565 private:
566   int copy_partitions(ulonglong * const copied, ulonglong * const deleted);
567   void cleanup_new_partition(uint part_count);
568   int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info,
569                             handler *file, const char *part_name,
570                             partition_element *p_elem,
571                             uint disable_non_uniq_indexes);
572   /*
573     delete_table and rename_table uses very similar logic which
574     is packed into this routine.
575   */
576   uint del_ren_table(const char *from, const char *to);
577   /*
578     One method to create the table_name.par file containing the names of the
579     underlying partitions, their engine and the number of partitions.
580     And one method to read it in.
581   */
582   bool create_handler_file(const char *name);
583   bool setup_engine_array(MEM_ROOT *mem_root);
584   bool read_par_file(const char *name);
585   bool get_from_handler_file(const char *name, MEM_ROOT *mem_root,
586                              bool is_clone);
587   bool new_handlers_from_part_info(MEM_ROOT *mem_root);
588   bool create_handlers(MEM_ROOT *mem_root);
589   void clear_handler_file();
590   int set_up_table_before_create(TABLE *table_arg,
591                                  const char *partition_name_with_path,
592                                  HA_CREATE_INFO *info,
593                                  partition_element *p_elem);
594   partition_element *find_partition_element(uint part_id);
595   bool insert_partition_name_in_hash(const char *name, uint part_id,
596                                      bool is_subpart);
597   bool populate_partition_name_hash();
598   Partition_share *get_share();
599   bool set_ha_share_ref(Handler_share **ha_share);
600   void fix_data_dir(char* path);
601   bool init_partition_bitmaps();
602   void free_partition_bitmaps();
603 
604 public:
605 
606   /*
607     -------------------------------------------------------------------------
608     MODULE open/close object
609     -------------------------------------------------------------------------
610     Open and close handler object to ensure all underlying files and
611     objects allocated and deallocated for query handling is handled
612     properly.
613     -------------------------------------------------------------------------
614 
615     A handler object is opened as part of its initialisation and before
616     being used for normal queries (not before meta-data changes always.
617     If the object was opened it will also be closed before being deleted.
618   */
619   virtual int open(const char *name, int mode, uint test_if_locked);
620   virtual int close(void);
621 
622   /*
623     -------------------------------------------------------------------------
624     MODULE start/end statement
625     -------------------------------------------------------------------------
626     This module contains methods that are used to understand start/end of
627     statements, transaction boundaries, and aid for proper concurrency
628     control.
629     The partition handler need not implement abort and commit since this
630     will be handled by any underlying handlers implementing transactions.
631     There is only one call to each handler type involved per transaction
632     and these go directly to the handlers supporting transactions
633     -------------------------------------------------------------------------
634   */
635   virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
636 				     enum thr_lock_type lock_type);
637   virtual int external_lock(THD * thd, int lock_type);
engine_name()638   LEX_CSTRING *engine_name() { return hton_name(partition_ht()); }
639   /*
640     When table is locked a statement is started by calling start_stmt
641     instead of external_lock
642   */
643   virtual int start_stmt(THD * thd, thr_lock_type lock_type);
644   /*
645     Lock count is number of locked underlying handlers (I assume)
646   */
647   virtual uint lock_count(void) const;
648   /*
649     Call to unlock rows not to be updated in transaction
650   */
651   virtual void unlock_row();
652   /*
653     Check if semi consistent read
654   */
655   virtual bool was_semi_consistent_read();
656   /*
657     Call to hint about semi consistent read
658   */
659   virtual void try_semi_consistent_read(bool);
660 
661   /*
662     NOTE: due to performance and resource issues with many partitions,
663     we only use the m_psi on the ha_partition handler, excluding all
664     partitions m_psi.
665   */
666 #ifdef HAVE_M_PSI_PER_PARTITION
667   /*
668     Bind the table/handler thread to track table i/o.
669   */
670   virtual void unbind_psi();
671   virtual void rebind_psi();
672 #endif
673   /*
674     -------------------------------------------------------------------------
675     MODULE change record
676     -------------------------------------------------------------------------
677     This part of the handler interface is used to change the records
678     after INSERT, DELETE, UPDATE, REPLACE method calls but also other
679     special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE.
680     -------------------------------------------------------------------------
681 
682     These methods are used for insert (write_row), update (update_row)
683     and delete (delete_row). All methods to change data always work on
684     one row at a time. update_row and delete_row also contains the old
685     row.
686     delete_all_rows will delete all rows in the table in one call as a
687     special optimisation for DELETE from table;
688 
689     Bulk inserts are supported if all underlying handlers support it.
690     start_bulk_insert and end_bulk_insert is called before and after a
691     number of calls to write_row.
692   */
693   virtual int write_row(uchar * buf);
694   virtual bool start_bulk_update();
695   virtual int exec_bulk_update(ha_rows *dup_key_found);
696   virtual int end_bulk_update();
697   virtual int bulk_update_row(const uchar *old_data, const uchar *new_data,
698                               ha_rows *dup_key_found);
699   virtual int update_row(const uchar * old_data, const uchar * new_data);
700   virtual int direct_update_rows_init(List<Item> *update_fields);
701   virtual int pre_direct_update_rows_init(List<Item> *update_fields);
702   virtual int direct_update_rows(ha_rows *update_rows);
703   virtual int pre_direct_update_rows();
704   virtual bool start_bulk_delete();
705   virtual int end_bulk_delete();
706   virtual int delete_row(const uchar * buf);
707   virtual int direct_delete_rows_init();
708   virtual int pre_direct_delete_rows_init();
709   virtual int direct_delete_rows(ha_rows *delete_rows);
710   virtual int pre_direct_delete_rows();
711   virtual int delete_all_rows(void);
712   virtual int truncate();
713   virtual void start_bulk_insert(ha_rows rows, uint flags);
714   virtual int end_bulk_insert();
715 private:
716   ha_rows guess_bulk_insert_rows();
717   void start_part_bulk_insert(THD *thd, uint part_id);
718   long estimate_read_buffer_size(long original_size);
719 public:
720 
721   /*
722     Method for truncating a specific partition.
723     (i.e. ALTER TABLE t1 TRUNCATE PARTITION p).
724 
725     @remark This method is a partitioning-specific hook
726             and thus not a member of the general SE API.
727   */
728   int truncate_partition(Alter_info *, bool *binlog_stmt);
729 
is_fatal_error(int error,uint flags)730   virtual bool is_fatal_error(int error, uint flags)
731   {
732     if (!handler::is_fatal_error(error, flags) ||
733         error == HA_ERR_NO_PARTITION_FOUND ||
734         error == HA_ERR_NOT_IN_LOCK_PARTITIONS)
735       return FALSE;
736     return TRUE;
737   }
738 
739 
740   /*
741     -------------------------------------------------------------------------
742     MODULE full table scan
743     -------------------------------------------------------------------------
744     This module is used for the most basic access method for any table
745     handler. This is to fetch all data through a full table scan. No
746     indexes are needed to implement this part.
747     It contains one method to start the scan (rnd_init) that can also be
748     called multiple times (typical in a nested loop join). Then proceeding
749     to the next record (rnd_next) and closing the scan (rnd_end).
750     To remember a record for later access there is a method (position)
751     and there is a method used to retrieve the record based on the stored
752     position.
753     The position can be a file position, a primary key, a ROWID dependent
754     on the handler below.
755     -------------------------------------------------------------------------
756   */
757   /*
758     unlike index_init(), rnd_init() can be called two times
759     without rnd_end() in between (it only makes sense if scan=1).
760     then the second call should prepare for the new table scan
761     (e.g if rnd_init allocates the cursor, second call should
762     position it to the start of the table, no need to deallocate
763     and allocate it again
764   */
765   virtual int rnd_init(bool scan);
766   virtual int rnd_end();
767   virtual int rnd_next(uchar * buf);
768   virtual int rnd_pos(uchar * buf, uchar * pos);
769   virtual int rnd_pos_by_record(uchar *record);
770   virtual void position(const uchar * record);
771 
772   /*
773     -------------------------------------------------------------------------
774     MODULE index scan
775     -------------------------------------------------------------------------
776     This part of the handler interface is used to perform access through
777     indexes. The interface is defined as a scan interface but the handler
778     can also use key lookup if the index is a unique index or a primary
779     key index.
780     Index scans are mostly useful for SELECT queries but are an important
781     part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
782     and so forth.
783     Naturally an index is needed for an index scan and indexes can either
784     be ordered, hash based. Some ordered indexes can return data in order
785     but not necessarily all of them.
786     There are many flags that define the behavior of indexes in the
787     various handlers. These methods are found in the optimizer module.
788     -------------------------------------------------------------------------
789 
790     index_read is called to start a scan of an index. The find_flag defines
791     the semantics of the scan. These flags are defined in
792     include/my_base.h
793     index_read_idx is the same but also initializes index before calling doing
794     the same thing as index_read. Thus it is similar to index_init followed
795     by index_read. This is also how we implement it.
796 
797     index_read/index_read_idx does also return the first row. Thus for
798     key lookups, the index_read will be the only call to the handler in
799     the index scan.
800 
801     index_init initializes an index before using it and index_end does
802     any end processing needed.
803   */
804   virtual int index_read_map(uchar * buf, const uchar * key,
805                              key_part_map keypart_map,
806                              enum ha_rkey_function find_flag);
807   virtual int index_init(uint idx, bool sorted);
808   virtual int index_end();
809 
810   /**
811     @breif
812     Positions an index cursor to the index specified in the handle. Fetches the
813     row if available. If the key value is null, begin at first key of the
814     index.
815   */
816   virtual int index_read_idx_map(uchar *buf, uint index, const uchar *key,
817                                  key_part_map keypart_map,
818                                  enum ha_rkey_function find_flag);
819   /*
820     These methods are used to jump to next or previous entry in the index
821     scan. There are also methods to jump to first and last entry.
822   */
823   virtual int index_next(uchar * buf);
824   virtual int index_prev(uchar * buf);
825   virtual int index_first(uchar * buf);
826   virtual int index_last(uchar * buf);
827   virtual int index_next_same(uchar * buf, const uchar * key, uint keylen);
828 
829   int index_read_last_map(uchar *buf,
830                           const uchar *key,
831                           key_part_map keypart_map);
832 
833   /*
834     read_first_row is virtual method but is only implemented by
835     handler.cc, no storage engine has implemented it so neither
836     will the partition handler.
837 
838     virtual int read_first_row(uchar *buf, uint primary_key);
839   */
840 
841 
842   virtual int read_range_first(const key_range * start_key,
843 			       const key_range * end_key,
844 			       bool eq_range, bool sorted);
845   virtual int read_range_next();
846 
847 
848   HANDLER_BUFFER *m_mrr_buffer;
849   uint *m_mrr_buffer_size;
850   uchar *m_mrr_full_buffer;
851   uint m_mrr_full_buffer_size;
852   uint m_mrr_new_full_buffer_size;
853   MY_BITMAP m_mrr_used_partitions;
854   uint *m_stock_range_seq;
855   /* not used: uint m_current_range_seq; */
856 
857   /* Value of mrr_mode passed to ha_partition::multi_range_read_init */
858   uint m_mrr_mode;
859 
860   /* Value of n_ranges passed to ha_partition::multi_range_read_init */
861   uint m_mrr_n_ranges;
862 
863   /*
864     Ordered MRR mode:  m_range_info[N] has the range_id of the last record that
865     we've got from partition N
866   */
867   range_id_t *m_range_info;
868 
869   /*
870     TRUE <=> This ha_partition::multi_range_read_next() call is the first one
871   */
872   bool m_multi_range_read_first;
873 
874   /* not used: uint m_mrr_range_init_flags; */
875 
876   /* Number of elements in the list pointed by m_mrr_range_first. Not used */
877   uint m_mrr_range_length;
878 
879   /* Linked list of ranges to scan */
880   PARTITION_KEY_MULTI_RANGE *m_mrr_range_first;
881   PARTITION_KEY_MULTI_RANGE *m_mrr_range_current;
882 
883   /*
884     For each partition: number of ranges MRR scan will scan in the partition
885   */
886   uint *m_part_mrr_range_length;
887 
888   /* For each partition: List of ranges to scan in this partition */
889   PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_first;
890   PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_current;
891   PARTITION_PART_KEY_MULTI_RANGE_HLD *m_partition_part_key_multi_range_hld;
892 
893   /*
894     Sequence of ranges to be scanned (TODO: why not store this in
895     handler::mrr_{iter,funcs}?)
896   */
897   range_seq_t m_seq;
898   RANGE_SEQ_IF *m_seq_if;
899 
900   /* Range iterator structure to be supplied to partitions */
901   RANGE_SEQ_IF m_part_seq_if;
902 
903   virtual int multi_range_key_create_key(
904     RANGE_SEQ_IF *seq,
905     range_seq_t seq_it
906   );
907   virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
908                                               void *seq_init_param,
909                                               uint n_ranges, uint *bufsz,
910                                               uint *mrr_mode,
911                                               Cost_estimate *cost);
912   virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
913                                         uint key_parts, uint *bufsz,
914                                         uint *mrr_mode, Cost_estimate *cost);
915   virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
916                                     uint n_ranges, uint mrr_mode,
917                                     HANDLER_BUFFER *buf);
918   virtual int multi_range_read_next(range_id_t *range_info);
919   virtual int multi_range_read_explain_info(uint mrr_mode, char *str,
920                                             size_t size);
last_part()921   uint last_part() { return m_last_part; }
922 
923 private:
924   bool init_record_priority_queue();
925   void destroy_record_priority_queue();
926   int common_index_read(uchar * buf, bool have_start_key);
927   int common_first_last(uchar * buf);
928   int partition_scan_set_up(uchar * buf, bool idx_read_flag);
929   bool check_parallel_search();
930   int handle_pre_scan(bool reverse_order, bool use_parallel);
931   int handle_unordered_next(uchar * buf, bool next_same);
932   int handle_unordered_scan_next_partition(uchar * buf);
933   int handle_ordered_index_scan(uchar * buf, bool reverse_order);
934   int handle_ordered_index_scan_key_not_found();
935   int handle_ordered_next(uchar * buf, bool next_same);
936   int handle_ordered_prev(uchar * buf);
937   void return_top_record(uchar * buf);
938   void swap_blobs(uchar* rec_buf, Ordered_blob_storage ** storage, bool restore);
939 public:
940   /*
941     -------------------------------------------------------------------------
942     MODULE information calls
943     -------------------------------------------------------------------------
944     This calls are used to inform the handler of specifics of the ongoing
945     scans and other actions. Most of these are used for optimisation
946     purposes.
947     -------------------------------------------------------------------------
948   */
949   virtual int info(uint);
950   void get_dynamic_partition_info(PARTITION_STATS *stat_info,
951                                   uint part_id);
952   void set_partitions_to_open(List<String> *partition_names);
953   int change_partitions_to_open(List<String> *partition_names);
954   int open_read_partitions(char *name_buff, size_t name_buff_size);
955   virtual int extra(enum ha_extra_function operation);
956   virtual int extra_opt(enum ha_extra_function operation, ulong arg);
957   virtual int reset(void);
958   virtual uint count_query_cache_dependant_tables(uint8 *tables_type);
959   virtual my_bool
960     register_query_cache_dependant_tables(THD *thd,
961                                           Query_cache *cache,
962                                           Query_cache_block_table **block,
963                                           uint *n);
964 
965 private:
966   typedef int handler_callback(handler *, void *);
967 
968   my_bool reg_query_cache_dependant_table(THD *thd,
969                                           char *engine_key,
970                                           uint engine_key_len,
971                                           char *query_key, uint query_key_len,
972                                           uint8 type,
973                                           Query_cache *cache,
974                                           Query_cache_block_table
975                                           **block_table,
976                                           handler *file, uint *n);
977   static const uint NO_CURRENT_PART_ID= NOT_A_PARTITION_ID;
978   int loop_partitions(handler_callback callback, void *param);
979   int loop_extra_alter(enum ha_extra_function operations);
980   void late_extra_cache(uint partition_id);
981   void late_extra_no_cache(uint partition_id);
982   void prepare_extra_cache(uint cachesize);
get_open_file_sample()983   handler *get_open_file_sample() const { return m_file_sample; }
984 public:
985 
986   /*
987     -------------------------------------------------------------------------
988     MODULE optimiser support
989     -------------------------------------------------------------------------
990     -------------------------------------------------------------------------
991   */
992 
993   /*
994     NOTE !!!!!!
995      -------------------------------------------------------------------------
996      -------------------------------------------------------------------------
997      One important part of the public handler interface that is not depicted in
998      the methods is the attribute records
999 
1000      which is defined in the base class. This is looked upon directly and is
1001      set by calling info(HA_STATUS_INFO) ?
1002      -------------------------------------------------------------------------
1003   */
1004 
1005 private:
1006   /* Helper functions for optimizer hints. */
1007   ha_rows min_rows_for_estimate();
1008   uint get_biggest_used_partition(uint *part_index);
1009 public:
1010 
1011   /*
1012     keys_to_use_for_scanning can probably be implemented as the
1013     intersection of all underlying handlers if mixed handlers are used.
1014     This method is used to derive whether an index can be used for
1015     index-only scanning when performing an ORDER BY query.
1016     Only called from one place in sql_select.cc
1017   */
1018   virtual const key_map *keys_to_use_for_scanning();
1019 
1020   /*
1021     Called in test_quick_select to determine if indexes should be used.
1022   */
1023   virtual double scan_time();
1024 
1025   /*
1026     The next method will never be called if you do not implement indexes.
1027   */
1028   virtual double read_time(uint index, uint ranges, ha_rows rows);
1029   /*
1030     For the given range how many records are estimated to be in this range.
1031     Used by optimiser to calculate cost of using a particular index.
1032   */
1033   virtual ha_rows records_in_range(uint inx, key_range * min_key,
1034 				   key_range * max_key);
1035 
1036   /*
1037     Upper bound of number records returned in scan is sum of all
1038     underlying handlers.
1039   */
1040   virtual ha_rows estimate_rows_upper_bound();
1041 
1042   /*
1043     table_cache_type is implemented by the underlying handler but all
1044     underlying handlers must have the same implementation for it to work.
1045   */
1046   virtual uint8 table_cache_type();
1047   virtual ha_rows records();
1048 
1049   /* Calculate hash value for PARTITION BY KEY tables. */
1050   static uint32 calculate_key_hash_value(Field **field_array);
1051 
1052   /*
1053     -------------------------------------------------------------------------
1054     MODULE print messages
1055     -------------------------------------------------------------------------
1056     This module contains various methods that returns text messages for
1057     table types, index type and error messages.
1058     -------------------------------------------------------------------------
1059   */
1060   /*
1061     The name of the index type that will be used for display
1062     Here we must ensure that all handlers use the same index type
1063     for each index created.
1064   */
1065   virtual const char *index_type(uint inx);
1066 
1067   /* The name of the table type that will be used for display purposes */
1068   virtual const char *table_type() const;
1069 
1070   /* The name of the row type used for the underlying tables. */
1071   virtual enum row_type get_row_type() const;
1072 
1073   /*
1074      Handler specific error messages
1075   */
1076   virtual void print_error(int error, myf errflag);
1077   virtual bool get_error_message(int error, String * buf);
1078   /*
1079    -------------------------------------------------------------------------
1080     MODULE handler characteristics
1081     -------------------------------------------------------------------------
1082     This module contains a number of methods defining limitations and
1083     characteristics of the handler. The partition handler will calculate
1084     this characteristics based on underlying handler characteristics.
1085     -------------------------------------------------------------------------
1086 
1087     This is a list of flags that says what the storage engine
1088     implements. The current table flags are documented in handler.h
1089     The partition handler will support whatever the underlying handlers
1090     support except when specifically mentioned below about exceptions
1091     to this rule.
1092     NOTE: This cannot be cached since it can depend on TRANSACTION ISOLATION
1093     LEVEL which is dynamic, see bug#39084.
1094 
1095     HA_READ_RND_SAME:
1096     Not currently used. (Means that the handler supports the rnd_same() call)
1097     (MyISAM, HEAP)
1098 
1099     HA_TABLE_SCAN_ON_INDEX:
1100     Used to avoid scanning full tables on an index. If this flag is set then
1101     the handler always has a primary key (hidden if not defined) and this
1102     index is used for scanning rather than a full table scan in all
1103     situations.
1104     (InnoDB, Federated)
1105 
1106     HA_REC_NOT_IN_SEQ:
1107     This flag is set for handlers that cannot guarantee that the rows are
1108     returned according to incremental positions (0, 1, 2, 3...).
1109     This also means that rnd_next() should return HA_ERR_RECORD_DELETED
1110     if it finds a deleted row.
1111     (MyISAM (not fixed length row), HEAP, InnoDB)
1112 
1113     HA_CAN_GEOMETRY:
1114     Can the storage engine handle spatial data.
1115     Used to check that no spatial attributes are declared unless
1116     the storage engine is capable of handling it.
1117     (MyISAM)
1118 
1119     HA_FAST_KEY_READ:
1120     Setting this flag indicates that the handler is equally fast in
1121     finding a row by key as by position.
1122     This flag is used in a very special situation in conjunction with
1123     filesort's. For further explanation see intro to init_read_record.
1124     (HEAP, InnoDB)
1125 
1126     HA_NULL_IN_KEY:
1127     Is NULL values allowed in indexes.
1128     If this is not allowed then it is not possible to use an index on a
1129     NULLable field.
1130     (HEAP, MyISAM, InnoDB)
1131 
1132     HA_DUPLICATE_POS:
1133     Tells that we can the position for the conflicting duplicate key
1134     record is stored in table->file->dupp_ref. (insert uses rnd_pos() on
1135     this to find the duplicated row)
1136     (MyISAM)
1137 
1138     HA_CAN_INDEX_BLOBS:
1139     Is the storage engine capable of defining an index of a prefix on
1140     a BLOB attribute.
1141     (Federated, MyISAM, InnoDB)
1142 
1143     HA_AUTO_PART_KEY:
1144     Auto increment fields can be part of a multi-part key. For second part
1145     auto-increment keys, the auto_incrementing is done in handler.cc
1146     (Federated, MyISAM)
1147 
1148     HA_REQUIRE_PRIMARY_KEY:
1149     Can't define a table without primary key (and cannot handle a table
1150     with hidden primary key)
1151     (No handler has this limitation currently)
1152 
1153     HA_WANTS_PRIMARY_KEY:
1154     Can't define a table without primary key except sequences
1155     (Only InnoDB has this when using innodb_force_primary_key == ON)
1156 
1157     HA_STATS_RECORDS_IS_EXACT:
1158     Does the counter of records after the info call specify an exact
1159     value or not. If it does this flag is set.
1160     Only MyISAM and HEAP uses exact count.
1161 
1162     HA_CAN_INSERT_DELAYED:
1163     Can the storage engine support delayed inserts.
1164     To start with the partition handler will not support delayed inserts.
1165     Further investigation needed.
1166     (HEAP, MyISAM)
1167 
1168     HA_PRIMARY_KEY_IN_READ_INDEX:
1169     This parameter is set when the handler will also return the primary key
1170     when doing read-only-key on another index.
1171 
1172     HA_NOT_DELETE_WITH_CACHE:
1173     Seems to be an old MyISAM feature that is no longer used. No handler
1174     has it defined but it is checked in init_read_record.
1175     Further investigation needed.
1176     (No handler defines it)
1177 
1178     HA_NO_PREFIX_CHAR_KEYS:
1179     Indexes on prefixes of character fields is not allowed.
1180     (Federated)
1181 
1182     HA_CAN_FULLTEXT:
1183     Does the storage engine support fulltext indexes
1184     The partition handler will start by not supporting fulltext indexes.
1185     (MyISAM)
1186 
1187     HA_CAN_SQL_HANDLER:
1188     Can the HANDLER interface in the MySQL API be used towards this
1189     storage engine.
1190     (MyISAM, InnoDB)
1191 
1192     HA_NO_AUTO_INCREMENT:
1193     Set if the storage engine does not support auto increment fields.
1194     (Currently not set by any handler)
1195 
1196     HA_HAS_CHECKSUM:
1197     Special MyISAM feature. Has special SQL support in CREATE TABLE.
1198     No special handling needed by partition handler.
1199     (MyISAM)
1200 
1201     HA_FILE_BASED:
1202     Should file names always be in lower case (used by engines
1203     that map table names to file names.
1204     Since partition handler has a local file this flag is set.
1205     (Federated, MyISAM)
1206 
1207     HA_CAN_BIT_FIELD:
1208     Is the storage engine capable of handling bit fields?
1209     (MyISAM)
1210 
1211     HA_NEED_READ_RANGE_BUFFER:
1212     Is Read Multi-Range supported => need multi read range buffer
1213     This parameter specifies whether a buffer for read multi range
1214     is needed by the handler. Whether the handler supports this
1215     feature or not is dependent of whether the handler implements
1216     read_multi_range* calls or not. The only handler currently
1217     supporting this feature is NDB so the partition handler need
1218     not handle this call. There are methods in handler.cc that will
1219     transfer those calls into index_read and other calls in the
1220     index scan module.
1221     (No handler defines it)
1222 
1223     HA_PRIMARY_KEY_REQUIRED_FOR_POSITION:
1224     Does the storage engine need a PK for position?
1225     (InnoDB)
1226 
1227     HA_FILE_BASED is always set for partition handler since we use a
1228     special file for handling names of partitions, engine types.
1229     HA_REC_NOT_IN_SEQ is always set for partition handler since we cannot
1230     guarantee that the records will be returned in sequence.
1231     HA_DUPLICATE_POS,
1232     HA_CAN_INSERT_DELAYED, HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is disabled
1233     until further investigated.
1234   */
1235   virtual Table_flags table_flags() const;
1236 
1237   /*
1238     This is a bitmap of flags that says how the storage engine
1239     implements indexes. The current index flags are documented in
1240     handler.h. If you do not implement indexes, just return zero
1241     here.
1242 
1243     part is the key part to check. First key part is 0
1244     If all_parts it's set, MySQL want to know the flags for the combined
1245     index up to and including 'part'.
1246 
1247     HA_READ_NEXT:
1248     Does the index support read next, this is assumed in the server
1249     code and never checked so all indexes must support this.
1250     Note that the handler can be used even if it doesn't have any index.
1251     (HEAP, MyISAM, Federated, InnoDB)
1252 
1253     HA_READ_PREV:
1254     Can the index be used to scan backwards.
1255     (HEAP, MyISAM, InnoDB)
1256 
1257     HA_READ_ORDER:
1258     Can the index deliver its record in index order. Typically true for
1259     all ordered indexes and not true for hash indexes.
1260     In first step this is not true for partition handler until a merge
1261     sort has been implemented in partition handler.
1262     Used to set keymap part_of_sortkey
1263     This keymap is only used to find indexes usable for resolving an ORDER BY
1264     in the query. Thus in most cases index_read will work just fine without
1265     order in result production. When this flag is set it is however safe to
1266     order all output started by index_read since most engines do this. With
1267     read_multi_range calls there is a specific flag setting order or not
1268     order so in those cases ordering of index output can be avoided.
1269     (InnoDB, HEAP, MyISAM)
1270 
1271     HA_READ_RANGE:
1272     Specify whether index can handle ranges, typically true for all
1273     ordered indexes and not true for hash indexes.
1274     Used by optimiser to check if ranges (as key >= 5) can be optimised
1275     by index.
1276     (InnoDB, MyISAM, HEAP)
1277 
1278     HA_ONLY_WHOLE_INDEX:
1279     Can't use part key searches. This is typically true for hash indexes
1280     and typically not true for ordered indexes.
1281     (Federated, HEAP)
1282 
1283     HA_KEYREAD_ONLY:
1284     Does the storage engine support index-only scans on this index.
1285     Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD
1286     Used to set key_map keys_for_keyread and to check in optimiser for
1287     index-only scans.  When doing a read under HA_EXTRA_KEYREAD the handler
1288     only have to fill in the columns the key covers. If
1289     HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns
1290     must be updated in the row.
1291     (InnoDB, MyISAM)
1292   */
index_flags(uint inx,uint part,bool all_parts)1293   virtual ulong index_flags(uint inx, uint part, bool all_parts) const
1294   {
1295     /*
1296       The following code is not safe if you are using different
1297       storage engines or different index types per partition.
1298     */
1299     return m_file[0]->index_flags(inx, part, all_parts);
1300   }
1301 
1302   /**
1303     wrapper function for handlerton alter_table_flags, since
1304     the ha_partition_hton cannot know all its capabilities
1305   */
1306   virtual alter_table_operations alter_table_flags(alter_table_operations flags);
1307   /*
1308     unireg.cc will call the following to make sure that the storage engine
1309     can handle the data it is about to send.
1310 
1311     The maximum supported values is the minimum of all handlers in the table
1312   */
1313   uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const;
1314   virtual uint max_supported_record_length() const;
1315   virtual uint max_supported_keys() const;
1316   virtual uint max_supported_key_parts() const;
1317   virtual uint max_supported_key_length() const;
1318   virtual uint max_supported_key_part_length() const;
1319   virtual uint min_record_length(uint options) const;
1320 
1321   /*
1322     Primary key is clustered can only be true if all underlying handlers have
1323     this feature.
1324   */
primary_key_is_clustered()1325   virtual bool primary_key_is_clustered()
1326   { return m_pkey_is_clustered; }
1327 
1328   /*
1329     -------------------------------------------------------------------------
1330     MODULE compare records
1331     -------------------------------------------------------------------------
1332     cmp_ref checks if two references are the same. For most handlers this is
1333     a simple memcmp of the reference. However some handlers use primary key
1334     as reference and this can be the same even if memcmp says they are
1335     different. This is due to character sets and end spaces and so forth.
1336     For the partition handler the reference is first two bytes providing the
1337     partition identity of the referred record and then the reference of the
1338     underlying handler.
1339     Thus cmp_ref for the partition handler always returns FALSE for records
1340     not in the same partition and uses cmp_ref on the underlying handler
1341     to check whether the rest of the reference part is also the same.
1342     -------------------------------------------------------------------------
1343   */
1344   virtual int cmp_ref(const uchar * ref1, const uchar * ref2);
1345   /*
1346     -------------------------------------------------------------------------
1347     MODULE auto increment
1348     -------------------------------------------------------------------------
1349     This module is used to handle the support of auto increments.
1350 
1351     This variable in the handler is used as part of the handler interface
1352     It is maintained by the parent handler object and should not be
1353     touched by child handler objects (see handler.cc for its use).
1354 
1355     auto_increment_column_changed
1356      -------------------------------------------------------------------------
1357   */
1358   virtual bool need_info_for_auto_inc();
1359   virtual bool can_use_for_auto_inc_init();
1360   virtual void get_auto_increment(ulonglong offset, ulonglong increment,
1361                                   ulonglong nb_desired_values,
1362                                   ulonglong *first_value,
1363                                   ulonglong *nb_reserved_values);
1364   virtual void release_auto_increment();
1365 private:
1366   virtual int reset_auto_increment(ulonglong value);
1367   void update_next_auto_inc_val();
lock_auto_increment()1368   virtual void lock_auto_increment()
1369   {
1370     /* lock already taken */
1371     if (auto_increment_safe_stmt_log_lock)
1372       return;
1373     if (table_share->tmp_table == NO_TMP_TABLE)
1374     {
1375       part_share->lock_auto_inc();
1376       DBUG_ASSERT(!auto_increment_lock);
1377       auto_increment_lock= TRUE;
1378     }
1379   }
unlock_auto_increment()1380   virtual void unlock_auto_increment()
1381   {
1382     /*
1383       If auto_increment_safe_stmt_log_lock is true, we have to keep the lock.
1384       It will be set to false and thus unlocked at the end of the statement by
1385       ha_partition::release_auto_increment.
1386     */
1387     if (auto_increment_lock && !auto_increment_safe_stmt_log_lock)
1388     {
1389       auto_increment_lock= FALSE;
1390       part_share->unlock_auto_inc();
1391     }
1392   }
set_auto_increment_if_higher(Field * field)1393   virtual void set_auto_increment_if_higher(Field *field)
1394   {
1395     ulonglong nr= (((Field_num*) field)->unsigned_flag ||
1396                    field->val_int() > 0) ? field->val_int() : 0;
1397     lock_auto_increment();
1398     DBUG_ASSERT(part_share->auto_inc_initialized ||
1399                 !can_use_for_auto_inc_init());
1400     /* must check when the mutex is taken */
1401     if (nr >= part_share->next_auto_inc_val)
1402       part_share->next_auto_inc_val= nr + 1;
1403     unlock_auto_increment();
1404   }
1405 
check_insert_autoincrement()1406   void check_insert_autoincrement()
1407   {
1408     /*
1409       If we INSERT into the table having the AUTO_INCREMENT column,
1410       we have to read all partitions for the next autoincrement value
1411       unless we already did it.
1412     */
1413     if (!part_share->auto_inc_initialized &&
1414         ha_thd()->lex->sql_command == SQLCOM_INSERT &&
1415         table->found_next_number_field)
1416       bitmap_set_all(&m_part_info->read_partitions);
1417   }
1418 
1419 public:
1420 
1421   /*
1422      -------------------------------------------------------------------------
1423      MODULE initialize handler for HANDLER call
1424      -------------------------------------------------------------------------
1425      This method is a special InnoDB method called before a HANDLER query.
1426      -------------------------------------------------------------------------
1427   */
1428   virtual void init_table_handle_for_HANDLER();
1429 
1430   /*
1431     The remainder of this file defines the handler methods not implemented
1432     by the partition handler
1433   */
1434 
1435   /*
1436     -------------------------------------------------------------------------
1437     MODULE foreign key support
1438     -------------------------------------------------------------------------
1439     The following methods are used to implement foreign keys as supported by
1440     InnoDB. Implement this ??
1441     get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual
1442     description of how the CREATE TABLE part to define FOREIGN KEY's is done.
1443     free_foreign_key_create_info is used to free the memory area that provided
1444     this description.
1445     can_switch_engines checks if it is ok to switch to a new engine based on
1446     the foreign key info in the table.
1447     -------------------------------------------------------------------------
1448 
1449     virtual char* get_foreign_key_create_info()
1450     virtual void free_foreign_key_create_info(char* str)
1451 
1452     virtual int get_foreign_key_list(THD *thd,
1453     List<FOREIGN_KEY_INFO> *f_key_list)
1454     virtual uint referenced_by_foreign_key()
1455   */
1456     virtual bool can_switch_engines();
1457   /*
1458     -------------------------------------------------------------------------
1459     MODULE fulltext index
1460     -------------------------------------------------------------------------
1461   */
1462     void ft_close_search(FT_INFO *handler);
1463     virtual int ft_init();
1464     virtual int pre_ft_init();
1465     virtual void ft_end();
1466     virtual int pre_ft_end();
1467     virtual FT_INFO *ft_init_ext(uint flags, uint inx, String *key);
1468     virtual int ft_read(uchar *buf);
1469     virtual int pre_ft_read(bool use_parallel);
1470 
1471   /*
1472      -------------------------------------------------------------------------
1473      MODULE restart full table scan at position (MyISAM)
1474      -------------------------------------------------------------------------
1475      The following method is only used by MyISAM when used as
1476      temporary tables in a join.
1477      virtual int restart_rnd_next(uchar *buf, uchar *pos);
1478   */
1479 
1480   /*
1481     -------------------------------------------------------------------------
1482     MODULE in-place ALTER TABLE
1483     -------------------------------------------------------------------------
1484     These methods are in the handler interface. (used by innodb-plugin)
1485     They are used for in-place alter table:
1486     -------------------------------------------------------------------------
1487   */
1488     virtual enum_alter_inplace_result
1489       check_if_supported_inplace_alter(TABLE *altered_table,
1490                                        Alter_inplace_info *ha_alter_info);
1491     virtual bool prepare_inplace_alter_table(TABLE *altered_table,
1492                                              Alter_inplace_info *ha_alter_info);
1493     virtual bool inplace_alter_table(TABLE *altered_table,
1494                                      Alter_inplace_info *ha_alter_info);
1495     virtual bool commit_inplace_alter_table(TABLE *altered_table,
1496                                             Alter_inplace_info *ha_alter_info,
1497                                             bool commit);
1498     virtual void notify_table_changed();
1499 
1500   /*
1501     -------------------------------------------------------------------------
1502     MODULE tablespace support
1503     -------------------------------------------------------------------------
1504     Admin of table spaces is not applicable to the partition handler (InnoDB)
1505     This means that the following method is not implemented:
1506     -------------------------------------------------------------------------
1507     virtual int discard_or_import_tablespace(my_bool discard)
1508   */
1509 
1510   /*
1511     -------------------------------------------------------------------------
1512     MODULE admin MyISAM
1513     -------------------------------------------------------------------------
1514 
1515     -------------------------------------------------------------------------
1516       OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are
1517       mapped to a routine that handles looping over a given set of
1518       partitions and those routines send a flag indicating to execute on
1519       all partitions.
1520     -------------------------------------------------------------------------
1521   */
1522     virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt);
1523     virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt);
1524     virtual int check(THD* thd, HA_CHECK_OPT *check_opt);
1525     virtual int repair(THD* thd, HA_CHECK_OPT *check_opt);
1526     virtual bool check_and_repair(THD *thd);
1527     virtual bool auto_repair(int error) const;
1528     virtual bool is_crashed() const;
1529     virtual int check_for_upgrade(HA_CHECK_OPT *check_opt);
1530 
1531   /*
1532     -------------------------------------------------------------------------
1533     MODULE condition pushdown
1534     -------------------------------------------------------------------------
1535   */
1536     virtual const COND *cond_push(const COND *cond);
1537     virtual void cond_pop();
1538     virtual void clear_top_table_fields();
1539     virtual int info_push(uint info_type, void *info);
1540 
1541     private:
1542     int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, uint flags);
1543     int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, uint part_id,
1544                         uint flag);
1545     /**
1546       Check if the rows are placed in the correct partition.  If the given
1547       argument is true, then move the rows to the correct partition.
1548     */
1549     int check_misplaced_rows(uint read_part_id, bool repair);
1550     void append_row_to_str(String &str);
1551     public:
1552 
1553   /* Enabled keycache for performance reasons, WL#4571 */
1554     virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt);
1555     virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt);
1556     virtual TABLE_LIST *get_next_global_for_child();
1557 
1558   /*
1559     -------------------------------------------------------------------------
1560     MODULE enable/disable indexes
1561     -------------------------------------------------------------------------
1562     Enable/Disable Indexes are only supported by HEAP and MyISAM.
1563     -------------------------------------------------------------------------
1564   */
1565     virtual int disable_indexes(uint mode);
1566     virtual int enable_indexes(uint mode);
1567     virtual int indexes_are_disabled(void);
1568 
1569   /*
1570     -------------------------------------------------------------------------
1571     MODULE append_create_info
1572     -------------------------------------------------------------------------
1573     append_create_info is only used by MyISAM MERGE tables and the partition
1574     handler will not support this handler as underlying handler.
1575     Implement this??
1576     -------------------------------------------------------------------------
1577     virtual void append_create_info(String *packet)
1578   */
1579 
1580   /*
1581     the following heavily relies on the fact that all partitions
1582     are in the same storage engine.
1583 
1584     When this limitation is lifted, the following hack should go away,
1585     and a proper interface for engines needs to be introduced:
1586 
1587       an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE.
1588       is given to engines everywhere where TABLE_SHARE is used now
1589       has members like option_struct, ha_data
1590       perhaps TABLE needs to be split the same way too...
1591 
1592     this can also be done before partition will support a mix of engines,
1593     but preferably together with other incompatible API changes.
1594   */
partition_ht()1595   virtual handlerton *partition_ht() const
1596   {
1597     handlerton *h= m_file[0]->ht;
1598     for (uint i=1; i < m_tot_parts; i++)
1599       DBUG_ASSERT(h == m_file[i]->ht);
1600     return h;
1601   }
1602 
part_records(partition_element * part_elem)1603   ha_rows part_records(partition_element *part_elem)
1604   {
1605     DBUG_ASSERT(m_part_info);
1606     uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1;
1607     uint32 part_id= part_elem->id * sub_factor;
1608     uint32 part_id_end= part_id + sub_factor;
1609     DBUG_ASSERT(part_id_end <= m_tot_parts);
1610     ha_rows part_recs= 0;
1611     for (; part_id < part_id_end; ++part_id)
1612     {
1613       handler *file= m_file[part_id];
1614       DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
1615       file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN);
1616       part_recs+= file->stats.records;
1617     }
1618     return part_recs;
1619   }
1620 
1621   friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
1622   friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
1623 };
1624 #endif /* HA_PARTITION_INCLUDED */
1625