1 #ifndef PARTITION_HANDLER_INCLUDED
2 #define PARTITION_HANDLER_INCLUDED
3 
4 /*
5    Copyright (c) 2005, 2021, Oracle and/or its affiliates.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License, version 2.0,
9    as published by the Free Software Foundation.
10 
11    This program is also distributed with certain software (including
12    but not limited to OpenSSL) that is licensed under separate terms,
13    as designated in a particular file or component or in included license
14    documentation.  The authors of MySQL hereby grant you an additional
15    permission to link the program and your derivative works with the
16    separately licensed software that they have included with MySQL.
17 
18    This program is distributed in the hope that it will be useful,
19    but WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21    GNU General Public License, version 2.0, for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
26 */
27 
28 #include "my_global.h"            // uint etc.
29 #include "my_base.h"              // ha_rows.
30 #include "handler.h"              // Handler_share
31 #include "sql_partition.h"        // part_id_range
32 #include "mysqld_error.h"         // ER_ILLEGAL_HA
33 #include "priority_queue.h"
34 #include "key.h"                  // key_rec_cmp
35 #include <vector>
36 
37 #define PARTITION_BYTES_IN_POS 2
38 
39 /* forward declarations */
40 typedef struct st_ha_create_information HA_CREATE_INFO;
41 typedef struct st_mem_root MEM_ROOT;
42 
43 static const uint NO_CURRENT_PART_ID= UINT_MAX32;
44 
45 /**
46   bits in Partition_handler::alter_flags():
47 
48   HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
49   supported at all.
50   HA_FAST_CHANGE_PARTITION means that optimized variants of the changes
51   exists but they are not necessarily done online.
52 
53   HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both
54   the new partition and to the old partitions when updating through the
55   old partitioning schema while performing a change of the partitioning.
56   This means that we can support updating of the table while performing
57   the copy phase of the change. For no lock at all also a double write
58   from new to old must exist and this is not required when this flag is
59   set.
60   This is actually removed even before it was introduced the first time.
61   The new idea is that handlers will handle the lock level already in
62   store_lock for ALTER TABLE partitions.
63   TODO: Implement this via the alter-inplace api.
64 */
65 #define HA_PARTITION_FUNCTION_SUPPORTED         (1L << 0)
66 #define HA_FAST_CHANGE_PARTITION                (1L << 1)
67 
68 enum enum_part_operation {
69   OPTIMIZE_PARTS= 0,
70   ANALYZE_PARTS,
71   CHECK_PARTS,
72   REPAIR_PARTS,
73   ASSIGN_KEYCACHE_PARTS,
74   PRELOAD_KEYS_PARTS
75 };
76 
77 /** Struct used for partition_name_hash */
78 typedef struct st_part_name_def
79 {
80   uchar *partition_name;
81   uint length;
82   uint32 part_id;
83   my_bool is_subpart;
84 } PART_NAME_DEF;
85 
86 
87 /**
88   Initialize partitioning (currently only PSI keys).
89 */
90 void partitioning_init();
91 
92 
93 /**
94   Partition specific Handler_share.
95 */
96 class Partition_share : public Handler_share
97 {
98 public:
99   Partition_share();
100   ~Partition_share();
101 
102   /** Set if auto increment is used an initialized. */
103   bool auto_inc_initialized;
104   /**
105     Mutex protecting next_auto_inc_val.
106     Initialized if table uses auto increment.
107   */
108   mysql_mutex_t *auto_inc_mutex;
109   /** First non reserved auto increment value. */
110   ulonglong next_auto_inc_val;
111   /**
112     Hash of partition names. Initialized by the first handler instance of a
113     table_share calling populate_partition_name_hash().
114     After that it is read-only, i.e. no locking required for reading.
115   */
116   HASH partition_name_hash;
117   /** flag that the name hash is initialized, so it only will do it once. */
118   bool partition_name_hash_initialized;
119 
120   /**
121     Initializes and sets auto_inc_mutex.
122     Only needed to be called if the table have an auto increment.
123     Must hold TABLE_SHARE::LOCK_ha_data when calling.
124   */
125   bool init_auto_inc_mutex(TABLE_SHARE *table_share);
126   /**
127     Release reserved auto increment values not used.
128     @param thd             Thread.
129     @param table_share     Table Share
130     @param next_insert_id  Next insert id (first non used auto inc value).
131     @param max_reserved    End of reserved auto inc range.
132   */
133   void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
134                                     const ulonglong next_insert_id,
135                                     const ulonglong max_reserved);
136 
137   /** lock mutex protecting auto increment value next_auto_inc_val. */
lock_auto_inc()138   inline void lock_auto_inc()
139   {
140     assert(auto_inc_mutex);
141     mysql_mutex_lock(auto_inc_mutex);
142   }
143   /** unlock mutex protecting auto increment value next_auto_inc_val. */
unlock_auto_inc()144   inline void unlock_auto_inc()
145   {
146     assert(auto_inc_mutex);
147     mysql_mutex_unlock(auto_inc_mutex);
148   }
149   /**
150     Populate partition_name_hash with partition and subpartition names
151     from part_info.
152     @param part_info  Partition info containing all partitions metadata.
153 
154     @return Operation status.
155       @retval false Success.
156       @retval true  Failure.
157   */
158   bool populate_partition_name_hash(partition_info *part_info);
159   /** Get partition name.
160 
161   @param part_id  Partition id (for subpartitioned table only subpartition
162                   names will be returned.)
163 
164   @return partition name or NULL if error.
165   */
166   const char *get_partition_name(size_t part_id) const;
167 private:
168   const uchar **partition_names;
169   /**
170     Insert [sub]partition name into  partition_name_hash
171     @param name        Partition name.
172     @param part_id     Partition id.
173     @param is_subpart  True if subpartition else partition.
174 
175     @return Operation status.
176       @retval false Success.
177       @retval true  Failure.
178   */
179   bool insert_partition_name_in_hash(const char *name,
180                                      uint part_id,
181                                      bool is_subpart);
182 };
183 
184 
185 /**
186   Class for partitioning specific operations.
187 
188   Returned from handler::get_partition_handler().
189 */
190 class Partition_handler :public Sql_alloc
191 {
192 public:
Partition_handler()193   Partition_handler() {}
~Partition_handler()194   ~Partition_handler() {}
195 
196   /**
197     Get dynamic table information from partition.
198 
199     @param[out] stat_info  Statistics struct to fill in.
200     @param[out] check_sum  Check sum value to fill in if supported.
201     @param[in]  part_id    Partition to report for.
202 
203     @note stat_info and check_sum are initialized by caller.
204     check_sum is only expected to be updated if HA_HAS_CHECKSUM.
205   */
206   virtual void get_dynamic_partition_info(ha_statistics *stat_info,
207                                           ha_checksum *check_sum,
208                                           uint part_id) = 0;
209   /**
210     Get default number of partitions.
211 
212     Used during creating a partitioned table.
213 
214     @param info  Create info.
215     @return Number of default partitions.
216   */
get_default_num_partitions(HA_CREATE_INFO * info)217   virtual int get_default_num_partitions(HA_CREATE_INFO *info) { return 1;}
218   /**
219     Setup auto partitioning.
220 
221     Called for engines with HA_USE_AUTO_PARTITION to setup the partition info
222     object
223 
224     @param[in,out] part_info  Partition object to setup.
225   */
set_auto_partitions(partition_info * part_info)226   virtual void set_auto_partitions(partition_info *part_info) { return; }
227   /**
228     Get number of partitions for table in SE
229 
230     @param name normalized path(same as open) to the table
231 
232     @param[out] num_parts Number of partitions
233 
234     @retval false for success
235     @retval true for failure, for example table didn't exist in engine
236   */
get_num_parts(const char * name,uint * num_parts)237   virtual bool get_num_parts(const char *name,
238                             uint *num_parts)
239   {
240     *num_parts= 0;
241     return false;
242   }
243   /**
244     Set the partition info object to be used by the handler.
245 
246     @param part_info  Partition info to be used by the handler.
247     @param early      True if called when part_info only created and parsed,
248                       but not setup, checked or fixed.
249   */
250   virtual void set_part_info(partition_info *part_info, bool early) = 0;
251   /**
252     Initialize partition.
253 
254     @param mem_root  Memory root for memory allocations.
255 
256     @return Operation status
257       @retval false  Success.
258       @retval true   Failure.
259   */
initialize_partition(MEM_ROOT * mem_root)260   virtual bool initialize_partition(MEM_ROOT *mem_root) {return false;}
261 
262 
263   /**
264     Truncate partitions.
265 
266     Truncate all partitions matching table->part_info->read_partitions.
267     Handler level wrapper for truncating partitions, will ensure that
268     mark_trx_read_write() is called and also checks locking assertions.
269 
270     @return Operation status.
271       @retval    0  Success.
272       @retval != 0  Error code.
273   */
truncate_partition()274   int truncate_partition()
275   {
276     handler *file= get_handler();
277     if (!file)
278     {
279       return HA_ERR_WRONG_COMMAND;
280     }
281     assert(file->table_share->tmp_table != NO_TMP_TABLE ||
282            file->m_lock_type == F_WRLCK);
283     file->mark_trx_read_write();
284     return truncate_partition_low();
285   }
286   /**
287     Change partitions.
288 
289     Change partitions according to their partition_element::part_state set up
290     in prep_alter_part_table(). Will create new partitions and copy requested
291     partitions there. Also updating part_state to reflect current state.
292 
293     Handler level wrapper for changing partitions.
294     This is the reason for having Partition_handler a friend class of handler,
295     mark_trx_read_write() is called and also checks locking assertions.
296     to ensure that mark_trx_read_write() is called and checking the asserts.
297 
298     @param[in]     create_info  Table create info.
299     @param[in]     path         Path including table name.
300     @param[out]    copied       Number of rows copied.
301     @param[out]    deleted      Number of rows deleted.
302   */
change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted)303   int change_partitions(HA_CREATE_INFO *create_info,
304                         const char *path,
305                         ulonglong * const copied,
306                         ulonglong * const deleted)
307   {
308     handler *file= get_handler();
309     if (!file)
310     {
311       my_error(ER_ILLEGAL_HA, MYF(0), create_info->alias);
312       return HA_ERR_WRONG_COMMAND;
313     }
314     assert(file->table_share->tmp_table != NO_TMP_TABLE ||
315            file->m_lock_type != F_UNLCK);
316     file->mark_trx_read_write();
317     return change_partitions_low(create_info, path, copied, deleted);
318   }
319   /**
320     Alter flags.
321 
322     Given a set of alter table flags, return which is supported.
323 
324     @param flags  Alter table operation flags.
325 
326     @return Supported alter table flags.
327   */
alter_flags(uint flags)328   virtual uint alter_flags(uint flags) const
329   { return 0; }
330 
331   /**
332     Get partition row type from SE
333     @param       part_id    Id of partition for which row type to be retrieved
334     @return      Partition row type.
335   */
get_partition_row_type(uint part_id)336   virtual enum row_type get_partition_row_type(uint part_id) {
337     return ROW_TYPE_NOT_USED;
338   }
339 
340 private:
341   /**
342     Truncate partition.
343 
344     Low-level primitive for handler, implementing
345     Partition_handler::truncate_partition().
346 
347     @return Operation status
348       @retval    0  Success.
349       @retval != 0  Error code.
350   */
truncate_partition_low()351   virtual int truncate_partition_low()
352   { return HA_ERR_WRONG_COMMAND; }
353   /**
354     Truncate partition.
355 
356     Low-level primitive for handler, implementing
357     Partition_handler::change_partitions().
358 
359     @param[in]     create_info  Table create info.
360     @param[in]     path         Path including table name.
361     @param[out]    copied       Number of rows copied.
362     @param[out]    deleted      Number of rows deleted.
363 
364     @return Operation status
365       @retval    0  Success.
366       @retval != 0  Error code.
367   */
change_partitions_low(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted)368   virtual int change_partitions_low(HA_CREATE_INFO *create_info,
369                                     const char *path,
370                                     ulonglong * const copied,
371                                     ulonglong * const deleted)
372   {
373     my_error(ER_ILLEGAL_HA, MYF(0), create_info->alias);
374     return HA_ERR_WRONG_COMMAND;
375   }
376   /**
377     Return the table handler.
378 
379     For some partitioning specific functions it is still needed to access
380     the handler directly for transaction handling (mark_trx_read_write())
381     and to assert correct locking.
382 
383     @return handler or NULL if not supported.
384   */
get_handler()385   virtual handler *get_handler()
386   { return NULL; }
387 };
388 
389 
390 /// Maps compare function to strict weak ordering required by Priority_queue.
391 struct Key_rec_less
392 {
393   typedef int (*key_compare_fun)(KEY**, uchar *, uchar *);
394 
Key_rec_lessKey_rec_less395   explicit Key_rec_less(KEY **keys)
396     : m_keys(keys), m_fun(key_rec_cmp), m_max_at_top(false)
397   {
398   }
399 
operatorKey_rec_less400   bool operator()(uchar *first, uchar *second)
401   {
402     const int cmpval=
403      (*m_fun)(m_keys, first + m_rec_offset, second + m_rec_offset);
404     return m_max_at_top ? cmpval < 0 : cmpval > 0;
405   }
406 
407   KEY **m_keys;
408   key_compare_fun m_fun;
409   uint m_rec_offset;
410   bool m_max_at_top;
411 };
412 
413 
414 /**
415   Partition_helper is a helper class that implements most generic partitioning
416   functionality such as:
417   table scan, index scan (both ordered and non-ordered),
418   insert (write_row()), delete and update.
419   And includes ALTER TABLE ... ADD/COALESCE/DROP/REORGANIZE/... PARTITION
420   support.
421   It also implements a cache for the auto increment value and check/repair for
422   rows in wrong partition.
423 
424   How to use it:
425   Inherit it and implement:
426   - *_in_part() functions for row operations.
427   - prepare_for_new_partitions(), create_new_partition(), close_new_partitions()
428     write_row_in_new_part() for handling 'fast' alter partition.
429 */
430 class Partition_helper : public Sql_alloc
431 {
432   typedef Priority_queue<uchar *, std::vector<uchar*>, Key_rec_less> Prio_queue;
433 public:
434   Partition_helper(handler *main_handler);
435   ~Partition_helper();
436 
437   /**
438     Set partition info.
439 
440     To be called from Partition_handler.
441 
442     @param  part_info  Partition info to use.
443     @param  early      True if called when part_info only created and parsed,
444                        but not setup, checked or fixed.
445   */
446   virtual void set_part_info_low(partition_info *part_info, bool early);
447   /**
448     Initialize variables used before the table is opened.
449 
450     @param mem_root  Memory root to allocate things from (not yet used).
451 
452     @return Operation status.
453       @retval false success.
454       @retval true  failure.
455   */
init_partitioning(MEM_ROOT * mem_root)456   inline bool init_partitioning(MEM_ROOT *mem_root)
457   {
458 #ifndef NDEBUG
459     m_key_not_found_partitions.bitmap= NULL;
460 #endif
461     return false;
462   }
463 
464 
465   /**
466     INSERT/UPDATE/DELETE functions.
467     @see handler.h
468     @{
469   */
470 
471   /**
472     Insert a row to the partitioned table.
473 
474     @param buf The row in MySQL Row Format.
475 
476     @return Operation status.
477       @retval    0 Success
478       @retval != 0 Error code
479   */
480   int ph_write_row(uchar *buf);
481   /**
482     Update an existing row in the partitioned table.
483 
484     Yes, update_row() does what you expect, it updates a row. old_data will
485     have the previous row record in it, while new_data will have the newest
486     data in it.
487     Keep in mind that the server can do updates based on ordering if an
488     ORDER BY clause was used. Consecutive ordering is not guaranteed.
489 
490     If the new record belongs to a different partition than the old record
491     then it will be inserted into the new partition and deleted from the old.
492 
493     new_data is always record[0]
494     old_data is always record[1]
495 
496     @param old_data  The old record in MySQL Row Format.
497     @param new_data  The new record in MySQL Row Format.
498     @param lookup_rows Indicator for TokuDB read free replication.
499 
500     @return Operation status.
501       @retval    0 Success
502       @retval != 0 Error code
503   */
504   int ph_update_row(const uchar *old_data, uchar *new_data,
505                     bool lookup_rows = true);
506   /**
507     Delete an existing row in the partitioned table.
508 
509     This will delete a row. buf will contain a copy of the row to be deleted.
510     The server will call this right after the current row has been read
511     (from either a previous rnd_xxx() or index_xxx() call).
512     If you keep a pointer to the last row or can access a primary key it will
513     make doing the deletion quite a bit easier.
514     Keep in mind that the server does no guarantee consecutive deletions.
515     ORDER BY clauses can be used.
516 
517     buf is either record[0] or record[1]
518 
519     @param buf  The record in MySQL Row Format.
520     @param lookup_rows Indicator for TokuDB read free replication.
521 
522     @return Operation status.
523       @retval    0 Success
524       @retval != 0 Error code
525   */
526   int ph_delete_row(const uchar *buf, bool lookup_rows = true);
527 
528   /** @} */
529 
530   /** Release unused auto increment values. */
531   void ph_release_auto_increment();
532   /**
533     Calculate key hash value from an null terminated array of fields.
534     Support function for KEY partitioning.
535 
536     @param field_array   An array of the fields in KEY partitioning
537 
538     @return hash_value calculated
539 
540     @note Uses the hash function on the character set of the field.
541     Integer and floating point fields use the binary character set by default.
542   */
543   static uint32 ph_calculate_key_hash_value(Field **field_array);
544   /** Get checksum for table.
545     @return Checksum or 0 if not supported (which also may be a correct checksum!).
546   */
547   ha_checksum ph_checksum() const;
548 
549   /**
550     MODULE full table scan
551 
552     This module is used for the most basic access method for any table
553     handler. This is to fetch all data through a full table scan. No
554     indexes are needed to implement this part.
555     It contains one method to start the scan (rnd_init) that can also be
556     called multiple times (typical in a nested loop join). Then proceeding
557     to the next record (rnd_next) and closing the scan (rnd_end).
558     To remember a record for later access there is a method (position)
559     and there is a method used to retrieve the record based on the stored
560     position.
561     The position can be a file position, a primary key, a ROWID dependent
562     on the handler below.
563 
564     unlike index_init(), rnd_init() can be called two times
565     without rnd_end() in between (it only makes sense if scan=1).
566     then the second call should prepare for the new table scan
567     (e.g if rnd_init allocates the cursor, second call should
568     position it to the start of the table, no need to deallocate
569     and allocate it again.
570     @see handler.h
571     @{
572   */
573 
574   int ph_rnd_init(bool scan);
575   int ph_rnd_end();
576   int ph_rnd_next(uchar *buf);
577   void ph_position(const uchar *record);
578   int ph_rnd_pos(uchar *buf, uchar *pos);
579 
580   /** @} */
581 
582   /**
583     MODULE index scan
584 
585     This part of the handler interface is used to perform access through
586     indexes. The interface is defined as a scan interface but the handler
587     can also use key lookup if the index is a unique index or a primary
588     key index.
589     Index scans are mostly useful for SELECT queries but are an important
590     part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
591     and so forth.
592     Naturally an index is needed for an index scan and indexes can either
593     be ordered, hash based. Some ordered indexes can return data in order
594     but not necessarily all of them.
595     There are many flags that define the behavior of indexes in the
596     various handlers. These methods are found in the optimizer module.
597     -------------------------------------------------------------------------
598 
599     index_read is called to start a scan of an index. The find_flag defines
600     the semantics of the scan. These flags are defined in
601     include/my_base.h
602     index_read_idx is the same but also initializes index before calling doing
603     the same thing as index_read. Thus it is similar to index_init followed
604     by index_read. This is also how we implement it.
605 
606     index_read/index_read_idx does also return the first row. Thus for
607     key lookups, the index_read will be the only call to the handler in
608     the index scan.
609 
610     index_init initializes an index before using it and index_end does
611     any end processing needed.
612     @{
613   */
614 
615   int ph_index_init_setup(uint key_nr, bool sorted);
616   int ph_index_init(uint key_nr, bool sorted);
617   int ph_index_end();
618   /*
619     These methods are used to jump to next or previous entry in the index
620     scan. There are also methods to jump to first and last entry.
621   */
622   int ph_index_first(uchar *buf);
623   int ph_index_last(uchar *buf);
624   int ph_index_next(uchar *buf);
625   int ph_index_next_same(uchar *buf, const uchar *key, uint keylen);
626   int ph_index_prev(uchar *buf);
627   int ph_index_read_map(uchar *buf,
628                         const uchar *key,
629                         key_part_map keypart_map,
630                         enum ha_rkey_function find_flag);
631   int ph_index_read_last_map(uchar *buf,
632                              const uchar *key,
633                              key_part_map keypart_map);
634   int ph_index_read_idx_map(uchar *buf,
635                             uint index,
636                             const uchar *key,
637                             key_part_map keypart_map,
638                             enum ha_rkey_function find_flag);
639   int ph_read_range_first(const key_range *start_key,
640                           const key_range *end_key,
641                           bool eq_range_arg,
642                           bool sorted);
643   int ph_read_range_next();
644   /** @} */
645 
646   /**
647     Functions matching Partition_handler API.
648     @{
649   */
650 
651   /**
652     Get statistics from a specific partition.
653     @param[out] stat_info  Area to report values into.
654     @param[out] check_sum  Check sum of partition.
655     @param[in]  part_id    Partition to report from.
656   */
657   virtual void get_dynamic_partition_info_low(ha_statistics *stat_info,
658                                               ha_checksum *check_sum,
659                                               uint part_id);
660 
661   /**
662     Implement the partition changes defined by ALTER TABLE of partitions.
663 
664     Add and copy if needed a number of partitions, during this operation
665     only read operation is ongoing in the server. This is used by
666     ADD PARTITION all types as well as by REORGANIZE PARTITION. For
667     one-phased implementations it is used also by DROP and COALESCE
668     PARTITIONs.
669     One-phased implementation needs the new frm file, other handlers will
670     get zero length and a NULL reference here.
671 
672     @param[in]  create_info       HA_CREATE_INFO object describing all
673                                   fields and indexes in table
674     @param[in]  path              Complete path of db and table name
675     @param[out] copied            Output parameter where number of copied
676                                   records are added
677     @param[out] deleted           Output parameter where number of deleted
678                                   records are added
679 
680     @return Operation status
681       @retval    0 Success
682       @retval != 0 Failure
683   */
684   virtual int change_partitions(HA_CREATE_INFO *create_info,
685                                 const char *path,
686                                 ulonglong * const copied,
687                                 ulonglong * const deleted);
688   /** @} */
689 
690 protected:
691   /* Common helper functions to be used by inheriting engines. */
692 
693   /*
694     open/close functions.
695   */
696 
697   /**
698     Set m_part_share, Allocate internal bitmaps etc. used by open tables.
699 
700     @param mem_root  Memory root to allocate things from (not yet used).
701 
702     @return Operation status.
703       @retval false success.
704       @retval true  failure.
705   */
706   bool open_partitioning(Partition_share *part_share);
707   /**
708     Close partitioning for a table.
709 
710     Frees memory and release other resources.
711   */
712   void close_partitioning();
713 
714   /**
715     Lock auto increment value if needed.
716   */
lock_auto_increment()717   inline void lock_auto_increment()
718   {
719     /* lock already taken */
720     if (m_auto_increment_safe_stmt_log_lock)
721       return;
722     assert(!m_auto_increment_lock);
723     if(m_table->s->tmp_table == NO_TMP_TABLE)
724     {
725       m_auto_increment_lock= true;
726       m_part_share->lock_auto_inc();
727     }
728   }
729   /**
730     unlock auto increment.
731   */
unlock_auto_increment()732   inline void unlock_auto_increment()
733   {
734     /*
735       If m_auto_increment_safe_stmt_log_lock is true, we have to keep the lock.
736       It will be set to false and thus unlocked at the end of the statement by
737       ha_partition::release_auto_increment.
738     */
739     if(m_auto_increment_lock && !m_auto_increment_safe_stmt_log_lock)
740     {
741       m_part_share->unlock_auto_inc();
742       m_auto_increment_lock= false;
743     }
744   }
745   /**
746     Get auto increment.
747 
748     Only to be used for auto increment values that are the first field in
749     an unique index.
750 
751     @param[in]  increment           Increment between generated numbers.
752     @param[in]  nb_desired_values   Number of values requested.
753     @param[out] first_value         First reserved value (ULLONG_MAX on error).
754     @param[out] nb_reserved_values  Number of values reserved.
755   */
756   void get_auto_increment_first_field(ulonglong increment,
757                                       ulonglong nb_desired_values,
758                                       ulonglong *first_value,
759                                       ulonglong *nb_reserved_values);
760 
761   /**
762     Initialize the record priority queue used for sorted index scans.
763     @return Operation status.
764       @retval    0   Success.
765       @retval != 0   Error code.
766   */
767   int init_record_priority_queue();
768   /**
769     Destroy the record priority queue used for sorted index scans.
770   */
771   void destroy_record_priority_queue();
772   /*
773     Administrative support functions.
774   */
775 
776   /** Print partitioning specific error.
777     @param error   Error code.
778     @param errflag Error flag.
779     @return false if error is printed else true.
780   */
781   bool print_partition_error(int error, myf errflag);
782   /**
783     Print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE.
784 
785     Modeled after mi_check_print_msg.
786 
787     @param thd         Thread context.
788     @param len         Needed length for message buffer.
789     @param msg_type    Message type.
790     @param db_name     Database name.
791     @param table_name  Table name.
792     @param op_name     Operation name.
793     @param fmt         Message (in printf format with additional arguments).
794 
795     @return Operation status.
796       @retval false for success else true.
797   */
798   bool print_admin_msg(THD *thd,
799                        uint len,
800                        const char *msg_type,
801                        const char *db_name,
802                        const char *table_name,
803                        const char *op_name,
804                        const char *fmt,
805                        ...);
806   /**
807     Check/fix misplaced rows.
808 
809     @param part_id  Partition to check/fix.
810     @param repair   If true, move misplaced rows to correct partition.
811 
812     @return Operation status.
813       @retval    0  Success
814       @retval != 0  Error
815   */
816   int check_misplaced_rows(uint part_id, bool repair);
817   /**
818     Set used partitions bitmap from Alter_info.
819 
820     @return false if success else true.
821   */
822   bool set_altered_partitions();
823 
824 private:
825   enum partition_index_scan_type
826   {
827     PARTITION_INDEX_READ= 1,
828     PARTITION_INDEX_FIRST,
829     PARTITION_INDEX_FIRST_UNORDERED,
830     PARTITION_INDEX_LAST,
831     PARTITION_INDEX_READ_LAST,
832     PARTITION_READ_RANGE,
833     PARTITION_NO_INDEX_SCAN
834   };
835 
836   /** handler to use (ha_partition, ha_innopart etc.) */
837   handler *m_handler;
838 
839   /*
840     Access methods to protected areas in handler to avoid adding
841     friend class Partition_helper in class handler.
842   */
843   virtual THD *get_thd() const = 0;
844   virtual TABLE *get_table() const = 0;
845   virtual bool get_eq_range() const = 0;
846   virtual void set_eq_range(bool eq_range) = 0;
847   virtual void set_range_key_part(KEY_PART_INFO *key_part) = 0;
848 
849   /*
850     Implementation of per partition operation by instantiated engine.
851     These must be implemented in the 'real' partition_helper subclass.
852   */
853 
854   /**
855     Write a row in the specified partition.
856 
857     @see handler::write_row().
858 
859     @param  part_id  Partition to write to.
860     @param  buf      Buffer with data to write.
861 
862     @return Operation status.
863       @retval    0  Success.
864       @retval != 0  Error code.
865   */
866   virtual int write_row_in_part(uint part_id, uchar *buf) = 0;
867   /**
868     Update a row in the specified partition.
869 
870     @see handler::update_row().
871 
872     @param  part_id   Partition to update in.
873     @param  old_data  Buffer containing old row.
874     @param  new_data  Buffer containing new row.
875 
876     @return Operation status.
877       @retval    0  Success.
878       @retval != 0  Error code.
879   */
880   virtual int update_row_in_part(uint new_part_id,
881                                  const uchar *old_data,
882                                  uchar *new_data) = 0;
883   /**
884     Delete an existing row in the specified partition.
885 
886     @see handler::delete_row().
887 
888     @param  part_id  Partition to delete from.
889     @param  buf      Buffer containing row to delete.
890 
891     @return Operation status.
892       @retval    0  Success.
893       @retval != 0  Error code.
894   */
895   virtual int delete_row_in_part(uint part_id, const uchar *buf) = 0;
896   /**
897     Initialize the shared auto increment value.
898 
899     @param no_lock  If HA_STATUS_NO_LOCK should be used in info(HA_STATUS_AUTO).
900 
901     Also sets stats.auto_increment_value.
902   */
903   virtual int initialize_auto_increment(bool no_lock) = 0;
904   /** Release auto_increment in all underlying partitions. */
release_auto_increment_all_parts()905   virtual void release_auto_increment_all_parts() {}
906   /** Save or persist the current max auto increment. */
save_auto_increment(ulonglong nr)907   virtual void save_auto_increment(ulonglong nr) {}
908   /**
909     Per partition equivalent of rnd_* and index_* functions.
910 
911     @see class handler.
912   */
913   virtual int rnd_init_in_part(uint part_id, bool table_scan) = 0;
914   int ph_rnd_next_in_part(uint part_id, uchar *buf);
915   virtual int rnd_next_in_part(uint part_id, uchar *buf) = 0;
916   virtual int rnd_end_in_part(uint part_id, bool scan) = 0;
917   virtual void position_in_last_part(uchar *ref, const uchar *row) = 0;
918   /* If ph_rnd_pos is used then this needs to be implemented! */
rnd_pos_in_part(uint part_id,uchar * buf,uchar * pos)919   virtual int rnd_pos_in_part(uint part_id, uchar *buf, uchar *pos)
920   { assert(0); return HA_ERR_WRONG_COMMAND; }
index_init_in_part(uint part,uint keynr,bool sorted)921   virtual int index_init_in_part(uint part, uint keynr, bool sorted)
922   { assert(0); return HA_ERR_WRONG_COMMAND; }
index_end_in_part(uint part)923   virtual int index_end_in_part(uint part)
924   { assert(0); return HA_ERR_WRONG_COMMAND; }
925   virtual int index_first_in_part(uint part, uchar *buf) = 0;
926   virtual int index_last_in_part(uint part, uchar *buf) = 0;
927   virtual int index_prev_in_part(uint part, uchar *buf) = 0;
928   virtual int index_next_in_part(uint part, uchar *buf) = 0;
929   virtual int index_next_same_in_part(uint part,
930                                       uchar *buf,
931                                       const uchar *key,
932                                       uint length) = 0;
933   virtual int index_read_map_in_part(uint part,
934                                      uchar *buf,
935                                      const uchar *key,
936                                      key_part_map keypart_map,
937                                      enum ha_rkey_function find_flag) = 0;
938   virtual int index_read_last_map_in_part(uint part,
939                                           uchar *buf,
940                                           const uchar *key,
941                                           key_part_map keypart_map) = 0;
942   /**
943     Do read_range_first in the specified partition.
944     If buf is set, then copy the result there instead of table->record[0].
945   */
946   virtual int read_range_first_in_part(uint part,
947                                        uchar *buf,
948                                        const key_range *start_key,
949                                        const key_range *end_key,
950                                        bool eq_range,
951                                        bool sorted) = 0;
952   /**
953     Do read_range_next in the specified partition.
954     If buf is set, then copy the result there instead of table->record[0].
955   */
956   virtual int read_range_next_in_part(uint part, uchar *buf) = 0;
957   virtual int index_read_idx_map_in_part(uint part,
958                                          uchar *buf,
959                                          uint index,
960                                          const uchar *key,
961                                          key_part_map keypart_map,
962                                          enum ha_rkey_function find_flag) = 0;
963   /**
964     Initialize engine specific resources for the record priority queue
965     used duing ordered index reads for multiple partitions.
966 
967     @param used_parts  Number of partitions used in query
968                        (number of set bits in m_part_info->read_partitions).
969 
970     @return Operation status.
971       @retval    0   Success.
972       @retval != 0   Error code.
973   */
init_record_priority_queue_for_parts(uint used_parts)974   virtual int init_record_priority_queue_for_parts(uint used_parts)
975   {
976     return 0;
977   }
978   /**
979     Destroy and release engine specific resources used by the record
980     priority queue.
981   */
destroy_record_priority_queue_for_parts()982   virtual void destroy_record_priority_queue_for_parts() {}
983   /**
984     Checksum for a partition.
985 
986     @param part_id  Partition to checksum.
987   */
checksum_in_part(uint part_id)988   virtual ha_checksum checksum_in_part(uint part_id) const
989   { assert(0); return 0; }
990   /**
991     Copy a cached row.
992 
993     Used when copying a row from the record priority queue to the return buffer.
994     For some engines, like InnoDB, only marked columns must be copied,
995     to preserve non-read columns.
996 
997     @param[out] to_rec    Buffer to copy to.
998     @param[in]  from_rec  Buffer to copy from.
999   */
copy_cached_row(uchar * to_rec,const uchar * from_rec)1000   virtual void copy_cached_row(uchar *to_rec, const uchar *from_rec)
1001   { memcpy(to_rec, from_rec, m_rec_length); }
1002   /**
1003     Prepare for creating new partitions during ALTER TABLE ... PARTITION.
1004     @param  num_partitions  Number of new partitions to be created.
1005     @param  only_create     True if only creating the partition
1006                             (no open/lock is needed).
1007 
1008     @return Operation status.
1009       @retval    0  Success.
1010       @retval != 0  Error code.
1011   */
1012   virtual int prepare_for_new_partitions(uint num_partitions,
1013                                          bool only_create) = 0;
1014   /**
1015     Create a new partition to be filled during ALTER TABLE ... PARTITION.
1016     @param   table         Table to create the partition in.
1017     @param   create_info   Table/partition specific create info.
1018     @param   part_name     Partition name.
1019     @param   new_part_id   Partition id in new table.
1020     @param   part_elem     Partition element.
1021 
1022     @return Operation status.
1023       @retval    0  Success.
1024       @retval != 0  Error code.
1025   */
1026   virtual int create_new_partition(TABLE *table,
1027                                    HA_CREATE_INFO *create_info,
1028                                    const char *part_name,
1029                                    uint new_part_id,
1030                                    partition_element *part_elem) = 0;
1031   /**
1032     Close and finalize new partitions.
1033   */
1034   virtual void close_new_partitions() = 0;
1035   /**
1036     write row to new partition.
1037     @param  new_part   New partition to write to.
1038 
1039     @return Operation status.
1040       @retval    0  Success.
1041       @retval != 0  Error code.
1042   */
1043   virtual int write_row_in_new_part(uint new_part) = 0;
1044 
1045   /* Internal helper functions*/
1046   /**
1047     Update auto increment value if current row contains a higher value.
1048   */
1049   inline void set_auto_increment_if_higher();
1050   /**
1051     Common routine to set up index scans.
1052 
1053     Find out which partitions we'll need to read when scanning the specified
1054     range.
1055 
1056     If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
1057     as we will not need to do merge ordering.
1058 
1059     @param buf            Buffer to later return record in (this function
1060                           needs it to calculate partitioning function values)
1061 
1062     @param idx_read_flag  True <=> m_start_key has range start endpoint which
1063                           probably can be used to determine the set of
1064                           partitions to scan.
1065                           False <=> there is no start endpoint.
1066 
1067     @return Operation status.
1068       @retval   0  Success
1069       @retval !=0  Error code
1070   */
1071   int partition_scan_set_up(uchar *buf, bool idx_read_flag);
1072   /**
1073     Common routine to handle index_next with unordered results.
1074 
1075     These routines are used to scan partitions without considering order.
1076     This is performed in two situations.
1077     1) In read_multi_range this is the normal case
1078     2) When performing any type of index_read, index_first, index_last where
1079     all fields in the partition function is bound. In this case the index
1080     scan is performed on only one partition and thus it isn't necessary to
1081     perform any sort.
1082 
1083     @param[out] buf        Read row in MySQL Row Format.
1084     @param[in]  next_same  Called from index_next_same.
1085 
1086     @return Operation status.
1087       @retval HA_ERR_END_OF_FILE  End of scan
1088       @retval 0                   Success
1089       @retval other               Error code
1090   */
1091   int handle_unordered_next(uchar *buf, bool is_next_same);
1092   /**
1093     Handle index_next when changing to new partition.
1094 
1095     This routine is used to start the index scan on the next partition.
1096     Both initial start and after completing scan on one partition.
1097 
1098     @param[out] buf  Read row in MySQL Row Format
1099 
1100     @return Operation status.
1101       @retval HA_ERR_END_OF_FILE  End of scan
1102       @retval 0                   Success
1103       @retval other               Error code
1104   */
1105   int handle_unordered_scan_next_partition(uchar *buf);
1106   /**
1107     Common routine to start index scan with ordered results.
1108 
1109     @param[out] buf  Read row in MySQL Row Format
1110 
1111     @return Operation status
1112       @retval HA_ERR_END_OF_FILE    End of scan
1113       @retval HA_ERR_KEY_NOT_FOUND  End of scan
1114       @retval 0                     Success
1115       @retval other                 Error code
1116   */
1117   int handle_ordered_index_scan(uchar *buf);
1118   /**
1119     Add index_next/prev results from partitions without exact match.
1120 
1121     If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
1122     ha_index_read_map was done, those partitions must be included in the
1123     following index_next/prev call.
1124 
1125     @return Operation status
1126       @retval HA_ERR_END_OF_FILE    End of scan
1127       @retval 0                     Success
1128       @retval other                 Error code
1129   */
1130   int handle_ordered_index_scan_key_not_found();
1131   /**
1132     Common routine to handle index_prev with ordered results.
1133 
1134     @param[out] buf  Read row in MySQL Row Format.
1135 
1136     @return Operation status.
1137       @retval HA_ERR_END_OF_FILE  End of scan
1138       @retval 0                   Success
1139       @retval other               Error code
1140   */
1141   int handle_ordered_prev(uchar *buf);
1142   /**
1143     Common routine to handle index_next with ordered results.
1144 
1145     @param[out] buf        Read row in MySQL Row Format.
1146     @param[in]  next_same  Called from index_next_same.
1147 
1148     @return Operation status.
1149       @retval HA_ERR_END_OF_FILE  End of scan
1150       @retval 0                   Success
1151       @retval other               Error code
1152   */
1153   int handle_ordered_next(uchar *buf, bool is_next_same);
1154   /**
1155     Common routine for a number of index_read variants.
1156 
1157     @param[out] buf             Buffer where the record should be returned.
1158     @param[in]  have_start_key  TRUE <=> the left endpoint is available, i.e.
1159                                 we're in index_read call or in read_range_first
1160                                 call and the range has left endpoint.
1161                                 FALSE <=> there is no left endpoint (we're in
1162                                 read_range_first() call and the range has no
1163                                 left endpoint).
1164 
1165     @return Operation status
1166       @retval 0                    OK
1167       @retval HA_ERR_END_OF_FILE   Whole index scanned, without finding the record.
1168       @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
1169       @retval other                Error code.
1170   */
1171   int common_index_read(uchar *buf, bool have_start_key);
1172   /**
1173     Common routine for index_first/index_last.
1174 
1175     @param[out] buf  Read row in MySQL Row Format.
1176 
1177     @return Operation status.
1178       @retval    0  Success
1179       @retval != 0  Error code
1180   */
1181   int common_first_last(uchar *buf);
1182   /**
1183     Return the top record in sort order.
1184 
1185     @param[out] buf  Row returned in MySQL Row Format.
1186   */
1187   void return_top_record(uchar *buf);
1188   /**
1189     Copy partitions as part of ALTER TABLE of partitions.
1190 
1191     change_partitions has done all the preparations, now it is time to
1192     actually copy the data from the reorganized partitions to the new
1193     partitions.
1194 
1195     @param[out] copied   Number of records copied.
1196     @param[out] deleted  Number of records deleted.
1197 
1198     @return Operation status
1199       @retval  0  Success
1200       @retval >0  Error code
1201   */
1202   virtual int copy_partitions(ulonglong * const copied,
1203                               ulonglong * const deleted);
1204 
1205   /**
1206     Set table->read_set taking partitioning expressions into account.
1207   */
1208   void set_partition_read_set();
1209 
1210   /*
1211     These could be private as well,
1212     but easier to expose them to derived classes to use.
1213   */
1214 protected:
1215 
1216   /** Convenience pointer to table from m_handler (i.e. m_handler->table). */
1217   TABLE *m_table;
1218   /** All internal partitioning data! @{ */
1219   /** Tables partitioning info (same as table->part_info) */
1220   partition_info *m_part_info;
1221   /** Is primary key clustered. */
1222   bool m_pkey_is_clustered;
1223   /** Cached value of m_part_info->is_sub_partitioned(). */
1224   bool m_is_sub_partitioned;
1225   /** Partition share for auto_inc handling. */
1226   Partition_share *m_part_share;
1227   /** Total number of partitions. */
1228   uint m_tot_parts;
1229   uint m_last_part;                      // Last accessed partition.
1230   const uchar *m_err_rec;                // record which gave error.
1231   bool m_auto_increment_safe_stmt_log_lock;
1232   bool m_auto_increment_lock;
1233   part_id_range m_part_spec;             // Which parts to scan
1234   uint m_scan_value;                     // Value passed in rnd_init
1235                                          // call
1236   key_range m_start_key;                 // index read key range
1237   enum partition_index_scan_type m_index_scan_type;// What type of index
1238                                                    // scan
1239   uint m_rec_length;                     // Local copy of record length
1240 
1241   bool m_ordered;                        // Ordered/Unordered index scan.
1242   bool m_ordered_scan_ongoing;           // Ordered index scan ongoing.
1243   bool m_reverse_order;                  // Scanning in reverse order (prev).
1244   /** Row and key buffer for ordered index scan. */
1245   uchar *m_ordered_rec_buffer;
1246   /** Prio queue used by sorted read. */
1247   Prio_queue *m_queue;
1248   /** Which partition is to deliver next result. */
1249   uint m_top_entry;
1250   /** Offset in m_ordered_rec_buffer from part buffer to its record buffer. */
1251   uint m_rec_offset;
1252   /**
1253     Current index used for sorting.
1254     If clustered PK exists, then it will be used as secondary index to
1255     sort on if the first is equal in key_rec_cmp.
1256     So if clustered pk: m_curr_key_info[0]= current index and
1257     m_curr_key_info[1]= pk and [2]= NULL.
1258     Otherwise [0]= current index, [1]= NULL, and we will
1259     sort by rowid as secondary sort key if equal first key.
1260   */
1261   KEY *m_curr_key_info[3];
1262   enum enum_using_ref {
1263     /** handler::ref is not copied to the PQ. */
1264     REF_NOT_USED= 0,
1265     /**
1266       handler::ref is copied to the PQ but does not need to be used in sorting.
1267     */
1268     REF_STORED_IN_PQ,
1269     /** handler::ref is copied to the PQ and must be used during sorting. */
1270     REF_USED_FOR_SORT};
1271   /** How handler::ref is used in the priority queue. */
1272   enum_using_ref m_ref_usage;
1273   /** Set if previous index_* call returned HA_ERR_KEY_NOT_FOUND. */
1274   bool m_key_not_found;
1275   /** Partitions that returned HA_ERR_KEY_NOT_FOUND. */
1276   MY_BITMAP m_key_not_found_partitions;
1277   /** @} */
1278 };
1279 #endif /* PARTITION_HANDLER_INCLUDED */
1280