1 /***************************************************************************** 2 3 Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/row0pread-histogram.h 28 Parallel read histogram interface. 29 30 Created 2019-04-20 by Darshan M N. */ 31 32 #ifndef row0pread_histogram_h 33 #define row0pread_histogram_h 34 35 #include <random> 36 #include "row0pread.h" 37 #include "ut0counter.h" 38 39 class Histogram_sampler { 40 public: 41 /** Constructor. 42 @param[in] max_threads Maximum number of threads to use. 43 @param[in] sampling_seed seed to be used for sampling 44 @param[in] sampling_percentage percentage of sampling that needs to be done 45 @param[in] sampling_method sampling method to be used for sampling */ 46 explicit Histogram_sampler(size_t max_threads, int sampling_seed, 47 double sampling_percentage, 48 enum_sampling_method sampling_method); 49 50 /** Destructor. */ 51 ~Histogram_sampler(); 52 53 /** Initialize the sampler context. 54 @param[in] trx Transaction used for parallel read. 55 @param[in] index clustered index. 56 @param[in] prebuilt prebuilt info 57 @retval true on success. */ 58 bool init(trx_t *trx, dict_index_t *index, row_prebuilt_t *prebuilt); 59 60 /** Buffer next row. 61 @return error code */ 62 dberr_t buffer_next(); 63 64 /** End parallel read in case the reader thread is still active and wait for 65 its exit. This can happen if we're ending sampling prematurely. */ 66 void buffer_end(); 67 68 /** Set the buffer. 69 @param[in] buf buffer to be used to store the row converted to MySQL 70 format. */ set(byte * buf)71 void set(byte *buf) { m_buf = buf; } 72 73 /** Start the sampling process. 74 @return DB_SUCCESS or error code. */ 75 dberr_t run(); 76 77 /** Check if the processing of the record needs to be skipped. 78 In case of record belonging to non-leaf page, we decide if the child page 79 pertaining to the record needs to be skipped. 80 In case of record belonging to leaf page, we read the page regardless. 81 @return true if it needs to be skipped, else false. */ 82 bool skip(); 83 84 private: 85 /** Wait till there is a request to buffer the next row. */ 86 void wait_for_start_of_buffering(); 87 88 /** Wait till the buffering of the row is complete. */ 89 void wait_for_end_of_buffering(); 90 91 /** Signal that the next row needs to be buffered. */ 92 void signal_start_of_buffering(); 93 94 /** Signal that the buffering of the row is complete. */ 95 void signal_end_of_buffering(); 96 97 /** Set the error state. 98 @param[in] err Error state to set to. */ set_error_state(dberr_t err)99 void set_error_state(dberr_t err) { m_err = err; } 100 101 /** @return true if in error state. */ is_error_set()102 bool is_error_set() const MY_ATTRIBUTE((warn_unused_result)) { 103 return (m_err != DB_SUCCESS); 104 } 105 106 /** Each parallel reader thread's init function. 107 @param[in] reader_thread_ctx context information related to the thread 108 @return DB_SUCCESS or error code. */ 109 dberr_t start_callback(Parallel_reader::Thread_ctx *reader_thread_ctx) 110 MY_ATTRIBUTE((warn_unused_result)); 111 112 /** Each parallel reader thread's end function. 113 @param[in] reader_thread_ctx context information related to the thread 114 @return DB_SUCCESS or error code. */ 115 dberr_t finish_callback(Parallel_reader::Thread_ctx *reader_thread_ctx) 116 MY_ATTRIBUTE((warn_unused_result)); 117 118 /** Convert the row in InnoDB format to MySQL format and store in the buffer 119 for server to use. 120 @param[in] ctx Parallel read context. 121 @param[in] rec record that needs to be converted 122 @param[in] offsets offsets belonging to the record 123 @param[in] index index of the record 124 @param[in] prebuilt Row meta-data cache. 125 @return DB_SUCCESS or error code. */ 126 dberr_t sample_rec(const Parallel_reader::Ctx *ctx, const rec_t *rec, 127 ulint *offsets, const dict_index_t *index, 128 row_prebuilt_t *prebuilt); 129 130 /** For each record in a non-leaf block at level 1 (if leaf level is 0) 131 check if the child page needs to be sampled and if so sample all the rows in 132 the child page. 133 @param[in] ctx Parallel read context. 134 @param[in] prebuilt Row meta-data cache. 135 @return error code */ 136 dberr_t process_non_leaf_rec(const Parallel_reader::Ctx *ctx, 137 row_prebuilt_t *prebuilt) 138 MY_ATTRIBUTE((warn_unused_result)); 139 140 /** Process the record in the leaf page. This would happen only when the root 141 page is the leaf page and in such a case we process the page regardless of 142 the sampling percentage. 143 @param[in] ctx Parallel read context. 144 @param[in] prebuilt Row meta-data cache. 145 @return error code */ 146 dberr_t process_leaf_rec(const Parallel_reader::Ctx *ctx, 147 row_prebuilt_t *prebuilt) 148 MY_ATTRIBUTE((warn_unused_result)); 149 150 private: 151 /** Buffer to store the sampled row which is in the MySQL format. */ 152 byte *m_buf{nullptr}; 153 154 /** Event to notify if the next row needs to be buffered. */ 155 os_event_t m_start_buffer_event; 156 157 /** Event to notify if the next row has been buffered. */ 158 os_event_t m_end_buffer_event; 159 160 /** Error code when the row was buffered. */ 161 dberr_t m_err{DB_SUCCESS}; 162 163 /** The parallel reader. */ 164 Parallel_reader m_parallel_reader; 165 166 /** Random generator engine used to provide us random uniformly distrubuted 167 values required to decide if the row in question needs to be sampled or 168 not. */ 169 std::mt19937 m_random_generator; 170 171 /** Uniform distribution used by the random generator. */ 172 static std::uniform_real_distribution<double> m_distribution; 173 174 /** Sampling method to be used for sampling. */ 175 enum_sampling_method m_sampling_method{enum_sampling_method::NONE}; 176 177 /** Sampling percentage to be used for sampling */ 178 double m_sampling_percentage{}; 179 180 /** Sampling seed to be used for sampling */ 181 int m_sampling_seed{}; 182 183 /** Number of rows sampled */ 184 std::atomic_size_t m_n_sampled; 185 }; 186 187 #endif /* !row0pread_histogram_h */ 188