1 /*
2 Copyright (c) 2017, Facebook, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
16
17 /* This C++ file's header */
18 #include "./rdb_io_watchdog.h"
19
20 /* C++ standard header files */
21 #include <string>
22 #include <vector>
23
24 /* Rdb_io_watchdog doesn't work on Windows [yet] */
25 #ifdef HAVE_TIMER_DELETE
26
27 namespace myrocks {
28
expire_io_callback(union sigval timer_data)29 void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
30 DBUG_ASSERT(timer_data.sival_ptr != nullptr);
31
32 // The treatment of any pending signal generated by the deleted timer is
33 // unspecified. Therefore we still need to handle the rare case where we
34 // finished the I/O operation right before the timer was deleted and callback
35 // was in flight.
36 if (!m_io_in_progress.load()) {
37 return;
38 }
39
40 // At this point we know that I/O has been stuck in `write()` for more than
41 // `m_write_timeout` seconds. We'll log a message and shut down the service.
42 // NO_LINT_DEBUG
43 sql_print_error(
44 "MyRocks has detected a combination of I/O requests which "
45 "have cumulatively been blocking for more than %u seconds. "
46 "Shutting the service down.",
47 m_write_timeout);
48
49 abort();
50 }
51
io_check_callback(union sigval timer_data)52 void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
53 RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
54
55 DBUG_ASSERT(timer_data.sival_ptr != nullptr);
56
57 struct sigevent e;
58
59 e.sigev_notify = SIGEV_THREAD;
60 e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
61 e.sigev_value.sival_ptr = this;
62 e.sigev_notify_attributes = nullptr;
63
64 int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
65
66 if (unlikely(ret)) {
67 // NO_LINT_DEBUG
68 sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
69 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
70 return;
71 }
72
73 struct itimerspec timer_spec;
74 memset(&timer_spec, 0, sizeof(timer_spec));
75
76 // One time execution only for the watchdog. No interval.
77 timer_spec.it_value.tv_sec = m_write_timeout;
78
79 ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
80
81 if (unlikely(ret)) {
82 // NO_LINT_DEBUG
83 sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
84 errno);
85 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
86 return;
87 }
88
89 m_io_in_progress.store(true);
90
91 // Verify the write access to all directories we care about.
92 for (const std::string &directory : m_dirs_to_check) {
93 ret = check_write_access(directory);
94
95 // We'll log a warning and attept to continue to see if the problem happens
96 // in other cases as well.
97 if (unlikely(ret != HA_EXIT_SUCCESS)) {
98 // NO_LINT_DEBUG
99 sql_print_warning("Unable to verify write access to %s (error code %d).",
100 directory.c_str(), ret);
101 }
102 }
103
104 m_io_in_progress.store(false);
105
106 // Clean up the watchdog timer.
107 ret = timer_delete(m_io_check_watchdog_timer);
108
109 if (unlikely(ret)) {
110 // NO_LINT_DEBUG
111 sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
112 }
113
114 m_io_check_watchdog_timer = nullptr;
115
116 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
117 }
118
check_write_access(const std::string & dirname) const119 int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
120 DBUG_ASSERT(!dirname.empty());
121 DBUG_ASSERT(m_buf != nullptr);
122
123 const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
124
125 // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
126 // buffer cache.
127 int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
128 S_IRWXU | S_IWUSR);
129
130 if (unlikely(fd == -1)) {
131 return fd;
132 }
133
134 int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
135
136 if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
137 return ret;
138 }
139
140 ret = close(fd);
141
142 if (unlikely(ret)) {
143 return ret;
144 }
145
146 ret = unlink(fname.c_str());
147
148 if (unlikely(ret)) {
149 return ret;
150 }
151
152 return HA_EXIT_SUCCESS;
153 }
154
reset_timeout(const uint32_t write_timeout)155 int Rdb_io_watchdog::reset_timeout(const uint32_t write_timeout) {
156 // This function will be called either from a thread initializing MyRocks
157 // engine or handling system variable changes. We need to account for the
158 // possibility of I/O callback executing at the same time. If that happens
159 // then we'll wait for it to finish.
160 RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
161
162 struct sigevent e;
163
164 // In all the cases all the active timers needs to be stopped.
165 int ret = stop_timers();
166
167 if (unlikely(ret)) {
168 // NO_LINT_DEBUG
169 sql_print_warning("Stopping I/O timers failed with %d.", errno);
170 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
171 return ret;
172 }
173
174 m_write_timeout = write_timeout;
175 m_io_in_progress.store(false);
176
177 // Zero means that the I/O timer will be disabled. Therefore there's nothing
178 // for us to do here.
179 if (!write_timeout) {
180 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
181 return HA_EXIT_SUCCESS;
182 }
183
184 free(m_buf);
185
186 ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
187 RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
188
189 if (unlikely(ret)) {
190 m_buf = nullptr;
191 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
192 // NB! The value of errno is not set.
193 return ret;
194 }
195
196 DBUG_ASSERT(m_buf != nullptr);
197 memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
198
199 // Common case gets handled here - we'll create a timer with a specific
200 // interval to check a set of directories for write access.
201 DBUG_ASSERT(m_dirs_to_check.size() > 0);
202
203 e.sigev_notify = SIGEV_THREAD;
204 e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
205 e.sigev_value.sival_ptr = this;
206 e.sigev_notify_attributes = nullptr;
207
208 ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
209
210 if (unlikely(ret)) {
211 // NO_LINT_DEBUG
212 sql_print_warning("Creating a I/O timer failed with %d.", errno);
213 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
214 return ret;
215 }
216
217 struct itimerspec timer_spec;
218 memset(&timer_spec, 0, sizeof(timer_spec));
219
220 // I/O timer will need to execute on a certain interval.
221 timer_spec.it_value.tv_sec = m_write_timeout;
222 timer_spec.it_interval.tv_sec = m_write_timeout;
223
224 ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
225
226 if (unlikely(ret)) {
227 // NO_LINT_DEBUG
228 sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
229 errno);
230 }
231
232 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
233
234 return HA_EXIT_SUCCESS;
235 }
236
237 } // namespace myrocks
238
239 #endif
240
241