1 /*
2    Copyright (c) 2017, Facebook, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
16 
17 /* This C++ file's header */
18 #include "./rdb_io_watchdog.h"
19 
20 /* C++ standard header files */
21 #include <string>
22 #include <vector>
23 
24 /* Rdb_io_watchdog doesn't work on Windows [yet] */
25 #ifdef HAVE_TIMER_DELETE
26 
27 namespace myrocks {
28 
expire_io_callback(union sigval timer_data)29 void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
30   DBUG_ASSERT(timer_data.sival_ptr != nullptr);
31 
32   // The treatment of any pending signal generated by the deleted timer is
33   // unspecified. Therefore we still need to handle the rare case where we
34   // finished the I/O operation right before the timer was deleted and callback
35   // was in flight.
36   if (!m_io_in_progress.load()) {
37     return;
38   }
39 
40   // At this point we know that I/O has been stuck in `write()` for more than
41   // `m_write_timeout` seconds. We'll log a message and shut down the service.
42   // NO_LINT_DEBUG
43   sql_print_error(
44       "MyRocks has detected a combination of I/O requests which "
45       "have cumulatively been blocking for more than %u seconds. "
46       "Shutting the service down.",
47       m_write_timeout);
48 
49   abort();
50 }
51 
io_check_callback(union sigval timer_data)52 void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
53   RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
54 
55   DBUG_ASSERT(timer_data.sival_ptr != nullptr);
56 
57   struct sigevent e;
58 
59   e.sigev_notify = SIGEV_THREAD;
60   e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
61   e.sigev_value.sival_ptr = this;
62   e.sigev_notify_attributes = nullptr;
63 
64   int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
65 
66   if (unlikely(ret)) {
67     // NO_LINT_DEBUG
68     sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
69     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
70     return;
71   }
72 
73   struct itimerspec timer_spec;
74   memset(&timer_spec, 0, sizeof(timer_spec));
75 
76   // One time execution only for the watchdog. No interval.
77   timer_spec.it_value.tv_sec = m_write_timeout;
78 
79   ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
80 
81   if (unlikely(ret)) {
82     // NO_LINT_DEBUG
83     sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
84                       errno);
85     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
86     return;
87   }
88 
89   m_io_in_progress.store(true);
90 
91   // Verify the write access to all directories we care about.
92   for (const std::string &directory : m_dirs_to_check) {
93     ret = check_write_access(directory);
94 
95     // We'll log a warning and attept to continue to see if the problem happens
96     // in other cases as well.
97     if (unlikely(ret != HA_EXIT_SUCCESS)) {
98       // NO_LINT_DEBUG
99       sql_print_warning("Unable to verify write access to %s (error code %d).",
100                         directory.c_str(), ret);
101     }
102   }
103 
104   m_io_in_progress.store(false);
105 
106   // Clean up the watchdog timer.
107   ret = timer_delete(m_io_check_watchdog_timer);
108 
109   if (unlikely(ret)) {
110     // NO_LINT_DEBUG
111     sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
112   }
113 
114   m_io_check_watchdog_timer = nullptr;
115 
116   RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
117 }
118 
check_write_access(const std::string & dirname) const119 int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
120   DBUG_ASSERT(!dirname.empty());
121   DBUG_ASSERT(m_buf != nullptr);
122 
123   const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
124 
125   // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
126   // buffer cache.
127   int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
128                 S_IRWXU | S_IWUSR);
129 
130   if (unlikely(fd == -1)) {
131     return fd;
132   }
133 
134   int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
135 
136   if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
137     return ret;
138   }
139 
140   ret = close(fd);
141 
142   if (unlikely(ret)) {
143     return ret;
144   }
145 
146   ret = unlink(fname.c_str());
147 
148   if (unlikely(ret)) {
149     return ret;
150   }
151 
152   return HA_EXIT_SUCCESS;
153 }
154 
reset_timeout(const uint32_t write_timeout)155 int Rdb_io_watchdog::reset_timeout(const uint32_t write_timeout) {
156   // This function will be called either from a thread initializing MyRocks
157   // engine or handling system variable changes. We need to account for the
158   // possibility of I/O callback executing at the same time. If that happens
159   // then we'll wait for it to finish.
160   RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
161 
162   struct sigevent e;
163 
164   // In all the cases all the active timers needs to be stopped.
165   int ret = stop_timers();
166 
167   if (unlikely(ret)) {
168     // NO_LINT_DEBUG
169     sql_print_warning("Stopping I/O timers failed with %d.", errno);
170     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
171     return ret;
172   }
173 
174   m_write_timeout = write_timeout;
175   m_io_in_progress.store(false);
176 
177   // Zero means that the I/O timer will be disabled. Therefore there's nothing
178   // for us to do here.
179   if (!write_timeout) {
180     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
181     return HA_EXIT_SUCCESS;
182   }
183 
184   free(m_buf);
185 
186   ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
187                        RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
188 
189   if (unlikely(ret)) {
190     m_buf = nullptr;
191     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
192     // NB! The value of errno is not set.
193     return ret;
194   }
195 
196   DBUG_ASSERT(m_buf != nullptr);
197   memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
198 
199   // Common case gets handled here - we'll create a timer with a specific
200   // interval to check a set of directories for write access.
201   DBUG_ASSERT(m_dirs_to_check.size() > 0);
202 
203   e.sigev_notify = SIGEV_THREAD;
204   e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
205   e.sigev_value.sival_ptr = this;
206   e.sigev_notify_attributes = nullptr;
207 
208   ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
209 
210   if (unlikely(ret)) {
211     // NO_LINT_DEBUG
212     sql_print_warning("Creating a I/O timer failed with %d.", errno);
213     RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
214     return ret;
215   }
216 
217   struct itimerspec timer_spec;
218   memset(&timer_spec, 0, sizeof(timer_spec));
219 
220   // I/O timer will need to execute on a certain interval.
221   timer_spec.it_value.tv_sec = m_write_timeout;
222   timer_spec.it_interval.tv_sec = m_write_timeout;
223 
224   ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
225 
226   if (unlikely(ret)) {
227     // NO_LINT_DEBUG
228     sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
229                       errno);
230   }
231 
232   RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
233 
234   return HA_EXIT_SUCCESS;
235 }
236 
237 }  // namespace myrocks
238 
239 #endif
240 
241