1 /*
2    implementation of the update record control
3 
4    Copyright (C) Andrew Tridgell  2007
5    Copyright (C) Ronnie Sahlberg  2007
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/time.h"
24 
25 #include <talloc.h>
26 #include <tevent.h>
27 
28 #include "lib/tdb_wrap/tdb_wrap.h"
29 #include "lib/util/debug.h"
30 #include "lib/util/samba_util.h"
31 #include "lib/util/sys_rw.h"
32 #include "lib/util/util_process.h"
33 
34 #include "ctdb_private.h"
35 #include "ctdb_client.h"
36 
37 #include "common/system.h"
38 #include "common/common.h"
39 #include "common/logging.h"
40 
41 struct ctdb_persistent_write_state {
42 	struct ctdb_db_context *ctdb_db;
43 	struct ctdb_marshall_buffer *m;
44 	struct ctdb_req_control_old *c;
45 	uint32_t flags;
46 };
47 
48 /* don't create/update records that does not exist locally */
49 #define UPDATE_FLAGS_REPLACE_ONLY	1
50 
51 /*
52   called from a child process to write the data
53  */
ctdb_persistent_store(struct ctdb_persistent_write_state * state)54 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
55 {
56 	unsigned int i;
57 	int ret;
58 	struct ctdb_rec_data_old *rec = NULL;
59 	struct ctdb_marshall_buffer *m = state->m;
60 
61 	ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
62 	if (ret == -1) {
63 		DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
64 				 state->ctdb_db->db_id));
65 		return -1;
66 	}
67 
68 	for (i=0;i<m->count;i++) {
69 		struct ctdb_ltdb_header oldheader;
70 		struct ctdb_ltdb_header header;
71 		TDB_DATA key, data, olddata;
72 		TALLOC_CTX *tmp_ctx = talloc_new(state);
73 
74 		rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
75 
76 		if (rec == NULL) {
77 			D_ERR("Failed to get next record %u for db_id 0x%08x "
78 			      "in ctdb_persistent_store\n",
79 			      i,
80 			      state->ctdb_db->db_id);
81 			talloc_free(tmp_ctx);
82 			goto failed;
83 		}
84 
85 		/* we must check if the record exists or not because
86 		   ctdb_ltdb_fetch will unconditionally create a record
87 		 */
88 		if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
89 			TDB_DATA trec;
90 			trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
91 			if (trec.dsize == 0) {
92 				talloc_free(tmp_ctx);
93 				continue;
94 			}
95 			free(trec.dptr);
96 		}
97 
98 		/* fetch the old header and ensure the rsn is less than the new rsn */
99 		ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
100 		if (ret != 0) {
101 			DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
102 					 state->ctdb_db->db_id));
103 			talloc_free(tmp_ctx);
104 			goto failed;
105 		}
106 
107 		if (oldheader.rsn >= header.rsn &&
108 		    (olddata.dsize != data.dsize ||
109 		     memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
110 			DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
111 					  state->ctdb_db->db_id,
112 					  (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
113 			talloc_free(tmp_ctx);
114 			goto failed;
115 		}
116 
117 		talloc_free(tmp_ctx);
118 
119 		ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
120 		if (ret != 0) {
121 			DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
122 					  state->ctdb_db->db_id));
123 			goto failed;
124 		}
125 	}
126 
127 	ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
128 	if (ret == -1) {
129 		DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
130 				 state->ctdb_db->db_id));
131 		return -1;
132 	}
133 
134 	return 0;
135 
136 failed:
137 	tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
138 	return -1;
139 }
140 
141 
142 /*
143   called when we the child has completed the persistent write
144   on our behalf
145  */
ctdb_persistent_write_callback(int status,void * private_data)146 static void ctdb_persistent_write_callback(int status, void *private_data)
147 {
148 	struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
149 								   struct ctdb_persistent_write_state);
150 
151 
152 	ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
153 
154 	talloc_free(state);
155 }
156 
157 /*
158   called if our lockwait child times out
159  */
ctdb_persistent_lock_timeout(struct tevent_context * ev,struct tevent_timer * te,struct timeval t,void * private_data)160 static void ctdb_persistent_lock_timeout(struct tevent_context *ev,
161 					 struct tevent_timer *te,
162 					 struct timeval t, void *private_data)
163 {
164 	struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
165 								   struct ctdb_persistent_write_state);
166 	ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
167 	talloc_free(state);
168 }
169 
170 struct childwrite_handle {
171 	struct ctdb_context *ctdb;
172 	struct ctdb_db_context *ctdb_db;
173 	struct tevent_fd *fde;
174 	int fd[2];
175 	pid_t child;
176 	void *private_data;
177 	void (*callback)(int, void *);
178 	struct timeval start_time;
179 };
180 
childwrite_destructor(struct childwrite_handle * h)181 static int childwrite_destructor(struct childwrite_handle *h)
182 {
183 	CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
184 	ctdb_kill(h->ctdb, h->child, SIGKILL);
185 	return 0;
186 }
187 
188 /* called when the child process has finished writing the record to the
189    database
190 */
childwrite_handler(struct tevent_context * ev,struct tevent_fd * fde,uint16_t flags,void * private_data)191 static void childwrite_handler(struct tevent_context *ev,
192 			       struct tevent_fd *fde,
193 			       uint16_t flags, void *private_data)
194 {
195 	struct childwrite_handle *h = talloc_get_type(private_data,
196 						     struct childwrite_handle);
197 	void *p = h->private_data;
198 	void (*callback)(int, void *) = h->callback;
199 	pid_t child = h->child;
200 	TALLOC_CTX *tmp_ctx = talloc_new(ev);
201 	int ret;
202 	char c;
203 
204 	CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
205 	CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
206 
207 	/* the handle needs to go away when the context is gone - when
208 	   the handle goes away this implicitly closes the pipe, which
209 	   kills the child */
210 	talloc_steal(tmp_ctx, h);
211 
212 	talloc_set_destructor(h, NULL);
213 
214 	ret = sys_read(h->fd[0], &c, 1);
215 	if (ret < 1) {
216 		DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
217 		c = 1;
218 	}
219 
220 	callback(c, p);
221 
222 	ctdb_kill(h->ctdb, child, SIGKILL);
223 	talloc_free(tmp_ctx);
224 }
225 
226 /* this creates a child process which will take out a tdb transaction
227    and write the record to the database.
228 */
ctdb_childwrite(struct ctdb_db_context * ctdb_db,void (* callback)(int,void * private_data),struct ctdb_persistent_write_state * state)229 static struct childwrite_handle *ctdb_childwrite(
230 				struct ctdb_db_context *ctdb_db,
231 				void (*callback)(int, void *private_data),
232 				struct ctdb_persistent_write_state *state)
233 {
234 	struct childwrite_handle *result;
235 	int ret;
236 	pid_t parent = getpid();
237 
238 	CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
239 	CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
240 
241 	if (!(result = talloc_zero(state, struct childwrite_handle))) {
242 		CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
243 		return NULL;
244 	}
245 
246 	ret = pipe(result->fd);
247 
248 	if (ret != 0) {
249 		talloc_free(result);
250 		CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
251 		return NULL;
252 	}
253 
254 	result->child = ctdb_fork(ctdb_db->ctdb);
255 
256 	if (result->child == (pid_t)-1) {
257 		close(result->fd[0]);
258 		close(result->fd[1]);
259 		talloc_free(result);
260 		CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
261 		return NULL;
262 	}
263 
264 	result->callback = callback;
265 	result->private_data = state;
266 	result->ctdb = ctdb_db->ctdb;
267 	result->ctdb_db = ctdb_db;
268 
269 	if (result->child == 0) {
270 		char c = 0;
271 
272 		close(result->fd[0]);
273 		prctl_set_comment("ctdb_write_persistent");
274 		ret = ctdb_persistent_store(state);
275 		if (ret != 0) {
276 			DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
277 			c = 1;
278 		}
279 
280 		sys_write(result->fd[1], &c, 1);
281 
282 		ctdb_wait_for_process_to_exit(parent);
283 		_exit(0);
284 	}
285 
286 	close(result->fd[1]);
287 	set_close_on_exec(result->fd[0]);
288 
289 	talloc_set_destructor(result, childwrite_destructor);
290 
291 	DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
292 
293 	result->fde = tevent_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
294 				    TEVENT_FD_READ, childwrite_handler,
295 				    (void *)result);
296 	if (result->fde == NULL) {
297 		talloc_free(result);
298 		CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
299 		return NULL;
300 	}
301 	tevent_fd_set_auto_close(result->fde);
302 
303 	result->start_time = timeval_current();
304 
305 	return result;
306 }
307 
308 /*
309    update a record on this node if the new record has a higher rsn than the
310    current record
311  */
ctdb_control_update_record(struct ctdb_context * ctdb,struct ctdb_req_control_old * c,TDB_DATA recdata,bool * async_reply)312 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
313 				   struct ctdb_req_control_old *c, TDB_DATA recdata,
314 				   bool *async_reply)
315 {
316 	struct ctdb_db_context *ctdb_db;
317 	struct ctdb_persistent_write_state *state;
318 	struct childwrite_handle *handle;
319 	struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
320 
321 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
322 		DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
323 		return -1;
324 	}
325 
326 	ctdb_db = find_ctdb_db(ctdb, m->db_id);
327 	if (ctdb_db == NULL) {
328 		DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
329 		return -1;
330 	}
331 
332 	if (ctdb_db->unhealthy_reason) {
333 		DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
334 				 ctdb_db->db_name, ctdb_db->unhealthy_reason));
335 		return -1;
336 	}
337 
338 	state = talloc(ctdb, struct ctdb_persistent_write_state);
339 	CTDB_NO_MEMORY(ctdb, state);
340 
341 	state->ctdb_db = ctdb_db;
342 	state->c       = c;
343 	state->m       = m;
344 	state->flags   = 0;
345 	if (ctdb_db_volatile(ctdb_db)) {
346 		state->flags   = UPDATE_FLAGS_REPLACE_ONLY;
347 	}
348 
349 	/* create a child process to take out a transaction and
350 	   write the data.
351 	*/
352 	handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
353 	if (handle == NULL) {
354 		DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
355 		talloc_free(state);
356 		return -1;
357 	}
358 
359 	/* we need to wait for the replies */
360 	*async_reply = true;
361 
362 	/* need to keep the control structure around */
363 	talloc_steal(state, c);
364 
365 	/* but we won't wait forever */
366 	tevent_add_timer(ctdb->ev, state,
367 			 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
368 			 ctdb_persistent_lock_timeout, state);
369 
370 	return 0;
371 }
372 
373