1 /*
2    persistent store logic
3 
4    Copyright (C) Andrew Tridgell  2007
5    Copyright (C) Ronnie Sahlberg  2007
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "replace.h"
22 #include "system/filesys.h"
23 #include "system/network.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26 
27 #include <talloc.h>
28 #include <tevent.h>
29 
30 #include "lib/tdb_wrap/tdb_wrap.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 
34 #include "ctdb_private.h"
35 
36 #include "common/reqid.h"
37 #include "common/common.h"
38 #include "common/logging.h"
39 
40 struct ctdb_persistent_state {
41 	struct ctdb_context *ctdb;
42 	struct ctdb_db_context *ctdb_db; /* used by trans3_commit */
43 	struct ctdb_client *client; /* used by trans3_commit */
44 	struct ctdb_req_control_old *c;
45 	const char *errormsg;
46 	uint32_t num_pending;
47 	int32_t status;
48 	uint32_t num_failed, num_sent;
49 };
50 
51 /*
52   1) all nodes fail, and all nodes reply
53   2) some nodes fail, all nodes reply
54   3) some nodes timeout
55   4) all nodes succeed
56  */
57 
58 /*
59   called when a node has acknowledged a ctdb_control_update_record call
60  */
ctdb_persistent_callback(struct ctdb_context * ctdb,int32_t status,TDB_DATA data,const char * errormsg,void * private_data)61 static void ctdb_persistent_callback(struct ctdb_context *ctdb,
62 				     int32_t status, TDB_DATA data,
63 				     const char *errormsg,
64 				     void *private_data)
65 {
66 	struct ctdb_persistent_state *state = talloc_get_type(private_data,
67 							      struct ctdb_persistent_state);
68 
69 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
70 		DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
71 				   "during recovery\n"));
72 		return;
73 	}
74 
75 	if (status != 0) {
76 		DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
77 			 status, errormsg?errormsg:"no error message given"));
78 		state->status = status;
79 		state->errormsg = errormsg;
80 		state->num_failed++;
81 
82 		/*
83 		 * If a node failed to complete the update_record control,
84 		 * then either a recovery is already running or something
85 		 * bad is going on. So trigger a recovery and let the
86 		 * recovery finish the transaction, sending back the reply
87 		 * for the trans3_commit control to the client.
88 		 */
89 		ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
90 		return;
91 	}
92 
93 	state->num_pending--;
94 
95 	if (state->num_pending != 0) {
96 		return;
97 	}
98 
99 	ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, state->errormsg);
100 	talloc_free(state);
101 }
102 
103 /*
104   called if persistent store times out
105  */
ctdb_persistent_store_timeout(struct tevent_context * ev,struct tevent_timer * te,struct timeval t,void * private_data)106 static void ctdb_persistent_store_timeout(struct tevent_context *ev,
107 					  struct tevent_timer *te,
108 					  struct timeval t, void *private_data)
109 {
110 	struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
111 
112 	if (state->ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
113 		DEBUG(DEBUG_INFO, ("ctdb_persistent_store_timeout: ignoring "
114 				   "timeout during recovery\n"));
115 		return;
116 	}
117 
118 	ctdb_request_control_reply(state->ctdb, state->c, NULL, 1,
119 				   "timeout in ctdb_persistent_state");
120 
121 	talloc_free(state);
122 }
123 
124 /**
125  * Finish pending trans3 commit controls, i.e. send
126  * reply to the client. This is called by the end-recovery
127  * control to fix the situation when a recovery interrupts
128  * the usual progress of a transaction.
129  */
ctdb_persistent_finish_trans3_commits(struct ctdb_context * ctdb)130 void ctdb_persistent_finish_trans3_commits(struct ctdb_context *ctdb)
131 {
132 	struct ctdb_db_context *ctdb_db;
133 
134 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
135 		DEBUG(DEBUG_INFO, ("ctdb_persistent_finish_trans3_commits: "
136 				   "skipping execution when recovery is "
137 				   "active\n"));
138 		return;
139 	}
140 
141 	for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
142 		struct ctdb_persistent_state *state;
143 
144 		if (ctdb_db->persistent_state == NULL) {
145 			continue;
146 		}
147 
148 		state = ctdb_db->persistent_state;
149 
150 		ctdb_request_control_reply(ctdb, state->c, NULL, 2,
151 					   "trans3 commit ended by recovery");
152 
153 		/* The destructor sets ctdb_db->persistent_state to NULL. */
154 		talloc_free(state);
155 	}
156 }
157 
ctdb_persistent_state_destructor(struct ctdb_persistent_state * state)158 static int ctdb_persistent_state_destructor(struct ctdb_persistent_state *state)
159 {
160 	if (state->client != NULL) {
161 		state->client->db_id = 0;
162 	}
163 
164 	if (state->ctdb_db != NULL) {
165 		state->ctdb_db->persistent_state = NULL;
166 	}
167 
168 	return 0;
169 }
170 
171 /*
172  * Store a set of persistent records.
173  * This is used to roll out a transaction to all nodes.
174  */
ctdb_control_trans3_commit(struct ctdb_context * ctdb,struct ctdb_req_control_old * c,TDB_DATA recdata,bool * async_reply)175 int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
176 				   struct ctdb_req_control_old *c,
177 				   TDB_DATA recdata, bool *async_reply)
178 {
179 	struct ctdb_client *client;
180 	struct ctdb_persistent_state *state;
181 	unsigned int i;
182 	struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
183 	struct ctdb_db_context *ctdb_db;
184 
185 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
186 		DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
187 		return -1;
188 	}
189 
190 	client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
191 	if (client == NULL) {
192 		DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
193 				 "to a client. Returning error\n"));
194 		return -1;
195 	}
196 
197 	if (client->db_id != 0) {
198 		DEBUG(DEBUG_ERR,(__location__ " ERROR: trans3_commit: "
199 				 "client-db_id[0x%08x] != 0 "
200 				 "(client_id[0x%08x]): trans3_commit active?\n",
201 				 client->db_id, client->client_id));
202 		return -1;
203 	}
204 
205 	ctdb_db = find_ctdb_db(ctdb, m->db_id);
206 	if (ctdb_db == NULL) {
207 		DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
208 				 "Unknown database db_id[0x%08x]\n", m->db_id));
209 		return -1;
210 	}
211 
212 	if (ctdb_db->persistent_state != NULL) {
213 		DEBUG(DEBUG_ERR, (__location__ " Error: "
214 				  "ctdb_control_trans3_commit "
215 				  "called while a transaction commit is "
216 				  "active. db_id[0x%08x]\n", m->db_id));
217 		return -1;
218 	}
219 
220 	ctdb_db->persistent_state = talloc_zero(ctdb_db,
221 						struct ctdb_persistent_state);
222 	CTDB_NO_MEMORY(ctdb, ctdb_db->persistent_state);
223 
224 	client->db_id = m->db_id;
225 
226 	state = ctdb_db->persistent_state;
227 	state->ctdb = ctdb;
228 	state->ctdb_db = ctdb_db;
229 	state->c    = c;
230 	state->client = client;
231 
232 	talloc_set_destructor(state, ctdb_persistent_state_destructor);
233 
234 	for (i = 0; i < ctdb->vnn_map->size; i++) {
235 		struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
236 		int ret;
237 
238 		/* only send to active nodes */
239 		if (node->flags & NODE_FLAGS_INACTIVE) {
240 			continue;
241 		}
242 
243 		ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
244 					       CTDB_CONTROL_UPDATE_RECORD,
245 					       c->client_id, 0, recdata,
246 					       ctdb_persistent_callback,
247 					       state);
248 		if (ret == -1) {
249 			DEBUG(DEBUG_ERR,("Unable to send "
250 					 "CTDB_CONTROL_UPDATE_RECORD "
251 					 "to pnn %u\n", node->pnn));
252 			talloc_free(state);
253 			return -1;
254 		}
255 
256 		state->num_pending++;
257 		state->num_sent++;
258 	}
259 
260 	if (state->num_pending == 0) {
261 		talloc_free(state);
262 		return 0;
263 	}
264 
265 	/* we need to wait for the replies */
266 	*async_reply = true;
267 
268 	/* need to keep the control structure around */
269 	talloc_steal(state, c);
270 
271 	/* but we won't wait forever */
272 	tevent_add_timer(ctdb->ev, state,
273 			 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
274 			 ctdb_persistent_store_timeout, state);
275 
276 	return 0;
277 }
278 
279 
280 /*
281   backwards compatibility:
282 
283   start a persistent store operation. passing both the key, header and
284   data to the daemon. If the client disconnects before it has issued
285   a persistent_update call to the daemon we trigger a full recovery
286   to ensure the databases are brought back in sync.
287   for now we ignore the recdata that the client has passed to us.
288  */
ctdb_control_start_persistent_update(struct ctdb_context * ctdb,struct ctdb_req_control_old * c,TDB_DATA recdata)289 int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
290 				      struct ctdb_req_control_old *c,
291 				      TDB_DATA recdata)
292 {
293 	struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
294 
295 	if (client == NULL) {
296 		DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
297 		return -1;
298 	}
299 
300 	client->num_persistent_updates++;
301 
302 	return 0;
303 }
304 
305 /*
306   backwards compatibility:
307 
308   called to tell ctdbd that it is no longer doing a persistent update
309 */
ctdb_control_cancel_persistent_update(struct ctdb_context * ctdb,struct ctdb_req_control_old * c,TDB_DATA recdata)310 int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
311 					      struct ctdb_req_control_old *c,
312 					      TDB_DATA recdata)
313 {
314 	struct ctdb_client *client = reqid_find(ctdb->idr, c->client_id, struct ctdb_client);
315 
316 	if (client == NULL) {
317 		DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
318 		return -1;
319 	}
320 
321 	if (client->num_persistent_updates > 0) {
322 		client->num_persistent_updates--;
323 	}
324 
325 	return 0;
326 }
327 
ctdb_get_db_seqnum(struct ctdb_context * ctdb,uint32_t db_id,uint64_t * seqnum)328 static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
329 				  uint32_t db_id,
330 				  uint64_t *seqnum)
331 {
332 	int32_t ret;
333 	struct ctdb_db_context *ctdb_db;
334 	const char *keyname = CTDB_DB_SEQNUM_KEY;
335 	TDB_DATA key;
336 	TDB_DATA data;
337 	TALLOC_CTX *mem_ctx = talloc_new(ctdb);
338 	struct ctdb_ltdb_header header;
339 
340 	ctdb_db = find_ctdb_db(ctdb, db_id);
341 	if (!ctdb_db) {
342 		DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
343 		ret = -1;
344 		goto done;
345 	}
346 
347 	if (! ctdb_db_allow_access(ctdb_db)) {
348 		ret = -1;
349 		goto done;
350 	}
351 
352 	key.dptr = (uint8_t *)discard_const(keyname);
353 	key.dsize = strlen(keyname) + 1;
354 
355 	ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
356 	if (ret != 0) {
357 		goto done;
358 	}
359 
360 	if (data.dsize != sizeof(uint64_t)) {
361 		*seqnum = 0;
362 		goto done;
363 	}
364 
365 	*seqnum = *(uint64_t *)data.dptr;
366 
367 done:
368 	talloc_free(mem_ctx);
369 	return ret;
370 }
371 
372 /**
373  * Get the sequence number of a persistent database.
374  */
ctdb_control_get_db_seqnum(struct ctdb_context * ctdb,TDB_DATA indata,TDB_DATA * outdata)375 int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
376 				   TDB_DATA indata,
377 				   TDB_DATA *outdata)
378 {
379 	uint32_t db_id;
380 	int32_t ret;
381 	uint64_t seqnum;
382 
383 	db_id = *(uint32_t *)indata.dptr;
384 	ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
385 	if (ret != 0) {
386 		goto done;
387 	}
388 
389 	outdata->dsize = sizeof(uint64_t);
390 	outdata->dptr = talloc_memdup(outdata, &seqnum, sizeof(uint64_t));
391 	if (outdata->dptr == NULL) {
392 		ret = -1;
393 	}
394 
395 done:
396 	return ret;
397 }
398