1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include "rcm_impl.h"
27 #include "rcm_module.h"
28
29 /*
30 * Global locks
31 */
32 mutex_t rcm_req_lock; /* protects global dr & info request list */
33
34 /*
35 * Daemon state file
36 */
37 static int state_fd;
38 #define RCM_STATE_FILE "/var/run/rcm_daemon_state"
39 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */
40
41 /*
42 * Daemon timeout value
43 */
44 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */
45
46 /*
47 * Struct for a list of outstanding rcm requests
48 */
49 typedef struct {
50 int seq_num; /* sequence number of request */
51 int state; /* current state */
52 pid_t pid; /* pid of initiator */
53 uint_t flag; /* request flags */
54 int type; /* resource(device) type */
55 timespec_t interval; /* suspend interval */
56 char device[MAXPATHLEN]; /* name of device or resource */
57 } req_t;
58
59 typedef struct {
60 int n_req;
61 int n_req_max; /* number of req_t's to follow */
62 int n_seq_max; /* last sequence number */
63 int idle_timeout; /* persist idle timeout value */
64 req_t req[1];
65 /* more req_t follows */
66 } req_list_t;
67
68 static req_list_t *dr_req_list;
69 static req_list_t *info_req_list;
70
71 static const char *locked_info = "DR operation in progress";
72 static const char *locked_err = "Resource is busy";
73
74 static int rcmd_get_state();
75 static void add_to_polling_list(pid_t);
76 static void remove_from_polling_list(pid_t);
77
78 void start_polling_thread();
79 static void stop_polling_thread();
80
81 /*
82 * Initialize request lists required for locking
83 */
84 void
rcmd_lock_init(void)85 rcmd_lock_init(void)
86 {
87 int size;
88 struct stat fbuf;
89
90 /*
91 * Start info list with one slot, then grow on demand.
92 */
93 info_req_list = s_calloc(1, sizeof (req_list_t));
94 info_req_list->n_req_max = 1;
95
96 /*
97 * Open daemon state file and map in contents
98 */
99 state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600);
100 if (state_fd == -1) {
101 rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"),
102 RCM_STATE_FILE, strerror(errno));
103 rcmd_exit(errno);
104 }
105
106 if (fstat(state_fd, &fbuf) != 0) {
107 rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"),
108 RCM_STATE_FILE, strerror(errno));
109 rcmd_exit(errno);
110 }
111
112 size = fbuf.st_size;
113 if (size == 0) {
114 size = sizeof (req_list_t);
115 if (ftruncate(state_fd, size) != 0) {
116 rcm_log_message(RCM_ERROR,
117 gettext("cannot truncate %s: %s\n"),
118 RCM_STATE_FILE, strerror(errno));
119 rcmd_exit(errno);
120 }
121 }
122
123 /*LINTED*/
124 dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE,
125 MAP_SHARED, state_fd, 0);
126 if (dr_req_list == MAP_FAILED) {
127 rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"),
128 RCM_STATE_FILE, strerror(errno));
129 rcmd_exit(errno);
130 }
131
132 /*
133 * Initial size is one entry
134 */
135 if (dr_req_list->n_req_max == 0) {
136 dr_req_list->n_req_max = 1;
137 (void) fsync(state_fd);
138 return;
139 }
140
141 rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n",
142 dr_req_list->n_req, dr_req_list->n_req_max);
143
144 /*
145 * Recover the daemon state
146 */
147 clean_dr_list();
148 }
149
150 /*
151 * Get a unique sequence number--to be called with rcm_req_lock held.
152 */
153 static int
get_seq_number()154 get_seq_number()
155 {
156 int number;
157
158 if (dr_req_list == NULL)
159 return (0);
160
161 dr_req_list->n_seq_max++;
162 number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT);
163 (void) fsync(state_fd);
164
165 return (number);
166 }
167
168 /*
169 * Find entry in list with the same resource name and sequence number.
170 * If seq_num == -1, no seq_num matching is required.
171 */
172 static req_t *
find_req_entry(char * device,uint_t flag,int seq_num,req_list_t * list)173 find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list)
174 {
175 int i;
176
177 /*
178 * Look for entry with the same resource and seq_num.
179 * Also match RCM_FILESYS field in flag.
180 */
181 for (i = 0; i < list->n_req_max; i++) {
182 if (list->req[i].state == RCM_STATE_REMOVE)
183 /* stale entry */
184 continue;
185 /*
186 * We need to distiguish a file system root from the directory
187 * it is mounted on.
188 *
189 * Applications are not aware of any difference between the
190 * two, but the system keeps track of it internally by
191 * checking for mount points while traversing file path.
192 * In a similar spirit, RCM is keeping this difference as
193 * an implementation detail.
194 */
195 if ((strcmp(device, list->req[i].device) != 0) ||
196 (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS))
197 /* different resource */
198 continue;
199
200 if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) !=
201 (list->req[i].seq_num >> SEQ_NUM_SHIFT)))
202 /* different base seqnum */
203 continue;
204
205 return (&list->req[i]);
206 }
207
208 return (NULL);
209 }
210
211 /*
212 * Get the next empty req_t entry. If no entry exists, grow the list.
213 */
214 static req_t *
get_req_entry(req_list_t ** listp)215 get_req_entry(req_list_t **listp)
216 {
217 int i;
218 int n_req = (*listp)->n_req;
219 int n_req_max = (*listp)->n_req_max;
220
221 /*
222 * If the list is full, grow the list and return the first
223 * entry in the new portion.
224 */
225 if (n_req == n_req_max) {
226 int newsize;
227
228 n_req_max += N_REQ_CHUNK;
229 newsize = sizeof (req_list_t) + (n_req_max - 1) *
230 sizeof (req_t);
231
232 if (listp == &info_req_list) {
233 *listp = s_realloc(*listp, newsize);
234 } else if (ftruncate(state_fd, newsize) != 0) {
235 rcm_log_message(RCM_ERROR,
236 gettext("cannot truncate %s: %s\n"),
237 RCM_STATE_FILE, strerror(errno));
238 rcmd_exit(errno);
239 /*LINTED*/
240 } else if ((*listp = (req_list_t *)mmap(NULL, newsize,
241 PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) ==
242 MAP_FAILED) {
243 rcm_log_message(RCM_ERROR,
244 gettext("cannot mmap %s: %s\n"),
245 RCM_STATE_FILE, strerror(errno));
246 rcmd_exit(errno);
247 }
248
249 /* Initialize the new entries */
250 for (i = (*listp)->n_req_max; i < n_req_max; i++) {
251 (*listp)->req[i].state = RCM_STATE_REMOVE;
252 (void) strcpy((*listp)->req[i].device, "");
253 }
254
255 (*listp)->n_req_max = n_req_max;
256 (*listp)->n_req++;
257 return (&(*listp)->req[n_req]);
258 }
259
260 /*
261 * List contains empty slots, find it.
262 */
263 for (i = 0; i < n_req_max; i++) {
264 if (((*listp)->req[i].device[0] == '\0') ||
265 ((*listp)->req[i].state == RCM_STATE_REMOVE)) {
266 break;
267 }
268 }
269
270 assert(i < n_req_max); /* empty slot must exist */
271
272 (*listp)->n_req++;
273 return (&(*listp)->req[i]);
274 }
275
276 /*
277 * When one resource depends on multiple resources, it's possible that
278 * rcm_get_info can be called multiple times on the resource, resulting
279 * in duplicate information. By assigning a unique sequence number to
280 * each rcm_get_info operation, this duplication can be eliminated.
281 *
282 * Insert a dr entry in info_req_list
283 */
284 int
info_req_add(char * rsrcname,uint_t flag,int seq_num)285 info_req_add(char *rsrcname, uint_t flag, int seq_num)
286 {
287 int error = 0;
288 char *device;
289 req_t *req;
290
291 rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n",
292 rsrcname, seq_num);
293
294 device = resolve_name(rsrcname);
295 (void) mutex_lock(&rcm_req_lock);
296
297 /*
298 * Look for entry with the same resource and seq_num.
299 * If it exists, we return an error so that such
300 * information is not gathered more than once.
301 */
302 if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) {
303 rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n",
304 device, seq_num);
305 error = -1;
306 goto out;
307 }
308
309 /*
310 * Get empty entry and fill in seq_num and device.
311 */
312 req = get_req_entry(&info_req_list);
313 req->seq_num = seq_num;
314 req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */
315 req->flag = flag;
316 (void) strcpy(req->device, device);
317
318 out:
319 (void) mutex_unlock(&rcm_req_lock);
320 free(device);
321
322 return (error);
323 }
324
325 /*
326 * Remove all entries associated with seq_num from info_req_list
327 */
328 void
info_req_remove(int seq_num)329 info_req_remove(int seq_num)
330 {
331 int i;
332
333 rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num);
334
335 seq_num >>= SEQ_NUM_SHIFT;
336 (void) mutex_lock(&rcm_req_lock);
337
338 /* remove all entries with seq_num */
339 for (i = 0; i < info_req_list->n_req_max; i++) {
340 if (info_req_list->req[i].state == RCM_STATE_REMOVE)
341 continue;
342
343 if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num)
344 continue;
345
346 info_req_list->req[i].state = RCM_STATE_REMOVE;
347 info_req_list->n_req--;
348 }
349
350 /*
351 * We don't shrink the info_req_list size for now.
352 */
353 (void) mutex_unlock(&rcm_req_lock);
354 }
355
356 /*
357 * Checking lock conflicts. There is a conflict if:
358 * - attempt to DR a node when either its ancester or descendent
359 * is in the process of DR
360 * - attempt to register for a node when its ancester is locked for DR
361 */
362 static int
check_lock(char * device,uint_t flag,int cflag,rcm_info_t ** info)363 check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info)
364 {
365 int i, ret = RCM_SUCCESS;
366
367 if (info)
368 *info = NULL;
369
370 /*
371 * During daemon initialization, don't check locks
372 */
373 if (dr_req_list == NULL)
374 return (ret);
375
376 for (i = 0; i < dr_req_list->n_req; i++) {
377 req_t *req = &dr_req_list->req[i];
378 char *dr_dev = req->device;
379
380 /*
381 * Skip empty entries
382 */
383 if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0'))
384 continue;
385
386 /*
387 * Make sure that none of the ancestors of dr_dev is
388 * being operated upon.
389 */
390 if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) {
391 /*
392 * An exception to this is the filesystem.
393 * We should allowed a filesystem rooted at a
394 * child directory to be unmounted.
395 */
396 if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) ||
397 ((dr_req_list->req[i].flag & RCM_FILESYS) == 0)))
398 continue;
399
400 assert(info != 0);
401
402 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
403 dr_req_list->req[i].state,
404 dr_req_list->req[i].seq_num, NULL, locked_info,
405 locked_err, NULL, info);
406 ret = RCM_CONFLICT;
407 break;
408 }
409
410 if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) {
411 /*
412 * Check descendents only for DR request.
413 *
414 * Could have multiple descendents doing DR,
415 * we want to find them all.
416 */
417 assert(info != 0);
418
419 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
420 dr_req_list->req[i].state,
421 dr_req_list->req[i].seq_num, NULL, locked_info,
422 locked_err, NULL, info);
423 ret = RCM_CONFLICT;
424 /* don't break here, need to find all conflicts */
425 }
426 }
427
428 return (ret);
429 }
430
431 /*
432 * Check for lock conflicts for DR operation or client registration
433 */
434 int
rsrc_check_lock_conflicts(char * rsrcname,uint_t flag,int cflag,rcm_info_t ** info)435 rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag,
436 rcm_info_t **info)
437 {
438 int result;
439 char *device;
440
441 device = resolve_name(rsrcname);
442 result = check_lock(device, flag, cflag, info);
443 free(device);
444
445 return (result);
446 }
447
448 static int
transition_state(int state)449 transition_state(int state)
450 {
451 /*
452 * If the resource state is in transition, ask caller to
453 * try again.
454 */
455 switch (state) {
456 case RCM_STATE_OFFLINING:
457 case RCM_STATE_SUSPENDING:
458 case RCM_STATE_RESUMING:
459 case RCM_STATE_ONLINING:
460 case RCM_STATE_REMOVING:
461
462 return (1);
463
464 default:
465 /*FALLTHROUGH*/
466 break;
467 }
468 return (0);
469 }
470
471 /*
472 * Update a dr entry in dr_req_list
473 */
474 /*ARGSUSED*/
475 static int
dr_req_update_entry(char * device,pid_t pid,uint_t flag,int state,int seq_num,timespec_t * interval,rcm_info_t ** infop)476 dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state,
477 int seq_num, timespec_t *interval, rcm_info_t **infop)
478 {
479 req_t *req;
480
481 /*
482 * Find request entry. If not found, return RCM_FAILURE
483 */
484 req = find_req_entry(device, flag, -1, dr_req_list);
485
486 if (req == NULL) {
487 switch (state) {
488 case RCM_STATE_OFFLINE_QUERYING:
489 case RCM_STATE_SUSPEND_QUERYING:
490 case RCM_STATE_OFFLINING:
491 case RCM_STATE_SUSPENDING:
492 /* could be re-do operation, no error message */
493 break;
494
495 default:
496 rcm_log_message(RCM_DEBUG,
497 "update non-existing resource %s\n", device);
498 }
499 return (RCM_FAILURE);
500 }
501
502 /*
503 * During initialization, update is unconditional (forced)
504 * in order to bring the daemon up in a sane state.
505 */
506 if (rcmd_get_state() == RCMD_INIT)
507 goto update;
508
509 /*
510 * Don't allow update with mismatched initiator pid. This could happen
511 * as part of normal operation.
512 */
513 if (pid != req->pid) {
514 rcm_log_message(RCM_INFO,
515 gettext("mismatched dr initiator pid: %ld %ld\n"),
516 req->pid, pid);
517 goto failure;
518 }
519
520 rcm_log_message(RCM_TRACE4,
521 "dr_req_update_entry: state=%d, device=%s\n",
522 req->state, req->device);
523
524 /*
525 * Check that the state transition is valid
526 */
527 switch (state) {
528 case RCM_STATE_OFFLINE_QUERYING:
529 case RCM_STATE_OFFLINING:
530 /*
531 * This is the case of re-offlining, which applies only
532 * if a previous attempt failed.
533 */
534 if ((req->state != RCM_STATE_OFFLINE_FAIL) &&
535 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
536 (req->state != RCM_STATE_OFFLINE_QUERY) &&
537 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
538 (req->state != RCM_STATE_OFFLINE)) {
539 rcm_log_message(RCM_WARNING,
540 gettext("%s: invalid offlining from state %d\n"),
541 device, req->state);
542 goto failure;
543 }
544 break;
545
546 case RCM_STATE_SUSPEND_QUERYING:
547 case RCM_STATE_SUSPENDING:
548 /*
549 * This is the case of re-suspending, which applies only
550 * if a previous attempt failed.
551 */
552 if ((req->state != RCM_STATE_SUSPEND_FAIL) &&
553 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
554 (req->state != RCM_STATE_SUSPEND_QUERY) &&
555 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
556 (req->state != RCM_STATE_SUSPEND)) {
557 rcm_log_message(RCM_WARNING,
558 gettext("%s: invalid suspending from state %d\n"),
559 device, req->state);
560 goto failure;
561 }
562 break;
563
564 case RCM_STATE_RESUMING:
565 if ((req->state != RCM_STATE_SUSPEND) &&
566 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
567 (req->state != RCM_STATE_SUSPEND_QUERY) &&
568 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
569 (req->state != RCM_STATE_SUSPEND_FAIL)) {
570 rcm_log_message(RCM_DEBUG,
571 "%s: invalid resuming from state %d\n",
572 device, req->state);
573 goto failure;
574 }
575 break;
576
577 case RCM_STATE_ONLINING:
578 if ((req->state != RCM_STATE_OFFLINE) &&
579 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
580 (req->state != RCM_STATE_OFFLINE_QUERY) &&
581 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
582 (req->state != RCM_STATE_OFFLINE_FAIL)) {
583 rcm_log_message(RCM_INFO,
584 gettext("%s: invalid onlining from state %d\n"),
585 device, req->state);
586 goto failure;
587 }
588 break;
589
590 case RCM_STATE_REMOVING:
591 if ((req->state != RCM_STATE_OFFLINE) &&
592 (req->state != RCM_STATE_OFFLINE_FAIL)) {
593 rcm_log_message(RCM_INFO,
594 gettext("%s: invalid removing from state %d\n"),
595 device, req->state);
596 goto failure;
597 }
598 break;
599
600 case RCM_STATE_SUSPEND_FAIL:
601 assert(req->state == RCM_STATE_SUSPENDING);
602 break;
603
604 case RCM_STATE_OFFLINE_FAIL:
605 assert(req->state == RCM_STATE_OFFLINING);
606 break;
607
608 case RCM_STATE_SUSPEND:
609 assert(req->state == RCM_STATE_SUSPENDING);
610 break;
611
612 case RCM_STATE_OFFLINE:
613 assert(req->state == RCM_STATE_OFFLINING);
614 break;
615
616 case RCM_STATE_ONLINE:
617 assert((req->state == RCM_STATE_RESUMING) ||
618 (req->state == RCM_STATE_ONLINING));
619 break;
620
621 default: /* shouldn't be here */
622 rcm_log_message(RCM_ERROR,
623 gettext("invalid update to dr state: %d\n"), state);
624 return (RCM_FAILURE);
625 }
626
627 update:
628 /*
629 * update the state, interval, and sequence number; sync state file
630 */
631 req->state = state;
632 req->seq_num = seq_num;
633
634 if (interval)
635 req->interval = *interval;
636 else
637 bzero(&req->interval, sizeof (timespec_t));
638
639 (void) fsync(state_fd);
640 return (RCM_SUCCESS);
641
642 failure:
643 if (infop != NULL) {
644 add_busy_rsrc_to_list(req->device, req->pid, req->state,
645 req->seq_num, NULL, locked_info, locked_err, NULL, infop);
646 }
647
648 /*
649 * A request may be left in a transition state because the operator
650 * typed ctrl-C. In this case, the daemon thread continues to run
651 * and will eventually put the state in a non-transitional state.
652 *
653 * To be safe, we return EAGAIN to allow librcm to loop and retry.
654 * If we are called from a module, loop & retry could result in a
655 * deadlock. The called will check for this case and turn EAGAIN
656 * into RCM_CONFLICT.
657 */
658 if (transition_state(req->state)) {
659 return (EAGAIN);
660 }
661
662 return (RCM_CONFLICT);
663 }
664
665 /*
666 * Insert a dr entry in dr_req_list
667 */
668 int
dr_req_add(char * rsrcname,pid_t pid,uint_t flag,int state,int seq_num,timespec_t * interval,rcm_info_t ** info)669 dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
670 timespec_t *interval, rcm_info_t **info)
671 {
672 int error;
673 char *device;
674 req_t *req;
675
676 rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n",
677 rsrcname, pid, flag, state, seq_num, (void *)info);
678
679 device = resolve_name(rsrcname);
680 if (device == NULL)
681 return (EINVAL);
682
683 (void) mutex_lock(&rcm_req_lock);
684
685 /*
686 * In the re-offline/suspend case, attempt to update dr request.
687 *
688 * If this succeeds, return success;
689 * If this fails because of a conflict, return error;
690 * If this this fails because no entry exists, add a new entry.
691 */
692 error = dr_req_update_entry(device, pid, flag, state, seq_num, interval,
693 info);
694
695 switch (error) {
696 case RCM_FAILURE:
697 /* proceed to add a new entry */
698 break;
699
700 case RCM_CONFLICT:
701 case RCM_SUCCESS:
702 case EAGAIN:
703 default:
704 goto out;
705 }
706
707 /*
708 * Check for lock conflicts
709 */
710 error = check_lock(device, flag, LOCK_FOR_DR, info);
711 if (error != RCM_SUCCESS) {
712 error = RCM_CONFLICT;
713 goto out;
714 }
715
716 /*
717 * Get empty request entry, fill in values and sync state file
718 */
719 req = get_req_entry(&dr_req_list);
720
721 req->seq_num = seq_num;
722 req->pid = pid;
723 req->flag = flag;
724 req->state = state;
725 req->type = rsrc_get_type(device);
726 (void) strcpy(req->device, device);
727
728 /* cache interval for failure recovery */
729 if (interval)
730 req->interval = *interval;
731 else
732 bzero(&req->interval, sizeof (timespec_t));
733
734 (void) fsync(state_fd);
735
736 /*
737 * Add initiator pid to polling list
738 */
739 add_to_polling_list(req->pid);
740
741 out:
742 (void) mutex_unlock(&rcm_req_lock);
743 free(device);
744
745 return (error);
746 }
747
748 /*
749 * Update a dr entry in dr_req_list
750 */
751 /*ARGSUSED*/
752 int
dr_req_update(char * rsrcname,pid_t pid,uint_t flag,int state,int seq_num,rcm_info_t ** info)753 dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
754 rcm_info_t **info)
755 {
756 int error;
757 char *device = resolve_name(rsrcname);
758
759 rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n",
760 rsrcname, pid, flag, state, seq_num);
761
762 (void) mutex_lock(&rcm_req_lock);
763 error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL,
764 info);
765 (void) mutex_unlock(&rcm_req_lock);
766 free(device);
767
768 return (error);
769 }
770
771 /*
772 * This function scans the DR request list for the next, non-removed
773 * entry that is part of the specified sequence. The 'device' name
774 * of the entry is copied into the provided 'rsrc' buffer.
775 *
776 * The 'rsrc' buffer is required because the DR request list is only
777 * locked during the duration of this lookup. Giving a direct pointer
778 * to something in the list would be unsafe.
779 */
780 int
dr_req_lookup(int seq_num,char * rsrc)781 dr_req_lookup(int seq_num, char *rsrc)
782 {
783 int i;
784 int len;
785 int base = (seq_num >> SEQ_NUM_SHIFT);
786 int retval = RCM_FAILURE;
787
788 if (rsrc == NULL) {
789 return (RCM_FAILURE);
790 }
791
792 (void) mutex_lock(&rcm_req_lock);
793
794 for (i = 0; i < dr_req_list->n_req_max; i++) {
795
796 /* Skip removed or non-matching entries */
797 if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) ||
798 ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) {
799 continue;
800 }
801
802 /* Copy the next-matching 'device' name into 'rsrc' */
803 len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN);
804 if (len < MAXPATHLEN) {
805 retval = RCM_SUCCESS;
806 }
807 break;
808 }
809
810 (void) mutex_unlock(&rcm_req_lock);
811
812 return (retval);
813 }
814
815 /*
816 * Remove a dr entry in dr_req_list
817 */
818 void
dr_req_remove(char * rsrcname,uint_t flag)819 dr_req_remove(char *rsrcname, uint_t flag)
820 {
821 req_t *req;
822 char *device = resolve_name(rsrcname);
823
824 rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname);
825
826 (void) mutex_lock(&rcm_req_lock);
827
828 /* find entry */
829 req = find_req_entry(device, flag, -1, dr_req_list);
830 free(device);
831
832 if (req == NULL) {
833 (void) mutex_unlock(&rcm_req_lock);
834 rcm_log_message(RCM_WARNING,
835 gettext("dr_req entry %s not found\n"), rsrcname);
836 return;
837 }
838
839 req->state = RCM_STATE_REMOVE;
840 dr_req_list->n_req--;
841 (void) fsync(state_fd);
842
843 /*
844 * remove pid from polling list
845 */
846 remove_from_polling_list(req->pid);
847
848 /*
849 * We don't shrink the dr_req_list size for now.
850 * Shouldn't cause big memory leaks.
851 */
852 (void) mutex_unlock(&rcm_req_lock);
853 }
854
855 /*
856 * Return the list of ongoing dr operation requests
857 */
858 rcm_info_t *
rsrc_dr_info()859 rsrc_dr_info()
860 {
861 int i;
862 rcm_info_t *info;
863 rcm_info_t *result = NULL;
864 char *rsrc;
865 int len;
866
867 rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n");
868
869 (void) mutex_lock(&rcm_req_lock);
870 for (i = 0; i < dr_req_list->n_req_max; i++) {
871 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
872 continue;
873
874 if (dr_req_list->req[i].device[0] == '\0')
875 continue;
876
877 if (dr_req_list->req[i].flag & RCM_FILESYS) {
878 len = strlen(dr_req_list->req[i].device) + 5;
879 rsrc = s_malloc(len);
880 (void) snprintf(rsrc, len, "%s(fs)",
881 dr_req_list->req[i].device);
882 } else {
883 rsrc = s_strdup(dr_req_list->req[i].device);
884 }
885
886 info = s_calloc(1, sizeof (*info));
887 if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) {
888 rcm_log_message(RCM_ERROR,
889 gettext("failed (nvlist_alloc=%s).\n"),
890 strerror(errno));
891 rcmd_exit(errno);
892 }
893
894 if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) {
895 rcm_log_message(RCM_ERROR,
896 gettext("failed (nvlist_add=%s).\n"),
897 strerror(errno));
898 rcmd_exit(errno);
899 }
900 (void) free(rsrc);
901
902 if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID,
903 dr_req_list->req[i].pid)) {
904 rcm_log_message(RCM_ERROR,
905 gettext("failed (nvlist_add=%s).\n"),
906 strerror(errno));
907 rcmd_exit(errno);
908 }
909
910 if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM,
911 dr_req_list->req[i].seq_num)) {
912 rcm_log_message(RCM_ERROR,
913 gettext("failed (nvlist_add=%s).\n"),
914 strerror(errno));
915 rcmd_exit(errno);
916 }
917
918 if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE,
919 dr_req_list->req[i].state)) {
920 rcm_log_message(RCM_ERROR,
921 gettext("failed (nvlist_add=%s).\n"),
922 strerror(errno));
923 rcmd_exit(errno);
924 }
925
926 if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO,
927 (char *)locked_info)) {
928 rcm_log_message(RCM_ERROR,
929 gettext("failed (nvlist_add=%s).\n"),
930 strerror(errno));
931 rcmd_exit(errno);
932 }
933
934 info->next = result;
935 result = info;
936 }
937 (void) mutex_unlock(&rcm_req_lock);
938
939 return (result);
940 }
941
942 /*
943 * Eliminate entries whose dr initiator is no longer running
944 * and recover daemon state during daemon restart.
945 *
946 * This routine is called from either during daemon initialization
947 * after all modules have registered resources or from the cleanup
948 * thread. In either case, it is the only thread running in the
949 * daemon.
950 */
951 void
clean_dr_list()952 clean_dr_list()
953 {
954 int i;
955 struct clean_list {
956 struct clean_list *next;
957 char *rsrcname;
958 pid_t pid;
959 int seq_num;
960 int state;
961 timespec_t interval;
962 } *tmp, *list = NULL;
963 char *rsrcnames[2];
964
965 rcm_log_message(RCM_TRACE3,
966 "clean_dr_list(): look for stale dr initiators\n");
967
968 rsrcnames[1] = NULL;
969
970 /*
971 * Make a list of entries to recover. This is necessary because
972 * the recovery operation will modify dr_req_list.
973 */
974 (void) mutex_lock(&rcm_req_lock);
975 for (i = 0; i < dr_req_list->n_req_max; i++) {
976 /* skip empty entries */
977 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
978 continue;
979
980 if (dr_req_list->req[i].device[0] == '\0')
981 continue;
982
983 /* skip cascade operations */
984 if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK)
985 continue;
986
987 /*
988 * In the cleanup case, ignore entries with initiators alive
989 */
990 if ((rcmd_get_state() == RCMD_CLEANUP) &&
991 proc_exist(dr_req_list->req[i].pid))
992 continue;
993
994 rcm_log_message(RCM_TRACE1,
995 "found stale entry: %s\n", dr_req_list->req[i].device);
996
997 tmp = s_malloc(sizeof (*tmp));
998 tmp->rsrcname = s_strdup(dr_req_list->req[i].device);
999 tmp->state = dr_req_list->req[i].state;
1000 tmp->pid = dr_req_list->req[i].pid;
1001 tmp->seq_num = dr_req_list->req[i].seq_num;
1002 tmp->interval = dr_req_list->req[i].interval;
1003 tmp->next = list;
1004 list = tmp;
1005 }
1006 (void) mutex_unlock(&rcm_req_lock);
1007
1008 if (list == NULL)
1009 return;
1010
1011 /*
1012 * If everything worked normally, we shouldn't be here.
1013 * Since we are here, something went wrong, so say something.
1014 */
1015 if (rcmd_get_state() == RCMD_INIT) {
1016 rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died "
1017 "unexpectedly, recovering previous daemon state\n"));
1018 } else {
1019 rcm_log_message(RCM_INFO, gettext("one or more dr initiator "
1020 "died, attempting automatic recovery\n"));
1021 }
1022
1023 while (list) {
1024 tmp = list;
1025 list = tmp->next;
1026
1027 switch (tmp->state) {
1028 case RCM_STATE_OFFLINE_QUERY:
1029 case RCM_STATE_OFFLINE_QUERY_FAIL:
1030 rsrcnames[0] = tmp->rsrcname;
1031 if (proc_exist(tmp->pid)) {
1032 /* redo */
1033 (void) process_resource_offline(rsrcnames,
1034 tmp->pid, RCM_QUERY, tmp->seq_num, NULL);
1035 } else {
1036 /* undo */
1037 (void) notify_resource_online(rsrcnames,
1038 tmp->pid, 0, tmp->seq_num, NULL);
1039 }
1040 break;
1041
1042 case RCM_STATE_OFFLINE:
1043 case RCM_STATE_OFFLINE_FAIL:
1044 rsrcnames[0] = tmp->rsrcname;
1045 if (proc_exist(tmp->pid)) {
1046 /* redo */
1047 (void) process_resource_offline(rsrcnames,
1048 tmp->pid, 0, tmp->seq_num, NULL);
1049 } else {
1050 /* undo */
1051 (void) notify_resource_online(rsrcnames,
1052 tmp->pid, 0, tmp->seq_num, NULL);
1053 }
1054 break;
1055
1056 case RCM_STATE_SUSPEND_QUERY:
1057 case RCM_STATE_SUSPEND_QUERY_FAIL:
1058 rsrcnames[0] = tmp->rsrcname;
1059 if (proc_exist(tmp->pid)) {
1060 /* redo */
1061 (void) process_resource_suspend(rsrcnames,
1062 tmp->pid, RCM_QUERY, tmp->seq_num,
1063 &tmp->interval, NULL);
1064 } else {
1065 /* undo */
1066 (void) notify_resource_resume(rsrcnames,
1067 tmp->pid, 0, tmp->seq_num, NULL);
1068 }
1069 break;
1070
1071 case RCM_STATE_SUSPEND:
1072 case RCM_STATE_SUSPEND_FAIL:
1073 rsrcnames[0] = tmp->rsrcname;
1074 if (proc_exist(tmp->pid)) {
1075 /* redo */
1076 (void) process_resource_suspend(rsrcnames,
1077 tmp->pid, 0, tmp->seq_num, &tmp->interval,
1078 NULL);
1079 } else {
1080 /* undo */
1081 (void) notify_resource_resume(rsrcnames,
1082 tmp->pid, 0, tmp->seq_num, NULL);
1083 }
1084 break;
1085
1086 case RCM_STATE_OFFLINING:
1087 case RCM_STATE_ONLINING:
1088 rsrcnames[0] = tmp->rsrcname;
1089 (void) notify_resource_online(rsrcnames, tmp->pid, 0,
1090 tmp->seq_num, NULL);
1091 break;
1092
1093 case RCM_STATE_SUSPENDING:
1094 case RCM_STATE_RESUMING:
1095 rsrcnames[0] = tmp->rsrcname;
1096 (void) notify_resource_resume(rsrcnames, tmp->pid, 0,
1097 tmp->seq_num, NULL);
1098 break;
1099
1100 case RCM_STATE_REMOVING:
1101 rsrcnames[0] = tmp->rsrcname;
1102 (void) notify_resource_remove(rsrcnames, tmp->pid, 0,
1103 tmp->seq_num, NULL);
1104 break;
1105
1106 default:
1107 rcm_log_message(RCM_WARNING,
1108 gettext("%s in unknown state %d\n"),
1109 tmp->rsrcname, tmp->state);
1110 break;
1111 }
1112 free(tmp->rsrcname);
1113 free(tmp);
1114 }
1115 }
1116
1117 /*
1118 * Selected thread blocking based on event type
1119 */
1120 barrier_t barrier;
1121
1122 /*
1123 * Change barrier state:
1124 * RCMD_INIT - daemon is intializing, only register allowed
1125 * RCMD_NORMAL - normal daemon processing
1126 * RCMD_CLEANUP - cleanup thread is waiting or running
1127 */
1128 int
rcmd_get_state()1129 rcmd_get_state()
1130 {
1131 return (barrier.state);
1132 }
1133
1134 void
rcmd_set_state(int state)1135 rcmd_set_state(int state)
1136 {
1137 /*
1138 * The state transition is as follows:
1139 * INIT --> NORMAL <---> CLEANUP
1140 * The implementation favors the cleanup thread
1141 */
1142
1143 (void) mutex_lock(&barrier.lock);
1144 barrier.state = state;
1145
1146 switch (state) {
1147 case RCMD_CLEANUP:
1148 /*
1149 * Wait for existing threads to exit
1150 */
1151 barrier.wanted++;
1152 while (barrier.thr_count != 0)
1153 (void) cond_wait(&barrier.cv, &barrier.lock);
1154 barrier.wanted--;
1155 barrier.thr_count = -1;
1156 break;
1157
1158 case RCMD_INIT:
1159 case RCMD_NORMAL:
1160 default:
1161 if (barrier.thr_count == -1)
1162 barrier.thr_count = 0;
1163 if (barrier.wanted)
1164 (void) cond_broadcast(&barrier.cv);
1165 break;
1166 }
1167
1168 (void) mutex_unlock(&barrier.lock);
1169 }
1170
1171 /*
1172 * Increment daemon thread count
1173 */
1174 int
rcmd_thr_incr(int cmd)1175 rcmd_thr_incr(int cmd)
1176 {
1177 int seq_num;
1178
1179 (void) mutex_lock(&barrier.lock);
1180 /*
1181 * Set wanted flag
1182 */
1183 barrier.wanted++;
1184
1185 /*
1186 * Wait till it is safe for daemon to perform the operation
1187 *
1188 * NOTE: if a module registers by passing a request to the
1189 * client proccess, we may need to allow register
1190 * to come through during daemon initialization.
1191 */
1192 while (barrier.state != RCMD_NORMAL)
1193 (void) cond_wait(&barrier.cv, &barrier.lock);
1194
1195 if ((cmd == CMD_EVENT) ||
1196 (cmd == CMD_REGISTER) ||
1197 (cmd == CMD_UNREGISTER)) {
1198 /*
1199 * Event passthru and register ops don't need sequence number
1200 */
1201 seq_num = -1;
1202 } else {
1203 /*
1204 * Non register operation gets a sequence number
1205 */
1206 seq_num = get_seq_number();
1207 }
1208 barrier.wanted--;
1209 barrier.thr_count++;
1210 (void) mutex_unlock(&barrier.lock);
1211
1212 if ((cmd == CMD_OFFLINE) ||
1213 (cmd == CMD_SUSPEND) ||
1214 (cmd == CMD_GETINFO)) {
1215 /*
1216 * For these operations, need to ask modules to
1217 * register any new resources that came online.
1218 *
1219 * This is because mount/umount are not instrumented
1220 * to register with rcm before using system resources.
1221 * Certain registration ops may fail during sync, which
1222 * indicates race conditions. This cannot be avoided
1223 * without changing mount/umount.
1224 */
1225 rcmd_db_sync();
1226 }
1227
1228 return (seq_num);
1229 }
1230
1231 /*
1232 * Decrement thread count
1233 */
1234 void
rcmd_thr_decr()1235 rcmd_thr_decr()
1236 {
1237 /*
1238 * Decrement thread count and wake up reload/cleanup thread.
1239 */
1240 (void) mutex_lock(&barrier.lock);
1241 barrier.last_update = time(NULL);
1242 if (--barrier.thr_count == 0)
1243 (void) cond_broadcast(&barrier.cv);
1244 (void) mutex_unlock(&barrier.lock);
1245 }
1246
1247 /*
1248 * Wakeup all waiting threads as a result of SIGHUP
1249 */
1250 static int sighup_received = 0;
1251
1252 void
rcmd_thr_signal()1253 rcmd_thr_signal()
1254 {
1255 (void) mutex_lock(&barrier.lock);
1256 sighup_received = 1;
1257 (void) cond_broadcast(&barrier.cv);
1258 (void) mutex_unlock(&barrier.lock);
1259 }
1260
1261 void
rcmd_start_timer(int timeout)1262 rcmd_start_timer(int timeout)
1263 {
1264 timestruc_t abstime;
1265
1266 if (timeout == 0)
1267 timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */
1268 else
1269 dr_req_list->idle_timeout = timeout; /* persist timeout */
1270
1271 if (timeout > 0) {
1272 abstime.tv_sec = time(NULL) + timeout;
1273 }
1274
1275 (void) mutex_lock(&barrier.lock);
1276 for (;;) {
1277 int idletime;
1278 int is_active;
1279
1280 if (timeout > 0)
1281 (void) cond_timedwait(&barrier.cv, &barrier.lock,
1282 &abstime);
1283 else
1284 (void) cond_wait(&barrier.cv, &barrier.lock);
1285
1286 /*
1287 * If sighup received, change timeout to 0 so the daemon is
1288 * shut down at the first possible moment
1289 */
1290 if (sighup_received)
1291 timeout = 0;
1292
1293 /*
1294 * If timeout is negative, never shutdown the daemon
1295 */
1296 if (timeout < 0)
1297 continue;
1298
1299 /*
1300 * Check for ongoing/pending activity
1301 */
1302 is_active = (barrier.thr_count || barrier.wanted ||
1303 (dr_req_list->n_req != 0));
1304 if (is_active) {
1305 abstime.tv_sec = time(NULL) + timeout;
1306 continue;
1307 }
1308
1309 /*
1310 * If idletime is less than timeout, continue to wait
1311 */
1312 idletime = time(NULL) - barrier.last_update;
1313 if (idletime < timeout) {
1314 abstime.tv_sec = barrier.last_update + timeout;
1315 continue;
1316 }
1317 break;
1318 }
1319
1320 (void) script_main_fini();
1321
1322 rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n"));
1323 }
1324
1325 /*
1326 * Code related to polling client pid's
1327 * Not declared as static so that we can find this structure easily
1328 * in the core file.
1329 */
1330 struct {
1331 int n_pids;
1332 int n_max_pids;
1333 thread_t poll_tid; /* poll thread id */
1334 int signaled;
1335 pid_t *pids;
1336 int *refcnt;
1337 struct pollfd *fds;
1338 cond_t cv; /* the associated lock is rcm_req_lock */
1339 } polllist;
1340
1341 static int
find_pid_index(pid_t pid)1342 find_pid_index(pid_t pid)
1343 {
1344 int i;
1345
1346 for (i = 0; i < polllist.n_pids; i++) {
1347 if (polllist.pids[i] == pid) {
1348 return (i);
1349 }
1350 }
1351 return (-1);
1352 }
1353
1354 /*
1355 * Resize buffer for new pids
1356 */
1357 static int
get_pid_index()1358 get_pid_index()
1359 {
1360 const int n_chunk = 10;
1361
1362 int n_max;
1363 int index = polllist.n_pids;
1364
1365 if (polllist.n_pids < polllist.n_max_pids) {
1366 polllist.n_pids++;
1367 return (index);
1368 }
1369
1370 if (polllist.n_max_pids == 0) {
1371 n_max = n_chunk;
1372 polllist.pids = s_calloc(n_max, sizeof (pid_t));
1373 polllist.refcnt = s_calloc(n_max, sizeof (int));
1374 polllist.fds = s_calloc(n_max, sizeof (struct pollfd));
1375 } else {
1376 n_max = polllist.n_max_pids + n_chunk;
1377 polllist.pids = s_realloc(polllist.pids,
1378 n_max * sizeof (pid_t));
1379 polllist.refcnt = s_realloc(polllist.refcnt,
1380 n_max * sizeof (int));
1381 polllist.fds = s_realloc(polllist.fds,
1382 n_max * sizeof (struct pollfd));
1383 }
1384 polllist.n_max_pids = n_max;
1385 polllist.n_pids++;
1386 return (index);
1387 }
1388
1389 /*
1390 * rcm_req_lock must be held
1391 */
1392 static void
add_to_polling_list(pid_t pid)1393 add_to_polling_list(pid_t pid)
1394 {
1395 int fd, index;
1396 char procfile[MAXPATHLEN];
1397
1398 if (pid == (pid_t)0)
1399 return;
1400
1401 rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid);
1402
1403 /*
1404 * Need to stop the poll thread before manipulating the polllist
1405 * since poll thread may possibly be using polllist.fds[] and
1406 * polllist.n_pids. As an optimization, first check if the pid
1407 * is already in the polllist. If it is, there is no need to
1408 * stop the poll thread. Just increment the pid reference count
1409 * and return;
1410 */
1411 index = find_pid_index(pid);
1412 if (index != -1) {
1413 polllist.refcnt[index]++;
1414 return;
1415 }
1416
1417 stop_polling_thread();
1418
1419 /*
1420 * In an attempt to stop the poll thread we may have released
1421 * and reacquired rcm_req_lock. So find the index again.
1422 */
1423 index = find_pid_index(pid);
1424 if (index != -1) {
1425 polllist.refcnt[index]++;
1426 goto done;
1427 }
1428
1429 /*
1430 * Open a /proc file
1431 */
1432 (void) sprintf(procfile, "/proc/%ld/as", pid);
1433 if ((fd = open(procfile, O_RDONLY)) == -1) {
1434 rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"),
1435 procfile, strerror(errno));
1436 goto done;
1437 }
1438
1439 /*
1440 * add pid to polllist
1441 */
1442 index = get_pid_index();
1443 polllist.pids[index] = pid;
1444 polllist.refcnt[index] = 1;
1445 polllist.fds[index].fd = fd;
1446 polllist.fds[index].events = 0;
1447 polllist.fds[index].revents = 0;
1448
1449 rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index);
1450
1451 done:
1452 start_polling_thread();
1453 }
1454
1455 /*
1456 * rcm_req_lock must be held
1457 */
1458 static void
remove_from_polling_list(pid_t pid)1459 remove_from_polling_list(pid_t pid)
1460 {
1461 int i, index;
1462
1463 if (pid == (pid_t)0)
1464 return;
1465
1466 rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid);
1467
1468 /*
1469 * Need to stop the poll thread before manipulating the polllist
1470 * since poll thread may possibly be using polllist.fds[] and
1471 * polllist.n_pids. As an optimization, first check the pid
1472 * reference count. If the pid reference count is greater than 1
1473 * there is no need to stop the polling thread.
1474 */
1475
1476 index = find_pid_index(pid);
1477 if (index == -1) {
1478 rcm_log_message(RCM_NOTICE,
1479 gettext("error removing pid %ld from polling list\n"), pid);
1480 return;
1481 }
1482
1483 /*
1484 * decrement the pid refcnt
1485 */
1486 if (polllist.refcnt[index] > 1) {
1487 polllist.refcnt[index]--;
1488 return;
1489 }
1490
1491 stop_polling_thread();
1492
1493 /*
1494 * In an attempt to stop the poll thread we may have released
1495 * and reacquired rcm_req_lock. So find the index again.
1496 */
1497 index = find_pid_index(pid);
1498 if (index == -1) {
1499 rcm_log_message(RCM_NOTICE,
1500 gettext("error removing pid %ld from polling list\n"), pid);
1501 goto done;
1502 }
1503
1504 if (--polllist.refcnt[index] > 0)
1505 goto done;
1506
1507 /*
1508 * refcnt down to zero, delete pid from polling list
1509 */
1510 (void) close(polllist.fds[index].fd);
1511 polllist.n_pids--;
1512
1513 for (i = index; i < polllist.n_pids; i++) {
1514 polllist.pids[i] = polllist.pids[i + 1];
1515 polllist.refcnt[i] = polllist.refcnt[i + 1];
1516 bcopy(&polllist.fds[i + 1], &polllist.fds[i],
1517 sizeof (struct pollfd));
1518 }
1519
1520 rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index);
1521
1522 done:
1523 start_polling_thread();
1524 }
1525
1526 void
init_poll_thread()1527 init_poll_thread()
1528 {
1529 polllist.poll_tid = (thread_t)-1;
1530 }
1531
1532 void
cleanup_poll_thread()1533 cleanup_poll_thread()
1534 {
1535 (void) mutex_lock(&rcm_req_lock);
1536 if (polllist.poll_tid == thr_self()) {
1537 rcm_log_message(RCM_TRACE2,
1538 "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids);
1539 polllist.poll_tid = (thread_t)-1;
1540 (void) cond_broadcast(&polllist.cv);
1541 }
1542 (void) mutex_unlock(&rcm_req_lock);
1543 }
1544
1545 /*ARGSUSED*/
1546 static void *
pollfunc(void * arg)1547 pollfunc(void *arg)
1548 {
1549 sigset_t mask;
1550
1551 rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n",
1552 polllist.n_pids);
1553
1554 /*
1555 * Unblock SIGUSR1 to allow polling thread to be killed
1556 */
1557 (void) sigemptyset(&mask);
1558 (void) sigaddset(&mask, SIGUSR1);
1559 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
1560
1561 (void) poll(polllist.fds, polllist.n_pids, (time_t)-1);
1562
1563 /*
1564 * block SIGUSR1 to avoid being killed while holding a lock
1565 */
1566 (void) sigemptyset(&mask);
1567 (void) sigaddset(&mask, SIGUSR1);
1568 (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
1569
1570 rcm_log_message(RCM_TRACE2, "returned from poll()\n");
1571
1572 cleanup_poll_thread();
1573
1574 (void) mutex_lock(&barrier.lock);
1575 need_cleanup = 1;
1576 (void) cond_broadcast(&barrier.cv);
1577 (void) mutex_unlock(&barrier.lock);
1578
1579 return (NULL);
1580 }
1581
1582 /*
1583 * rcm_req_lock must be held
1584 */
1585 void
start_polling_thread()1586 start_polling_thread()
1587 {
1588 int err;
1589
1590 if (rcmd_get_state() != RCMD_NORMAL)
1591 return;
1592
1593 if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0)
1594 return;
1595
1596 if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED,
1597 &polllist.poll_tid)) == 0)
1598 polllist.signaled = 0;
1599 else
1600 rcm_log_message(RCM_ERROR,
1601 gettext("failed to create polling thread: %s\n"),
1602 strerror(err));
1603 }
1604
1605 /*
1606 * rcm_req_lock must be held
1607 */
1608 static void
stop_polling_thread()1609 stop_polling_thread()
1610 {
1611 int err;
1612
1613 while (polllist.poll_tid != (thread_t)-1) {
1614 if (polllist.signaled == 0) {
1615 if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0)
1616 polllist.signaled = 1;
1617 else
1618 /*
1619 * thr_kill shouldn't have failed since the
1620 * poll thread id and the signal are valid.
1621 * So log an error. Since when thr_kill
1622 * fails no signal is sent (as per man page),
1623 * the cond_wait below will wait until the
1624 * the poll thread exits by some other means.
1625 * The poll thread, for example, exits on its
1626 * own when any DR initiator process that it
1627 * is currently polling exits.
1628 */
1629 rcm_log_message(RCM_ERROR,
1630 gettext(
1631 "fail to kill polling thread %d: %s\n"),
1632 polllist.poll_tid, strerror(err));
1633 }
1634 (void) cond_wait(&polllist.cv, &rcm_req_lock);
1635 }
1636 }
1637