xref: /minix/minix/servers/rs/manager.c (revision 77e79d33)
1 /*
2  * Changes:
3  *   Nov 22, 2009:	added basic live update support  (Cristiano Giuffrida)
4  *   Mar 02, 2009:	Extended isolation policies  (Jorrit N. Herder)
5  *   Jul 22, 2005:	Created  (Jorrit N. Herder)
6  */
7 
8 #include <paths.h>
9 
10 #include <sys/exec_elf.h>
11 
12 #include "inc.h"
13 
14 #include "kernel/proc.h"
15 
16 static int run_script(struct rproc *rp);
17 
18 /*===========================================================================*
19  *				caller_is_root				     *
20  *===========================================================================*/
21 static int caller_is_root(endpoint)
22 endpoint_t endpoint;				/* caller endpoint */
23 {
24   uid_t euid;
25 
26   /* Check if caller has root user ID. */
27   euid = getnuid(endpoint);
28   if (rs_verbose && euid != 0)
29   {
30 	printf("RS: got unauthorized request from endpoint %d\n", endpoint);
31   }
32 
33   return euid == 0;
34 }
35 
36 /*===========================================================================*
37  *				caller_can_control			     *
38  *===========================================================================*/
39 static int caller_can_control(endpoint, target_rp)
40 endpoint_t endpoint;
41 struct rproc *target_rp;
42 {
43   int control_allowed = 0;
44   register struct rproc *rp;
45   register struct rprocpub *rpub;
46   char *proc_name;
47   int c;
48 
49   proc_name = target_rp->r_pub->proc_name;
50 
51   /* Check if label is listed in caller's isolation policy. */
52   for (rp = BEG_RPROC_ADDR; rp < END_RPROC_ADDR; rp++) {
53 	if (!(rp->r_flags & RS_IN_USE))
54 		continue;
55 
56 	rpub = rp->r_pub;
57 	if (rpub->endpoint == endpoint) {
58 		break;
59 	}
60   }
61   if (rp == END_RPROC_ADDR) return 0;
62 
63   for (c = 0; c < rp->r_nr_control; c++) {
64 	if (strcmp(rp->r_control[c], proc_name) == 0) {
65 		control_allowed = 1;
66 		break;
67 	}
68   }
69 
70   if (rs_verbose)
71 	printf("RS: allowing %u control over %s via policy: %s\n",
72 		endpoint, target_rp->r_pub->label,
73 		control_allowed ? "yes" : "no");
74 
75   return control_allowed;
76 }
77 
78 /*===========================================================================*
79  *			     check_call_permission			     *
80  *===========================================================================*/
81 int check_call_permission(caller, call, rp)
82 endpoint_t caller;
83 int call;
84 struct rproc *rp;
85 {
86 /* Check if the caller has permission to execute a particular call. */
87   struct rprocpub *rpub;
88   int call_allowed;
89 
90   /* Caller should be either root or have control privileges. */
91   call_allowed = caller_is_root(caller);
92   if(rp) {
93       call_allowed |= caller_can_control(caller, rp);
94   }
95   if(!call_allowed) {
96       return EPERM;
97   }
98 
99   if(rp) {
100       rpub = rp->r_pub;
101 
102       /* Only allow RS_EDIT if the target is a user process. */
103       if(!(rp->r_priv.s_flags & SYS_PROC)) {
104           if(call != RS_EDIT) return EPERM;
105       }
106 
107       /* Disallow the call if an update is in progress. */
108       if(RUPDATE_IS_UPDATING()) {
109       	  return EBUSY;
110       }
111 
112       /* Disallow the call if another call is in progress for the service. */
113       if((rp->r_flags & RS_LATEREPLY)
114           || (rp->r_flags & RS_INITIALIZING)) {
115           return EBUSY;
116       }
117 
118       /* Only allow RS_DOWN and RS_RESTART if the service has terminated. */
119       if(rp->r_flags & RS_TERMINATED) {
120           if(call != RS_DOWN && call != RS_RESTART) return EPERM;
121       }
122 
123       /* Disallow RS_DOWN for core system services. */
124       if (rpub->sys_flags & SF_CORE_SRV) {
125           if(call == RS_DOWN) return EPERM;
126       }
127   }
128 
129   return OK;
130 }
131 
132 /*===========================================================================*
133  *				copy_rs_start				     *
134  *===========================================================================*/
135 int copy_rs_start(src_e, src_rs_start, dst_rs_start)
136 endpoint_t src_e;
137 char *src_rs_start;
138 struct rs_start *dst_rs_start;
139 {
140   int r;
141 
142   r = sys_datacopy(src_e, (vir_bytes) src_rs_start,
143   	SELF, (vir_bytes) dst_rs_start, sizeof(struct rs_start));
144 
145   return r;
146 }
147 
148 /*===========================================================================*
149  *				copy_label				     *
150  *===========================================================================*/
151 int copy_label(src_e, src_label, src_len, dst_label, dst_len)
152 endpoint_t src_e;
153 char *src_label;
154 size_t src_len;
155 char *dst_label;
156 size_t dst_len;
157 {
158   int s, len;
159 
160   len = MIN(dst_len-1, src_len);
161 
162   s = sys_datacopy(src_e, (vir_bytes) src_label,
163 	SELF, (vir_bytes) dst_label, len);
164   if (s != OK) return s;
165 
166   dst_label[len] = 0;
167 
168   return OK;
169 }
170 
171 /*===========================================================================*
172  *			      init_state_data				     *
173  *===========================================================================*/
174 int init_state_data(endpoint_t src_e, int prepare_state,
175     struct rs_state_data *src_rs_state_data,
176     struct rs_state_data *dst_rs_state_data)
177 {
178   int s, i, j, num_ipc_filters = 0;
179   struct rs_ipc_filter_el (*rs_ipc_filter_els)[IPCF_MAX_ELEMENTS];
180   struct rs_ipc_filter_el rs_ipc_filter[IPCF_MAX_ELEMENTS];
181   size_t rs_ipc_filter_size = sizeof(rs_ipc_filter);
182   ipc_filter_el_t (*ipcf_els_buff)[IPCF_MAX_ELEMENTS];
183   size_t ipcf_els_buff_size;
184 
185   dst_rs_state_data->size = 0;
186   dst_rs_state_data->eval_addr = NULL;
187   dst_rs_state_data->eval_len = 0;
188   dst_rs_state_data->ipcf_els = NULL;
189   dst_rs_state_data->ipcf_els_size  = 0;
190   if(src_rs_state_data->size != sizeof(struct rs_state_data)) {
191       return E2BIG;
192   }
193 
194   /* Initialize eval expression. */
195   if(prepare_state == SEF_LU_STATE_EVAL) {
196       if(src_rs_state_data->eval_len == 0 || !src_rs_state_data->eval_addr) {
197           return EINVAL;
198       }
199       dst_rs_state_data->eval_addr = malloc(src_rs_state_data->eval_len+1);
200       dst_rs_state_data->eval_len = src_rs_state_data->eval_len;
201       if(!dst_rs_state_data->eval_addr) {
202           return ENOMEM;
203       }
204       s = sys_datacopy(src_e, (vir_bytes) src_rs_state_data->eval_addr,
205           SELF, (vir_bytes) dst_rs_state_data->eval_addr,
206           dst_rs_state_data->eval_len);
207       if(s != OK) {
208           return s;
209       }
210       *((char*)dst_rs_state_data->eval_addr + dst_rs_state_data->eval_len) = '\0';
211       dst_rs_state_data->size = src_rs_state_data->size;
212   }
213 
214   /* Initialize ipc filters. */
215   if(src_rs_state_data->ipcf_els_size % rs_ipc_filter_size) {
216       return E2BIG;
217   }
218   rs_ipc_filter_els = src_rs_state_data->ipcf_els;
219   num_ipc_filters = src_rs_state_data->ipcf_els_size / rs_ipc_filter_size;
220   if(!rs_ipc_filter_els) {
221       return OK;
222   }
223 
224   ipcf_els_buff_size = sizeof(ipc_filter_el_t)*IPCF_MAX_ELEMENTS*num_ipc_filters;
225   if(src_e == VM_PROC_NR) {
226       ipcf_els_buff_size += sizeof(ipc_filter_el_t)*IPCF_MAX_ELEMENTS;
227   }
228   ipcf_els_buff = malloc(ipcf_els_buff_size);
229   if(!ipcf_els_buff) {
230       return ENOMEM;
231   }
232   memset(ipcf_els_buff, 0, ipcf_els_buff_size);
233   for(i=0;i<num_ipc_filters;i++) {
234       s = sys_datacopy(src_e, (vir_bytes) rs_ipc_filter_els[i],
235           SELF, (vir_bytes) rs_ipc_filter, rs_ipc_filter_size);
236       if(s != OK) {
237           return s;
238       }
239       for(j=0;j<IPCF_MAX_ELEMENTS && rs_ipc_filter[j].flags;j++) {
240           endpoint_t m_source = 0;
241           int m_type = 0;
242           int flags = rs_ipc_filter[j].flags;
243           if(flags & IPCF_MATCH_M_TYPE) {
244               m_type = rs_ipc_filter[j].m_type;
245           }
246           if(flags & IPCF_MATCH_M_SOURCE) {
247               if(ds_retrieve_label_endpt(rs_ipc_filter[j].m_label,&m_source) != OK) {
248                   /* try to see if an endpoint was provided as label */
249                   char *buff;
250                   if(!strcmp("ANY_USR", rs_ipc_filter[j].m_label)) {
251                       m_source = ANY_USR;
252                   }
253                   else if(!strcmp("ANY_SYS", rs_ipc_filter[j].m_label)) {
254                       m_source = ANY_SYS;
255                   }
256                   else if(!strcmp("ANY_TSK", rs_ipc_filter[j].m_label)) {
257                       m_source = ANY_TSK;
258                   }
259                   else {
260                       errno=0;
261                       m_source = strtol(rs_ipc_filter[j].m_label, &buff, 10);
262                       if(errno || strcmp(buff, "")) {
263                             return ESRCH;
264                       }
265                   }
266               }
267           }
268           ipcf_els_buff[i][j].flags = flags;
269           ipcf_els_buff[i][j].m_source = m_source;
270           ipcf_els_buff[i][j].m_type = m_type;
271       }
272   }
273   if(src_e == VM_PROC_NR) {
274       /* Make sure VM can still talk to us at update time. */
275       ipcf_els_buff[i][0].flags = (IPCF_EL_WHITELIST|IPCF_MATCH_M_SOURCE|IPCF_MATCH_M_TYPE);
276       ipcf_els_buff[i][0].m_source = RS_PROC_NR;
277       ipcf_els_buff[i][0].m_type = VM_RS_UPDATE;
278   }
279   dst_rs_state_data->size = src_rs_state_data->size;
280   dst_rs_state_data->ipcf_els = ipcf_els_buff;
281   dst_rs_state_data->ipcf_els_size = ipcf_els_buff_size;
282 
283   return OK;
284 }
285 
286 /*===========================================================================*
287  *			        build_cmd_dep				     *
288  *===========================================================================*/
289 void build_cmd_dep(struct rproc *rp)
290 {
291   struct rprocpub *rpub;
292   int arg_count;
293   int len;
294   char *cmd_ptr;
295 
296   rpub = rp->r_pub;
297 
298   /* Build argument vector to be passed to execute call. The format of the
299    * arguments vector is: path, arguments, NULL.
300    */
301   strcpy(rp->r_args, rp->r_cmd);		/* copy raw command */
302   arg_count = 0;				/* initialize arg count */
303   rp->r_argv[arg_count++] = rp->r_args;		/* start with path */
304   cmd_ptr = rp->r_args;				/* do some parsing */
305   while(*cmd_ptr != '\0') {			/* stop at end of string */
306       if (*cmd_ptr == ' ') {			/* next argument */
307           *cmd_ptr = '\0';			/* terminate previous */
308 	  while (*++cmd_ptr == ' ') ; 		/* skip spaces */
309 	  if (*cmd_ptr == '\0') break;		/* no arg following */
310 	  /* There are ARGV_ELEMENTS elements; must leave one for null */
311 	  if (arg_count>=ARGV_ELEMENTS-1) {	/* arg vector full */
312 		printf("RS: build_cmd_dep: too many args\n");
313 	  	break;
314 	  }
315 	  assert(arg_count < ARGV_ELEMENTS);
316           rp->r_argv[arg_count++] = cmd_ptr;	/* add to arg vector */
317       }
318       cmd_ptr ++;				/* continue parsing */
319   }
320   assert(arg_count < ARGV_ELEMENTS);
321   rp->r_argv[arg_count] = NULL;			/* end with NULL pointer */
322   rp->r_argc = arg_count;
323 }
324 
325 /*===========================================================================*
326  *				end_srv_init				     *
327  *===========================================================================*/
328 void end_srv_init(struct rproc *rp)
329 {
330   struct rprocpub *rpub;
331   int r;
332 
333   rpub = rp->r_pub;
334 
335   /* See if a late reply has to be sent. */
336   late_reply(rp, OK);
337 
338   /* If the service has completed initialization after a crash
339    * make the new instance active and cleanup the old replica.
340    * If the service was part of a scheduled update, schedule the new
341    * replica for the same update.
342    */
343   if(rp->r_prev_rp) {
344       if(SRV_IS_UPD_SCHEDULED(rp->r_prev_rp)) {
345           rupdate_upd_move(rp->r_prev_rp, rp);
346       }
347       cleanup_service(rp->r_prev_rp);
348       rp->r_prev_rp = NULL;
349       rp->r_restarts += 1;
350 
351       if(rs_verbose)
352           printf("RS: %s completed restart\n", srv_to_string(rp));
353   }
354   rp->r_next_rp = NULL;
355 }
356 
357 /*===========================================================================*
358  *			     kill_service_debug				     *
359  *===========================================================================*/
360 int kill_service_debug(file, line, rp, errstr, err)
361 char *file;
362 int line;
363 struct rproc *rp;
364 char *errstr;
365 int err;
366 {
367 /* Crash a system service and don't let it restart. */
368   if(errstr && !shutting_down) {
369       printf("RS: %s (error %d)\n", errstr, err);
370   }
371   rp->r_flags |= RS_EXITING;				/* expect exit */
372   crash_service_debug(file, line, rp);			/* simulate crash */
373 
374   return err;
375 }
376 
377 /*===========================================================================*
378  *			    crash_service_debug				     *
379  *===========================================================================*/
380 int crash_service_debug(file, line, rp)
381 char *file;
382 int line;
383 struct rproc *rp;
384 {
385 /* Simluate a crash in a system service. */
386   struct rprocpub *rpub;
387 
388   rpub = rp->r_pub;
389 
390   if(rs_verbose)
391       printf("RS: %s %skilled at %s:%d\n", srv_to_string(rp),
392           rp->r_flags & RS_EXITING ? "lethally " : "", file, line);
393 
394   /* RS should simply exit() directly. */
395   if(rpub->endpoint == RS_PROC_NR) {
396       exit(1);
397   }
398 
399   return sys_kill(rpub->endpoint, SIGKILL);
400 }
401 
402 /*===========================================================================*
403  *			  cleanup_service_debug				     *
404  *===========================================================================*/
405 void cleanup_service_debug(file, line, rp)
406 char *file;
407 int line;
408 struct rproc *rp;
409 {
410   struct rprocpub *rpub;
411   int detach, cleanup_script;
412   int s;
413 
414   rpub = rp->r_pub;
415 
416   if(!(rp->r_flags & RS_DEAD)) {
417       if(rs_verbose)
418           printf("RS: %s marked for cleanup at %s:%d\n", srv_to_string(rp),
419               file, line);
420 
421       /* Unlink service the first time. */
422       if(rp->r_next_rp) {
423           rp->r_next_rp->r_prev_rp = NULL;
424           rp->r_next_rp = NULL;
425       }
426       if(rp->r_prev_rp) {
427           rp->r_prev_rp->r_next_rp = NULL;
428           rp->r_prev_rp = NULL;
429       }
430       if(rp->r_new_rp) {
431           rp->r_new_rp->r_old_rp = NULL;
432           rp->r_new_rp = NULL;
433       }
434       if(rp->r_old_rp) {
435           rp->r_old_rp->r_new_rp = NULL;
436           rp->r_old_rp = NULL;
437       }
438       rp->r_flags |= RS_DEAD;
439 
440       /* Make sure the service can no longer run and unblock IPC callers. */
441       sys_privctl(rpub->endpoint, SYS_PRIV_DISALLOW, NULL);
442       sys_privctl(rpub->endpoint, SYS_PRIV_CLEAR_IPC_REFS, NULL);
443       rp->r_flags &= ~RS_ACTIVE;
444 
445       /* Send a late reply if there is any pending. */
446       late_reply(rp, OK);
447 
448       return;
449   }
450 
451   cleanup_script = rp->r_flags & RS_CLEANUP_SCRIPT;
452   detach = rp->r_flags & RS_CLEANUP_DETACH;
453 
454   /* Cleanup the service when not detaching. */
455   if(!detach) {
456       if(rs_verbose)
457           printf("RS: %s cleaned up at %s:%d\n", srv_to_string(rp),
458               file, line);
459 
460       /* Tell scheduler this process is finished */
461       if ((s = sched_stop(rp->r_scheduler, rpub->endpoint)) != OK) {
462             printf("RS: warning: scheduler won't give up process: %d\n", s);
463       }
464 
465       /* Ask PM to exit the service */
466       if(rp->r_pid == -1) {
467           printf("RS: warning: attempt to kill pid -1!\n");
468       }
469       else {
470           srv_kill(rp->r_pid, SIGKILL);
471       }
472   }
473 
474   /* See if we need to run a script now. */
475   if(cleanup_script) {
476       rp->r_flags &= ~RS_CLEANUP_SCRIPT;
477       s = run_script(rp);
478       if(s != OK) {
479           printf("RS: warning: cannot run cleanup script: %d\n", s);
480       }
481   }
482 
483   if(detach) {
484       /* Detach service when asked to. */
485       detach_service(rp);
486   }
487   else {
488       /* Free slot otherwise, unless we're about to reuse it */
489       if (!(rp->r_flags & RS_REINCARNATE))
490           free_slot(rp);
491   }
492 }
493 
494 /*===========================================================================*
495  *			     detach_service_debug			     *
496  *===========================================================================*/
497 void detach_service_debug(file, line, rp)
498 char *file;
499 int line;
500 struct rproc *rp;
501 {
502 /* Detach the given system service. */
503   static unsigned long detach_counter = 0;
504   char label[RS_MAX_LABEL_LEN];
505   struct rprocpub *rpub;
506 
507   rpub = rp->r_pub;
508 
509   /* Publish a new unique label for the system service. */
510   rpub->label[RS_MAX_LABEL_LEN-1] = '\0';
511   strcpy(label, rpub->label);
512   snprintf(rpub->label, RS_MAX_LABEL_LEN, "%lu.%s", ++detach_counter, label);
513   ds_publish_label(rpub->label, rpub->endpoint, DSF_OVERWRITE);
514 
515   if(rs_verbose)
516       printf("RS: %s detached at %s:%d\n", srv_to_string(rp),
517           file, line);
518 
519   /* Allow the service to run. */
520   rp->r_flags = RS_IN_USE | RS_ACTIVE;
521   rpub->sys_flags &= ~(SF_CORE_SRV|SF_DET_RESTART);
522   rp->r_period = 0;
523   rpub->dev_nr = 0;
524   sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL);
525 }
526 
527 /*===========================================================================*
528  *				create_service				     *
529  *===========================================================================*/
530 int create_service(rp)
531 struct rproc *rp;
532 {
533 /* Create the given system service. */
534   int child_proc_nr_e, child_proc_nr_n;		/* child process slot */
535   pid_t child_pid;				/* child's process id */
536   int s, use_copy, has_replica;
537   extern char **environ;
538   struct rprocpub *rpub;
539 
540   rpub = rp->r_pub;
541   use_copy= (rpub->sys_flags & SF_USE_COPY);
542   has_replica= (rp->r_old_rp
543       || (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED)));
544 
545   /* Do we need an existing replica to create the service? */
546   if(!has_replica && (rpub->sys_flags & SF_NEED_REPL)) {
547       printf("RS: unable to create service '%s' without a replica\n",
548           rpub->label);
549       free_slot(rp);
550       return(EPERM);
551   }
552 
553   /* Do we need an in-memory copy to create the service? */
554   if(!use_copy && (rpub->sys_flags & SF_NEED_COPY)) {
555       printf("RS: unable to create service '%s' without an in-memory copy\n",
556           rpub->label);
557       free_slot(rp);
558       return(EPERM);
559   }
560 
561   /* Do we have a copy or a command to create the service? */
562   if(!use_copy && !strcmp(rp->r_cmd, "")) {
563       printf("RS: unable to create service '%s' without a copy or command\n",
564           rpub->label);
565       free_slot(rp);
566       return(EPERM);
567   }
568 
569   /* Now fork and branch for parent and child process (and check for error).
570    * After fork()ing, we need to pin RS memory again or pagefaults will occur
571    * on future writes.
572    */
573   if(rs_verbose)
574       printf("RS: forking child with srv_fork()...\n");
575   child_pid= srv_fork(rp->r_uid, 0);	/* Force group to wheel for now */
576   if(child_pid < 0) {
577       printf("RS: srv_fork() failed (error %d)\n", child_pid);
578       free_slot(rp);
579       return(child_pid);
580   }
581 
582   /* Get endpoint of the child. */
583   if ((s = getprocnr(child_pid, &child_proc_nr_e)) != 0)
584 	panic("unable to get child endpoint: %d", s);
585 
586   /* There is now a child process. Update the system process table. */
587   child_proc_nr_n = _ENDPOINT_P(child_proc_nr_e);
588   rp->r_flags = RS_IN_USE;			/* mark slot in use */
589   rpub->endpoint = child_proc_nr_e;		/* set child endpoint */
590   rp->r_pid = child_pid;			/* set child pid */
591   rp->r_check_tm = 0;				/* not checked yet */
592   rp->r_alive_tm = getticks(); 			/* currently alive */
593   rp->r_stop_tm = 0;				/* not exiting yet */
594   rp->r_backoff = 0;				/* not to be restarted */
595   rproc_ptr[child_proc_nr_n] = rp;		/* mapping for fast access */
596   rpub->in_use = TRUE;				/* public entry is now in use */
597 
598   /* Set and synch the privilege structure for the new service. */
599   if ((s = sys_privctl(child_proc_nr_e, SYS_PRIV_SET_SYS, &rp->r_priv)) != OK
600 	|| (s = sys_getpriv(&rp->r_priv, child_proc_nr_e)) != OK) {
601 	printf("RS: unable to set privilege structure: %d\n", s);
602 	cleanup_service(rp);
603 	vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
604 	return ENOMEM;
605   }
606 
607   /* Set the scheduler for this process */
608   if ((s = sched_init_proc(rp)) != OK) {
609 	printf("RS: unable to start scheduling: %d\n", s);
610 	cleanup_service(rp);
611 	vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
612 	return s;
613   }
614 
615   /* Copy the executable image into the child process. If no copy exists,
616    * allocate one and free it right after exec completes.
617    */
618   if(use_copy) {
619       if(rs_verbose)
620           printf("RS: %s uses an in-memory copy\n",
621               srv_to_string(rp));
622   }
623   else {
624       if ((s = read_exec(rp)) != OK) {
625           printf("RS: read_exec failed: %d\n", s);
626           cleanup_service(rp);
627           vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
628           return s;
629       }
630   }
631   if(rs_verbose)
632         printf("RS: execing child with srv_execve()...\n");
633   s = srv_execve(child_proc_nr_e, rp->r_exec, rp->r_exec_len, rpub->proc_name,
634         rp->r_argv, environ);
635   vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
636   if (s != OK) {
637         printf("RS: srv_execve failed: %d\n", s);
638         cleanup_service(rp);
639         return s;
640   }
641   if(!use_copy) {
642         free_exec(rp);
643   }
644 
645   /* The purpose of non-blocking forks is to avoid involving VFS in the forking
646    * process, because VFS may be blocked on a sendrec() to a MFS that is
647    * waiting for a endpoint update for a dead driver. We have just published
648    * that update, but VFS may still be blocked. As a result, VFS may not yet
649    * have received PM's fork message. Hence, if we call mapdriver()
650    * immediately, VFS may not know about the process and thus refuse to add the
651    * driver entry. The following temporary hack works around this by forcing
652    * blocking communication from PM to VFS. Once VFS has been made non-blocking
653    * towards MFS instances, this hack and the big part of srv_fork() can go.
654    */
655   setuid(0);
656 
657   /* If this is a RS instance, pin memory. */
658   if(rp->r_priv.s_flags & ROOT_SYS_PROC) {
659       if(rs_verbose)
660           printf("RS: pinning memory of RS instance %s\n", srv_to_string(rp));
661 
662       s = vm_memctl(rpub->endpoint, VM_RS_MEM_PIN, 0, 0);
663       if(s != OK) {
664           printf("vm_memctl failed: %d\n", s);
665           cleanup_service(rp);
666           return s;
667       }
668   }
669 
670   /* If this is a VM instance, let VM know now. */
671   if(rp->r_priv.s_flags & VM_SYS_PROC) {
672       struct rproc *rs_rp;
673       struct rproc **rs_rps;
674       int i, nr_rs_rps;
675 
676       if(rs_verbose)
677           printf("RS: informing VM of instance %s\n", srv_to_string(rp));
678 
679       s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM, 0, 0);
680       if(s != OK) {
681           printf("vm_memctl failed: %d\n", s);
682           cleanup_service(rp);
683           return s;
684       }
685 
686       /* VM may start actually pinning memory for us only now.
687        * Ask again for all our instances.
688        */
689       rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
690       get_service_instances(rs_rp, &rs_rps, &nr_rs_rps);
691       for(i=0;i<nr_rs_rps;i++) {
692           vm_memctl(rs_rps[i]->r_pub->endpoint, VM_RS_MEM_PIN, 0, 0);
693       }
694   }
695 
696   /* Tell VM about allowed calls. */
697   if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0], TRUE)) != OK) {
698       printf("RS: vm_set_priv failed: %d\n", s);
699       cleanup_service(rp);
700       return s;
701   }
702 
703   if(rs_verbose)
704       printf("RS: %s created\n", srv_to_string(rp));
705 
706   return OK;
707 }
708 
709 /*===========================================================================*
710  *				clone_service				     *
711  *===========================================================================*/
712 int clone_service(struct rproc *rp, int instance_flag, int init_flags)
713 {
714 /* Clone the given system service instance. */
715   struct rproc *replica_rp;
716   struct rprocpub *replica_rpub;
717   struct rproc **rp_link;
718   struct rproc **replica_link;
719   struct rproc *rs_rp;
720   int rs_flags;
721   int r;
722 
723   if(rs_verbose)
724       printf("RS: %s creating a replica\n", srv_to_string(rp));
725 
726   /* VM can only reliably support one replica at the time for now.
727    * XXX TO-DO: Fix VM's rs_memctl_make_vm_instance to allow multiple replicas.
728    */
729   if(rp->r_pub->endpoint == VM_PROC_NR && instance_flag == LU_SYS_PROC
730       && rp->r_next_rp) {
731       cleanup_service_now(rp->r_next_rp);
732       rp->r_next_rp = NULL;
733   }
734 
735   /* Clone slot. */
736   if((r = clone_slot(rp, &replica_rp)) != OK) {
737       return r;
738   }
739   replica_rpub = replica_rp->r_pub;
740 
741   /* Clone is a live updated or restarted service instance? */
742   if(instance_flag == LU_SYS_PROC) {
743       rp_link = &rp->r_new_rp;
744       replica_link = &replica_rp->r_old_rp;
745   }
746   else {
747       rp_link = &rp->r_next_rp;
748       replica_link = &replica_rp->r_prev_rp;
749   }
750   replica_rp->r_priv.s_flags |= instance_flag;
751   replica_rp->r_priv.s_init_flags |= init_flags;
752 
753   /* Link the two slots. */
754   *rp_link = replica_rp;
755   *replica_link = rp;
756 
757   /* Create a new replica of the service. */
758   r = create_service(replica_rp);
759   if(r != OK) {
760       *rp_link = NULL;
761       return r;
762   }
763 
764   /* If this instance is for restarting RS, set up a backup signal manager. */
765   rs_flags = (ROOT_SYS_PROC | RST_SYS_PROC);
766   if((replica_rp->r_priv.s_flags & rs_flags) == rs_flags) {
767       rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
768 
769       /* Update signal managers. */
770       r = update_sig_mgrs(rs_rp, SELF, replica_rpub->endpoint);
771       if(r == OK) {
772           r = update_sig_mgrs(replica_rp, SELF, NONE);
773       }
774       if(r != OK) {
775           *rp_link = NULL;
776           return kill_service(replica_rp, "update_sig_mgrs failed", r);
777       }
778   }
779 
780   return OK;
781 }
782 
783 /*===========================================================================*
784  *				publish_service				     *
785  *===========================================================================*/
786 int publish_service(rp)
787 struct rproc *rp;				/* pointer to service slot */
788 {
789 /* Publish a service. */
790   int r;
791   struct rprocpub *rpub;
792   struct rs_pci pci_acl;
793   message m;
794   endpoint_t ep;
795 
796   rpub = rp->r_pub;
797 
798   /* Register label with DS. */
799   r = ds_publish_label(rpub->label, rpub->endpoint, DSF_OVERWRITE);
800   if (r != OK) {
801       return kill_service(rp, "ds_publish_label call failed", r);
802   }
803 
804   /* If the service is a driver, map it. */
805   if (rpub->dev_nr > 0) {
806       /* The purpose of non-blocking forks is to avoid involving VFS in the
807        * forking process, because VFS may be blocked on a ipc_sendrec() to a MFS
808        * that is waiting for a endpoint update for a dead driver. We have just
809        * published that update, but VFS may still be blocked. As a result, VFS
810        * may not yet have received PM's fork message. Hence, if we call
811        * mapdriver() immediately, VFS may not know about the process and thus
812        * refuse to add the driver entry. The following temporary hack works
813        * around this by forcing blocking communication from PM to VFS. Once VFS
814        * has been made non-blocking towards MFS instances, this hack and the
815        * big part of srv_fork() can go.
816        */
817       setuid(0);
818 
819       if ((r = mapdriver(rpub->label, rpub->dev_nr)) != OK) {
820           return kill_service(rp, "couldn't map driver", r);
821       }
822   }
823 
824 #if USE_PCI
825   /* If PCI properties are set, inform the PCI driver about the new service. */
826   if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) {
827       pci_acl = rpub->pci_acl;
828       strcpy(pci_acl.rsp_label, rpub->label);
829       pci_acl.rsp_endpoint= rpub->endpoint;
830 
831       r = pci_set_acl(&pci_acl);
832       if (r != OK) {
833           return kill_service(rp, "pci_set_acl call failed", r);
834       }
835   }
836 #endif /* USE_PCI */
837 
838   if (rpub->devman_id != 0) {
839 	  r = ds_retrieve_label_endpt("devman",&ep);
840 
841 	  if (r != OK) {
842 		return kill_service(rp, "devman not running?", r);
843 	  }
844 	  m.m_type = DEVMAN_BIND;
845 	  m.DEVMAN_ENDPOINT  = rpub->endpoint;
846 	  m.DEVMAN_DEVICE_ID = rpub->devman_id;
847 	  r = ipc_sendrec(ep, &m);
848 	  if (r != OK || m.DEVMAN_RESULT != OK) {
849 		 return kill_service(rp, "devman bind device failed", r);
850 	  }
851   }
852 
853   if(rs_verbose)
854       printf("RS: %s published\n", srv_to_string(rp));
855 
856   return OK;
857 }
858 
859 /*===========================================================================*
860  *			      unpublish_service				     *
861  *===========================================================================*/
862 int unpublish_service(rp)
863 struct rproc *rp;				/* pointer to service slot */
864 {
865 /* Unpublish a service. */
866   struct rprocpub *rpub;
867   int r, result;
868   message m;
869   endpoint_t ep;
870 
871 
872   rpub = rp->r_pub;
873   result = OK;
874 
875   /* Unregister label with DS. */
876   r = ds_delete_label(rpub->label);
877   if (r != OK && !shutting_down) {
878      printf("RS: ds_delete_label call failed (error %d)\n", r);
879      result = r;
880   }
881 
882   /* No need to inform VFS and VM, cleanup is done on exit automatically. */
883 
884 #if USE_PCI
885   /* If PCI properties are set, inform the PCI driver. */
886   if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) {
887       r = pci_del_acl(rpub->endpoint);
888       if (r != OK && !shutting_down) {
889           printf("RS: pci_del_acl call failed (error %d)\n", r);
890           result = r;
891       }
892   }
893 #endif /* USE_PCI */
894 
895   if (rpub->devman_id != 0) {
896 	  r = ds_retrieve_label_endpt("devman",&ep);
897 
898 	  if (r != OK) {
899 		   printf("RS: devman not running?");
900 	  } else {
901 		m.m_type = DEVMAN_UNBIND;
902 		m.DEVMAN_ENDPOINT  = rpub->endpoint;
903 		m.DEVMAN_DEVICE_ID = rpub->devman_id;
904 		r = ipc_sendrec(ep, &m);
905 
906 		if (r != OK || m.DEVMAN_RESULT != OK) {
907 			 printf("RS: devman unbind device failed");
908 		}
909 	  }
910   }
911 
912   if(rs_verbose)
913       printf("RS: %s unpublished\n", srv_to_string(rp));
914 
915   return result;
916 }
917 
918 /*===========================================================================*
919  *				run_service				     *
920  *===========================================================================*/
921 int run_service(struct rproc *rp, int init_type, int init_flags)
922 {
923 /* Let a newly created service run. */
924   struct rprocpub *rpub;
925   int s;
926 
927   rpub = rp->r_pub;
928 
929   /* Allow the service to run. */
930   if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
931       return kill_service(rp, "unable to allow the service to run",s);
932   }
933 
934   /* Initialize service. */
935   if((s = init_service(rp, init_type, init_flags)) != OK) {
936       return kill_service(rp, "unable to initialize service", s);
937   }
938 
939   if(rs_verbose)
940       printf("RS: %s allowed to run\n", srv_to_string(rp));
941 
942   return OK;
943 }
944 
945 /*===========================================================================*
946  *				start_service				     *
947  *===========================================================================*/
948 int start_service(struct rproc *rp, int init_flags)
949 {
950 /* Start a system service. */
951   int r;
952   struct rprocpub *rpub;
953 
954   rpub = rp->r_pub;
955 
956   /* Create and make active. */
957   rp->r_priv.s_init_flags |= init_flags;
958   r = create_service(rp);
959   if(r != OK) {
960       return r;
961   }
962   activate_service(rp, NULL);
963 
964   /* Publish service properties. */
965   r = publish_service(rp);
966   if (r != OK) {
967       return r;
968   }
969 
970   /* Run. */
971   r = run_service(rp, SEF_INIT_FRESH, init_flags);
972   if(r != OK) {
973       return r;
974   }
975 
976   if(rs_verbose)
977       printf("RS: %s started with major %d\n", srv_to_string(rp),
978           rpub->dev_nr);
979 
980   return OK;
981 }
982 
983 /*===========================================================================*
984  *				stop_service				     *
985  *===========================================================================*/
986 void stop_service(struct rproc *rp,int how)
987 {
988   struct rprocpub *rpub;
989   int signo;
990 
991   rpub = rp->r_pub;
992 
993   /* Try to stop the system service. First send a SIGTERM signal to ask the
994    * system service to terminate. If the service didn't install a signal
995    * handler, it will be killed. If it did and ignores the signal, we'll
996    * find out because we record the time here and send a SIGKILL.
997    */
998   if(rs_verbose)
999       printf("RS: %s signaled with SIGTERM\n", srv_to_string(rp));
1000 
1001   signo = rpub->endpoint != RS_PROC_NR ? SIGTERM : SIGHUP; /* SIGHUP for RS. */
1002 
1003   rp->r_flags |= how;				/* what to on exit? */
1004   sys_kill(rpub->endpoint, signo);		/* first try friendly */
1005   rp->r_stop_tm = getticks(); 			/* record current time */
1006 }
1007 
1008 /*===========================================================================*
1009  *			      activate_service				     *
1010  *===========================================================================*/
1011 void activate_service(struct rproc *rp, struct rproc *ex_rp)
1012 {
1013 /* Activate a service instance and deactivate another one if requested. */
1014 
1015   if(ex_rp && (ex_rp->r_flags & RS_ACTIVE) ) {
1016       ex_rp->r_flags &= ~RS_ACTIVE;
1017       if(rs_verbose)
1018           printf("RS: %s becomes inactive\n", srv_to_string(ex_rp));
1019   }
1020 
1021   if(! (rp->r_flags & RS_ACTIVE) ) {
1022       rp->r_flags |= RS_ACTIVE;
1023       if(rs_verbose)
1024           printf("RS: %s becomes active\n", srv_to_string(rp));
1025   }
1026 }
1027 
1028 /*===========================================================================*
1029  *			      reincarnate_service			     *
1030  *===========================================================================*/
1031 void reincarnate_service(struct rproc *old_rp)
1032 {
1033 /* Restart a service as if it were never started before. */
1034   struct rproc *rp;
1035   int r, restarts;
1036 
1037   if ((r = clone_slot(old_rp, &rp)) != OK) {
1038       printf("RS: Failed to clone the slot: %d\n", r);
1039       return;
1040   }
1041 
1042   rp->r_flags = RS_IN_USE;
1043   rproc_ptr[_ENDPOINT_P(rp->r_pub->endpoint)] = NULL;
1044 
1045   restarts = rp->r_restarts;
1046   start_service(rp, SEF_INIT_FRESH);
1047   rp->r_restarts = restarts + 1;
1048 }
1049 
1050 /*===========================================================================*
1051  *			      terminate_service				     *
1052  *===========================================================================*/
1053 void terminate_service(struct rproc *rp)
1054 {
1055 /* Handle a termination event for a system service. */
1056   struct rproc **rps;
1057   struct rprocpub *rpub;
1058   int nr_rps, norestart;
1059   int i, r;
1060 
1061   rpub = rp->r_pub;
1062 
1063   if(rs_verbose)
1064      printf("RS: %s terminated\n", srv_to_string(rp));
1065 
1066   /* Deal with failures during initialization. */
1067   if(rp->r_flags & RS_INITIALIZING) {
1068       /* If updating, rollback. */
1069       if(SRV_IS_UPDATING(rp)) {
1070           printf("RS: update failed: state transfer failed. Rolling back...\n");
1071           end_update(rp->r_init_err, RS_REPLY);
1072           rp->r_init_err = ERESTART;
1073           return;
1074       }
1075 
1076       if (rpub->sys_flags & SF_NO_BIN_EXP) {
1077           /* If service was deliberately started with binary exponential offset
1078 	   * disabled, we're going to assume we want to refresh a service upon
1079 	   * failure.
1080 	   */
1081           if(rs_verbose)
1082               printf("RS: service '%s' exited during initialization; "
1083 		     "refreshing\n", rpub->label);
1084           rp->r_flags |= RS_REFRESHING; /* restart initialization. */
1085       } else {
1086           if(rs_verbose)
1087               printf("RS: service '%s' exited during initialization; "
1088                      "exiting\n", rpub->label);
1089           rp->r_flags |= RS_EXITING; /* don't restart. */
1090       }
1091   }
1092 
1093   /* If an update process is in progress, end it before doing anything else.
1094    * This is to be on the safe side, since there may be some weird dependencies
1095    * with services under update, while we perform recovery actions.
1096    */
1097   if(RUPDATE_IS_UPDATING()) {
1098       printf("RS: aborting the update after a crash...\n");
1099       abort_update_proc(ERESTART);
1100   }
1101 
1102   /* Force exit when no restart is requested. */
1103   norestart = !(rp->r_flags & RS_EXITING) && (rp->r_pub->sys_flags & SF_NORESTART);
1104   if(norestart) {
1105       rp->r_flags |= RS_EXITING;
1106       if((rp->r_pub->sys_flags & SF_DET_RESTART)
1107           && (rp->r_restarts < MAX_DET_RESTART)) {
1108           /* Detach at cleanup time. */
1109           rp->r_flags |= RS_CLEANUP_DETACH;
1110       }
1111       if(rp->r_script[0] != '\0') {
1112           /* Run script at cleanup time. */
1113           rp->r_flags |= RS_CLEANUP_SCRIPT;
1114       }
1115   }
1116 
1117   if (rp->r_flags & RS_EXITING) {
1118       /* If a core system service is exiting, we are in trouble. */
1119       if ((rp->r_pub->sys_flags & SF_CORE_SRV) && !shutting_down) {
1120           printf("core system service died: %s\n", srv_to_string(rp));
1121 	  _exit(1);
1122       }
1123 
1124       /* If this service was scheduled for the update, abort the update now. */
1125       if(SRV_IS_UPD_SCHEDULED(rp)) {
1126           printf("RS: aborting the scheduled update, one of the services part of it is exiting...\n");
1127           abort_update_proc(EDEADSRCDST);
1128       }
1129 
1130       /* See if a late reply has to be sent. */
1131       r = (rp->r_caller_request == RS_DOWN
1132           || (rp->r_caller_request == RS_REFRESH && norestart) ? OK : EDEADEPT);
1133       late_reply(rp, r);
1134 
1135       /* Unpublish the service. */
1136       unpublish_service(rp);
1137 
1138       /* Cleanup all the instances of the service. */
1139       get_service_instances(rp, &rps, &nr_rps);
1140       for(i=0;i<nr_rps;i++) {
1141           cleanup_service(rps[i]);
1142       }
1143 
1144       /* If the service is reincarnating, its slot has not been cleaned up.
1145        * Check for this flag now, and attempt to start the service again.
1146        * If this fails, start_service() itself will perform cleanup.
1147        */
1148       if (rp->r_flags & RS_REINCARNATE) {
1149           rp->r_flags &= ~RS_REINCARNATE;
1150           reincarnate_service(rp);
1151       }
1152   }
1153   else if(rp->r_flags & RS_REFRESHING) {
1154       /* Restart service. */
1155       restart_service(rp);
1156   }
1157   else {
1158       /* Determine what to do. If this is the first unexpected
1159        * exit, immediately restart this service. Otherwise use
1160        * a binary exponential backoff.
1161        */
1162       if (rp->r_restarts > 0) {
1163           if (!(rpub->sys_flags & SF_NO_BIN_EXP)) {
1164               rp->r_backoff = 1 << MIN(rp->r_restarts,(BACKOFF_BITS-2));
1165               rp->r_backoff = MIN(rp->r_backoff,MAX_BACKOFF);
1166               if ((rpub->sys_flags & SF_USE_COPY) && rp->r_backoff > 1)
1167                   rp->r_backoff= 1;
1168 	  }
1169 	  else {
1170               rp->r_backoff = 1;
1171 	  }
1172           return;
1173       }
1174 
1175       /* Restart service. */
1176       restart_service(rp);
1177   }
1178 }
1179 
1180 /*===========================================================================*
1181  *				run_script				     *
1182  *===========================================================================*/
1183 static int run_script(struct rproc *rp)
1184 {
1185 	int r, endpoint;
1186 	pid_t pid;
1187 	char *reason;
1188 	char incarnation_str[20];	/* Enough for a counter? */
1189 	char *envp[1] = { NULL };
1190 	struct rprocpub *rpub;
1191 
1192 	rpub = rp->r_pub;
1193 	if (rp->r_flags & RS_REFRESHING)
1194 		reason= "restart";
1195 	else if (rp->r_flags & RS_NOPINGREPLY)
1196 		reason= "no-heartbeat";
1197 	else reason= "terminated";
1198 	snprintf(incarnation_str, sizeof(incarnation_str), "%d", rp->r_restarts);
1199 
1200  	if(rs_verbose) {
1201 		printf("RS: %s:\n", srv_to_string(rp));
1202 		printf("RS:     calling script '%s'\n", rp->r_script);
1203 		printf("RS:     reason: '%s'\n", reason);
1204 		printf("RS:     incarnation: '%s'\n", incarnation_str);
1205 	}
1206 
1207 	pid= fork();
1208 	switch(pid)
1209 	{
1210 	case -1:
1211 		return errno;
1212 	case 0:
1213 		execle(_PATH_BSHELL, "sh", rp->r_script, rpub->label, reason,
1214 			incarnation_str, (char*) NULL, envp);
1215 		printf("RS: run_script: execl '%s' failed: %s\n",
1216 			rp->r_script, strerror(errno));
1217 		exit(1);
1218 	default:
1219 		/* Set the privilege structure for the child process. */
1220 		if ((r = getprocnr(pid, &endpoint)) != 0)
1221 			panic("unable to get child endpoint: %d", r);
1222 		if ((r = sys_privctl(endpoint, SYS_PRIV_SET_USER, NULL))
1223 			!= OK) {
1224 			return kill_service(rp,"can't set script privileges",r);
1225 		}
1226 		/* Set the script's privileges on other servers. */
1227 		vm_set_priv(endpoint, NULL, FALSE);
1228 		if ((r = vm_set_priv(endpoint, NULL, FALSE)) != OK) {
1229 			return kill_service(rp,"can't set script VM privs",r);
1230 		}
1231 		/* Allow the script to run. */
1232 		if ((r = sys_privctl(endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
1233 			return kill_service(rp,"can't let the script run",r);
1234 		}
1235 		/* Pin RS memory again after fork()ing. */
1236 		vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
1237 	}
1238 	return OK;
1239 }
1240 
1241 /*===========================================================================*
1242  *			      restart_service				     *
1243  *===========================================================================*/
1244 void restart_service(struct rproc *rp)
1245 {
1246 /* Restart service via a recovery script or directly. */
1247   struct rproc *replica_rp;
1248   int r;
1249 
1250   /* See if a late reply has to be sent. */
1251   late_reply(rp, OK);
1252 
1253   /* Run a recovery script if available. */
1254   if (rp->r_script[0] != '\0') {
1255       r = run_script(rp);
1256       if(r != OK) {
1257           kill_service(rp, "unable to run script", errno);
1258       }
1259       return;
1260   }
1261 
1262   /* Restart directly. We need a replica if not already available. */
1263   if(rp->r_next_rp == NULL) {
1264       /* Create the replica. */
1265       r = clone_service(rp, RST_SYS_PROC, 0);
1266       if(r != OK) {
1267           kill_service(rp, "unable to clone service", r);
1268           return;
1269       }
1270   }
1271   replica_rp = rp->r_next_rp;
1272 
1273   /* Update the service into the replica. */
1274   r = update_service(&rp, &replica_rp, RS_SWAP, 0);
1275   if(r != OK) {
1276       kill_service(rp, "unable to update into new replica", r);
1277       return;
1278   }
1279 
1280   /* Let the new replica run. */
1281   r = run_service(replica_rp, SEF_INIT_RESTART, 0);
1282   if(r != OK) {
1283       kill_service(rp, "unable to let the replica run", r);
1284       return;
1285   }
1286 
1287   /* See if the old version needs to be detached. */
1288   if((rp->r_pub->sys_flags & SF_DET_RESTART)
1289       && (rp->r_restarts < MAX_DET_RESTART)) {
1290       rp->r_flags |= RS_CLEANUP_DETACH;
1291   }
1292 
1293   if(rs_verbose)
1294       printf("RS: %s restarted into %s\n",
1295           srv_to_string(rp), srv_to_string(replica_rp));
1296 }
1297 
1298 /*===========================================================================*
1299  *		         inherit_service_defaults			     *
1300  *===========================================================================*/
1301 void inherit_service_defaults(def_rp, rp)
1302 struct rproc *def_rp;
1303 struct rproc *rp;
1304 {
1305   struct rprocpub *def_rpub;
1306   struct rprocpub *rpub;
1307 
1308   def_rpub = def_rp->r_pub;
1309   rpub = rp->r_pub;
1310 
1311   /* Device and PCI settings. These properties cannot change. */
1312   rpub->dev_nr = def_rpub->dev_nr;
1313   rpub->pci_acl = def_rpub->pci_acl;
1314 
1315   /* Immutable system and privilege flags. */
1316   rpub->sys_flags &= ~IMM_SF;
1317   rpub->sys_flags |= (def_rpub->sys_flags & IMM_SF);
1318   rp->r_priv.s_flags &= ~IMM_F;
1319   rp->r_priv.s_flags |= (def_rp->r_priv.s_flags & IMM_F);
1320 
1321   /* Allowed traps. They cannot change. */
1322   rp->r_priv.s_trap_mask = def_rp->r_priv.s_trap_mask;
1323 }
1324 
1325 /*===========================================================================*
1326  *		           get_service_instances			     *
1327  *===========================================================================*/
1328 void get_service_instances(rp, rps, length)
1329 struct rproc *rp;
1330 struct rproc ***rps;
1331 int *length;
1332 {
1333 /* Retrieve all the service instances of a given service. */
1334   static struct rproc *instances[5];
1335   int nr_instances;
1336 
1337   nr_instances = 0;
1338   instances[nr_instances++] = rp;
1339   if(rp->r_prev_rp) instances[nr_instances++] = rp->r_prev_rp;
1340   if(rp->r_next_rp) instances[nr_instances++] = rp->r_next_rp;
1341   if(rp->r_old_rp) instances[nr_instances++] = rp->r_old_rp;
1342   if(rp->r_new_rp) instances[nr_instances++] = rp->r_new_rp;
1343 
1344   *rps = instances;
1345   *length = nr_instances;
1346 }
1347 
1348 /*===========================================================================*
1349  *				share_exec				     *
1350  *===========================================================================*/
1351 void share_exec(rp_dst, rp_src)
1352 struct rproc *rp_dst, *rp_src;
1353 {
1354   if(rs_verbose)
1355       printf("RS: %s shares exec image with %s\n",
1356           srv_to_string(rp_dst), srv_to_string(rp_src));
1357 
1358   /* Share exec image from rp_src to rp_dst. */
1359   rp_dst->r_exec_len = rp_src->r_exec_len;
1360   rp_dst->r_exec = rp_src->r_exec;
1361 }
1362 
1363 /*===========================================================================*
1364  *				read_exec				     *
1365  *===========================================================================*/
1366 int read_exec(rp)
1367 struct rproc *rp;
1368 {
1369   int e, r, fd;
1370   char *e_name;
1371   struct stat sb;
1372 
1373   e_name= rp->r_argv[0];
1374   if(rs_verbose)
1375       printf("RS: service '%s' reads exec image from: %s\n", rp->r_pub->label,
1376           e_name);
1377 
1378   r= stat(e_name, &sb);
1379   if (r != 0)
1380       return -errno;
1381 
1382   if (sb.st_size < sizeof(Elf_Ehdr))
1383       return ENOEXEC;
1384 
1385   fd= open(e_name, O_RDONLY);
1386   if (fd == -1)
1387       return -errno;
1388 
1389   rp->r_exec_len= sb.st_size;
1390   rp->r_exec= malloc(rp->r_exec_len);
1391   if (rp->r_exec == NULL)
1392   {
1393       printf("RS: read_exec: unable to allocate %zu bytes\n",
1394           rp->r_exec_len);
1395       close(fd);
1396       return ENOMEM;
1397   }
1398 
1399   r= read(fd, rp->r_exec, rp->r_exec_len);
1400   e= errno;
1401   close(fd);
1402   if (r == rp->r_exec_len)
1403       return OK;
1404 
1405   printf("RS: read_exec: read failed %d, errno %d\n", r, e);
1406 
1407   free_exec(rp);
1408 
1409   if (r >= 0)
1410       return EIO;
1411   else
1412       return -e;
1413 }
1414 
1415 /*===========================================================================*
1416  *				free_exec				     *
1417  *===========================================================================*/
1418 void free_exec(rp)
1419 struct rproc *rp;
1420 {
1421 /* Free an exec image. */
1422   int slot_nr, has_shared_exec;
1423   struct rproc *other_rp;
1424 
1425   /* Search for some other slot sharing the same exec image. */
1426   has_shared_exec = FALSE;
1427   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1428       other_rp = &rproc[slot_nr];		/* get pointer to slot */
1429       if (other_rp->r_flags & RS_IN_USE && other_rp != rp
1430           && other_rp->r_exec == rp->r_exec) {  /* found! */
1431           has_shared_exec = TRUE;
1432           break;
1433       }
1434   }
1435 
1436   /* If nobody uses our copy of the exec image, we can try to get rid of it. */
1437   if(!has_shared_exec) {
1438       if(rs_verbose)
1439           printf("RS: %s frees exec image\n", srv_to_string(rp));
1440       free(rp->r_exec);
1441   }
1442   else {
1443       if(rs_verbose)
1444           printf("RS: %s no longer sharing exec image with %s\n",
1445               srv_to_string(rp), srv_to_string(other_rp));
1446   }
1447   rp->r_exec = NULL;
1448   rp->r_exec_len = 0;
1449 }
1450 
1451 /*===========================================================================*
1452  *				 edit_slot				     *
1453  *===========================================================================*/
1454 int edit_slot(rp, rs_start, source)
1455 struct rproc *rp;
1456 struct rs_start *rs_start;
1457 endpoint_t source;
1458 {
1459 /* Edit a given slot to override existing settings. */
1460   struct rprocpub *rpub;
1461   char *label;
1462   int len;
1463   int s, i;
1464   int basic_kc[] =  { SYS_BASIC_CALLS, NULL_C };
1465   int basic_vmc[] =  { VM_BASIC_CALLS, NULL_C };
1466 
1467   rpub = rp->r_pub;
1468 
1469   /* Update IPC target list. */
1470   if (rs_start->rss_ipclen==0 || rs_start->rss_ipclen+1>sizeof(rp->r_ipc_list)){
1471       printf("RS: edit_slot: ipc list empty or long for '%s'\n", rpub->label);
1472       return EINVAL;
1473   }
1474   s=sys_datacopy(source, (vir_bytes) rs_start->rss_ipc,
1475       SELF, (vir_bytes) rp->r_ipc_list, rs_start->rss_ipclen);
1476   if (s != OK) return(s);
1477   rp->r_ipc_list[rs_start->rss_ipclen]= '\0';
1478 
1479   /* Update IRQs. */
1480   if(rs_start->rss_nr_irq == RSS_IRQ_ALL) {
1481       rs_start->rss_nr_irq = 0;
1482   }
1483   else {
1484       rp->r_priv.s_flags |= CHECK_IRQ;
1485   }
1486   if (rs_start->rss_nr_irq > NR_IRQ) {
1487       printf("RS: edit_slot: too many IRQs requested\n");
1488       return EINVAL;
1489   }
1490   rp->r_nr_irq= rp->r_priv.s_nr_irq= rs_start->rss_nr_irq;
1491   for (i= 0; i<rp->r_priv.s_nr_irq; i++) {
1492       rp->r_irq_tab[i]= rp->r_priv.s_irq_tab[i]= rs_start->rss_irq[i];
1493       if(rs_verbose)
1494           printf("RS: edit_slot: IRQ %d\n", rp->r_priv.s_irq_tab[i]);
1495   }
1496 
1497   /* Update I/O ranges. */
1498   if(rs_start->rss_nr_io == RSS_IO_ALL) {
1499       rs_start->rss_nr_io = 0;
1500   }
1501   else {
1502       rp->r_priv.s_flags |= CHECK_IO_PORT;
1503   }
1504   if (rs_start->rss_nr_io > NR_IO_RANGE) {
1505       printf("RS: edit_slot: too many I/O ranges requested\n");
1506       return EINVAL;
1507   }
1508   rp->r_nr_io_range= rp->r_priv.s_nr_io_range= rs_start->rss_nr_io;
1509   for (i= 0; i<rp->r_priv.s_nr_io_range; i++) {
1510       rp->r_priv.s_io_tab[i].ior_base= rs_start->rss_io[i].base;
1511       rp->r_priv.s_io_tab[i].ior_limit=
1512           rs_start->rss_io[i].base+rs_start->rss_io[i].len-1;
1513       rp->r_io_tab[i] = rp->r_priv.s_io_tab[i];
1514       if(rs_verbose)
1515           printf("RS: edit_slot: I/O [%x..%x]\n",
1516               rp->r_priv.s_io_tab[i].ior_base,
1517               rp->r_priv.s_io_tab[i].ior_limit);
1518   }
1519 
1520   /* Update kernel call mask. Inherit basic kernel calls when asked to. */
1521   memcpy(rp->r_priv.s_k_call_mask, rs_start->rss_system,
1522       sizeof(rp->r_priv.s_k_call_mask));
1523   if(rs_start->rss_flags & RSS_SYS_BASIC_CALLS) {
1524       fill_call_mask(basic_kc, NR_SYS_CALLS,
1525           rp->r_priv.s_k_call_mask, KERNEL_CALL, FALSE);
1526   }
1527 
1528   /* Update VM call mask. Inherit basic VM calls. */
1529   memcpy(rpub->vm_call_mask, rs_start->rss_vm,
1530       sizeof(rpub->vm_call_mask));
1531   if(rs_start->rss_flags & RSS_VM_BASIC_CALLS) {
1532       fill_call_mask(basic_vmc, NR_VM_CALLS,
1533           rpub->vm_call_mask, VM_RQ_BASE, FALSE);
1534   }
1535 
1536   /* Update control labels. */
1537   if(rs_start->rss_nr_control > 0) {
1538       int i, s;
1539       if (rs_start->rss_nr_control > RS_NR_CONTROL) {
1540           printf("RS: edit_slot: too many control labels\n");
1541           return EINVAL;
1542       }
1543       for (i=0; i<rs_start->rss_nr_control; i++) {
1544           s = copy_label(source, rs_start->rss_control[i].l_addr,
1545               rs_start->rss_control[i].l_len, rp->r_control[i],
1546               sizeof(rp->r_control[i]));
1547           if(s != OK)
1548               return s;
1549       }
1550       rp->r_nr_control = rs_start->rss_nr_control;
1551 
1552       if (rs_verbose) {
1553           printf("RS: edit_slot: control labels:");
1554           for (i=0; i<rp->r_nr_control; i++)
1555               printf(" %s", rp->r_control[i]);
1556           printf("\n");
1557       }
1558   }
1559 
1560   /* Update signal manager. */
1561   rp->r_priv.s_sig_mgr = rs_start->rss_sigmgr;
1562 
1563   /* Update scheduling properties if possible. */
1564   if(rp->r_scheduler != NONE) {
1565       rp->r_scheduler = rs_start->rss_scheduler;
1566       rp->r_priority = rs_start->rss_priority;
1567       rp->r_quantum = rs_start->rss_quantum;
1568       rp->r_cpu = rs_start->rss_cpu;
1569   }
1570 
1571   /* Update command and arguments. */
1572   if (rs_start->rss_cmdlen > MAX_COMMAND_LEN-1) return(E2BIG);
1573   s=sys_datacopy(source, (vir_bytes) rs_start->rss_cmd,
1574       SELF, (vir_bytes) rp->r_cmd, rs_start->rss_cmdlen);
1575   if (s != OK) return(s);
1576   rp->r_cmd[rs_start->rss_cmdlen] = '\0';	/* ensure it is terminated */
1577   if (rp->r_cmd[0] != '/') return(EINVAL);	/* insist on absolute path */
1578 
1579   /* Build cmd dependencies (argv). */
1580   build_cmd_dep(rp);
1581 
1582   /* Copy in the program name. */
1583   if (rs_start->rss_prognamelen > sizeof(rpub->proc_name)-1) return(E2BIG);
1584   s=sys_datacopy(source, (vir_bytes) rs_start->rss_progname,
1585       SELF, (vir_bytes) rpub->proc_name, rs_start->rss_prognamelen);
1586   if (s != OK) return(s);
1587   rpub->proc_name[rs_start->rss_prognamelen] = '\0';
1588 
1589   /* Update label if not already set. */
1590   if(!strcmp(rpub->label, "")) {
1591       if(rs_start->rss_label.l_len > 0) {
1592           /* RS_UP caller has supplied a custom label for this service. */
1593           int s = copy_label(source, rs_start->rss_label.l_addr,
1594               rs_start->rss_label.l_len, rpub->label, sizeof(rpub->label));
1595           if(s != OK)
1596               return s;
1597           if(rs_verbose)
1598               printf("RS: edit_slot: using label (custom) '%s'\n", rpub->label);
1599       } else {
1600           /* Default label for the service. */
1601           label = rpub->proc_name;
1602           len= strlen(label);
1603           memcpy(rpub->label, label, len);
1604           rpub->label[len]= '\0';
1605           if(rs_verbose)
1606               printf("RS: edit_slot: using label (from proc_name) '%s'\n",
1607                   rpub->label);
1608       }
1609   }
1610 
1611   /* Update recovery script. */
1612   if (rs_start->rss_scriptlen > MAX_SCRIPT_LEN-1) return(E2BIG);
1613   if (rs_start->rss_script != NULL && rs_start->rss_scriptlen > 0
1614       && !(rpub->sys_flags & SF_CORE_SRV)) {
1615       s=sys_datacopy(source, (vir_bytes) rs_start->rss_script,
1616           SELF, (vir_bytes) rp->r_script, rs_start->rss_scriptlen);
1617       if (s != OK) return(s);
1618       rp->r_script[rs_start->rss_scriptlen] = '\0';
1619       rpub->sys_flags |= SF_USE_SCRIPT;
1620   }
1621 
1622   /* Update system flags and in-memory copy. */
1623   if ((rs_start->rss_flags & RSS_COPY) && !(rpub->sys_flags & SF_USE_COPY)) {
1624       int exst_cpy;
1625       struct rproc *rp2;
1626       struct rprocpub *rpub2;
1627       exst_cpy = 0;
1628 
1629       if(rs_start->rss_flags & RSS_REUSE) {
1630           int i;
1631 
1632           for(i = 0; i < NR_SYS_PROCS; i++) {
1633               rp2 = &rproc[i];
1634               rpub2 = rproc[i].r_pub;
1635               if(strcmp(rpub->proc_name, rpub2->proc_name) == 0 &&
1636                   (rpub2->sys_flags & SF_USE_COPY)) {
1637                   /* We have found the same binary that's
1638                    * already been copied */
1639                   exst_cpy = 1;
1640                   break;
1641               }
1642           }
1643       }
1644 
1645       s = OK;
1646       if(!exst_cpy)
1647           s = read_exec(rp);
1648       else
1649           share_exec(rp, rp2);
1650 
1651       if (s != OK)
1652           return s;
1653 
1654       rpub->sys_flags |= SF_USE_COPY;
1655   }
1656   if (rs_start->rss_flags & RSS_REPLICA) {
1657       rpub->sys_flags |= SF_USE_REPL;
1658   }
1659   if (rs_start->rss_flags & RSS_NO_BIN_EXP) {
1660       rpub->sys_flags |= SF_NO_BIN_EXP;
1661   }
1662   if (rs_start->rss_flags & RSS_DETACH) {
1663       rpub->sys_flags |= SF_DET_RESTART;
1664   }
1665   else {
1666       rpub->sys_flags &= ~SF_DET_RESTART;
1667   }
1668   if (rs_start->rss_flags & RSS_NORESTART) {
1669       if(rpub->sys_flags & SF_CORE_SRV) {
1670           return EPERM;
1671       }
1672       rpub->sys_flags |= SF_NORESTART;
1673   }
1674   else {
1675       rpub->sys_flags &= ~SF_NORESTART;
1676   }
1677 
1678   /* Update period. */
1679   if(rpub->endpoint != RS_PROC_NR) {
1680       rp->r_period = rs_start->rss_period;
1681   }
1682 
1683   /* Update restarts. */
1684   if(rs_start->rss_restarts) {
1685       rp->r_restarts = rs_start->rss_restarts;
1686   }
1687 
1688   /* Update number of ASR live updates. */
1689   if(rs_start->rss_asr_count >= 0) {
1690       rp->r_asr_count = rs_start->rss_asr_count;
1691   }
1692 
1693   /* (Re)initialize privilege settings. */
1694   init_privs(rp, &rp->r_priv);
1695 
1696   return OK;
1697 }
1698 
1699 /*===========================================================================*
1700  *				 init_slot				     *
1701  *===========================================================================*/
1702 int init_slot(rp, rs_start, source)
1703 struct rproc *rp;
1704 struct rs_start *rs_start;
1705 endpoint_t source;
1706 {
1707 /* Initialize a slot as requested by the client. */
1708   struct rprocpub *rpub;
1709   int i;
1710 
1711   rpub = rp->r_pub;
1712 
1713   /* All dynamically created services get the same sys and privilege flags, and
1714    * allowed traps. Other privilege settings can be specified at runtime. The
1715    * privilege id is dynamically allocated by the kernel.
1716    */
1717   rpub->sys_flags = DSRV_SF;             /* system flags */
1718   rp->r_priv.s_flags = DSRV_F;           /* privilege flags */
1719   rp->r_priv.s_init_flags = DSRV_I;      /* init flags */
1720   rp->r_priv.s_trap_mask = DSRV_T;       /* allowed traps */
1721   rp->r_priv.s_bak_sig_mgr = NONE;       /* backup signal manager */
1722 
1723   /* Initialize uid. */
1724   rp->r_uid= rs_start->rss_uid;
1725 
1726   /* Initialize device driver settings. */
1727   rpub->dev_nr = rs_start->rss_major;
1728   rpub->devman_id = rs_start->devman_id;
1729 
1730   /* Initialize pci settings. */
1731   if (rs_start->rss_nr_pci_id > RS_NR_PCI_DEVICE) {
1732       printf("RS: init_slot: too many PCI device IDs\n");
1733       return EINVAL;
1734   }
1735   rpub->pci_acl.rsp_nr_device = rs_start->rss_nr_pci_id;
1736   for (i= 0; i<rpub->pci_acl.rsp_nr_device; i++) {
1737       rpub->pci_acl.rsp_device[i].vid= rs_start->rss_pci_id[i].vid;
1738       rpub->pci_acl.rsp_device[i].did= rs_start->rss_pci_id[i].did;
1739       rpub->pci_acl.rsp_device[i].sub_vid= rs_start->rss_pci_id[i].sub_vid;
1740       rpub->pci_acl.rsp_device[i].sub_did= rs_start->rss_pci_id[i].sub_did;
1741       if(rs_verbose)
1742           printf("RS: init_slot: PCI %04x/%04x (sub %04x:%04x)\n",
1743               rpub->pci_acl.rsp_device[i].vid,
1744               rpub->pci_acl.rsp_device[i].did,
1745               rpub->pci_acl.rsp_device[i].sub_vid,
1746               rpub->pci_acl.rsp_device[i].sub_did);
1747   }
1748   if (rs_start->rss_nr_pci_class > RS_NR_PCI_CLASS) {
1749       printf("RS: init_slot: too many PCI class IDs\n");
1750       return EINVAL;
1751   }
1752   rpub->pci_acl.rsp_nr_class= rs_start->rss_nr_pci_class;
1753   for (i= 0; i<rpub->pci_acl.rsp_nr_class; i++) {
1754       rpub->pci_acl.rsp_class[i].pciclass=rs_start->rss_pci_class[i].pciclass;
1755       rpub->pci_acl.rsp_class[i].mask= rs_start->rss_pci_class[i].mask;
1756       if(rs_verbose)
1757           printf("RS: init_slot: PCI class %06x mask %06x\n",
1758               (unsigned int) rpub->pci_acl.rsp_class[i].pciclass,
1759               (unsigned int) rpub->pci_acl.rsp_class[i].mask);
1760   }
1761 
1762   /* Initialize some fields. */
1763   rp->r_asr_count = 0;				/* no ASR updates yet */
1764   rp->r_restarts = 0; 				/* no restarts yet */
1765   rp->r_old_rp = NULL;			        /* no old version yet */
1766   rp->r_new_rp = NULL;			        /* no new version yet */
1767   rp->r_prev_rp = NULL;			        /* no prev replica yet */
1768   rp->r_next_rp = NULL;			        /* no next replica yet */
1769   rp->r_exec = NULL;                            /* no in-memory copy yet */
1770   rp->r_exec_len = 0;
1771   rp->r_script[0]= '\0';                        /* no recovery script yet */
1772   rpub->label[0]= '\0';                         /* no label yet */
1773   rp->r_scheduler = -1;                         /* no scheduler yet */
1774   rp->r_priv.s_sig_mgr = -1;                    /* no signal manager yet */
1775   rp->r_map_prealloc_addr = 0;                  /* no preallocated memory */
1776   rp->r_map_prealloc_len = 0;
1777   rp->r_init_err = ERESTART;                    /* default init error `*/
1778 
1779   /* Initialize editable slot settings. */
1780   return edit_slot(rp, rs_start, source);
1781 }
1782 
1783 /*===========================================================================*
1784  *				clone_slot				     *
1785  *===========================================================================*/
1786 int clone_slot(rp, clone_rpp)
1787 struct rproc *rp;
1788 struct rproc **clone_rpp;
1789 {
1790   int r;
1791   struct rproc *clone_rp;
1792   struct rprocpub *rpub, *clone_rpub;
1793 
1794   /* Allocate a system service slot for the clone. */
1795   r = alloc_slot(&clone_rp);
1796   if(r != OK) {
1797       printf("RS: clone_slot: unable to allocate a new slot: %d\n", r);
1798       return r;
1799   }
1800 
1801   rpub = rp->r_pub;
1802   clone_rpub = clone_rp->r_pub;
1803 
1804   /* Synch the privilege structure of the source with the kernel. */
1805   if ((r = sys_getpriv(&(rp->r_priv), rpub->endpoint)) != OK) {
1806       panic("unable to synch privilege structure: %d", r);
1807   }
1808 
1809   /* Shallow copy. */
1810   *clone_rp = *rp;
1811   *clone_rpub = *rpub;
1812 
1813   /* Deep copy. */
1814   clone_rp->r_init_err = ERESTART; /* default init error */
1815   clone_rp->r_flags &= ~RS_ACTIVE; /* the clone is not active yet */
1816   clone_rp->r_pid = -1;            /* no pid yet */
1817   clone_rpub->endpoint = -1;       /* no endpoint yet */
1818   clone_rp->r_pub = clone_rpub;    /* restore pointer to public entry */
1819   build_cmd_dep(clone_rp);         /* rebuild cmd dependencies */
1820   if(clone_rpub->sys_flags & SF_USE_COPY) {
1821       share_exec(clone_rp, rp);        /* share exec image */
1822   }
1823   clone_rp->r_old_rp = NULL;	   /* no old version yet */
1824   clone_rp->r_new_rp = NULL;	   /* no new version yet */
1825   clone_rp->r_prev_rp = NULL;	   /* no prev replica yet */
1826   clone_rp->r_next_rp = NULL;	   /* no next replica yet */
1827 
1828   /* Force dynamic privilege id. */
1829   clone_rp->r_priv.s_flags |= DYN_PRIV_ID;
1830 
1831   /* Clear instance flags. */
1832   clone_rp->r_priv.s_flags &= ~(LU_SYS_PROC | RST_SYS_PROC);
1833   clone_rp->r_priv.s_init_flags = 0;
1834 
1835   *clone_rpp = clone_rp;
1836   return OK;
1837 }
1838 
1839 /*===========================================================================*
1840  *			    swap_slot_pointer				     *
1841  *===========================================================================*/
1842 static void swap_slot_pointer(struct rproc **rpp, struct rproc *src_rp,
1843     struct rproc *dst_rp)
1844 {
1845   if(*rpp == src_rp) {
1846       *rpp = dst_rp;
1847   }
1848   else if(*rpp == dst_rp) {
1849       *rpp = src_rp;
1850   }
1851 }
1852 
1853 /*===========================================================================*
1854  *				swap_slot				     *
1855  *===========================================================================*/
1856 void swap_slot(src_rpp, dst_rpp)
1857 struct rproc **src_rpp;
1858 struct rproc **dst_rpp;
1859 {
1860 /* Swap two service slots. */
1861   struct rproc *src_rp, *dst_rp;
1862   struct rprocpub *src_rpub, *dst_rpub;
1863   struct rproc orig_src_rproc, orig_dst_rproc;
1864   struct rprocpub orig_src_rprocpub, orig_dst_rprocpub;
1865   struct rprocupd *prev_rpupd, *rpupd;
1866 
1867   src_rp = *src_rpp;
1868   dst_rp = *dst_rpp;
1869   src_rpub = src_rp->r_pub;
1870   dst_rpub = dst_rp->r_pub;
1871 
1872   /* Save existing data first. */
1873   orig_src_rproc = *src_rp;
1874   orig_src_rprocpub = *src_rpub;
1875   orig_dst_rproc = *dst_rp;
1876   orig_dst_rprocpub = *dst_rpub;
1877 
1878   /* Swap slots. */
1879   *src_rp = orig_dst_rproc;
1880   *src_rpub = orig_dst_rprocpub;
1881   *dst_rp = orig_src_rproc;
1882   *dst_rpub = orig_src_rprocpub;
1883 
1884   /* Restore public entries and update descriptors. */
1885   src_rp->r_pub = orig_src_rproc.r_pub;
1886   dst_rp->r_pub = orig_dst_rproc.r_pub;
1887   src_rp->r_upd = orig_src_rproc.r_upd;
1888   dst_rp->r_upd = orig_dst_rproc.r_upd;
1889 
1890   /* Rebuild command dependencies. */
1891   build_cmd_dep(src_rp);
1892   build_cmd_dep(dst_rp);
1893 
1894   /* Swap local slot pointers. */
1895   swap_slot_pointer(&src_rp->r_prev_rp, src_rp, dst_rp);
1896   swap_slot_pointer(&src_rp->r_next_rp, src_rp, dst_rp);
1897   swap_slot_pointer(&src_rp->r_old_rp, src_rp, dst_rp);
1898   swap_slot_pointer(&src_rp->r_new_rp, src_rp, dst_rp);
1899   swap_slot_pointer(&dst_rp->r_prev_rp, src_rp, dst_rp);
1900   swap_slot_pointer(&dst_rp->r_next_rp, src_rp, dst_rp);
1901   swap_slot_pointer(&dst_rp->r_old_rp, src_rp, dst_rp);
1902   swap_slot_pointer(&dst_rp->r_new_rp, src_rp, dst_rp);
1903 
1904   /* Swap global slot pointers. */
1905   RUPDATE_ITER(rupdate.first_rpupd, prev_rpupd, rpupd,
1906       swap_slot_pointer(&rpupd->rp, src_rp, dst_rp);
1907   );
1908   swap_slot_pointer(&rproc_ptr[_ENDPOINT_P(src_rp->r_pub->endpoint)],
1909       src_rp, dst_rp);
1910   swap_slot_pointer(&rproc_ptr[_ENDPOINT_P(dst_rp->r_pub->endpoint)],
1911       src_rp, dst_rp);
1912 
1913   /* Adjust input pointers. */
1914   *src_rpp = dst_rp;
1915   *dst_rpp = src_rp;
1916 }
1917 
1918 /*===========================================================================*
1919  *			   lookup_slot_by_label				     *
1920  *===========================================================================*/
1921 struct rproc* lookup_slot_by_label(char *label)
1922 {
1923 /* Lookup a service slot matching the given label. */
1924   int slot_nr;
1925   struct rproc *rp;
1926   struct rprocpub *rpub;
1927 
1928   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1929       rp = &rproc[slot_nr];
1930       if (!(rp->r_flags & RS_ACTIVE)) {
1931           continue;
1932       }
1933       rpub = rp->r_pub;
1934       if (strcmp(rpub->label, label) == 0) {
1935           return rp;
1936       }
1937   }
1938 
1939   return NULL;
1940 }
1941 
1942 /*===========================================================================*
1943  *			   lookup_slot_by_pid				     *
1944  *===========================================================================*/
1945 struct rproc* lookup_slot_by_pid(pid_t pid)
1946 {
1947 /* Lookup a service slot matching the given pid. */
1948   int slot_nr;
1949   struct rproc *rp;
1950 
1951   if(pid < 0) {
1952       return NULL;
1953   }
1954 
1955   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1956       rp = &rproc[slot_nr];
1957       if (!(rp->r_flags & RS_IN_USE)) {
1958           continue;
1959       }
1960       if (rp->r_pid == pid) {
1961           return rp;
1962       }
1963   }
1964 
1965   return NULL;
1966 }
1967 
1968 /*===========================================================================*
1969  *			   lookup_slot_by_dev_nr			     *
1970  *===========================================================================*/
1971 struct rproc* lookup_slot_by_dev_nr(dev_t dev_nr)
1972 {
1973 /* Lookup a service slot matching the given device number. */
1974   int slot_nr;
1975   struct rproc *rp;
1976   struct rprocpub *rpub;
1977 
1978   if(dev_nr <= 0) {
1979       return NULL;
1980   }
1981 
1982   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1983       rp = &rproc[slot_nr];
1984       rpub = rp->r_pub;
1985       if (!(rp->r_flags & RS_IN_USE)) {
1986           continue;
1987       }
1988       if (rpub->dev_nr == dev_nr) {
1989           return rp;
1990       }
1991   }
1992 
1993   return NULL;
1994 }
1995 
1996 /*===========================================================================*
1997  *			   lookup_slot_by_flags				     *
1998  *===========================================================================*/
1999 struct rproc* lookup_slot_by_flags(int flags)
2000 {
2001 /* Lookup a service slot matching the given flags. */
2002   int slot_nr;
2003   struct rproc *rp;
2004 
2005   if(!flags) {
2006       return NULL;
2007   }
2008 
2009   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
2010       rp = &rproc[slot_nr];
2011       if (!(rp->r_flags & RS_IN_USE)) {
2012           continue;
2013       }
2014       if (rp->r_flags & flags) {
2015           return rp;
2016       }
2017   }
2018 
2019   return NULL;
2020 }
2021 
2022 /*===========================================================================*
2023  *				alloc_slot				     *
2024  *===========================================================================*/
2025 int alloc_slot(rpp)
2026 struct rproc **rpp;
2027 {
2028 /* Alloc a new system service slot. */
2029   int slot_nr;
2030 
2031   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
2032       *rpp = &rproc[slot_nr];			/* get pointer to slot */
2033       if (!((*rpp)->r_flags & RS_IN_USE)) 	/* check if available */
2034 	  break;
2035   }
2036   if (slot_nr >= NR_SYS_PROCS) {
2037 	return ENOMEM;
2038   }
2039 
2040   return OK;
2041 }
2042 
2043 /*===========================================================================*
2044  *				free_slot				     *
2045  *===========================================================================*/
2046 void free_slot(rp)
2047 struct rproc *rp;
2048 {
2049 /* Free a system service slot. */
2050   struct rprocpub *rpub;
2051 
2052   rpub = rp->r_pub;
2053 
2054   /* Send a late reply if there is any pending. */
2055   late_reply(rp, OK);
2056 
2057   /* Free memory if necessary. */
2058   if(rpub->sys_flags & SF_USE_COPY) {
2059       free_exec(rp);
2060   }
2061 
2062   /* Mark slot as no longer in use.. */
2063   rp->r_flags = 0;
2064   rp->r_pid = -1;
2065   rpub->in_use = FALSE;
2066   rproc_ptr[_ENDPOINT_P(rpub->endpoint)] = NULL;
2067 }
2068 
2069 
2070 /*===========================================================================*
2071  *				get_next_name				     *
2072  *===========================================================================*/
2073 static char *get_next_name(ptr, name, caller_label)
2074 char *ptr;
2075 char *name;
2076 char *caller_label;
2077 {
2078 	/* Get the next name from the list of (IPC) program names.
2079 	 */
2080 	char *p, *q;
2081 	size_t len;
2082 
2083 	for (p= ptr; p[0] != '\0'; p= q)
2084 	{
2085 		/* Skip leading space */
2086 		while (p[0] != '\0' && isspace((unsigned char)p[0]))
2087 			p++;
2088 
2089 		/* Find start of next word */
2090 		q= p;
2091 		while (q[0] != '\0' && !isspace((unsigned char)q[0]))
2092 			q++;
2093 		if (q == p)
2094 			continue;
2095 		len= q-p;
2096 		if (len > RS_MAX_LABEL_LEN)
2097 		{
2098 			printf(
2099 	"rs:get_next_name: bad ipc list entry '%.*s' for %s: too long\n",
2100 				(int) len, p, caller_label);
2101 			continue;
2102 		}
2103 		memcpy(name, p, len);
2104 		name[len]= '\0';
2105 
2106 		return q; /* found another */
2107 	}
2108 
2109 	return NULL; /* done */
2110 }
2111 
2112 /*===========================================================================*
2113  *				add_forward_ipc				     *
2114  *===========================================================================*/
2115 void add_forward_ipc(rp, privp)
2116 struct rproc *rp;
2117 struct priv *privp;
2118 {
2119 	/* Add IPC send permissions to a process based on that process's IPC
2120 	 * list.
2121 	 */
2122 	char name[RS_MAX_LABEL_LEN+1], *p;
2123 	struct rproc *rrp;
2124 	endpoint_t endpoint;
2125 	int r;
2126 	int priv_id;
2127 	struct priv priv;
2128 	struct rprocpub *rpub;
2129 
2130 	rpub = rp->r_pub;
2131 	p = rp->r_ipc_list;
2132 
2133 	while ((p = get_next_name(p, name, rpub->label)) != NULL) {
2134 
2135 		if (strcmp(name, "SYSTEM") == 0)
2136 			endpoint= SYSTEM;
2137 		else if (strcmp(name, "USER") == 0)
2138 			endpoint= INIT_PROC_NR; /* all user procs */
2139 		else
2140 		{
2141 			/* Set a privilege bit for every process matching the
2142 			 * given process name. It is perfectly fine if this
2143 			 * loop does not find any matches, as the target
2144 			 * process(es) may not have been started yet. See
2145 			 * add_backward_ipc() below.
2146 			 */
2147 			for (rrp=BEG_RPROC_ADDR; rrp<END_RPROC_ADDR; rrp++) {
2148 				if (!(rrp->r_flags & RS_IN_USE))
2149 					continue;
2150 
2151 				if (!strcmp(rrp->r_pub->proc_name, name)) {
2152 #if PRIV_DEBUG
2153 					printf("  RS: add_forward_ipc: setting"
2154 						" sendto bit for %d...\n",
2155 						rrp->r_pub->endpoint);
2156 #endif
2157 
2158 					priv_id= rrp->r_priv.s_id;
2159 					set_sys_bit(privp->s_ipc_to, priv_id);
2160 				}
2161 			}
2162 
2163 			continue;
2164 		}
2165 
2166 		/* This code only applies to the exception cases. */
2167 		if ((r = sys_getpriv(&priv, endpoint)) < 0)
2168 		{
2169 			printf(
2170 		"add_forward_ipc: unable to get priv_id for '%s': %d\n",
2171 				name, r);
2172 			continue;
2173 		}
2174 
2175 #if PRIV_DEBUG
2176 		printf("  RS: add_forward_ipc: setting sendto bit for %d...\n",
2177 			endpoint);
2178 #endif
2179 		priv_id= priv.s_id;
2180 		set_sys_bit(privp->s_ipc_to, priv_id);
2181 	}
2182 }
2183 
2184 
2185 /*===========================================================================*
2186  *				add_backward_ipc			     *
2187  *===========================================================================*/
2188 void add_backward_ipc(rp, privp)
2189 struct rproc *rp;
2190 struct priv *privp;
2191 {
2192 	/* Add IPC send permissions to a process based on other processes' IPC
2193 	 * lists. This is enough to allow each such two processes to talk to
2194 	 * each other, as the kernel guarantees send mask symmetry. We need to
2195 	 * add these permissions now because the current process may not yet
2196 	 * have existed at the time that the other process was initialized.
2197 	 */
2198 	char name[RS_MAX_LABEL_LEN+1], *p;
2199 	struct rproc *rrp;
2200 	struct rprocpub *rrpub;
2201 	char *proc_name;
2202 	int priv_id, is_ipc_all, is_ipc_all_sys;
2203 
2204 	proc_name = rp->r_pub->proc_name;
2205 
2206 	for (rrp=BEG_RPROC_ADDR; rrp<END_RPROC_ADDR; rrp++) {
2207 		if (!(rrp->r_flags & RS_IN_USE))
2208 			continue;
2209 
2210 		if (!rrp->r_ipc_list[0])
2211 			continue;
2212 
2213 		/* If the process being checked is set to allow IPC to all
2214 		 * other processes, or for all other system processes and the
2215 		 * target process is a system process, add a permission bit.
2216 		 */
2217 		rrpub = rrp->r_pub;
2218 
2219 		is_ipc_all = !strcmp(rrp->r_ipc_list, RSS_IPC_ALL);
2220 		is_ipc_all_sys = !strcmp(rrp->r_ipc_list, RSS_IPC_ALL_SYS);
2221 
2222 		if (is_ipc_all ||
2223 			(is_ipc_all_sys && (privp->s_flags & SYS_PROC))) {
2224 #if PRIV_DEBUG
2225 			printf("  RS: add_backward_ipc: setting sendto bit "
2226 				"for %d...\n", rrpub->endpoint);
2227 #endif
2228 			priv_id= rrp->r_priv.s_id;
2229 			set_sys_bit(privp->s_ipc_to, priv_id);
2230 
2231 			continue;
2232 		}
2233 
2234 		/* An IPC target list was provided for the process being
2235 		 * checked here. Make sure that the name of the new process
2236 		 * is in that process's list. There may be multiple matches.
2237 		 */
2238 		p = rrp->r_ipc_list;
2239 
2240 		while ((p = get_next_name(p, name, rrpub->label)) != NULL) {
2241 			if (!strcmp(proc_name, name)) {
2242 #if PRIV_DEBUG
2243 				printf("  RS: add_backward_ipc: setting sendto"
2244 					" bit for %d...\n",
2245 					rrpub->endpoint);
2246 #endif
2247 				priv_id= rrp->r_priv.s_id;
2248 				set_sys_bit(privp->s_ipc_to, priv_id);
2249 			}
2250 		}
2251 	}
2252 }
2253 
2254 
2255 /*===========================================================================*
2256  *				init_privs				     *
2257  *===========================================================================*/
2258 void init_privs(rp, privp)
2259 struct rproc *rp;
2260 struct priv *privp;
2261 {
2262 	int i;
2263 	int is_ipc_all, is_ipc_all_sys;
2264 
2265 	/* Clear s_ipc_to */
2266 	fill_send_mask(&privp->s_ipc_to, FALSE);
2267 
2268 	is_ipc_all = !strcmp(rp->r_ipc_list, RSS_IPC_ALL);
2269 	is_ipc_all_sys = !strcmp(rp->r_ipc_list, RSS_IPC_ALL_SYS);
2270 
2271 #if PRIV_DEBUG
2272 	printf("  RS: init_privs: ipc list is '%s'...\n", rp->r_ipc_list);
2273 #endif
2274 
2275 	if (!is_ipc_all && !is_ipc_all_sys)
2276 	{
2277 		add_forward_ipc(rp, privp);
2278 		add_backward_ipc(rp, privp);
2279 
2280 	}
2281 	else
2282 	{
2283 		for (i= 0; i<NR_SYS_PROCS; i++)
2284 		{
2285 			if (is_ipc_all || i != USER_PRIV_ID)
2286 				set_sys_bit(privp->s_ipc_to, i);
2287 		}
2288 	}
2289 }
2290 
2291