xref: /minix/minix/servers/rs/manager.c (revision fb9c64b2)
1 /*
2  * Changes:
3  *   Nov 22, 2009:	added basic live update support  (Cristiano Giuffrida)
4  *   Mar 02, 2009:	Extended isolation policies  (Jorrit N. Herder)
5  *   Jul 22, 2005:	Created  (Jorrit N. Herder)
6  */
7 
8 #include <paths.h>
9 
10 #include <sys/exec_elf.h>
11 
12 #include "inc.h"
13 
14 #include "kernel/proc.h"
15 
16 static int run_script(struct rproc *rp);
17 
18 /*===========================================================================*
19  *				caller_is_root				     *
20  *===========================================================================*/
21 static int caller_is_root(endpoint)
22 endpoint_t endpoint;				/* caller endpoint */
23 {
24   uid_t euid;
25 
26   /* Check if caller has root user ID. */
27   euid = getnuid(endpoint);
28   if (rs_verbose && euid != 0)
29   {
30 	printf("RS: got unauthorized request from endpoint %d\n", endpoint);
31   }
32 
33   return euid == 0;
34 }
35 
36 /*===========================================================================*
37  *				caller_can_control			     *
38  *===========================================================================*/
39 static int caller_can_control(endpoint, target_rp)
40 endpoint_t endpoint;
41 struct rproc *target_rp;
42 {
43   int control_allowed = 0;
44   register struct rproc *rp;
45   register struct rprocpub *rpub;
46   char *proc_name;
47   int c;
48 
49   proc_name = target_rp->r_pub->proc_name;
50 
51   /* Check if label is listed in caller's isolation policy. */
52   for (rp = BEG_RPROC_ADDR; rp < END_RPROC_ADDR; rp++) {
53 	if (!(rp->r_flags & RS_IN_USE))
54 		continue;
55 
56 	rpub = rp->r_pub;
57 	if (rpub->endpoint == endpoint) {
58 		break;
59 	}
60   }
61   if (rp == END_RPROC_ADDR) return 0;
62 
63   for (c = 0; c < rp->r_nr_control; c++) {
64 	if (strcmp(rp->r_control[c], proc_name) == 0) {
65 		control_allowed = 1;
66 		break;
67 	}
68   }
69 
70   if (rs_verbose)
71 	printf("RS: allowing %u control over %s via policy: %s\n",
72 		endpoint, target_rp->r_pub->label,
73 		control_allowed ? "yes" : "no");
74 
75   return control_allowed;
76 }
77 
78 /*===========================================================================*
79  *			     check_call_permission			     *
80  *===========================================================================*/
81 int check_call_permission(caller, call, rp)
82 endpoint_t caller;
83 int call;
84 struct rproc *rp;
85 {
86 /* Check if the caller has permission to execute a particular call. */
87   struct rprocpub *rpub;
88   int call_allowed;
89 
90   /* Caller should be either root or have control privileges. */
91   call_allowed = caller_is_root(caller);
92   if(rp) {
93       call_allowed |= caller_can_control(caller, rp);
94   }
95   if(!call_allowed) {
96       return EPERM;
97   }
98 
99   if(rp) {
100       rpub = rp->r_pub;
101 
102       /* Only allow RS_EDIT if the target is a user process. */
103       if(!(rp->r_priv.s_flags & SYS_PROC)) {
104           if(call != RS_EDIT) return EPERM;
105       }
106 
107       /* Disallow the call if an update is in progress. */
108       if(RUPDATE_IS_UPDATING()) {
109       	  return EBUSY;
110       }
111 
112       /* Disallow the call if another call is in progress for the service. */
113       if((rp->r_flags & RS_LATEREPLY)
114           || (rp->r_flags & RS_INITIALIZING)) {
115           return EBUSY;
116       }
117 
118       /* Only allow RS_DOWN and RS_RESTART if the service has terminated. */
119       if(rp->r_flags & RS_TERMINATED) {
120           if(call != RS_DOWN && call != RS_RESTART) return EPERM;
121       }
122 
123       /* Disallow RS_DOWN for core system services. */
124       if (rpub->sys_flags & SF_CORE_SRV) {
125           if(call == RS_DOWN) return EPERM;
126       }
127   }
128 
129   return OK;
130 }
131 
132 /*===========================================================================*
133  *				copy_rs_start				     *
134  *===========================================================================*/
135 int copy_rs_start(src_e, src_rs_start, dst_rs_start)
136 endpoint_t src_e;
137 char *src_rs_start;
138 struct rs_start *dst_rs_start;
139 {
140   int r;
141 
142   r = sys_datacopy(src_e, (vir_bytes) src_rs_start,
143   	SELF, (vir_bytes) dst_rs_start, sizeof(struct rs_start));
144 
145   return r;
146 }
147 
148 /*===========================================================================*
149  *				copy_label				     *
150  *===========================================================================*/
151 int copy_label(src_e, src_label, src_len, dst_label, dst_len)
152 endpoint_t src_e;
153 char *src_label;
154 size_t src_len;
155 char *dst_label;
156 size_t dst_len;
157 {
158   int s, len;
159 
160   len = MIN(dst_len-1, src_len);
161 
162   s = sys_datacopy(src_e, (vir_bytes) src_label,
163 	SELF, (vir_bytes) dst_label, len);
164   if (s != OK) return s;
165 
166   dst_label[len] = 0;
167 
168   return OK;
169 }
170 
171 /*===========================================================================*
172  *			      init_state_data				     *
173  *===========================================================================*/
174 int init_state_data(endpoint_t src_e, int prepare_state,
175     struct rs_state_data *src_rs_state_data,
176     struct rs_state_data *dst_rs_state_data)
177 {
178   int s, i, j, num_ipc_filters = 0;
179   struct rs_ipc_filter_el (*rs_ipc_filter_els)[IPCF_MAX_ELEMENTS];
180   struct rs_ipc_filter_el rs_ipc_filter[IPCF_MAX_ELEMENTS];
181   size_t rs_ipc_filter_size = sizeof(rs_ipc_filter);
182   ipc_filter_el_t (*ipcf_els_buff)[IPCF_MAX_ELEMENTS];
183   size_t ipcf_els_buff_size;
184 
185   dst_rs_state_data->size = 0;
186   dst_rs_state_data->eval_addr = NULL;
187   dst_rs_state_data->eval_len = 0;
188   dst_rs_state_data->ipcf_els = NULL;
189   dst_rs_state_data->ipcf_els_size  = 0;
190   if(src_rs_state_data->size != sizeof(struct rs_state_data)) {
191       return E2BIG;
192   }
193 
194   /* Initialize eval expression. */
195   if(prepare_state == SEF_LU_STATE_EVAL) {
196       if(src_rs_state_data->eval_len == 0 || !src_rs_state_data->eval_addr) {
197           return EINVAL;
198       }
199       dst_rs_state_data->eval_addr = malloc(src_rs_state_data->eval_len+1);
200       dst_rs_state_data->eval_len = src_rs_state_data->eval_len;
201       if(!dst_rs_state_data->eval_addr) {
202           return ENOMEM;
203       }
204       s = sys_datacopy(src_e, (vir_bytes) src_rs_state_data->eval_addr,
205           SELF, (vir_bytes) dst_rs_state_data->eval_addr,
206           dst_rs_state_data->eval_len);
207       if(s != OK) {
208           return s;
209       }
210       *((char*)dst_rs_state_data->eval_addr + dst_rs_state_data->eval_len) = '\0';
211       dst_rs_state_data->size = src_rs_state_data->size;
212   }
213 
214   /* Initialize ipc filters. */
215   if(src_rs_state_data->ipcf_els_size % rs_ipc_filter_size) {
216       return E2BIG;
217   }
218   rs_ipc_filter_els = src_rs_state_data->ipcf_els;
219   num_ipc_filters = src_rs_state_data->ipcf_els_size / rs_ipc_filter_size;
220   if(!rs_ipc_filter_els) {
221       return OK;
222   }
223 
224   ipcf_els_buff_size = sizeof(ipc_filter_el_t)*IPCF_MAX_ELEMENTS*num_ipc_filters;
225   if(src_e == VM_PROC_NR) {
226       ipcf_els_buff_size += sizeof(ipc_filter_el_t)*IPCF_MAX_ELEMENTS;
227   }
228   ipcf_els_buff = malloc(ipcf_els_buff_size);
229   if(!ipcf_els_buff) {
230       return ENOMEM;
231   }
232   memset(ipcf_els_buff, 0, ipcf_els_buff_size);
233   for(i=0;i<num_ipc_filters;i++) {
234       s = sys_datacopy(src_e, (vir_bytes) rs_ipc_filter_els[i],
235           SELF, (vir_bytes) rs_ipc_filter, rs_ipc_filter_size);
236       if(s != OK) {
237           return s;
238       }
239       for(j=0;j<IPCF_MAX_ELEMENTS && rs_ipc_filter[j].flags;j++) {
240           endpoint_t m_source = 0;
241           int m_type = 0;
242           int flags = rs_ipc_filter[j].flags;
243           if(flags & IPCF_MATCH_M_TYPE) {
244               m_type = rs_ipc_filter[j].m_type;
245           }
246           if(flags & IPCF_MATCH_M_SOURCE) {
247               if(ds_retrieve_label_endpt(rs_ipc_filter[j].m_label,&m_source) != OK) {
248                   /* try to see if an endpoint was provided as label */
249                   char *buff;
250                   if(!strcmp("ANY_USR", rs_ipc_filter[j].m_label)) {
251                       m_source = ANY_USR;
252                   }
253                   else if(!strcmp("ANY_SYS", rs_ipc_filter[j].m_label)) {
254                       m_source = ANY_SYS;
255                   }
256                   else if(!strcmp("ANY_TSK", rs_ipc_filter[j].m_label)) {
257                       m_source = ANY_TSK;
258                   }
259                   else {
260                       errno=0;
261                       m_source = strtol(rs_ipc_filter[j].m_label, &buff, 10);
262                       if(errno || strcmp(buff, "")) {
263                             return ESRCH;
264                       }
265                   }
266               }
267           }
268           ipcf_els_buff[i][j].flags = flags;
269           ipcf_els_buff[i][j].m_source = m_source;
270           ipcf_els_buff[i][j].m_type = m_type;
271       }
272   }
273   if(src_e == VM_PROC_NR) {
274       /* Make sure VM can still talk to us at update time. */
275       ipcf_els_buff[i][0].flags = (IPCF_EL_WHITELIST|IPCF_MATCH_M_SOURCE|IPCF_MATCH_M_TYPE);
276       ipcf_els_buff[i][0].m_source = RS_PROC_NR;
277       ipcf_els_buff[i][0].m_type = VM_RS_UPDATE;
278   }
279   dst_rs_state_data->size = src_rs_state_data->size;
280   dst_rs_state_data->ipcf_els = ipcf_els_buff;
281   dst_rs_state_data->ipcf_els_size = ipcf_els_buff_size;
282 
283   return OK;
284 }
285 
286 /*===========================================================================*
287  *			        build_cmd_dep				     *
288  *===========================================================================*/
289 void build_cmd_dep(struct rproc *rp)
290 {
291   struct rprocpub *rpub;
292   int arg_count;
293   int len;
294   char *cmd_ptr;
295 
296   rpub = rp->r_pub;
297 
298   /* Build argument vector to be passed to execute call. The format of the
299    * arguments vector is: path, arguments, NULL.
300    */
301   strcpy(rp->r_args, rp->r_cmd);		/* copy raw command */
302   arg_count = 0;				/* initialize arg count */
303   rp->r_argv[arg_count++] = rp->r_args;		/* start with path */
304   cmd_ptr = rp->r_args;				/* do some parsing */
305   while(*cmd_ptr != '\0') {			/* stop at end of string */
306       if (*cmd_ptr == ' ') {			/* next argument */
307           *cmd_ptr = '\0';			/* terminate previous */
308 	  while (*++cmd_ptr == ' ') ; 		/* skip spaces */
309 	  if (*cmd_ptr == '\0') break;		/* no arg following */
310 	  /* There are ARGV_ELEMENTS elements; must leave one for null */
311 	  if (arg_count>=ARGV_ELEMENTS-1) {	/* arg vector full */
312 		printf("RS: build_cmd_dep: too many args\n");
313 	  	break;
314 	  }
315 	  assert(arg_count < ARGV_ELEMENTS);
316           rp->r_argv[arg_count++] = cmd_ptr;	/* add to arg vector */
317       }
318       cmd_ptr ++;				/* continue parsing */
319   }
320   assert(arg_count < ARGV_ELEMENTS);
321   rp->r_argv[arg_count] = NULL;			/* end with NULL pointer */
322   rp->r_argc = arg_count;
323 }
324 
325 /*===========================================================================*
326  *				end_srv_init				     *
327  *===========================================================================*/
328 void end_srv_init(struct rproc *rp)
329 {
330   struct rprocpub *rpub;
331   int r;
332 
333   rpub = rp->r_pub;
334 
335   /* See if a late reply has to be sent. */
336   late_reply(rp, OK);
337 
338   /* If the service has completed initialization after a crash
339    * make the new instance active and cleanup the old replica.
340    * If the service was part of a scheduled update, schedule the new
341    * replica for the same update.
342    */
343   if(rp->r_prev_rp) {
344       if(SRV_IS_UPD_SCHEDULED(rp->r_prev_rp)) {
345           rupdate_upd_move(rp->r_prev_rp, rp);
346       }
347       cleanup_service(rp->r_prev_rp);
348       rp->r_prev_rp = NULL;
349       rp->r_restarts += 1;
350 
351       if(rs_verbose)
352           printf("RS: %s completed restart\n", srv_to_string(rp));
353   }
354   rp->r_next_rp = NULL;
355 }
356 
357 /*===========================================================================*
358  *			     kill_service_debug				     *
359  *===========================================================================*/
360 int kill_service_debug(file, line, rp, errstr, err)
361 char *file;
362 int line;
363 struct rproc *rp;
364 char *errstr;
365 int err;
366 {
367 /* Crash a system service and don't let it restart. */
368   if(errstr && !shutting_down) {
369       printf("RS: %s (error %d)\n", errstr, err);
370   }
371   rp->r_flags |= RS_EXITING;				/* expect exit */
372   crash_service_debug(file, line, rp);			/* simulate crash */
373 
374   return err;
375 }
376 
377 /*===========================================================================*
378  *			    crash_service_debug				     *
379  *===========================================================================*/
380 int crash_service_debug(file, line, rp)
381 char *file;
382 int line;
383 struct rproc *rp;
384 {
385 /* Simluate a crash in a system service. */
386   struct rprocpub *rpub;
387 
388   rpub = rp->r_pub;
389 
390   if(rs_verbose)
391       printf("RS: %s %skilled at %s:%d\n", srv_to_string(rp),
392           rp->r_flags & RS_EXITING ? "lethally " : "", file, line);
393 
394   /* RS should simply exit() directly. */
395   if(rpub->endpoint == RS_PROC_NR) {
396       exit(1);
397   }
398 
399   return sys_kill(rpub->endpoint, SIGKILL);
400 }
401 
402 /*===========================================================================*
403  *			  cleanup_service_debug				     *
404  *===========================================================================*/
405 void cleanup_service_debug(file, line, rp)
406 char *file;
407 int line;
408 struct rproc *rp;
409 {
410   struct rprocpub *rpub;
411   int detach, cleanup_script;
412   int s;
413 
414   rpub = rp->r_pub;
415 
416   if(!(rp->r_flags & RS_DEAD)) {
417       if(rs_verbose)
418           printf("RS: %s marked for cleanup at %s:%d\n", srv_to_string(rp),
419               file, line);
420 
421       /* Unlink service the first time. */
422       if(rp->r_next_rp) {
423           rp->r_next_rp->r_prev_rp = NULL;
424           rp->r_next_rp = NULL;
425       }
426       if(rp->r_prev_rp) {
427           rp->r_prev_rp->r_next_rp = NULL;
428           rp->r_prev_rp = NULL;
429       }
430       if(rp->r_new_rp) {
431           rp->r_new_rp->r_old_rp = NULL;
432           rp->r_new_rp = NULL;
433       }
434       if(rp->r_old_rp) {
435           rp->r_old_rp->r_new_rp = NULL;
436           rp->r_old_rp = NULL;
437       }
438       rp->r_flags |= RS_DEAD;
439 
440       /* Make sure the service can no longer run and unblock IPC callers. */
441       sys_privctl(rpub->endpoint, SYS_PRIV_DISALLOW, NULL);
442       sys_privctl(rpub->endpoint, SYS_PRIV_CLEAR_IPC_REFS, NULL);
443       rp->r_flags &= ~RS_ACTIVE;
444 
445       /* Send a late reply if there is any pending. */
446       late_reply(rp, OK);
447 
448       return;
449   }
450 
451   cleanup_script = rp->r_flags & RS_CLEANUP_SCRIPT;
452   detach = rp->r_flags & RS_CLEANUP_DETACH;
453 
454   /* Cleanup the service when not detaching. */
455   if(!detach) {
456       if(rs_verbose)
457           printf("RS: %s cleaned up at %s:%d\n", srv_to_string(rp),
458               file, line);
459 
460       /* Tell scheduler this process is finished */
461       if ((s = sched_stop(rp->r_scheduler, rpub->endpoint)) != OK) {
462             printf("RS: warning: scheduler won't give up process: %d\n", s);
463       }
464 
465       /* Ask PM to exit the service */
466       if(rp->r_pid == -1) {
467           printf("RS: warning: attempt to kill pid -1!\n");
468       }
469       else {
470           srv_kill(rp->r_pid, SIGKILL);
471       }
472   }
473 
474   /* See if we need to run a script now. */
475   if(cleanup_script) {
476       rp->r_flags &= ~RS_CLEANUP_SCRIPT;
477       s = run_script(rp);
478       if(s != OK) {
479           printf("RS: warning: cannot run cleanup script: %d\n", s);
480       }
481   }
482 
483   if(detach) {
484       /* Detach service when asked to. */
485       detach_service(rp);
486   }
487   else {
488       /* Free slot otherwise, unless we're about to reuse it */
489       if (!(rp->r_flags & RS_REINCARNATE))
490           free_slot(rp);
491   }
492 }
493 
494 /*===========================================================================*
495  *			     detach_service_debug			     *
496  *===========================================================================*/
497 void detach_service_debug(file, line, rp)
498 char *file;
499 int line;
500 struct rproc *rp;
501 {
502 /* Detach the given system service. */
503   static unsigned long detach_counter = 0;
504   char label[RS_MAX_LABEL_LEN];
505   struct rprocpub *rpub;
506 
507   rpub = rp->r_pub;
508 
509   /* Publish a new unique label for the system service. */
510   rpub->label[RS_MAX_LABEL_LEN-1] = '\0';
511   strcpy(label, rpub->label);
512   snprintf(rpub->label, RS_MAX_LABEL_LEN, "%lu.%s", ++detach_counter, label);
513   ds_publish_label(rpub->label, rpub->endpoint, DSF_OVERWRITE);
514 
515   if(rs_verbose)
516       printf("RS: %s detached at %s:%d\n", srv_to_string(rp),
517           file, line);
518 
519   /* Allow the service to run. */
520   rp->r_flags = RS_IN_USE | RS_ACTIVE;
521   rpub->sys_flags &= ~(SF_CORE_SRV|SF_DET_RESTART);
522   rp->r_period = 0;
523   rpub->dev_nr = 0;
524   rpub->nr_domain = 0;
525   sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL);
526 }
527 
528 /*===========================================================================*
529  *				create_service				     *
530  *===========================================================================*/
531 int create_service(rp)
532 struct rproc *rp;
533 {
534 /* Create the given system service. */
535   int child_proc_nr_e, child_proc_nr_n;		/* child process slot */
536   pid_t child_pid;				/* child's process id */
537   int s, use_copy, has_replica;
538   extern char **environ;
539   struct rprocpub *rpub;
540 
541   rpub = rp->r_pub;
542   use_copy= (rpub->sys_flags & SF_USE_COPY);
543   has_replica= (rp->r_old_rp
544       || (rp->r_prev_rp && !(rp->r_prev_rp->r_flags & RS_TERMINATED)));
545 
546   /* Do we need an existing replica to create the service? */
547   if(!has_replica && (rpub->sys_flags & SF_NEED_REPL)) {
548       printf("RS: unable to create service '%s' without a replica\n",
549           rpub->label);
550       free_slot(rp);
551       return(EPERM);
552   }
553 
554   /* Do we need an in-memory copy to create the service? */
555   if(!use_copy && (rpub->sys_flags & SF_NEED_COPY)) {
556       printf("RS: unable to create service '%s' without an in-memory copy\n",
557           rpub->label);
558       free_slot(rp);
559       return(EPERM);
560   }
561 
562   /* Do we have a copy or a command to create the service? */
563   if(!use_copy && !strcmp(rp->r_cmd, "")) {
564       printf("RS: unable to create service '%s' without a copy or command\n",
565           rpub->label);
566       free_slot(rp);
567       return(EPERM);
568   }
569 
570   /* Now fork and branch for parent and child process (and check for error).
571    * After fork()ing, we need to pin RS memory again or pagefaults will occur
572    * on future writes.
573    */
574   if(rs_verbose)
575       printf("RS: forking child with srv_fork()...\n");
576   child_pid= srv_fork(rp->r_uid, 0);	/* Force group to wheel for now */
577   if(child_pid < 0) {
578       printf("RS: srv_fork() failed (error %d)\n", child_pid);
579       free_slot(rp);
580       return(child_pid);
581   }
582 
583   /* Get endpoint of the child. */
584   if ((s = getprocnr(child_pid, &child_proc_nr_e)) != 0)
585 	panic("unable to get child endpoint: %d", s);
586 
587   /* There is now a child process. Update the system process table. */
588   child_proc_nr_n = _ENDPOINT_P(child_proc_nr_e);
589   rp->r_flags = RS_IN_USE;			/* mark slot in use */
590   rpub->endpoint = child_proc_nr_e;		/* set child endpoint */
591   rp->r_pid = child_pid;			/* set child pid */
592   rp->r_check_tm = 0;				/* not checked yet */
593   rp->r_alive_tm = getticks(); 			/* currently alive */
594   rp->r_stop_tm = 0;				/* not exiting yet */
595   rp->r_backoff = 0;				/* not to be restarted */
596   rproc_ptr[child_proc_nr_n] = rp;		/* mapping for fast access */
597   rpub->in_use = TRUE;				/* public entry is now in use */
598 
599   /* Set and synch the privilege structure for the new service. */
600   if ((s = sys_privctl(child_proc_nr_e, SYS_PRIV_SET_SYS, &rp->r_priv)) != OK
601 	|| (s = sys_getpriv(&rp->r_priv, child_proc_nr_e)) != OK) {
602 	printf("RS: unable to set privilege structure: %d\n", s);
603 	cleanup_service(rp);
604 	vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
605 	return ENOMEM;
606   }
607 
608   /* Set the scheduler for this process */
609   if ((s = sched_init_proc(rp)) != OK) {
610 	printf("RS: unable to start scheduling: %d\n", s);
611 	cleanup_service(rp);
612 	vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
613 	return s;
614   }
615 
616   /* Copy the executable image into the child process. If no copy exists,
617    * allocate one and free it right after exec completes.
618    */
619   if(use_copy) {
620       if(rs_verbose)
621           printf("RS: %s uses an in-memory copy\n",
622               srv_to_string(rp));
623   }
624   else {
625       if ((s = read_exec(rp)) != OK) {
626           printf("RS: read_exec failed: %d\n", s);
627           cleanup_service(rp);
628           vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
629           return s;
630       }
631   }
632   if(rs_verbose)
633         printf("RS: execing child with srv_execve()...\n");
634   s = srv_execve(child_proc_nr_e, rp->r_exec, rp->r_exec_len, rpub->proc_name,
635         rp->r_argv, environ);
636   vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
637   if (s != OK) {
638         printf("RS: srv_execve failed: %d\n", s);
639         cleanup_service(rp);
640         return s;
641   }
642   if(!use_copy) {
643         free_exec(rp);
644   }
645 
646   /* The purpose of non-blocking forks is to avoid involving VFS in the forking
647    * process, because VFS may be blocked on a sendrec() to a MFS that is
648    * waiting for a endpoint update for a dead driver. We have just published
649    * that update, but VFS may still be blocked. As a result, VFS may not yet
650    * have received PM's fork message. Hence, if we call mapdriver()
651    * immediately, VFS may not know about the process and thus refuse to add the
652    * driver entry. The following temporary hack works around this by forcing
653    * blocking communication from PM to VFS. Once VFS has been made non-blocking
654    * towards MFS instances, this hack and the big part of srv_fork() can go.
655    */
656   setuid(0);
657 
658   /* If this is a RS instance, pin memory. */
659   if(rp->r_priv.s_flags & ROOT_SYS_PROC) {
660       if(rs_verbose)
661           printf("RS: pinning memory of RS instance %s\n", srv_to_string(rp));
662 
663       s = vm_memctl(rpub->endpoint, VM_RS_MEM_PIN, 0, 0);
664       if(s != OK) {
665           printf("vm_memctl failed: %d\n", s);
666           cleanup_service(rp);
667           return s;
668       }
669   }
670 
671   /* If this is a VM instance, let VM know now. */
672   if(rp->r_priv.s_flags & VM_SYS_PROC) {
673       struct rproc *rs_rp;
674       struct rproc **rs_rps;
675       int i, nr_rs_rps;
676 
677       if(rs_verbose)
678           printf("RS: informing VM of instance %s\n", srv_to_string(rp));
679 
680       s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM, 0, 0);
681       if(s != OK) {
682           printf("vm_memctl failed: %d\n", s);
683           cleanup_service(rp);
684           return s;
685       }
686 
687       /* VM may start actually pinning memory for us only now.
688        * Ask again for all our instances.
689        */
690       rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
691       get_service_instances(rs_rp, &rs_rps, &nr_rs_rps);
692       for(i=0;i<nr_rs_rps;i++) {
693           vm_memctl(rs_rps[i]->r_pub->endpoint, VM_RS_MEM_PIN, 0, 0);
694       }
695   }
696 
697   /* Tell VM about allowed calls. */
698   if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0], TRUE)) != OK) {
699       printf("RS: vm_set_priv failed: %d\n", s);
700       cleanup_service(rp);
701       return s;
702   }
703 
704   if(rs_verbose)
705       printf("RS: %s created\n", srv_to_string(rp));
706 
707   return OK;
708 }
709 
710 /*===========================================================================*
711  *				clone_service				     *
712  *===========================================================================*/
713 int clone_service(struct rproc *rp, int instance_flag, int init_flags)
714 {
715 /* Clone the given system service instance. */
716   struct rproc *replica_rp;
717   struct rprocpub *replica_rpub;
718   struct rproc **rp_link;
719   struct rproc **replica_link;
720   struct rproc *rs_rp;
721   int rs_flags;
722   int r;
723 
724   if(rs_verbose)
725       printf("RS: %s creating a replica\n", srv_to_string(rp));
726 
727   /* VM can only reliably support one replica at the time for now.
728    * XXX TO-DO: Fix VM's rs_memctl_make_vm_instance to allow multiple replicas.
729    */
730   if(rp->r_pub->endpoint == VM_PROC_NR && instance_flag == LU_SYS_PROC
731       && rp->r_next_rp) {
732       cleanup_service_now(rp->r_next_rp);
733       rp->r_next_rp = NULL;
734   }
735 
736   /* Clone slot. */
737   if((r = clone_slot(rp, &replica_rp)) != OK) {
738       return r;
739   }
740   replica_rpub = replica_rp->r_pub;
741 
742   /* Clone is a live updated or restarted service instance? */
743   if(instance_flag == LU_SYS_PROC) {
744       rp_link = &rp->r_new_rp;
745       replica_link = &replica_rp->r_old_rp;
746   }
747   else {
748       rp_link = &rp->r_next_rp;
749       replica_link = &replica_rp->r_prev_rp;
750   }
751   replica_rp->r_priv.s_flags |= instance_flag;
752   replica_rp->r_priv.s_init_flags |= init_flags;
753 
754   /* Link the two slots. */
755   *rp_link = replica_rp;
756   *replica_link = rp;
757 
758   /* Create a new replica of the service. */
759   r = create_service(replica_rp);
760   if(r != OK) {
761       *rp_link = NULL;
762       return r;
763   }
764 
765   /* If this instance is for restarting RS, set up a backup signal manager. */
766   rs_flags = (ROOT_SYS_PROC | RST_SYS_PROC);
767   if((replica_rp->r_priv.s_flags & rs_flags) == rs_flags) {
768       rs_rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
769 
770       /* Update signal managers. */
771       r = update_sig_mgrs(rs_rp, SELF, replica_rpub->endpoint);
772       if(r == OK) {
773           r = update_sig_mgrs(replica_rp, SELF, NONE);
774       }
775       if(r != OK) {
776           *rp_link = NULL;
777           return kill_service(replica_rp, "update_sig_mgrs failed", r);
778       }
779   }
780 
781   return OK;
782 }
783 
784 /*===========================================================================*
785  *				publish_service				     *
786  *===========================================================================*/
787 int publish_service(rp)
788 struct rproc *rp;				/* pointer to service slot */
789 {
790 /* Publish a service. */
791   int r;
792   struct rprocpub *rpub;
793   struct rs_pci pci_acl;
794   message m;
795   endpoint_t ep;
796 
797   rpub = rp->r_pub;
798 
799   /* Register label with DS. */
800   r = ds_publish_label(rpub->label, rpub->endpoint, DSF_OVERWRITE);
801   if (r != OK) {
802       return kill_service(rp, "ds_publish_label call failed", r);
803   }
804 
805   /* If the service is a driver, map it. */
806   if (rpub->dev_nr > 0 || rpub->nr_domain > 0) {
807       /* The purpose of non-blocking forks is to avoid involving VFS in the
808        * forking process, because VFS may be blocked on a ipc_sendrec() to a MFS
809        * that is waiting for a endpoint update for a dead driver. We have just
810        * published that update, but VFS may still be blocked. As a result, VFS
811        * may not yet have received PM's fork message. Hence, if we call
812        * mapdriver() immediately, VFS may not know about the process and thus
813        * refuse to add the driver entry. The following temporary hack works
814        * around this by forcing blocking communication from PM to VFS. Once VFS
815        * has been made non-blocking towards MFS instances, this hack and the
816        * big part of srv_fork() can go.
817        */
818       setuid(0);
819 
820       if ((r = mapdriver(rpub->label, rpub->dev_nr, rpub->domain,
821         rpub->nr_domain)) != OK) {
822           return kill_service(rp, "couldn't map driver", r);
823       }
824   }
825 
826 #if USE_PCI
827   /* If PCI properties are set, inform the PCI driver about the new service. */
828   if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) {
829       pci_acl = rpub->pci_acl;
830       strcpy(pci_acl.rsp_label, rpub->label);
831       pci_acl.rsp_endpoint= rpub->endpoint;
832 
833       r = pci_set_acl(&pci_acl);
834       if (r != OK) {
835           return kill_service(rp, "pci_set_acl call failed", r);
836       }
837   }
838 #endif /* USE_PCI */
839 
840   if (rpub->devman_id != 0) {
841 	  r = ds_retrieve_label_endpt("devman",&ep);
842 
843 	  if (r != OK) {
844 		return kill_service(rp, "devman not running?", r);
845 	  }
846 	  m.m_type = DEVMAN_BIND;
847 	  m.DEVMAN_ENDPOINT  = rpub->endpoint;
848 	  m.DEVMAN_DEVICE_ID = rpub->devman_id;
849 	  r = ipc_sendrec(ep, &m);
850 	  if (r != OK || m.DEVMAN_RESULT != OK) {
851 		 return kill_service(rp, "devman bind device failed", r);
852 	  }
853   }
854 
855   if(rs_verbose)
856       printf("RS: %s published\n", srv_to_string(rp));
857 
858   return OK;
859 }
860 
861 /*===========================================================================*
862  *			      unpublish_service				     *
863  *===========================================================================*/
864 int unpublish_service(rp)
865 struct rproc *rp;				/* pointer to service slot */
866 {
867 /* Unpublish a service. */
868   struct rprocpub *rpub;
869   int r, result;
870   message m;
871   endpoint_t ep;
872 
873 
874   rpub = rp->r_pub;
875   result = OK;
876 
877   /* Unregister label with DS. */
878   r = ds_delete_label(rpub->label);
879   if (r != OK && !shutting_down) {
880      printf("RS: ds_delete_label call failed (error %d)\n", r);
881      result = r;
882   }
883 
884   /* No need to inform VFS and VM, cleanup is done on exit automatically. */
885 
886 #if USE_PCI
887   /* If PCI properties are set, inform the PCI driver. */
888   if(rpub->pci_acl.rsp_nr_device || rpub->pci_acl.rsp_nr_class) {
889       r = pci_del_acl(rpub->endpoint);
890       if (r != OK && !shutting_down) {
891           printf("RS: pci_del_acl call failed (error %d)\n", r);
892           result = r;
893       }
894   }
895 #endif /* USE_PCI */
896 
897   if (rpub->devman_id != 0) {
898 	  r = ds_retrieve_label_endpt("devman",&ep);
899 
900 	  if (r != OK) {
901 		   printf("RS: devman not running?");
902 	  } else {
903 		m.m_type = DEVMAN_UNBIND;
904 		m.DEVMAN_ENDPOINT  = rpub->endpoint;
905 		m.DEVMAN_DEVICE_ID = rpub->devman_id;
906 		r = ipc_sendrec(ep, &m);
907 
908 		if (r != OK || m.DEVMAN_RESULT != OK) {
909 			 printf("RS: devman unbind device failed");
910 		}
911 	  }
912   }
913 
914   if(rs_verbose)
915       printf("RS: %s unpublished\n", srv_to_string(rp));
916 
917   return result;
918 }
919 
920 /*===========================================================================*
921  *				run_service				     *
922  *===========================================================================*/
923 int run_service(struct rproc *rp, int init_type, int init_flags)
924 {
925 /* Let a newly created service run. */
926   struct rprocpub *rpub;
927   int s;
928 
929   rpub = rp->r_pub;
930 
931   /* Allow the service to run. */
932   if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
933       return kill_service(rp, "unable to allow the service to run",s);
934   }
935 
936   /* Initialize service. */
937   if((s = init_service(rp, init_type, init_flags)) != OK) {
938       return kill_service(rp, "unable to initialize service", s);
939   }
940 
941   if(rs_verbose)
942       printf("RS: %s allowed to run\n", srv_to_string(rp));
943 
944   return OK;
945 }
946 
947 /*===========================================================================*
948  *				start_service				     *
949  *===========================================================================*/
950 int start_service(struct rproc *rp, int init_flags)
951 {
952 /* Start a system service. */
953   int r;
954   struct rprocpub *rpub;
955 
956   rpub = rp->r_pub;
957 
958   /* Create and make active. */
959   rp->r_priv.s_init_flags |= init_flags;
960   r = create_service(rp);
961   if(r != OK) {
962       return r;
963   }
964   activate_service(rp, NULL);
965 
966   /* Publish service properties. */
967   r = publish_service(rp);
968   if (r != OK) {
969       return r;
970   }
971 
972   /* Run. */
973   r = run_service(rp, SEF_INIT_FRESH, init_flags);
974   if(r != OK) {
975       return r;
976   }
977 
978   if(rs_verbose)
979       printf("RS: %s started with major %d\n", srv_to_string(rp),
980           rpub->dev_nr);
981 
982   return OK;
983 }
984 
985 /*===========================================================================*
986  *				stop_service				     *
987  *===========================================================================*/
988 void stop_service(struct rproc *rp,int how)
989 {
990   struct rprocpub *rpub;
991   int signo;
992 
993   rpub = rp->r_pub;
994 
995   /* Try to stop the system service. First send a SIGTERM signal to ask the
996    * system service to terminate. If the service didn't install a signal
997    * handler, it will be killed. If it did and ignores the signal, we'll
998    * find out because we record the time here and send a SIGKILL.
999    */
1000   if(rs_verbose)
1001       printf("RS: %s signaled with SIGTERM\n", srv_to_string(rp));
1002 
1003   signo = rpub->endpoint != RS_PROC_NR ? SIGTERM : SIGHUP; /* SIGHUP for RS. */
1004 
1005   rp->r_flags |= how;				/* what to on exit? */
1006   sys_kill(rpub->endpoint, signo);		/* first try friendly */
1007   rp->r_stop_tm = getticks(); 			/* record current time */
1008 }
1009 
1010 /*===========================================================================*
1011  *			      activate_service				     *
1012  *===========================================================================*/
1013 void activate_service(struct rproc *rp, struct rproc *ex_rp)
1014 {
1015 /* Activate a service instance and deactivate another one if requested. */
1016 
1017   if(ex_rp && (ex_rp->r_flags & RS_ACTIVE) ) {
1018       ex_rp->r_flags &= ~RS_ACTIVE;
1019       if(rs_verbose)
1020           printf("RS: %s becomes inactive\n", srv_to_string(ex_rp));
1021   }
1022 
1023   if(! (rp->r_flags & RS_ACTIVE) ) {
1024       rp->r_flags |= RS_ACTIVE;
1025       if(rs_verbose)
1026           printf("RS: %s becomes active\n", srv_to_string(rp));
1027   }
1028 }
1029 
1030 /*===========================================================================*
1031  *			      reincarnate_service			     *
1032  *===========================================================================*/
1033 void reincarnate_service(struct rproc *old_rp)
1034 {
1035 /* Restart a service as if it were never started before. */
1036   struct rproc *rp;
1037   int r, restarts;
1038 
1039   if ((r = clone_slot(old_rp, &rp)) != OK) {
1040       printf("RS: Failed to clone the slot: %d\n", r);
1041       return;
1042   }
1043 
1044   rp->r_flags = RS_IN_USE;
1045   rproc_ptr[_ENDPOINT_P(rp->r_pub->endpoint)] = NULL;
1046 
1047   restarts = rp->r_restarts;
1048   start_service(rp, SEF_INIT_FRESH);
1049   rp->r_restarts = restarts + 1;
1050 }
1051 
1052 /*===========================================================================*
1053  *			      terminate_service				     *
1054  *===========================================================================*/
1055 void terminate_service(struct rproc *rp)
1056 {
1057 /* Handle a termination event for a system service. */
1058   struct rproc **rps;
1059   struct rprocpub *rpub;
1060   int nr_rps, norestart;
1061   int i, r;
1062 
1063   rpub = rp->r_pub;
1064 
1065   if(rs_verbose)
1066      printf("RS: %s terminated\n", srv_to_string(rp));
1067 
1068   /* Deal with failures during initialization. */
1069   if(rp->r_flags & RS_INITIALIZING) {
1070       /* If updating, rollback. */
1071       if(SRV_IS_UPDATING(rp)) {
1072           printf("RS: update failed: state transfer failed. Rolling back...\n");
1073           end_update(rp->r_init_err, RS_REPLY);
1074           rp->r_init_err = ERESTART;
1075           return;
1076       }
1077 
1078       if (rpub->sys_flags & SF_NO_BIN_EXP) {
1079           /* If service was deliberately started with binary exponential offset
1080 	   * disabled, we're going to assume we want to refresh a service upon
1081 	   * failure.
1082 	   */
1083           if(rs_verbose)
1084               printf("RS: service '%s' exited during initialization; "
1085 		     "refreshing\n", rpub->label);
1086           rp->r_flags |= RS_REFRESHING; /* restart initialization. */
1087       } else {
1088           if(rs_verbose)
1089               printf("RS: service '%s' exited during initialization; "
1090                      "exiting\n", rpub->label);
1091           rp->r_flags |= RS_EXITING; /* don't restart. */
1092       }
1093   }
1094 
1095   /* If an update process is in progress, end it before doing anything else.
1096    * This is to be on the safe side, since there may be some weird dependencies
1097    * with services under update, while we perform recovery actions.
1098    */
1099   if(RUPDATE_IS_UPDATING()) {
1100       printf("RS: aborting the update after a crash...\n");
1101       abort_update_proc(ERESTART);
1102   }
1103 
1104   /* Force exit when no restart is requested. */
1105   norestart = !(rp->r_flags & RS_EXITING) && (rp->r_pub->sys_flags & SF_NORESTART);
1106   if(norestart) {
1107       rp->r_flags |= RS_EXITING;
1108       if((rp->r_pub->sys_flags & SF_DET_RESTART)
1109           && (rp->r_restarts < MAX_DET_RESTART)) {
1110           /* Detach at cleanup time. */
1111           rp->r_flags |= RS_CLEANUP_DETACH;
1112       }
1113       if(rp->r_script[0] != '\0') {
1114           /* Run script at cleanup time. */
1115           rp->r_flags |= RS_CLEANUP_SCRIPT;
1116       }
1117   }
1118 
1119   if (rp->r_flags & RS_EXITING) {
1120       /* If a core system service is exiting, we are in trouble. */
1121       if ((rp->r_pub->sys_flags & SF_CORE_SRV) && !shutting_down) {
1122           printf("core system service died: %s\n", srv_to_string(rp));
1123 	  _exit(1);
1124       }
1125 
1126       /* If this service was scheduled for the update, abort the update now. */
1127       if(SRV_IS_UPD_SCHEDULED(rp)) {
1128           printf("RS: aborting the scheduled update, one of the services part of it is exiting...\n");
1129           abort_update_proc(EDEADSRCDST);
1130       }
1131 
1132       /* See if a late reply has to be sent. */
1133       r = (rp->r_caller_request == RS_DOWN
1134           || (rp->r_caller_request == RS_REFRESH && norestart) ? OK : EDEADEPT);
1135       late_reply(rp, r);
1136 
1137       /* Unpublish the service. */
1138       unpublish_service(rp);
1139 
1140       /* Cleanup all the instances of the service. */
1141       get_service_instances(rp, &rps, &nr_rps);
1142       for(i=0;i<nr_rps;i++) {
1143           cleanup_service(rps[i]);
1144       }
1145 
1146       /* If the service is reincarnating, its slot has not been cleaned up.
1147        * Check for this flag now, and attempt to start the service again.
1148        * If this fails, start_service() itself will perform cleanup.
1149        */
1150       if (rp->r_flags & RS_REINCARNATE) {
1151           rp->r_flags &= ~RS_REINCARNATE;
1152           reincarnate_service(rp);
1153       }
1154   }
1155   else if(rp->r_flags & RS_REFRESHING) {
1156       /* Restart service. */
1157       restart_service(rp);
1158   }
1159   else {
1160       /* Determine what to do. If this is the first unexpected
1161        * exit, immediately restart this service. Otherwise use
1162        * a binary exponential backoff.
1163        */
1164       if (rp->r_restarts > 0) {
1165           if (!(rpub->sys_flags & SF_NO_BIN_EXP)) {
1166               rp->r_backoff = 1 << MIN(rp->r_restarts,(BACKOFF_BITS-2));
1167               rp->r_backoff = MIN(rp->r_backoff,MAX_BACKOFF);
1168               if ((rpub->sys_flags & SF_USE_COPY) && rp->r_backoff > 1)
1169                   rp->r_backoff= 1;
1170 	  }
1171 	  else {
1172               rp->r_backoff = 1;
1173 	  }
1174           return;
1175       }
1176 
1177       /* Restart service. */
1178       restart_service(rp);
1179   }
1180 }
1181 
1182 /*===========================================================================*
1183  *				run_script				     *
1184  *===========================================================================*/
1185 static int run_script(struct rproc *rp)
1186 {
1187 	int r, endpoint;
1188 	pid_t pid;
1189 	char *reason;
1190 	char incarnation_str[20];	/* Enough for a counter? */
1191 	char *envp[1] = { NULL };
1192 	struct rprocpub *rpub;
1193 
1194 	rpub = rp->r_pub;
1195 	if (rp->r_flags & RS_REFRESHING)
1196 		reason= "restart";
1197 	else if (rp->r_flags & RS_NOPINGREPLY)
1198 		reason= "no-heartbeat";
1199 	else reason= "terminated";
1200 	snprintf(incarnation_str, sizeof(incarnation_str), "%d", rp->r_restarts);
1201 
1202  	if(rs_verbose) {
1203 		printf("RS: %s:\n", srv_to_string(rp));
1204 		printf("RS:     calling script '%s'\n", rp->r_script);
1205 		printf("RS:     reason: '%s'\n", reason);
1206 		printf("RS:     incarnation: '%s'\n", incarnation_str);
1207 	}
1208 
1209 	pid= fork();
1210 	switch(pid)
1211 	{
1212 	case -1:
1213 		return errno;
1214 	case 0:
1215 		execle(_PATH_BSHELL, "sh", rp->r_script, rpub->label, reason,
1216 			incarnation_str, (char*) NULL, envp);
1217 		printf("RS: run_script: execl '%s' failed: %s\n",
1218 			rp->r_script, strerror(errno));
1219 		exit(1);
1220 	default:
1221 		/* Set the privilege structure for the child process. */
1222 		if ((r = getprocnr(pid, &endpoint)) != 0)
1223 			panic("unable to get child endpoint: %d", r);
1224 		if ((r = sys_privctl(endpoint, SYS_PRIV_SET_USER, NULL))
1225 			!= OK) {
1226 			return kill_service(rp,"can't set script privileges",r);
1227 		}
1228 		/* Set the script's privileges on other servers. */
1229 		vm_set_priv(endpoint, NULL, FALSE);
1230 		if ((r = vm_set_priv(endpoint, NULL, FALSE)) != OK) {
1231 			return kill_service(rp,"can't set script VM privs",r);
1232 		}
1233 		/* Allow the script to run. */
1234 		if ((r = sys_privctl(endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
1235 			return kill_service(rp,"can't let the script run",r);
1236 		}
1237 		/* Pin RS memory again after fork()ing. */
1238 		vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN, 0, 0);
1239 	}
1240 	return OK;
1241 }
1242 
1243 /*===========================================================================*
1244  *			      restart_service				     *
1245  *===========================================================================*/
1246 void restart_service(struct rproc *rp)
1247 {
1248 /* Restart service via a recovery script or directly. */
1249   struct rproc *replica_rp;
1250   int r;
1251 
1252   /* See if a late reply has to be sent. */
1253   late_reply(rp, OK);
1254 
1255   /* Run a recovery script if available. */
1256   if (rp->r_script[0] != '\0') {
1257       r = run_script(rp);
1258       if(r != OK) {
1259           kill_service(rp, "unable to run script", errno);
1260       }
1261       return;
1262   }
1263 
1264   /* Restart directly. We need a replica if not already available. */
1265   if(rp->r_next_rp == NULL) {
1266       /* Create the replica. */
1267       r = clone_service(rp, RST_SYS_PROC, 0);
1268       if(r != OK) {
1269           kill_service(rp, "unable to clone service", r);
1270           return;
1271       }
1272   }
1273   replica_rp = rp->r_next_rp;
1274 
1275   /* Update the service into the replica. */
1276   r = update_service(&rp, &replica_rp, RS_SWAP, 0);
1277   if(r != OK) {
1278       kill_service(rp, "unable to update into new replica", r);
1279       return;
1280   }
1281 
1282   /* Let the new replica run. */
1283   r = run_service(replica_rp, SEF_INIT_RESTART, 0);
1284   if(r != OK) {
1285       kill_service(rp, "unable to let the replica run", r);
1286       return;
1287   }
1288 
1289   /* See if the old version needs to be detached. */
1290   if((rp->r_pub->sys_flags & SF_DET_RESTART)
1291       && (rp->r_restarts < MAX_DET_RESTART)) {
1292       rp->r_flags |= RS_CLEANUP_DETACH;
1293   }
1294 
1295   if(rs_verbose)
1296       printf("RS: %s restarted into %s\n",
1297           srv_to_string(rp), srv_to_string(replica_rp));
1298 }
1299 
1300 /*===========================================================================*
1301  *		         inherit_service_defaults			     *
1302  *===========================================================================*/
1303 void inherit_service_defaults(def_rp, rp)
1304 struct rproc *def_rp;
1305 struct rproc *rp;
1306 {
1307   struct rprocpub *def_rpub;
1308   struct rprocpub *rpub;
1309   int i;
1310 
1311   def_rpub = def_rp->r_pub;
1312   rpub = rp->r_pub;
1313 
1314   /* Device, domain, and PCI settings. These properties cannot change. */
1315   rpub->dev_nr = def_rpub->dev_nr;
1316   rpub->nr_domain = def_rpub->nr_domain;
1317   for (i = 0; i < def_rpub->nr_domain; i++)
1318 	rpub->domain[i] = def_rpub->domain[i];
1319   rpub->pci_acl = def_rpub->pci_acl;
1320 
1321   /* Immutable system and privilege flags. */
1322   rpub->sys_flags &= ~IMM_SF;
1323   rpub->sys_flags |= (def_rpub->sys_flags & IMM_SF);
1324   rp->r_priv.s_flags &= ~IMM_F;
1325   rp->r_priv.s_flags |= (def_rp->r_priv.s_flags & IMM_F);
1326 
1327   /* Allowed traps. They cannot change. */
1328   rp->r_priv.s_trap_mask = def_rp->r_priv.s_trap_mask;
1329 }
1330 
1331 /*===========================================================================*
1332  *		           get_service_instances			     *
1333  *===========================================================================*/
1334 void get_service_instances(rp, rps, length)
1335 struct rproc *rp;
1336 struct rproc ***rps;
1337 int *length;
1338 {
1339 /* Retrieve all the service instances of a given service. */
1340   static struct rproc *instances[5];
1341   int nr_instances;
1342 
1343   nr_instances = 0;
1344   instances[nr_instances++] = rp;
1345   if(rp->r_prev_rp) instances[nr_instances++] = rp->r_prev_rp;
1346   if(rp->r_next_rp) instances[nr_instances++] = rp->r_next_rp;
1347   if(rp->r_old_rp) instances[nr_instances++] = rp->r_old_rp;
1348   if(rp->r_new_rp) instances[nr_instances++] = rp->r_new_rp;
1349 
1350   *rps = instances;
1351   *length = nr_instances;
1352 }
1353 
1354 /*===========================================================================*
1355  *				share_exec				     *
1356  *===========================================================================*/
1357 void share_exec(rp_dst, rp_src)
1358 struct rproc *rp_dst, *rp_src;
1359 {
1360   if(rs_verbose)
1361       printf("RS: %s shares exec image with %s\n",
1362           srv_to_string(rp_dst), srv_to_string(rp_src));
1363 
1364   /* Share exec image from rp_src to rp_dst. */
1365   rp_dst->r_exec_len = rp_src->r_exec_len;
1366   rp_dst->r_exec = rp_src->r_exec;
1367 }
1368 
1369 /*===========================================================================*
1370  *				read_exec				     *
1371  *===========================================================================*/
1372 int read_exec(rp)
1373 struct rproc *rp;
1374 {
1375   int e, r, fd;
1376   char *e_name;
1377   struct stat sb;
1378 
1379   e_name= rp->r_argv[0];
1380   if(rs_verbose)
1381       printf("RS: service '%s' reads exec image from: %s\n", rp->r_pub->label,
1382           e_name);
1383 
1384   r= stat(e_name, &sb);
1385   if (r != 0)
1386       return -errno;
1387 
1388   if (sb.st_size < sizeof(Elf_Ehdr))
1389       return ENOEXEC;
1390 
1391   fd= open(e_name, O_RDONLY);
1392   if (fd == -1)
1393       return -errno;
1394 
1395   rp->r_exec_len= sb.st_size;
1396   rp->r_exec= malloc(rp->r_exec_len);
1397   if (rp->r_exec == NULL)
1398   {
1399       printf("RS: read_exec: unable to allocate %zu bytes\n",
1400           rp->r_exec_len);
1401       close(fd);
1402       return ENOMEM;
1403   }
1404 
1405   r= read(fd, rp->r_exec, rp->r_exec_len);
1406   e= errno;
1407   close(fd);
1408   if (r == rp->r_exec_len)
1409       return OK;
1410 
1411   printf("RS: read_exec: read failed %d, errno %d\n", r, e);
1412 
1413   free_exec(rp);
1414 
1415   if (r >= 0)
1416       return EIO;
1417   else
1418       return -e;
1419 }
1420 
1421 /*===========================================================================*
1422  *				free_exec				     *
1423  *===========================================================================*/
1424 void free_exec(rp)
1425 struct rproc *rp;
1426 {
1427 /* Free an exec image. */
1428   int slot_nr, has_shared_exec;
1429   struct rproc *other_rp;
1430 
1431   /* Search for some other slot sharing the same exec image. */
1432   has_shared_exec = FALSE;
1433   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1434       other_rp = &rproc[slot_nr];		/* get pointer to slot */
1435       if (other_rp->r_flags & RS_IN_USE && other_rp != rp
1436           && other_rp->r_exec == rp->r_exec) {  /* found! */
1437           has_shared_exec = TRUE;
1438           break;
1439       }
1440   }
1441 
1442   /* If nobody uses our copy of the exec image, we can try to get rid of it. */
1443   if(!has_shared_exec) {
1444       if(rs_verbose)
1445           printf("RS: %s frees exec image\n", srv_to_string(rp));
1446       free(rp->r_exec);
1447   }
1448   else {
1449       if(rs_verbose)
1450           printf("RS: %s no longer sharing exec image with %s\n",
1451               srv_to_string(rp), srv_to_string(other_rp));
1452   }
1453   rp->r_exec = NULL;
1454   rp->r_exec_len = 0;
1455 }
1456 
1457 /*===========================================================================*
1458  *				 edit_slot				     *
1459  *===========================================================================*/
1460 int edit_slot(rp, rs_start, source)
1461 struct rproc *rp;
1462 struct rs_start *rs_start;
1463 endpoint_t source;
1464 {
1465 /* Edit a given slot to override existing settings. */
1466   struct rprocpub *rpub;
1467   char *label;
1468   int len;
1469   int s, i;
1470   int basic_kc[] =  { SYS_BASIC_CALLS, NULL_C };
1471   int basic_vmc[] =  { VM_BASIC_CALLS, NULL_C };
1472 
1473   rpub = rp->r_pub;
1474 
1475   /* Update IPC target list. */
1476   if (rs_start->rss_ipclen==0 || rs_start->rss_ipclen+1>sizeof(rp->r_ipc_list)){
1477       printf("RS: edit_slot: ipc list empty or long for '%s'\n", rpub->label);
1478       return EINVAL;
1479   }
1480   s=sys_datacopy(source, (vir_bytes) rs_start->rss_ipc,
1481       SELF, (vir_bytes) rp->r_ipc_list, rs_start->rss_ipclen);
1482   if (s != OK) return(s);
1483   rp->r_ipc_list[rs_start->rss_ipclen]= '\0';
1484 
1485   /* Update IRQs. */
1486   if(rs_start->rss_nr_irq == RSS_IRQ_ALL) {
1487       rs_start->rss_nr_irq = 0;
1488   }
1489   else {
1490       rp->r_priv.s_flags |= CHECK_IRQ;
1491   }
1492   if (rs_start->rss_nr_irq > NR_IRQ) {
1493       printf("RS: edit_slot: too many IRQs requested\n");
1494       return EINVAL;
1495   }
1496   rp->r_nr_irq= rp->r_priv.s_nr_irq= rs_start->rss_nr_irq;
1497   for (i= 0; i<rp->r_priv.s_nr_irq; i++) {
1498       rp->r_irq_tab[i]= rp->r_priv.s_irq_tab[i]= rs_start->rss_irq[i];
1499       if(rs_verbose)
1500           printf("RS: edit_slot: IRQ %d\n", rp->r_priv.s_irq_tab[i]);
1501   }
1502 
1503   /* Update I/O ranges. */
1504   if(rs_start->rss_nr_io == RSS_IO_ALL) {
1505       rs_start->rss_nr_io = 0;
1506   }
1507   else {
1508       rp->r_priv.s_flags |= CHECK_IO_PORT;
1509   }
1510   if (rs_start->rss_nr_io > NR_IO_RANGE) {
1511       printf("RS: edit_slot: too many I/O ranges requested\n");
1512       return EINVAL;
1513   }
1514   rp->r_nr_io_range= rp->r_priv.s_nr_io_range= rs_start->rss_nr_io;
1515   for (i= 0; i<rp->r_priv.s_nr_io_range; i++) {
1516       rp->r_priv.s_io_tab[i].ior_base= rs_start->rss_io[i].base;
1517       rp->r_priv.s_io_tab[i].ior_limit=
1518           rs_start->rss_io[i].base+rs_start->rss_io[i].len-1;
1519       rp->r_io_tab[i] = rp->r_priv.s_io_tab[i];
1520       if(rs_verbose)
1521           printf("RS: edit_slot: I/O [%x..%x]\n",
1522               rp->r_priv.s_io_tab[i].ior_base,
1523               rp->r_priv.s_io_tab[i].ior_limit);
1524   }
1525 
1526   /* Update kernel call mask. Inherit basic kernel calls when asked to. */
1527   memcpy(rp->r_priv.s_k_call_mask, rs_start->rss_system,
1528       sizeof(rp->r_priv.s_k_call_mask));
1529   if(rs_start->rss_flags & RSS_SYS_BASIC_CALLS) {
1530       fill_call_mask(basic_kc, NR_SYS_CALLS,
1531           rp->r_priv.s_k_call_mask, KERNEL_CALL, FALSE);
1532   }
1533 
1534   /* Update VM call mask. Inherit basic VM calls. */
1535   memcpy(rpub->vm_call_mask, rs_start->rss_vm,
1536       sizeof(rpub->vm_call_mask));
1537   if(rs_start->rss_flags & RSS_VM_BASIC_CALLS) {
1538       fill_call_mask(basic_vmc, NR_VM_CALLS,
1539           rpub->vm_call_mask, VM_RQ_BASE, FALSE);
1540   }
1541 
1542   /* Update control labels. */
1543   if(rs_start->rss_nr_control > 0) {
1544       int i, s;
1545       if (rs_start->rss_nr_control > RS_NR_CONTROL) {
1546           printf("RS: edit_slot: too many control labels\n");
1547           return EINVAL;
1548       }
1549       for (i=0; i<rs_start->rss_nr_control; i++) {
1550           s = copy_label(source, rs_start->rss_control[i].l_addr,
1551               rs_start->rss_control[i].l_len, rp->r_control[i],
1552               sizeof(rp->r_control[i]));
1553           if(s != OK)
1554               return s;
1555       }
1556       rp->r_nr_control = rs_start->rss_nr_control;
1557 
1558       if (rs_verbose) {
1559           printf("RS: edit_slot: control labels:");
1560           for (i=0; i<rp->r_nr_control; i++)
1561               printf(" %s", rp->r_control[i]);
1562           printf("\n");
1563       }
1564   }
1565 
1566   /* Update signal manager. */
1567   rp->r_priv.s_sig_mgr = rs_start->rss_sigmgr;
1568 
1569   /* Update scheduling properties if possible. */
1570   if(rp->r_scheduler != NONE) {
1571       rp->r_scheduler = rs_start->rss_scheduler;
1572       rp->r_priority = rs_start->rss_priority;
1573       rp->r_quantum = rs_start->rss_quantum;
1574       rp->r_cpu = rs_start->rss_cpu;
1575   }
1576 
1577   /* Update command and arguments. */
1578   if (rs_start->rss_cmdlen > MAX_COMMAND_LEN-1) return(E2BIG);
1579   s=sys_datacopy(source, (vir_bytes) rs_start->rss_cmd,
1580       SELF, (vir_bytes) rp->r_cmd, rs_start->rss_cmdlen);
1581   if (s != OK) return(s);
1582   rp->r_cmd[rs_start->rss_cmdlen] = '\0';	/* ensure it is terminated */
1583   if (rp->r_cmd[0] != '/') return(EINVAL);	/* insist on absolute path */
1584 
1585   /* Build cmd dependencies (argv). */
1586   build_cmd_dep(rp);
1587 
1588   /* Copy in the program name. */
1589   if (rs_start->rss_prognamelen > sizeof(rpub->proc_name)-1) return(E2BIG);
1590   s=sys_datacopy(source, (vir_bytes) rs_start->rss_progname,
1591       SELF, (vir_bytes) rpub->proc_name, rs_start->rss_prognamelen);
1592   if (s != OK) return(s);
1593   rpub->proc_name[rs_start->rss_prognamelen] = '\0';
1594 
1595   /* Update label if not already set. */
1596   if(!strcmp(rpub->label, "")) {
1597       if(rs_start->rss_label.l_len > 0) {
1598           /* RS_UP caller has supplied a custom label for this service. */
1599           int s = copy_label(source, rs_start->rss_label.l_addr,
1600               rs_start->rss_label.l_len, rpub->label, sizeof(rpub->label));
1601           if(s != OK)
1602               return s;
1603           if(rs_verbose)
1604               printf("RS: edit_slot: using label (custom) '%s'\n", rpub->label);
1605       } else {
1606           /* Default label for the service. */
1607           label = rpub->proc_name;
1608           len= strlen(label);
1609           memcpy(rpub->label, label, len);
1610           rpub->label[len]= '\0';
1611           if(rs_verbose)
1612               printf("RS: edit_slot: using label (from proc_name) '%s'\n",
1613                   rpub->label);
1614       }
1615   }
1616 
1617   /* Update recovery script. */
1618   if (rs_start->rss_scriptlen > MAX_SCRIPT_LEN-1) return(E2BIG);
1619   if (rs_start->rss_script != NULL && rs_start->rss_scriptlen > 0
1620       && !(rpub->sys_flags & SF_CORE_SRV)) {
1621       s=sys_datacopy(source, (vir_bytes) rs_start->rss_script,
1622           SELF, (vir_bytes) rp->r_script, rs_start->rss_scriptlen);
1623       if (s != OK) return(s);
1624       rp->r_script[rs_start->rss_scriptlen] = '\0';
1625       rpub->sys_flags |= SF_USE_SCRIPT;
1626   }
1627 
1628   /* Update system flags and in-memory copy. */
1629   if ((rs_start->rss_flags & RSS_COPY) && !(rpub->sys_flags & SF_USE_COPY)) {
1630       int exst_cpy;
1631       struct rproc *rp2;
1632       struct rprocpub *rpub2;
1633       exst_cpy = 0;
1634 
1635       if(rs_start->rss_flags & RSS_REUSE) {
1636           int i;
1637 
1638           for(i = 0; i < NR_SYS_PROCS; i++) {
1639               rp2 = &rproc[i];
1640               rpub2 = rproc[i].r_pub;
1641               if(strcmp(rpub->proc_name, rpub2->proc_name) == 0 &&
1642                   (rpub2->sys_flags & SF_USE_COPY)) {
1643                   /* We have found the same binary that's
1644                    * already been copied */
1645                   exst_cpy = 1;
1646                   break;
1647               }
1648           }
1649       }
1650 
1651       s = OK;
1652       if(!exst_cpy)
1653           s = read_exec(rp);
1654       else
1655           share_exec(rp, rp2);
1656 
1657       if (s != OK)
1658           return s;
1659 
1660       rpub->sys_flags |= SF_USE_COPY;
1661   }
1662   if (rs_start->rss_flags & RSS_REPLICA) {
1663       rpub->sys_flags |= SF_USE_REPL;
1664   }
1665   if (rs_start->rss_flags & RSS_NO_BIN_EXP) {
1666       rpub->sys_flags |= SF_NO_BIN_EXP;
1667   }
1668   if (rs_start->rss_flags & RSS_DETACH) {
1669       rpub->sys_flags |= SF_DET_RESTART;
1670   }
1671   else {
1672       rpub->sys_flags &= ~SF_DET_RESTART;
1673   }
1674   if (rs_start->rss_flags & RSS_NORESTART) {
1675       if(rpub->sys_flags & SF_CORE_SRV) {
1676           return EPERM;
1677       }
1678       rpub->sys_flags |= SF_NORESTART;
1679   }
1680   else {
1681       rpub->sys_flags &= ~SF_NORESTART;
1682   }
1683 
1684   /* Update period. */
1685   if(rpub->endpoint != RS_PROC_NR) {
1686       rp->r_period = rs_start->rss_period;
1687   }
1688 
1689   /* Update restarts. */
1690   if(rs_start->rss_restarts) {
1691       rp->r_restarts = rs_start->rss_restarts;
1692   }
1693 
1694   /* Update number of ASR live updates. */
1695   if(rs_start->rss_asr_count >= 0) {
1696       rp->r_asr_count = rs_start->rss_asr_count;
1697   }
1698 
1699   /* (Re)initialize privilege settings. */
1700   init_privs(rp, &rp->r_priv);
1701 
1702   return OK;
1703 }
1704 
1705 /*===========================================================================*
1706  *				 init_slot				     *
1707  *===========================================================================*/
1708 int init_slot(rp, rs_start, source)
1709 struct rproc *rp;
1710 struct rs_start *rs_start;
1711 endpoint_t source;
1712 {
1713 /* Initialize a slot as requested by the client. */
1714   struct rprocpub *rpub;
1715   int i;
1716 
1717   rpub = rp->r_pub;
1718 
1719   /* All dynamically created services get the same sys and privilege flags, and
1720    * allowed traps. Other privilege settings can be specified at runtime. The
1721    * privilege id is dynamically allocated by the kernel.
1722    */
1723   rpub->sys_flags = DSRV_SF;             /* system flags */
1724   rp->r_priv.s_flags = DSRV_F;           /* privilege flags */
1725   rp->r_priv.s_init_flags = DSRV_I;      /* init flags */
1726   rp->r_priv.s_trap_mask = DSRV_T;       /* allowed traps */
1727   rp->r_priv.s_bak_sig_mgr = NONE;       /* backup signal manager */
1728 
1729   /* Initialize uid. */
1730   rp->r_uid= rs_start->rss_uid;
1731 
1732   /* Initialize device driver settings. */
1733   if (rs_start->rss_nr_domain < 0 || rs_start->rss_nr_domain > NR_DOMAIN) {
1734       printf("RS: init_slot: too many domains\n");
1735       return EINVAL;
1736   }
1737 
1738   rpub->dev_nr = rs_start->rss_major;
1739   rpub->nr_domain = rs_start->rss_nr_domain;
1740   for (i = 0; i < rs_start->rss_nr_domain; i++)
1741 	rpub->domain[i] = rs_start->rss_domain[i];
1742   rpub->devman_id = rs_start->devman_id;
1743 
1744   /* Initialize pci settings. */
1745   if (rs_start->rss_nr_pci_id > RS_NR_PCI_DEVICE) {
1746       printf("RS: init_slot: too many PCI device IDs\n");
1747       return EINVAL;
1748   }
1749   rpub->pci_acl.rsp_nr_device = rs_start->rss_nr_pci_id;
1750   for (i= 0; i<rpub->pci_acl.rsp_nr_device; i++) {
1751       rpub->pci_acl.rsp_device[i].vid= rs_start->rss_pci_id[i].vid;
1752       rpub->pci_acl.rsp_device[i].did= rs_start->rss_pci_id[i].did;
1753       rpub->pci_acl.rsp_device[i].sub_vid= rs_start->rss_pci_id[i].sub_vid;
1754       rpub->pci_acl.rsp_device[i].sub_did= rs_start->rss_pci_id[i].sub_did;
1755       if(rs_verbose)
1756           printf("RS: init_slot: PCI %04x/%04x (sub %04x:%04x)\n",
1757               rpub->pci_acl.rsp_device[i].vid,
1758               rpub->pci_acl.rsp_device[i].did,
1759               rpub->pci_acl.rsp_device[i].sub_vid,
1760               rpub->pci_acl.rsp_device[i].sub_did);
1761   }
1762   if (rs_start->rss_nr_pci_class > RS_NR_PCI_CLASS) {
1763       printf("RS: init_slot: too many PCI class IDs\n");
1764       return EINVAL;
1765   }
1766   rpub->pci_acl.rsp_nr_class= rs_start->rss_nr_pci_class;
1767   for (i= 0; i<rpub->pci_acl.rsp_nr_class; i++) {
1768       rpub->pci_acl.rsp_class[i].pciclass=rs_start->rss_pci_class[i].pciclass;
1769       rpub->pci_acl.rsp_class[i].mask= rs_start->rss_pci_class[i].mask;
1770       if(rs_verbose)
1771           printf("RS: init_slot: PCI class %06x mask %06x\n",
1772               (unsigned int) rpub->pci_acl.rsp_class[i].pciclass,
1773               (unsigned int) rpub->pci_acl.rsp_class[i].mask);
1774   }
1775 
1776   /* Initialize some fields. */
1777   rp->r_asr_count = 0;				/* no ASR updates yet */
1778   rp->r_restarts = 0; 				/* no restarts yet */
1779   rp->r_old_rp = NULL;			        /* no old version yet */
1780   rp->r_new_rp = NULL;			        /* no new version yet */
1781   rp->r_prev_rp = NULL;			        /* no prev replica yet */
1782   rp->r_next_rp = NULL;			        /* no next replica yet */
1783   rp->r_exec = NULL;                            /* no in-memory copy yet */
1784   rp->r_exec_len = 0;
1785   rp->r_script[0]= '\0';                        /* no recovery script yet */
1786   rpub->label[0]= '\0';                         /* no label yet */
1787   rp->r_scheduler = -1;                         /* no scheduler yet */
1788   rp->r_priv.s_sig_mgr = -1;                    /* no signal manager yet */
1789   rp->r_map_prealloc_addr = 0;                  /* no preallocated memory */
1790   rp->r_map_prealloc_len = 0;
1791   rp->r_init_err = ERESTART;                    /* default init error `*/
1792 
1793   /* Initialize editable slot settings. */
1794   return edit_slot(rp, rs_start, source);
1795 }
1796 
1797 /*===========================================================================*
1798  *				clone_slot				     *
1799  *===========================================================================*/
1800 int clone_slot(rp, clone_rpp)
1801 struct rproc *rp;
1802 struct rproc **clone_rpp;
1803 {
1804   int r;
1805   struct rproc *clone_rp;
1806   struct rprocpub *rpub, *clone_rpub;
1807 
1808   /* Allocate a system service slot for the clone. */
1809   r = alloc_slot(&clone_rp);
1810   if(r != OK) {
1811       printf("RS: clone_slot: unable to allocate a new slot: %d\n", r);
1812       return r;
1813   }
1814 
1815   rpub = rp->r_pub;
1816   clone_rpub = clone_rp->r_pub;
1817 
1818   /* Synch the privilege structure of the source with the kernel. */
1819   if ((r = sys_getpriv(&(rp->r_priv), rpub->endpoint)) != OK) {
1820       panic("unable to synch privilege structure: %d", r);
1821   }
1822 
1823   /* Shallow copy. */
1824   *clone_rp = *rp;
1825   *clone_rpub = *rpub;
1826 
1827   /* Deep copy. */
1828   clone_rp->r_init_err = ERESTART; /* default init error */
1829   clone_rp->r_flags &= ~RS_ACTIVE; /* the clone is not active yet */
1830   clone_rp->r_pid = -1;            /* no pid yet */
1831   clone_rpub->endpoint = -1;       /* no endpoint yet */
1832   clone_rp->r_pub = clone_rpub;    /* restore pointer to public entry */
1833   build_cmd_dep(clone_rp);         /* rebuild cmd dependencies */
1834   if(clone_rpub->sys_flags & SF_USE_COPY) {
1835       share_exec(clone_rp, rp);        /* share exec image */
1836   }
1837   clone_rp->r_old_rp = NULL;	   /* no old version yet */
1838   clone_rp->r_new_rp = NULL;	   /* no new version yet */
1839   clone_rp->r_prev_rp = NULL;	   /* no prev replica yet */
1840   clone_rp->r_next_rp = NULL;	   /* no next replica yet */
1841 
1842   /* Force dynamic privilege id. */
1843   clone_rp->r_priv.s_flags |= DYN_PRIV_ID;
1844 
1845   /* Clear instance flags. */
1846   clone_rp->r_priv.s_flags &= ~(LU_SYS_PROC | RST_SYS_PROC);
1847   clone_rp->r_priv.s_init_flags = 0;
1848 
1849   *clone_rpp = clone_rp;
1850   return OK;
1851 }
1852 
1853 /*===========================================================================*
1854  *			    swap_slot_pointer				     *
1855  *===========================================================================*/
1856 static void swap_slot_pointer(struct rproc **rpp, struct rproc *src_rp,
1857     struct rproc *dst_rp)
1858 {
1859   if(*rpp == src_rp) {
1860       *rpp = dst_rp;
1861   }
1862   else if(*rpp == dst_rp) {
1863       *rpp = src_rp;
1864   }
1865 }
1866 
1867 /*===========================================================================*
1868  *				swap_slot				     *
1869  *===========================================================================*/
1870 void swap_slot(src_rpp, dst_rpp)
1871 struct rproc **src_rpp;
1872 struct rproc **dst_rpp;
1873 {
1874 /* Swap two service slots. */
1875   struct rproc *src_rp, *dst_rp;
1876   struct rprocpub *src_rpub, *dst_rpub;
1877   struct rproc orig_src_rproc, orig_dst_rproc;
1878   struct rprocpub orig_src_rprocpub, orig_dst_rprocpub;
1879   struct rprocupd *prev_rpupd, *rpupd;
1880 
1881   src_rp = *src_rpp;
1882   dst_rp = *dst_rpp;
1883   src_rpub = src_rp->r_pub;
1884   dst_rpub = dst_rp->r_pub;
1885 
1886   /* Save existing data first. */
1887   orig_src_rproc = *src_rp;
1888   orig_src_rprocpub = *src_rpub;
1889   orig_dst_rproc = *dst_rp;
1890   orig_dst_rprocpub = *dst_rpub;
1891 
1892   /* Swap slots. */
1893   *src_rp = orig_dst_rproc;
1894   *src_rpub = orig_dst_rprocpub;
1895   *dst_rp = orig_src_rproc;
1896   *dst_rpub = orig_src_rprocpub;
1897 
1898   /* Restore public entries and update descriptors. */
1899   src_rp->r_pub = orig_src_rproc.r_pub;
1900   dst_rp->r_pub = orig_dst_rproc.r_pub;
1901   src_rp->r_upd = orig_src_rproc.r_upd;
1902   dst_rp->r_upd = orig_dst_rproc.r_upd;
1903 
1904   /* Rebuild command dependencies. */
1905   build_cmd_dep(src_rp);
1906   build_cmd_dep(dst_rp);
1907 
1908   /* Swap local slot pointers. */
1909   swap_slot_pointer(&src_rp->r_prev_rp, src_rp, dst_rp);
1910   swap_slot_pointer(&src_rp->r_next_rp, src_rp, dst_rp);
1911   swap_slot_pointer(&src_rp->r_old_rp, src_rp, dst_rp);
1912   swap_slot_pointer(&src_rp->r_new_rp, src_rp, dst_rp);
1913   swap_slot_pointer(&dst_rp->r_prev_rp, src_rp, dst_rp);
1914   swap_slot_pointer(&dst_rp->r_next_rp, src_rp, dst_rp);
1915   swap_slot_pointer(&dst_rp->r_old_rp, src_rp, dst_rp);
1916   swap_slot_pointer(&dst_rp->r_new_rp, src_rp, dst_rp);
1917 
1918   /* Swap global slot pointers. */
1919   RUPDATE_ITER(rupdate.first_rpupd, prev_rpupd, rpupd,
1920       swap_slot_pointer(&rpupd->rp, src_rp, dst_rp);
1921   );
1922   swap_slot_pointer(&rproc_ptr[_ENDPOINT_P(src_rp->r_pub->endpoint)],
1923       src_rp, dst_rp);
1924   swap_slot_pointer(&rproc_ptr[_ENDPOINT_P(dst_rp->r_pub->endpoint)],
1925       src_rp, dst_rp);
1926 
1927   /* Adjust input pointers. */
1928   *src_rpp = dst_rp;
1929   *dst_rpp = src_rp;
1930 }
1931 
1932 /*===========================================================================*
1933  *			   lookup_slot_by_label				     *
1934  *===========================================================================*/
1935 struct rproc* lookup_slot_by_label(char *label)
1936 {
1937 /* Lookup a service slot matching the given label. */
1938   int slot_nr;
1939   struct rproc *rp;
1940   struct rprocpub *rpub;
1941 
1942   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1943       rp = &rproc[slot_nr];
1944       if (!(rp->r_flags & RS_ACTIVE)) {
1945           continue;
1946       }
1947       rpub = rp->r_pub;
1948       if (strcmp(rpub->label, label) == 0) {
1949           return rp;
1950       }
1951   }
1952 
1953   return NULL;
1954 }
1955 
1956 /*===========================================================================*
1957  *			   lookup_slot_by_pid				     *
1958  *===========================================================================*/
1959 struct rproc* lookup_slot_by_pid(pid_t pid)
1960 {
1961 /* Lookup a service slot matching the given pid. */
1962   int slot_nr;
1963   struct rproc *rp;
1964 
1965   if(pid < 0) {
1966       return NULL;
1967   }
1968 
1969   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1970       rp = &rproc[slot_nr];
1971       if (!(rp->r_flags & RS_IN_USE)) {
1972           continue;
1973       }
1974       if (rp->r_pid == pid) {
1975           return rp;
1976       }
1977   }
1978 
1979   return NULL;
1980 }
1981 
1982 /*===========================================================================*
1983  *			   lookup_slot_by_dev_nr			     *
1984  *===========================================================================*/
1985 struct rproc* lookup_slot_by_dev_nr(dev_t dev_nr)
1986 {
1987 /* Lookup a service slot matching the given device number. */
1988   int slot_nr;
1989   struct rproc *rp;
1990   struct rprocpub *rpub;
1991 
1992   if(dev_nr <= 0) {
1993       return NULL;
1994   }
1995 
1996   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
1997       rp = &rproc[slot_nr];
1998       rpub = rp->r_pub;
1999       if (!(rp->r_flags & RS_IN_USE)) {
2000           continue;
2001       }
2002       if (rpub->dev_nr == dev_nr) {
2003           return rp;
2004       }
2005   }
2006 
2007   return NULL;
2008 }
2009 
2010 /*===========================================================================*
2011  *			   lookup_slot_by_domain			     *
2012  *===========================================================================*/
2013 struct rproc* lookup_slot_by_domain(int domain)
2014 {
2015 /* Lookup a service slot matching the given protocol family. */
2016   int i, slot_nr;
2017   struct rproc *rp;
2018   struct rprocpub *rpub;
2019 
2020   if (domain <= 0) {
2021       return NULL;
2022   }
2023 
2024   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
2025       rp = &rproc[slot_nr];
2026       rpub = rp->r_pub;
2027       if (!(rp->r_flags & RS_IN_USE)) {
2028           continue;
2029       }
2030       for (i = 0; i < rpub->nr_domain; i++)
2031 	  if (rpub->domain[i] == domain)
2032 	      return rp;
2033   }
2034 
2035   return NULL;
2036 }
2037 
2038 /*===========================================================================*
2039  *			   lookup_slot_by_flags				     *
2040  *===========================================================================*/
2041 struct rproc* lookup_slot_by_flags(int flags)
2042 {
2043 /* Lookup a service slot matching the given flags. */
2044   int slot_nr;
2045   struct rproc *rp;
2046 
2047   if(!flags) {
2048       return NULL;
2049   }
2050 
2051   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
2052       rp = &rproc[slot_nr];
2053       if (!(rp->r_flags & RS_IN_USE)) {
2054           continue;
2055       }
2056       if (rp->r_flags & flags) {
2057           return rp;
2058       }
2059   }
2060 
2061   return NULL;
2062 }
2063 
2064 /*===========================================================================*
2065  *				alloc_slot				     *
2066  *===========================================================================*/
2067 int alloc_slot(rpp)
2068 struct rproc **rpp;
2069 {
2070 /* Alloc a new system service slot. */
2071   int slot_nr;
2072 
2073   for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) {
2074       *rpp = &rproc[slot_nr];			/* get pointer to slot */
2075       if (!((*rpp)->r_flags & RS_IN_USE)) 	/* check if available */
2076 	  break;
2077   }
2078   if (slot_nr >= NR_SYS_PROCS) {
2079 	return ENOMEM;
2080   }
2081 
2082   return OK;
2083 }
2084 
2085 /*===========================================================================*
2086  *				free_slot				     *
2087  *===========================================================================*/
2088 void free_slot(rp)
2089 struct rproc *rp;
2090 {
2091 /* Free a system service slot. */
2092   struct rprocpub *rpub;
2093 
2094   rpub = rp->r_pub;
2095 
2096   /* Send a late reply if there is any pending. */
2097   late_reply(rp, OK);
2098 
2099   /* Free memory if necessary. */
2100   if(rpub->sys_flags & SF_USE_COPY) {
2101       free_exec(rp);
2102   }
2103 
2104   /* Mark slot as no longer in use.. */
2105   rp->r_flags = 0;
2106   rp->r_pid = -1;
2107   rpub->in_use = FALSE;
2108   rproc_ptr[_ENDPOINT_P(rpub->endpoint)] = NULL;
2109 }
2110 
2111 
2112 /*===========================================================================*
2113  *				get_next_name				     *
2114  *===========================================================================*/
2115 static char *get_next_name(ptr, name, caller_label)
2116 char *ptr;
2117 char *name;
2118 char *caller_label;
2119 {
2120 	/* Get the next name from the list of (IPC) program names.
2121 	 */
2122 	char *p, *q;
2123 	size_t len;
2124 
2125 	for (p= ptr; p[0] != '\0'; p= q)
2126 	{
2127 		/* Skip leading space */
2128 		while (p[0] != '\0' && isspace((unsigned char)p[0]))
2129 			p++;
2130 
2131 		/* Find start of next word */
2132 		q= p;
2133 		while (q[0] != '\0' && !isspace((unsigned char)q[0]))
2134 			q++;
2135 		if (q == p)
2136 			continue;
2137 		len= q-p;
2138 		if (len > RS_MAX_LABEL_LEN)
2139 		{
2140 			printf(
2141 	"rs:get_next_name: bad ipc list entry '%.*s' for %s: too long\n",
2142 				(int) len, p, caller_label);
2143 			continue;
2144 		}
2145 		memcpy(name, p, len);
2146 		name[len]= '\0';
2147 
2148 		return q; /* found another */
2149 	}
2150 
2151 	return NULL; /* done */
2152 }
2153 
2154 /*===========================================================================*
2155  *				add_forward_ipc				     *
2156  *===========================================================================*/
2157 void add_forward_ipc(rp, privp)
2158 struct rproc *rp;
2159 struct priv *privp;
2160 {
2161 	/* Add IPC send permissions to a process based on that process's IPC
2162 	 * list.
2163 	 */
2164 	char name[RS_MAX_LABEL_LEN+1], *p;
2165 	struct rproc *rrp;
2166 	endpoint_t endpoint;
2167 	int r;
2168 	int priv_id;
2169 	struct priv priv;
2170 	struct rprocpub *rpub;
2171 
2172 	rpub = rp->r_pub;
2173 	p = rp->r_ipc_list;
2174 
2175 	while ((p = get_next_name(p, name, rpub->label)) != NULL) {
2176 
2177 		if (strcmp(name, "SYSTEM") == 0)
2178 			endpoint= SYSTEM;
2179 		else if (strcmp(name, "USER") == 0)
2180 			endpoint= INIT_PROC_NR; /* all user procs */
2181 		else
2182 		{
2183 			/* Set a privilege bit for every process matching the
2184 			 * given process name. It is perfectly fine if this
2185 			 * loop does not find any matches, as the target
2186 			 * process(es) may not have been started yet. See
2187 			 * add_backward_ipc() below.
2188 			 */
2189 			for (rrp=BEG_RPROC_ADDR; rrp<END_RPROC_ADDR; rrp++) {
2190 				if (!(rrp->r_flags & RS_IN_USE))
2191 					continue;
2192 
2193 				if (!strcmp(rrp->r_pub->proc_name, name)) {
2194 #if PRIV_DEBUG
2195 					printf("  RS: add_forward_ipc: setting"
2196 						" sendto bit for %d...\n",
2197 						rrp->r_pub->endpoint);
2198 #endif
2199 
2200 					priv_id= rrp->r_priv.s_id;
2201 					set_sys_bit(privp->s_ipc_to, priv_id);
2202 				}
2203 			}
2204 
2205 			continue;
2206 		}
2207 
2208 		/* This code only applies to the exception cases. */
2209 		if ((r = sys_getpriv(&priv, endpoint)) < 0)
2210 		{
2211 			printf(
2212 		"add_forward_ipc: unable to get priv_id for '%s': %d\n",
2213 				name, r);
2214 			continue;
2215 		}
2216 
2217 #if PRIV_DEBUG
2218 		printf("  RS: add_forward_ipc: setting sendto bit for %d...\n",
2219 			endpoint);
2220 #endif
2221 		priv_id= priv.s_id;
2222 		set_sys_bit(privp->s_ipc_to, priv_id);
2223 	}
2224 }
2225 
2226 
2227 /*===========================================================================*
2228  *				add_backward_ipc			     *
2229  *===========================================================================*/
2230 void add_backward_ipc(rp, privp)
2231 struct rproc *rp;
2232 struct priv *privp;
2233 {
2234 	/* Add IPC send permissions to a process based on other processes' IPC
2235 	 * lists. This is enough to allow each such two processes to talk to
2236 	 * each other, as the kernel guarantees send mask symmetry. We need to
2237 	 * add these permissions now because the current process may not yet
2238 	 * have existed at the time that the other process was initialized.
2239 	 */
2240 	char name[RS_MAX_LABEL_LEN+1], *p;
2241 	struct rproc *rrp;
2242 	struct rprocpub *rrpub;
2243 	char *proc_name;
2244 	int priv_id, is_ipc_all, is_ipc_all_sys;
2245 
2246 	proc_name = rp->r_pub->proc_name;
2247 
2248 	for (rrp=BEG_RPROC_ADDR; rrp<END_RPROC_ADDR; rrp++) {
2249 		if (!(rrp->r_flags & RS_IN_USE))
2250 			continue;
2251 
2252 		if (!rrp->r_ipc_list[0])
2253 			continue;
2254 
2255 		/* If the process being checked is set to allow IPC to all
2256 		 * other processes, or for all other system processes and the
2257 		 * target process is a system process, add a permission bit.
2258 		 */
2259 		rrpub = rrp->r_pub;
2260 
2261 		is_ipc_all = !strcmp(rrp->r_ipc_list, RSS_IPC_ALL);
2262 		is_ipc_all_sys = !strcmp(rrp->r_ipc_list, RSS_IPC_ALL_SYS);
2263 
2264 		if (is_ipc_all ||
2265 			(is_ipc_all_sys && (privp->s_flags & SYS_PROC))) {
2266 #if PRIV_DEBUG
2267 			printf("  RS: add_backward_ipc: setting sendto bit "
2268 				"for %d...\n", rrpub->endpoint);
2269 #endif
2270 			priv_id= rrp->r_priv.s_id;
2271 			set_sys_bit(privp->s_ipc_to, priv_id);
2272 
2273 			continue;
2274 		}
2275 
2276 		/* An IPC target list was provided for the process being
2277 		 * checked here. Make sure that the name of the new process
2278 		 * is in that process's list. There may be multiple matches.
2279 		 */
2280 		p = rrp->r_ipc_list;
2281 
2282 		while ((p = get_next_name(p, name, rrpub->label)) != NULL) {
2283 			if (!strcmp(proc_name, name)) {
2284 #if PRIV_DEBUG
2285 				printf("  RS: add_backward_ipc: setting sendto"
2286 					" bit for %d...\n",
2287 					rrpub->endpoint);
2288 #endif
2289 				priv_id= rrp->r_priv.s_id;
2290 				set_sys_bit(privp->s_ipc_to, priv_id);
2291 			}
2292 		}
2293 	}
2294 }
2295 
2296 
2297 /*===========================================================================*
2298  *				init_privs				     *
2299  *===========================================================================*/
2300 void init_privs(rp, privp)
2301 struct rproc *rp;
2302 struct priv *privp;
2303 {
2304 	int i;
2305 	int is_ipc_all, is_ipc_all_sys;
2306 
2307 	/* Clear s_ipc_to */
2308 	fill_send_mask(&privp->s_ipc_to, FALSE);
2309 
2310 	is_ipc_all = !strcmp(rp->r_ipc_list, RSS_IPC_ALL);
2311 	is_ipc_all_sys = !strcmp(rp->r_ipc_list, RSS_IPC_ALL_SYS);
2312 
2313 #if PRIV_DEBUG
2314 	printf("  RS: init_privs: ipc list is '%s'...\n", rp->r_ipc_list);
2315 #endif
2316 
2317 	if (!is_ipc_all && !is_ipc_all_sys)
2318 	{
2319 		add_forward_ipc(rp, privp);
2320 		add_backward_ipc(rp, privp);
2321 
2322 	}
2323 	else
2324 	{
2325 		for (i= 0; i<NR_SYS_PROCS; i++)
2326 		{
2327 			if (is_ipc_all || i != USER_PRIV_ID)
2328 				set_sys_bit(privp->s_ipc_to, i);
2329 		}
2330 	}
2331 }
2332 
2333