1 /*
2 * (c) Copyright 1992 by Panagiotis Tsirigotis
3 * (c) Sections Copyright 1998-2001 by Rob Braun
4 * All rights reserved. The file named COPYRIGHT specifies the terms
5 * and conditions for redistribution.
6 */
7
8
9 #include "config.h"
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <signal.h>
13 #include <time.h>
14 #include <fcntl.h>
15 #include <syslog.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 #include <sys/time.h>
19
20 #include "sio.h"
21 #include "internals.h"
22 #include "msg.h"
23 #include "sconf.h"
24 #include "state.h"
25 #include "main.h"
26 #include "xconfig.h"
27 #include "xtimer.h"
28 #include "options.h"
29
30 static unsigned thread_check( register struct service *sp,unsigned running_servers, unsigned retry_servers );
31 static unsigned refcount_check( struct service *sp, unsigned *running_servers, unsigned *retry_servers );
32 static unsigned service_count_check( register struct service *sp, unsigned running_servers, unsigned retry_servers );
33 static void periodic_check(void);
34
35
dump_services(int fd)36 static void dump_services( int fd )
37 {
38 unsigned u ;
39
40 /*
41 * Dump the current configuration (services + defaults)
42 */
43 Sprint( fd, "Services + defaults:\n" ) ;
44 sc_dump( DEFAULTS( ps ), fd, 0, TRUE ) ;
45
46 for ( u = 0 ; u < pset_count( SERVICES( ps ) ) ; u++ )
47 svc_dump( SP( pset_pointer( SERVICES( ps ), u ) ), fd ) ;
48 }
49
dump_internal_state(void)50 void dump_internal_state(void)
51 {
52 int dump_fd ;
53 const char *dump_file = DUMP_FILE ;
54 time_t current_time ;
55 int fd ;
56 unsigned u ;
57 const char *func = "dump_internal_state" ;
58
59 if ( debug.on )
60 msg( LOG_DEBUG, func, "Dumping State" ) ;
61
62 dump_fd = open( dump_file, O_WRONLY | O_CREAT | O_APPEND, DUMP_FILE_MODE);
63 if ( dump_fd == -1 )
64 {
65 msg( LOG_ERR, func, "failed to open %s: %m", dump_file ) ;
66 return ;
67 }
68
69 if (Sbuftype( dump_fd, SIO_LINEBUF ) == SIO_ERR )
70 {
71 /*
72 * If the above function failed, Sprint will most likely
73 * fail, too. Output a message for troubleshooting and quit.
74 */
75 msg( LOG_ERR, func,
76 "failed setting up sio buffering: %m fd:%d", dump_fd ) ;
77 Sclose(dump_fd);
78 return;
79 }
80
81 /*
82 * Print the program name, version, and timestamp.
83 * Note that the program_version variable contains the program name.
84 */
85 (void) time( ¤t_time ) ;
86 Sprint( dump_fd, "INTERNAL STATE DUMP: %s\n", program_version ) ;
87 Sprint( dump_fd, "Current time: %s\n", ctime( ¤t_time ) ) ;
88
89 dump_services( dump_fd ) ;
90
91 /*
92 * Dump the server table
93 */
94 Sprint( dump_fd, "Server table dump:\n" ) ;
95 for ( u = 0 ; u < pset_count( SERVERS( ps ) ) ; u++ )
96 server_dump( SERP( pset_pointer( SERVERS( ps ), u ) ), dump_fd ) ;
97 Sputchar( dump_fd, '\n' ) ;
98
99 /*
100 * Dump the retry_table
101 */
102 Sprint( dump_fd, "Retry table dump:\n" ) ;
103 for ( u = 0 ; u < pset_count( RETRIES( ps ) ) ; u++ )
104 server_dump( SERP( pset_pointer( RETRIES( ps ), u ) ), dump_fd ) ;
105 Sputchar( dump_fd, '\n' ) ;
106
107 /*
108 * Dump the socket mask
109 */
110 Sprint( dump_fd, "Socket mask:" ) ;
111 for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
112 if ( FD_ISSET( fd, &ps.rws.socket_mask ) )
113 Sprint( dump_fd, " %d", fd ) ;
114 Sputchar( dump_fd, '\n' ) ;
115 Sprint( dump_fd, "mask_max = %d\n", ps.rws.mask_max ) ;
116
117 /*
118 * Dump the descriptors that are open and are *not* in the socket mask
119 */
120 Sprint( dump_fd, "Open descriptors (not in socket mask):" ) ;
121 for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
122 {
123 struct stat st ;
124
125 if ( FD_ISSET( fd, &ps.rws.socket_mask ) )
126 continue ;
127 if ( fstat( fd, &st ) == -1 )
128 continue ;
129 Sprint( dump_fd, " %d", fd ) ;
130 }
131 Sputchar( dump_fd, '\n' ) ;
132 Sputchar( dump_fd, '\n' ) ;
133
134 Sprint( dump_fd, "active_services = %d\n", ps.rws.active_services ) ;
135 Sprint( dump_fd, "available_services = %d\n", ps.rws.available_services ) ;
136 Sprint( dump_fd, "descriptors_free = %d\n", ps.rws.descriptors_free ) ;
137 Sprint( dump_fd, "running_servers = %d\n", pset_count( SERVERS( ps ) ) ) ;
138 Sprint( dump_fd, "Logging service = %s\n",
139 LOG_SERVICE( ps ) != NULL ? "enabled" : "not enabled" ) ;
140 Sputchar( dump_fd, '\n' ) ;
141
142 Sprint( dump_fd, "max_descriptors = %d\n", (int)ps.ros.max_descriptors ) ;
143 Sprint( dump_fd, "process_limit = %d\n", (int)ps.ros.process_limit ) ;
144 Sprint( dump_fd, "config_file = %s\n", ps.ros.config_file ) ;
145 if ( debug.on )
146 Sprint( dump_fd, "debug_fd = %d\n", debug.fd ) ;
147 Sputchar( dump_fd, '\n' ) ;
148
149 Sprint( dump_fd, "END OF DUMP\n\n" ) ;
150 Sclose( dump_fd );
151
152 msg( LOG_INFO, func, "generated state dump in file %s", dump_file ) ;
153 }
154
155
156 /*
157 * Types of consistency checks
158 */
159 enum check_type { PERIODIC, USER_REQUESTED } ;
160
161
consistency_check(enum check_type type)162 static void consistency_check( enum check_type type )
163 {
164 int fd ;
165 fd_set socket_mask_copy ;
166 unsigned u ;
167 int errors ;
168 unsigned total_running_servers = 0 ;
169 unsigned total_retry_servers = 0 ;
170 unsigned error_count = 0 ;
171 bool_int service_count_check_failed = FALSE ;
172 const char *func = "consistency_check" ;
173
174 socket_mask_copy = ps.rws.socket_mask ;
175
176 for ( u = 0 ; u < pset_count( SERVICES( ps ) ) ; u++ )
177 {
178 register struct service *sp = SP( pset_pointer( SERVICES( ps ), u ) ) ;
179 char *sid = SVC_ID( sp ) ;
180 unsigned running_servers ;
181 unsigned retry_servers ;
182
183 error_count += refcount_check( sp, &running_servers, &retry_servers ) ;
184
185 if ( SVC_IS_AVAILABLE( sp ) || SVC_IS_DISABLED ( sp ) )
186 {
187 /*
188 * In this case, there may be some servers running
189 */
190 if ( FD_ISSET( SVC_FD( sp ), &socket_mask_copy ) )
191 {
192 if ( SVC_IS_DISABLED( sp ) )
193 {
194 msg( LOG_ERR, func,
195 "fd of disabled service %s still in socket mask", sid ) ;
196 error_count++ ;
197 }
198 FD_CLR( SVC_FD( sp ), &socket_mask_copy ) ;
199 }
200 error_count += thread_check( sp, running_servers, retry_servers ) ;
201
202 errors = service_count_check( sp, running_servers, retry_servers ) ;
203 if ( ! errors && ! service_count_check_failed )
204 {
205 total_retry_servers += retry_servers ;
206 total_running_servers += running_servers ;
207 }
208 if ( errors )
209 {
210 service_count_check_failed = TRUE ;
211 error_count += errors ;
212 }
213
214 if ( SVC_IS_DISABLED( sp ) && SVC_RUNNING_SERVERS( sp ) == 0 )
215 {
216 msg( LOG_ERR, func,
217 "disabled service %s has 0 running servers\n", sid ) ;
218 error_count++ ;
219 continue ;
220 }
221 }
222 /* TCPMUX client programs are always stopped until they run. */
223 else if ( ! SVC_IS_MUXCLIENT( sp ) )
224 {
225 msg( LOG_ERR, func, "service %s not started", SVC_ID( sp ) ) ;
226 error_count++ ;
227 }
228 }
229
230 if ( ! service_count_check_failed )
231 {
232 if ( total_running_servers != pset_count( SERVERS( ps ) ) )
233 {
234 msg( LOG_ERR, func,
235 "total running servers (%d) != number of running servers (%d)",
236 total_running_servers, pset_count( SERVERS( ps ) ) ) ;
237 error_count++ ;
238 }
239 if ( total_retry_servers != pset_count( RETRIES( ps ) ) )
240 {
241 msg( LOG_ERR, func,
242 "total retry servers (%d) != number of retry servers (%d)",
243 total_retry_servers, pset_count( RETRIES( ps ) ) ) ;
244 error_count++ ;
245 }
246 }
247
248 /*
249 * Check if there are any descriptors set in socket_mask_copy
250 */
251 for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
252 if ( FD_ISSET( fd, &socket_mask_copy ) && ((fd != signals_pending[0]) && fd != signals_pending[1]))
253 {
254 msg( LOG_ERR, func,
255 "descriptor %d set in socket mask but there is no service for it",
256 fd ) ;
257 error_count++ ;
258 }
259
260 if ( error_count != 0 )
261 msg( LOG_WARNING, func,
262 "Consistency check detected %d errors", error_count ) ;
263 else
264 if ( type == USER_REQUESTED || debug.on )
265 msg( LOG_INFO, func, "Consistency check passed" ) ;
266
267 if( type == PERIODIC )
268 if ( xtimer_add( periodic_check, ps.ros.cc_interval ) == -1 )
269 msg( LOG_ERR, func, "Failed to start consistency timer" ) ;
270 }
271
272
273 /*
274 * Check that the counts of running and retry servers stored in struct service
275 * are accurate
276 */
service_count_check(struct service * sp,unsigned running_servers,unsigned retry_servers)277 static unsigned service_count_check( struct service *sp,
278 unsigned running_servers,
279 unsigned retry_servers )
280 {
281 char *sid = SVC_ID( sp ) ;
282 int error_count = 0 ;
283 const char *func = "service_count_check" ;
284
285 if ( SVC_RUNNING_SERVERS( sp ) != running_servers )
286 {
287 msg( LOG_ERR, func,
288 "service %s: actual running servers = %d, known running servers = %d",
289 sid, running_servers, SVC_RUNNING_SERVERS( sp ) ) ;
290 error_count++ ;
291 }
292 if ( SVC_RETRIES( sp ) != retry_servers )
293 {
294 msg( LOG_ERR, func,
295 "service %s: actual retry servers = %d, known retry servers = %d",
296 sid, retry_servers, SVC_RETRIES( sp ) ) ;
297 error_count++ ;
298 }
299
300 if ( error_count && debug.on )
301 msg( LOG_DEBUG, func, "%s: %d errors detected", sid, error_count ) ;
302
303 return( error_count ) ;
304 }
305
306
307
308 /*
309 * If the service is single-threaded:
310 * if the descriptor is set in the socket mask, there must
311 * be a server running (or to be retried)
312 * If the service is multi-threaded:
313 * the descriptor must be always set
314 */
thread_check(struct service * sp,unsigned running_servers,unsigned retry_servers)315 static unsigned thread_check( struct service *sp,
316 unsigned running_servers,
317 unsigned retry_servers )
318 {
319 unsigned error_count = 0 ;
320 int sd = SVC_FD( sp ) ;
321 char *sid = SVC_ID( sp ) ;
322 const char *func = "thread_check" ;
323
324 if ( SVC_WAITS( sp ) )
325 {
326 bool_int has_servers = ( running_servers + retry_servers != 0 ) ;
327
328 if ( has_servers && FD_ISSET( sd, &ps.rws.socket_mask ) )
329 {
330 msg( LOG_ERR, func,
331 "Active single-threaded service %s: server running, descriptor set", sid ) ;
332 error_count++ ;
333 }
334 if ( !has_servers && !FD_ISSET( sd, &ps.rws.socket_mask ) )
335 {
336 msg( LOG_ERR, func,
337 "Active single-threaded service %s: no server running, descriptor not set",
338 sid ) ;
339 error_count++ ;
340 }
341 }
342 else
343 if ( ! FD_ISSET( sd, &ps.rws.socket_mask ) )
344 {
345 msg( LOG_ERR, func,
346 "Active multi-threaded service %s: descriptor not set", sid ) ;
347 error_count++ ;
348 }
349
350 if ( error_count && debug.on )
351 msg( LOG_DEBUG, func, "%s: %d errors detected", sid, error_count ) ;
352
353 return( error_count ) ;
354 }
355
356
357
358 /*
359 * Count the number of references to the specified service contained
360 * in the specified table of servers; put the number of servers
361 * in *countp
362 */
count_refs(struct service * sp,pset_h servers,unsigned * countp)363 static int count_refs( struct service *sp, pset_h servers, unsigned *countp )
364 {
365 unsigned u ;
366 struct server *serp ;
367 int refs = 0 ;
368 unsigned count = 0 ;
369
370 for ( u = 0 ; u < pset_count( servers ) ; u++ )
371 {
372 serp = SERP( pset_pointer( SERVERS( ps ), u ) ) ;
373 if ( SERVER_SERVICE( serp ) == sp )
374 {
375 refs++ ;
376 count++ ;
377 }
378 if ( SERVER_CONNSERVICE( serp ) == sp )
379 refs++ ;
380 /*
381 * XXX: in the future we may want to check if the given service
382 * is any of the alternative services (currently only SPECIAL
383 * services can be alternative services and SPECIAL services
384 * are not included in the service table)
385 */
386 }
387 *countp = count ;
388 return( refs ) ;
389 }
390
391
392 /*
393 * Check for reference counting errors.
394 * Returns number of errors found.
395 * Always set the number of running and retry servers.
396 */
refcount_check(struct service * sp,unsigned * running_servers,unsigned * retry_servers)397 static unsigned refcount_check( struct service *sp,
398 unsigned *running_servers,
399 unsigned *retry_servers )
400 {
401 char *sid = SVC_ID( sp ) ;
402 unsigned errors = 0 ;
403 int refs ;
404 int refcount = SVC_REFCOUNT( sp ) ;
405 const char *func = "refcount_check" ;
406
407 if ( refcount <= 0 )
408 {
409 msg( LOG_ERR, func, "%s service has bad refcount: %d",
410 sid, refcount ) ;
411 errors++ ;
412 }
413
414 /*
415 * The service table holds a reference to the service. The remaining
416 * references must be from servers and connections.
417 */
418 refcount-- ;
419
420 refs = count_refs( sp, SERVERS( ps ), running_servers ) ;
421 if ( ! errors && refs > refcount )
422 {
423 msg( LOG_ERR, func,
424 "running servers: too many references for %s (%d with max=%d)",
425 sid, refs, refcount ) ;
426 errors++ ;
427 }
428
429 refs = count_refs( sp, RETRIES( ps ), retry_servers ) ;
430 if ( ! errors && refs > refcount )
431 {
432 msg( LOG_ERR, func,
433 "retry servers: too many references for %s (%d with max=%d)",
434 sid, refs, refcount ) ;
435 errors++ ;
436 }
437
438 if ( errors && debug.on )
439 msg( LOG_DEBUG, func, "%s: %d errors detected", sid, errors ) ;
440
441 return( errors ) ;
442 }
443
444
user_requested_check(void)445 void user_requested_check(void)
446 {
447 consistency_check( USER_REQUESTED ) ;
448 }
449
450
periodic_check(void)451 static void periodic_check(void)
452 {
453 consistency_check( PERIODIC ) ;
454 }
455
456 /* This actually gets called during initialization, so be careful what
457 * gets put in here.
458 */
enable_periodic_check(unsigned interval)459 void enable_periodic_check( unsigned interval )
460 {
461 const char *func = "enable_periodic_check" ;
462
463 if ( xtimer_add( periodic_check, interval ) == -1 )
464 {
465 msg( LOG_ERR, func, "Failed to start consistency timer" ) ;
466 return ;
467 }
468 }
469