1 /*
2  * (c) Copyright 1992 by Panagiotis Tsirigotis
3  * (c) Sections Copyright 1998-2001 by Rob Braun
4  * All rights reserved.  The file named COPYRIGHT specifies the terms
5  * and conditions for redistribution.
6  */
7 
8 
9 #include "config.h"
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <signal.h>
13 #include <time.h>
14 #include <fcntl.h>
15 #include <syslog.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 #include <sys/time.h>
19 
20 #include "sio.h"
21 #include "internals.h"
22 #include "msg.h"
23 #include "sconf.h"
24 #include "state.h"
25 #include "main.h"
26 #include "xconfig.h"
27 #include "xtimer.h"
28 #include "options.h"
29 
30 static unsigned thread_check( register struct service *sp,unsigned running_servers, unsigned retry_servers );
31 static unsigned refcount_check( struct service *sp, unsigned *running_servers, unsigned *retry_servers );
32 static unsigned service_count_check( register struct service *sp, unsigned running_servers, unsigned retry_servers );
33 static void periodic_check(void);
34 
35 
dump_services(int fd)36 static void dump_services( int fd )
37 {
38    unsigned u ;
39 
40    /*
41     * Dump the current configuration (services + defaults)
42     */
43    Sprint( fd, "Services + defaults:\n" ) ;
44    sc_dump( DEFAULTS( ps ), fd, 0, TRUE ) ;
45 
46    for ( u = 0 ; u < pset_count( SERVICES( ps ) ) ; u++ )
47       svc_dump( SP( pset_pointer( SERVICES( ps ), u ) ), fd ) ;
48 }
49 
dump_internal_state(void)50 void dump_internal_state(void)
51 {
52    int dump_fd ;
53    const char *dump_file = DUMP_FILE ;
54    time_t current_time ;
55    int fd ;
56    unsigned u ;
57    const char *func = "dump_internal_state" ;
58 
59    if ( debug.on )
60       msg( LOG_DEBUG, func, "Dumping State" ) ;
61 
62    dump_fd = open( dump_file, O_WRONLY | O_CREAT | O_APPEND, DUMP_FILE_MODE);
63    if ( dump_fd == -1 )
64    {
65       msg( LOG_ERR, func, "failed to open %s: %m", dump_file ) ;
66       return ;
67    }
68 
69    if (Sbuftype( dump_fd, SIO_LINEBUF ) == SIO_ERR )
70    {
71        /*
72 	* If the above function failed, Sprint will most likely
73 	* fail, too. Output a message for troubleshooting and quit.
74 	*/
75        msg( LOG_ERR, func,
76             "failed setting up sio buffering: %m fd:%d", dump_fd ) ;
77        Sclose(dump_fd);
78        return;
79    }
80 
81    /*
82     * Print the program name, version, and timestamp.
83     * Note that the program_version variable contains the program name.
84     */
85    (void) time( &current_time ) ;
86    Sprint( dump_fd, "INTERNAL STATE DUMP: %s\n", program_version ) ;
87    Sprint( dump_fd, "Current time: %s\n", ctime( &current_time ) ) ;
88 
89    dump_services( dump_fd ) ;
90 
91    /*
92     * Dump the server table
93     */
94    Sprint( dump_fd, "Server table dump:\n" ) ;
95    for ( u = 0 ; u < pset_count( SERVERS( ps ) ) ; u++ )
96       server_dump( SERP( pset_pointer( SERVERS( ps ), u ) ), dump_fd ) ;
97    Sputchar( dump_fd, '\n' ) ;
98 
99    /*
100     * Dump the retry_table
101     */
102    Sprint( dump_fd, "Retry table dump:\n" ) ;
103    for ( u = 0 ; u < pset_count( RETRIES( ps ) ) ; u++ )
104       server_dump( SERP( pset_pointer( RETRIES( ps ), u ) ), dump_fd ) ;
105    Sputchar( dump_fd, '\n' ) ;
106 
107    /*
108     * Dump the socket mask
109     */
110    Sprint( dump_fd, "Socket mask:" ) ;
111    for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
112       if ( FD_ISSET( fd, &ps.rws.socket_mask ) )
113          Sprint( dump_fd, " %d", fd ) ;
114    Sputchar( dump_fd, '\n' ) ;
115    Sprint( dump_fd, "mask_max = %d\n", ps.rws.mask_max ) ;
116 
117    /*
118     * Dump the descriptors that are open and are *not* in the socket mask
119     */
120    Sprint( dump_fd, "Open descriptors (not in socket mask):" ) ;
121    for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
122    {
123       struct stat st ;
124 
125       if ( FD_ISSET( fd, &ps.rws.socket_mask ) )
126          continue ;
127       if ( fstat( fd, &st ) == -1 )
128          continue ;
129       Sprint( dump_fd, " %d", fd ) ;
130    }
131    Sputchar( dump_fd, '\n' ) ;
132    Sputchar( dump_fd, '\n' ) ;
133 
134    Sprint( dump_fd, "active_services = %d\n", ps.rws.active_services ) ;
135    Sprint( dump_fd, "available_services = %d\n", ps.rws.available_services ) ;
136    Sprint( dump_fd, "descriptors_free = %d\n", ps.rws.descriptors_free ) ;
137    Sprint( dump_fd, "running_servers = %d\n", pset_count( SERVERS( ps ) ) ) ;
138    Sprint( dump_fd, "Logging service = %s\n",
139             LOG_SERVICE( ps ) != NULL ? "enabled" : "not enabled" ) ;
140    Sputchar( dump_fd, '\n' ) ;
141 
142    Sprint( dump_fd, "max_descriptors = %d\n", (int)ps.ros.max_descriptors ) ;
143    Sprint( dump_fd, "process_limit = %d\n", (int)ps.ros.process_limit ) ;
144    Sprint( dump_fd, "config_file = %s\n", ps.ros.config_file ) ;
145    if ( debug.on )
146       Sprint( dump_fd, "debug_fd = %d\n", debug.fd ) ;
147    Sputchar( dump_fd, '\n' ) ;
148 
149    Sprint( dump_fd, "END OF DUMP\n\n" ) ;
150    Sclose( dump_fd );
151 
152    msg( LOG_INFO, func, "generated state dump in file %s", dump_file ) ;
153 }
154 
155 
156 /*
157  * Types of consistency checks
158  */
159 enum check_type { PERIODIC, USER_REQUESTED } ;
160 
161 
consistency_check(enum check_type type)162 static void consistency_check( enum check_type type )
163 {
164    int         fd ;
165    fd_set      socket_mask_copy ;
166    unsigned    u ;
167    int         errors ;
168    unsigned    total_running_servers        = 0 ;
169    unsigned    total_retry_servers          = 0 ;
170    unsigned    error_count                  = 0 ;
171    bool_int    service_count_check_failed   = FALSE ;
172    const char  *func                        = "consistency_check" ;
173 
174    socket_mask_copy = ps.rws.socket_mask ;
175 
176    for ( u = 0 ; u < pset_count( SERVICES( ps ) ) ; u++ )
177    {
178       register struct service *sp = SP( pset_pointer( SERVICES( ps ), u ) ) ;
179       char *sid = SVC_ID( sp ) ;
180       unsigned   running_servers ;
181       unsigned   retry_servers ;
182 
183       error_count += refcount_check( sp, &running_servers, &retry_servers ) ;
184 
185       if ( SVC_IS_AVAILABLE( sp ) || SVC_IS_DISABLED ( sp ) )
186       {
187          /*
188           * In this case, there may be some servers running
189           */
190          if ( FD_ISSET( SVC_FD( sp ), &socket_mask_copy ) )
191          {
192             if ( SVC_IS_DISABLED( sp ) )
193             {
194                msg( LOG_ERR, func,
195                   "fd of disabled service %s still in socket mask", sid ) ;
196                error_count++ ;
197             }
198             FD_CLR( SVC_FD( sp ), &socket_mask_copy ) ;
199          }
200          error_count += thread_check( sp, running_servers, retry_servers ) ;
201 
202          errors = service_count_check( sp, running_servers, retry_servers ) ;
203          if ( ! errors && ! service_count_check_failed )
204          {
205             total_retry_servers += retry_servers ;
206             total_running_servers += running_servers ;
207          }
208          if ( errors )
209          {
210             service_count_check_failed = TRUE ;
211             error_count += errors ;
212          }
213 
214          if ( SVC_IS_DISABLED( sp ) && SVC_RUNNING_SERVERS( sp ) == 0 )
215          {
216             msg( LOG_ERR, func,
217                "disabled service %s has 0 running servers\n", sid ) ;
218             error_count++ ;
219             continue ;
220          }
221       }
222       /* TCPMUX client programs are always stopped until they run. */
223       else if ( ! SVC_IS_MUXCLIENT( sp ) )
224       {
225          msg( LOG_ERR, func, "service %s not started", SVC_ID( sp ) ) ;
226          error_count++ ;
227       }
228    }
229 
230    if ( ! service_count_check_failed )
231    {
232       if ( total_running_servers != pset_count( SERVERS( ps ) ) )
233       {
234          msg( LOG_ERR, func,
235             "total running servers (%d) != number of running servers (%d)",
236                total_running_servers, pset_count( SERVERS( ps ) ) ) ;
237          error_count++ ;
238       }
239       if ( total_retry_servers != pset_count( RETRIES( ps ) ) )
240       {
241          msg( LOG_ERR, func,
242             "total retry servers (%d) != number of retry servers (%d)",
243                total_retry_servers, pset_count( RETRIES( ps ) ) ) ;
244          error_count++ ;
245       }
246    }
247 
248    /*
249     * Check if there are any descriptors set in socket_mask_copy
250     */
251    for ( fd = 0 ; (unsigned)fd < ps.ros.max_descriptors ; fd++ )
252       if ( FD_ISSET( fd, &socket_mask_copy ) && ((fd != signals_pending[0]) && fd != signals_pending[1]))
253       {
254          msg( LOG_ERR, func,
255             "descriptor %d set in socket mask but there is no service for it",
256                fd ) ;
257          error_count++ ;
258       }
259 
260    if ( error_count != 0 )
261       msg( LOG_WARNING, func,
262             "Consistency check detected %d errors", error_count ) ;
263    else
264       if ( type == USER_REQUESTED || debug.on )
265          msg( LOG_INFO, func, "Consistency check passed" ) ;
266 
267    if( type == PERIODIC )
268       if ( xtimer_add( periodic_check, ps.ros.cc_interval ) == -1 )
269          msg( LOG_ERR, func, "Failed to start consistency timer" ) ;
270 }
271 
272 
273 /*
274  * Check that the counts of running and retry servers stored in struct service
275  * are accurate
276  */
service_count_check(struct service * sp,unsigned running_servers,unsigned retry_servers)277 static unsigned service_count_check( struct service *sp,
278                                       unsigned running_servers,
279                                       unsigned retry_servers )
280 {
281    char *sid = SVC_ID( sp ) ;
282    int error_count = 0 ;
283    const char *func = "service_count_check" ;
284 
285    if ( SVC_RUNNING_SERVERS( sp ) != running_servers )
286    {
287       msg( LOG_ERR, func,
288          "service %s: actual running servers = %d, known running servers = %d",
289             sid, running_servers, SVC_RUNNING_SERVERS( sp ) ) ;
290       error_count++ ;
291    }
292    if ( SVC_RETRIES( sp ) != retry_servers )
293    {
294       msg( LOG_ERR, func,
295          "service %s: actual retry servers = %d, known retry servers = %d",
296             sid, retry_servers, SVC_RETRIES( sp ) ) ;
297       error_count++ ;
298    }
299 
300    if ( error_count && debug.on )
301       msg( LOG_DEBUG, func, "%s: %d errors detected", sid, error_count ) ;
302 
303    return( error_count ) ;
304 }
305 
306 
307 
308 /*
309  * If the service is single-threaded:
310  *         if the descriptor is set in the socket mask, there must
311  *         be a server running (or to be retried)
312  *   If the service is multi-threaded:
313  *         the descriptor must be always set
314  */
thread_check(struct service * sp,unsigned running_servers,unsigned retry_servers)315 static unsigned thread_check( struct service *sp,
316                                unsigned running_servers,
317                                unsigned retry_servers )
318 {
319    unsigned error_count = 0 ;
320    int sd = SVC_FD( sp ) ;
321    char *sid = SVC_ID( sp ) ;
322    const char *func = "thread_check" ;
323 
324    if ( SVC_WAITS( sp ) )
325    {
326       bool_int has_servers = ( running_servers + retry_servers != 0 ) ;
327 
328       if ( has_servers && FD_ISSET( sd, &ps.rws.socket_mask ) )
329       {
330          msg( LOG_ERR, func,
331 "Active single-threaded service %s: server running, descriptor set", sid ) ;
332          error_count++ ;
333       }
334       if ( !has_servers && !FD_ISSET( sd, &ps.rws.socket_mask ) )
335       {
336          msg( LOG_ERR, func,
337 "Active single-threaded service %s: no server running, descriptor not set",
338             sid ) ;
339          error_count++ ;
340       }
341    }
342    else
343       if ( ! FD_ISSET( sd, &ps.rws.socket_mask ) )
344       {
345          msg( LOG_ERR, func,
346             "Active multi-threaded service %s: descriptor not set", sid ) ;
347          error_count++ ;
348       }
349 
350    if ( error_count && debug.on )
351       msg( LOG_DEBUG, func, "%s: %d errors detected", sid, error_count ) ;
352 
353    return( error_count ) ;
354 }
355 
356 
357 
358 /*
359  * Count the number of references to the specified service contained
360  * in the specified table of servers; put the number of servers
361  * in *countp
362  */
count_refs(struct service * sp,pset_h servers,unsigned * countp)363 static int count_refs( struct service *sp, pset_h servers, unsigned *countp )
364 {
365    unsigned u ;
366    struct server *serp ;
367    int refs = 0 ;
368    unsigned count = 0 ;
369 
370    for ( u = 0 ; u < pset_count( servers ) ; u++ )
371    {
372       serp = SERP( pset_pointer( SERVERS( ps ), u ) ) ;
373       if ( SERVER_SERVICE( serp ) == sp )
374       {
375          refs++ ;
376          count++ ;
377       }
378       if ( SERVER_CONNSERVICE( serp ) == sp )
379          refs++ ;
380       /*
381        * XXX:   in the future we may want to check if the given service
382        *         is any of the alternative services (currently only SPECIAL
383        *         services can be alternative services and SPECIAL services
384        *         are not included in the service table)
385        */
386    }
387    *countp = count ;
388    return( refs ) ;
389 }
390 
391 
392 /*
393  * Check for reference counting errors.
394  * Returns number of errors found.
395  * Always set the number of running and retry servers.
396  */
refcount_check(struct service * sp,unsigned * running_servers,unsigned * retry_servers)397 static unsigned refcount_check( struct service *sp,
398                                  unsigned *running_servers,
399                                  unsigned *retry_servers )
400 {
401    char *sid = SVC_ID( sp ) ;
402    unsigned errors = 0 ;
403    int refs ;
404    int refcount = SVC_REFCOUNT( sp ) ;
405    const char *func = "refcount_check" ;
406 
407    if ( refcount <= 0 )
408    {
409       msg( LOG_ERR, func, "%s service has bad refcount: %d",
410                sid, refcount ) ;
411       errors++ ;
412    }
413 
414    /*
415     * The service table holds a reference to the service. The remaining
416     * references must be from servers and connections.
417     */
418    refcount-- ;
419 
420    refs = count_refs( sp, SERVERS( ps ), running_servers ) ;
421    if ( ! errors && refs > refcount )
422    {
423       msg( LOG_ERR, func,
424          "running servers: too many references for %s (%d with max=%d)",
425             sid, refs, refcount ) ;
426       errors++ ;
427    }
428 
429    refs = count_refs( sp, RETRIES( ps ), retry_servers ) ;
430    if ( ! errors && refs > refcount )
431    {
432       msg( LOG_ERR, func,
433          "retry servers: too many references for %s (%d with max=%d)",
434             sid, refs, refcount ) ;
435       errors++ ;
436    }
437 
438    if ( errors && debug.on )
439       msg( LOG_DEBUG, func, "%s: %d errors detected", sid, errors ) ;
440 
441    return( errors ) ;
442 }
443 
444 
user_requested_check(void)445 void user_requested_check(void)
446 {
447    consistency_check( USER_REQUESTED ) ;
448 }
449 
450 
periodic_check(void)451 static void periodic_check(void)
452 {
453    consistency_check( PERIODIC ) ;
454 }
455 
456 /* This actually gets called during initialization, so be careful what
457  * gets put in here.
458  */
enable_periodic_check(unsigned interval)459 void enable_periodic_check( unsigned interval )
460 {
461    const char *func = "enable_periodic_check" ;
462 
463    if ( xtimer_add( periodic_check, interval ) == -1 )
464    {
465       msg( LOG_ERR, func, "Failed to start consistency timer" ) ;
466       return ;
467    }
468 }
469