1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 // The BOINC API and runtime system.
19 //
20 // Notes:
21 // 1) Thread structure:
22 //  Sequential apps
23 //    Unix
24 //      Suspend/resume have to be done in the worker thread,
25 //      so we use a 10 Hz SIGALRM signal handler.
26 //      Also get CPU time (getrusage()) in the signal handler.
27 //      Note: many library functions and system calls
28 //      are not "asynch signal safe": see, e.g.
29 //      http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03
30 //      (e.g. sprintf() in a signal handler hangs Mac OS X).
31 //      Can't do floating-point math because FP regs not saved.
32 //      So we do as little as possible in the signal handler,
33 //      and do the rest in a separate "timer thread".
34 //          - send status and graphics messages to client
35 //          - handle messages from client
36 //          - set ready-to-checkpoint flag
37 //          - check heartbeat
38 //          - call app-defined timer callback function
39 //    Mac: similar to Linux,
40 //          but getrusage() in the worker signal handler causes crashes,
41 //          so do it in the timer thread (GETRUSAGE_IN_TIMER_THREAD)
42 //          TODO: why not do this on Linux too?
43 //    Android: similar to Linux,
44 //          but setitimer() causes crashes on some Android versions,
45 //          so instead of using a periodic signal,
46 //          have the timer thread send SIGALRM signals to the worker thread
47 //          every .1 sec.
48 //          TODO: for uniformity should we do this on Linux as well?
49 //    Win
50 //      the timer thread does everything
51 //  Multi-thread apps:
52 //    Unix:
53 //      fork
54 //      original process runs timer loop:
55 //        handle suspend/resume/quit, heartbeat (use signals)
56 //      new process call boinc_init_options() with flags to
57 //        send status messages and handle checkpoint stuff,
58 //        and returns from boinc_init_parallel()
59 //      NOTE: THIS DOESN'T RESPECT CRITICAL SECTIONS.
60 //      NEED TO MASK SIGNALS IN CHILD DURING CRITICAL SECTIONS
61 //    Win:
62 //      like sequential case, except suspend/resume must enumerate
63 //      all threads (except timer) and suspend/resume them all
64 //
65 // 2) All variables that are accessed by two threads (i.e. worker and timer)
66 //  MUST be declared volatile.
67 //
68 // 3) For compatibility with C, we use int instead of bool various places
69 //
70 // 4) We must periodically check that the client is still alive and exit if not.
71 //      Originally this was done using heartbeat msgs from client.
72 //      This is unreliable, e.g. if the client is blocked for a long time.
73 //      As of Oct 11 2012 we use a different mechanism:
74 //      the client passes its PID and we periodically check whether it exists.
75 //      But we need to support the heartbeat mechanism also for compatibility.
76 //
77 // Terminology:
78 // The processing of a result can be divided
79 // into multiple "episodes" (executions of the app),
80 // each of which resumes from the checkpointed state of the previous episode.
81 // Unless otherwise noted, "CPU time" refers to the sum over all episodes
82 // (not counting the part after the last checkpoint in an episode).
83 
84 
85 #if defined(_WIN32) && !defined(__STDWX_H__) && !defined(_BOINC_WIN_) && !defined(_AFX_STDAFX_H_)
86 #include "boinc_win.h"
87 #endif
88 
89 #ifdef _WIN32
90 #include "version.h"
91 #include "win_util.h"
92 #else
93 #include "config.h"
94 #include <cstdlib>
95 #include <cstring>
96 #include <cstdio>
97 #include <cstdarg>
98 #include <sys/types.h>
99 #include <errno.h>
100 #include <unistd.h>
101 #include <sys/time.h>
102 #include <sys/resource.h>
103 #include <sys/wait.h>
104 #include <pthread.h>
105 #include <vector>
106 #ifndef __EMX__
107 #include <sched.h>
108 #endif
109 #endif
110 
111 #include "app_ipc.h"
112 #include "common_defs.h"
113 #include "diagnostics.h"
114 #include "error_numbers.h"
115 #include "filesys.h"
116 #include "mem_usage.h"
117 #include "parse.h"
118 #include "proc_control.h"
119 #include "shmem.h"
120 #include "str_replace.h"
121 #include "str_util.h"
122 #include "util.h"
123 
124 #include "boinc_api.h"
125 
126 using std::vector;
127 
128 //#define VERBOSE
129     // enable a bunch of fprintfs to stderr
130 
131 //#define MSGS_FROM_FILE
132     // get messages from a file "msgs.txt" instead of shared mem
133     // write messages to a file "out_msgs.txt" instead of shared mem
134 
135 //#define ANDROID
136     // use the Android thread/signal logic, which works on Linux too
137 
138 #ifdef __APPLE__
139 #include "mac_backtrace.h"
140 #endif
141 #if defined(__APPLE__) || defined(ANDROID)
142 #define GETRUSAGE_IN_TIMER_THREAD
143     // call getrusage() in the timer thread,
144     // rather than in the worker thread's signal handler
145     // (which can cause crashes on Mac)
146     // If you want, you can set this for Linux too:
147     // CPPFLAGS=-DGETRUSAGE_IN_TIMER_THREAD
148 #endif
149 
150 const char* api_version = "API_VERSION_" PACKAGE_VERSION;
151 static APP_INIT_DATA aid;
152 static FILE_LOCK file_lock;
153 APP_CLIENT_SHM* app_client_shm = 0;
154 static volatile int time_until_checkpoint;
155     // time until enable checkpoint
156 static volatile double fraction_done;
157 static volatile double last_checkpoint_cpu_time;
158 static volatile bool ready_to_checkpoint = false;
159 static volatile int in_critical_section = 0;
160 static volatile double last_wu_cpu_time;
161 static volatile bool standalone = false;
162 static volatile double initial_wu_cpu_time;
163 static volatile bool have_new_trickle_up = false;
164 static volatile bool have_trickle_down = true;
165     // set if the client notified us of a trickle-down.
166     // init to true so the first call to boinc_receive_trickle_down()
167     // will scan the slot dir for old trickle-down files
168 static volatile bool handle_trickle_downs = false;
169     // whether we should check for notifications of trickle_downs
170     // and file upload status.
171     // set by boinc_receive_trickle_down() and boinc_upload_file().
172 static volatile int heartbeat_giveup_count;
173     // interrupt count value at which to give up on client
174 #ifdef _WIN32
175 static volatile int nrunning_ticks = 0;
176 #endif
177 static volatile int interrupt_count = 0;
178     // number of timer interrupts
179     // used to measure elapsed time in a way that's
180     // not affected by user changing system clock,
181     // and doesn't have big jump after hibernation
182 static volatile int running_interrupt_count = 0;
183     // number of timer interrupts while not suspended.
184     // Used to compute elapsed time
185 static volatile bool finishing;
186     // used for worker/timer synch during boinc_finish();
187 static int want_network = 0;
188 static int have_network = 1;
189 static double bytes_sent = 0;
190 static double bytes_received = 0;
191 bool boinc_disable_timer_thread = false;
192     // simulate unresponsive app by setting to true (debugging)
193 static FUNC_PTR timer_callback = 0;
194 char web_graphics_url[256];
195 bool send_web_graphics_url = false;
196 char remote_desktop_addr[256];
197 bool send_remote_desktop_addr = false;
198 int app_min_checkpoint_period = 0;
199     // min checkpoint period requested by app
200 
201 #define TIMER_PERIOD 0.1
202     // Sleep interval for timer thread;
203     // determines max rate of handling messages from client.
204     // Unix: period of worker-thread timer interrupts.
205 #define TIMERS_PER_SEC 10
206     // reciprocal of TIMER_PERIOD
207     // This determines the resolution of fraction done and CPU time reporting
208     // to the client, and of checkpoint enabling.
209 #define HEARTBEAT_GIVEUP_SECS 30
210 #define HEARTBEAT_GIVEUP_COUNT ((int)(HEARTBEAT_GIVEUP_SECS/TIMER_PERIOD))
211     // quit if no heartbeat from client in this #interrupts
212 #define LOCKFILE_TIMEOUT_PERIOD 35
213     // quit if we cannot aquire slot lock file in this #secs after startup
214 
215 #ifdef _WIN32
216 static HANDLE hSharedMem;
217 HANDLE worker_thread_handle;
218     // used to suspend worker thread, and to measure its CPU time
219 DWORD timer_thread_id;
220 #else
221 static volatile bool worker_thread_exit_flag = false;
222 static volatile int worker_thread_exit_status;
223     // the above are used by the timer thread to tell
224     // the worker thread to exit
225 static pthread_t worker_thread_handle;
226 static pthread_t timer_thread_handle;
227 #ifndef GETRUSAGE_IN_TIMER_THREAD
228 static struct rusage worker_thread_ru;
229 #endif
230 #endif
231 
232 static BOINC_OPTIONS options;
233 volatile BOINC_STATUS boinc_status;
234 
235 #ifdef MSGS_FROM_FILE
236 static FILE* fout;
237 #endif
238 
239 // vars related to intermediate file upload
240 struct UPLOAD_FILE_STATUS {
241     std::string name;
242     int status;
243 };
244 static bool have_new_upload_file;
245 static std::vector<UPLOAD_FILE_STATUS> upload_file_status;
246 
247 static int resume_activities();
248 static void boinc_exit(int);
249 static void block_sigalrm();
250 static int start_worker_signals();
251 
boinc_msg_prefix(char * sbuf,int len)252 char* boinc_msg_prefix(char* sbuf, int len) {
253 #ifdef ANDROID
254     // the time stuff crashes on Android if in a signal handler
255     //
256     sbuf[0] = 0;
257 #else
258     char buf[256];
259     struct tm tm;
260     struct tm *tmp = &tm;
261     int n;
262 
263     time_t x = time(0);
264     if (x == -1) {
265         strlcpy(sbuf, "time() failed", len);
266         return sbuf;
267     }
268 #ifdef _WIN32
269 #ifdef __MINGW32__
270     if ((tmp = localtime(&x)) == NULL) {
271 #else
272     if (localtime_s(&tm, &x) == EINVAL) {
273 #endif
274 #else
275     if (localtime_r(&x, &tm) == NULL) {
276 #endif
277         strlcpy(sbuf, "localtime() failed", len);
278         return sbuf;
279     }
280     if (strftime(buf, sizeof(buf)-1, "%H:%M:%S", tmp) == 0) {
281         strlcpy(sbuf, "strftime() failed", len);
282         return sbuf;
283     }
284 #ifdef _WIN32
285     n = _snprintf(sbuf, len, "%s (%d):", buf, GetCurrentProcessId());
286 #else
287     n = snprintf(sbuf, len, "%s (%d):", buf, getpid());
288 #endif
289     if (n < 0) {
290         strlcpy(sbuf, "sprintf() failed", len);
291         return sbuf;
292     }
293     sbuf[len-1] = 0;    // just in case
294 #endif  // ANDROID
295     return sbuf;
296 }
297 
298 #ifndef MSGS_FROM_FILE
299 
300 static int setup_shared_mem() {
301     char buf[256];
302     if (standalone) {
303         fprintf(stderr,
304             "%s Standalone mode, so not using shared memory.\n",
305             boinc_msg_prefix(buf, sizeof(buf))
306         );
307         return 0;
308     }
309     app_client_shm = new APP_CLIENT_SHM;
310 
311 #ifdef _WIN32
312     sprintf(buf, "%s%s", SHM_PREFIX, aid.shmem_seg_name);
313     hSharedMem = attach_shmem(buf, (void**)&app_client_shm->shm);
314     if (hSharedMem == NULL) {
315         delete app_client_shm;
316         app_client_shm = NULL;
317     }
318 #else
319 #ifdef __EMX__
320     if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
321         delete app_client_shm;
322         app_client_shm = NULL;
323     }
324 #else
325     if (aid.shmem_seg_name == -1) {
326         // Version 6 Unix/Linux/Mac client
327         if (attach_shmem_mmap(MMAPPED_FILE_NAME, (void**)&app_client_shm->shm)) {
328             delete app_client_shm;
329             app_client_shm = NULL;
330         }
331     } else {
332         // version 5 Unix/Linux/Mac client
333         if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
334             delete app_client_shm;
335             app_client_shm = NULL;
336         }
337     }
338 #endif
339 #endif  // ! _WIN32
340     if (app_client_shm == NULL) return -1;
341     return 0;
342 }
343 #endif      // MSGS_FROM_FILE
344 
345 // a mutex for data structures shared between time and worker threads
346 //
347 #ifdef _WIN32
348 static HANDLE mutex;
349 static void init_mutex() {
350     mutex = CreateMutex(NULL, FALSE, NULL);
351 }
352 static inline void acquire_mutex() {
353     WaitForSingleObject(mutex, INFINITE);
354 }
355 static inline void release_mutex() {
356     ReleaseMutex(mutex);
357 }
358 #else
359 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
360 static void init_mutex() {}
361 static inline void acquire_mutex() {
362 #ifdef VERBOSE
363     char buf[256];
364     fprintf(stderr, "%s acquiring mutex\n",
365         boinc_msg_prefix(buf, sizeof(buf))
366     );
367 #endif
368     pthread_mutex_lock(&mutex);
369 }
370 static inline void release_mutex() {
371 #ifdef VERBOSE
372     char buf[256];
373     fprintf(stderr, "%s releasing mutex\n",
374         boinc_msg_prefix(buf, sizeof(buf))
375     );
376 #endif
377     pthread_mutex_unlock(&mutex);
378 }
379 #endif
380 
381 // Return CPU time of process.
382 //
383 double boinc_worker_thread_cpu_time() {
384     double cpu;
385 #ifdef _WIN32
386     int retval;
387     retval = boinc_process_cpu_time(GetCurrentProcess(), cpu);
388     if (retval) {
389         cpu = nrunning_ticks * TIMER_PERIOD;   // for Win9x
390     }
391 #else
392 #ifdef GETRUSAGE_IN_TIMER_THREAD
393     struct rusage worker_thread_ru;
394     getrusage(RUSAGE_SELF, &worker_thread_ru);
395 #endif
396     cpu = (double)worker_thread_ru.ru_utime.tv_sec
397       + (((double)worker_thread_ru.ru_utime.tv_usec)/1000000.0);
398     cpu += (double)worker_thread_ru.ru_stime.tv_sec
399       + (((double)worker_thread_ru.ru_stime.tv_usec)/1000000.0);
400 #endif
401 
402     return cpu;
403 }
404 
405 // Communicate to the client (via shared mem)
406 // the current CPU time and fraction done.
407 // NOTE: various bugs could cause some of these FP numbers to be enormous,
408 // possibly overflowing the buffer.
409 // So use strlcat() instead of strcat()
410 //
411 // This is called only from the timer thread (so no need for synch)
412 //
413 static bool update_app_progress(double cpu_t, double cp_cpu_t) {
414     char msg_buf[MSG_CHANNEL_SIZE], buf[256];
415 
416     if (standalone) return true;
417 
418     sprintf(msg_buf,
419         "<current_cpu_time>%e</current_cpu_time>\n"
420         "<checkpoint_cpu_time>%e</checkpoint_cpu_time>\n",
421         cpu_t, cp_cpu_t
422     );
423     if (want_network) {
424         strlcat(msg_buf, "<want_network>1</want_network>\n", sizeof(msg_buf));
425     }
426     if (fraction_done >= 0) {
427         double range = aid.fraction_done_end - aid.fraction_done_start;
428         double fdone = aid.fraction_done_start + fraction_done*range;
429         sprintf(buf, "<fraction_done>%e</fraction_done>\n", fdone);
430         strlcat(msg_buf, buf, sizeof(msg_buf));
431     }
432     if (bytes_sent) {
433         sprintf(buf, "<bytes_sent>%f</bytes_sent>\n", bytes_sent);
434         strlcat(msg_buf, buf, sizeof(msg_buf));
435     }
436     if (bytes_received) {
437         sprintf(buf, "<bytes_received>%f</bytes_received>\n", bytes_received);
438         strlcat(msg_buf, buf, sizeof(msg_buf));
439     }
440 #ifdef MSGS_FROM_FILE
441     if (fout) {
442         fputs(msg_buf, fout);
443     }
444     return 0;
445 #else
446     return app_client_shm->shm->app_status.send_msg(msg_buf);
447 #endif
448 }
449 
450 // called in timer thread
451 //
452 static void handle_heartbeat_msg() {
453     char buf[MSG_CHANNEL_SIZE];
454     double dtemp;
455     bool btemp;
456 
457     if (!app_client_shm->shm->heartbeat.get_msg(buf)) {
458         return;
459     }
460     boinc_status.network_suspended = false;
461     if (match_tag(buf, "<heartbeat/>")) {
462         heartbeat_giveup_count = interrupt_count + HEARTBEAT_GIVEUP_COUNT;
463     }
464     if (parse_double(buf, "<wss>", dtemp)) {
465         boinc_status.working_set_size = dtemp;
466     }
467     if (parse_double(buf, "<max_wss>", dtemp)) {
468         boinc_status.max_working_set_size = dtemp;
469     }
470     if (parse_bool(buf, "suspend_network", btemp)) {
471         boinc_status.network_suspended = btemp;
472     }
473 }
474 
475 // called in timer thread
476 //
477 static bool client_dead() {
478     char buf[256];
479     bool dead;
480     if (aid.client_pid) {
481         // check every 10 sec
482         //
483         if (interrupt_count%(TIMERS_PER_SEC*10)) return false;
484 #ifdef _WIN32
485         HANDLE h = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, aid.client_pid);
486         // If the process exists but is running under a different user account (boinc_master)
487         // then the handle returned is NULL and GetLastError() returns ERROR_ACCESS_DENIED.
488         //
489         if ((h == NULL) && (GetLastError() != ERROR_ACCESS_DENIED)) {
490             dead = true;
491         } else {
492             if (h) CloseHandle(h);
493             dead = false;
494         }
495 #else
496         int retval = kill(aid.client_pid, 0);
497         dead = (retval == -1 && errno == ESRCH);
498 #endif
499     } else {
500         dead = (interrupt_count > heartbeat_giveup_count);
501     }
502     if (dead) {
503         boinc_msg_prefix(buf, sizeof(buf));
504         fputs(buf, stderr);     // don't use fprintf() here
505         if (aid.client_pid) {
506             fputs(" BOINC client no longer exists - exiting\n", stderr);
507         } else {
508             fputs(" No heartbeat from client for 30 sec - exiting\n", stderr);
509         }
510         return true;
511     }
512     return false;
513 }
514 
515 #ifndef _WIN32
516 // For multithread apps on Unix, the main process executes the following.
517 //
518 static void parallel_master(int child_pid) {
519     char buf[MSG_CHANNEL_SIZE];
520     int exit_status;
521     while (1) {
522         boinc_sleep(TIMER_PERIOD);
523         interrupt_count++;
524         if (app_client_shm) {
525             handle_heartbeat_msg();
526             if (app_client_shm->shm->process_control_request.get_msg(buf)) {
527                 if (match_tag(buf, "<suspend/>")) {
528                     kill(child_pid, SIGSTOP);
529                 } else if (match_tag(buf, "<resume/>")) {
530                     kill(child_pid, SIGCONT);
531                 } else if (match_tag(buf, "<quit/>")) {
532                     kill(child_pid, SIGKILL);
533                     exit(0);
534                 } else if (match_tag(buf, "<abort/>")) {
535                     kill(child_pid, SIGKILL);
536                     exit(EXIT_ABORTED_BY_CLIENT);
537                 }
538             }
539 
540             if (client_dead()) {
541                 kill(child_pid, SIGKILL);
542                 exit(0);
543             }
544         }
545         if (interrupt_count % TIMERS_PER_SEC) continue;
546         if (waitpid(child_pid, &exit_status, WNOHANG) == child_pid) break;
547     }
548     boinc_finish(exit_status);
549 }
550 #endif
551 
552 int boinc_init() {
553 #ifndef MSGS_FROM_FILE
554     int retval;
555     if (!diagnostics_is_initialized()) {
556         retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
557         if (retval) return retval;
558     }
559 #endif
560     boinc_options_defaults(options);
561     return boinc_init_options(&options);
562 }
563 
564 int boinc_init_options(BOINC_OPTIONS* opt) {
565     int retval;
566 #ifndef _WIN32
567     if (options.multi_thread) {
568         int child_pid = fork();
569         if (child_pid) {
570             // original process - master
571             //
572             options.send_status_msgs = false;
573             retval = boinc_init_options_general(options);
574             if (retval) {
575                 kill(child_pid, SIGKILL);
576                 return retval;
577             }
578             parallel_master(child_pid);
579         }
580         // new process - slave
581         //
582         options.main_program = false;
583         options.check_heartbeat = false;
584         options.handle_process_control = false;
585         options.multi_thread = false;
586         options.multi_process = false;
587         return boinc_init_options(&options);
588     }
589 #endif
590     retval = boinc_init_options_general(*opt);
591     if (retval) return retval;
592     retval = start_timer_thread();
593     if (retval) return retval;
594 #ifndef _WIN32
595     retval = start_worker_signals();
596     if (retval) return retval;
597 #endif
598     return 0;
599 }
600 
601 int boinc_init_parallel() {
602     BOINC_OPTIONS _options;
603     boinc_options_defaults(_options);
604     _options.multi_thread = true;
605     return boinc_init_options(&_options);
606 }
607 
608 static int min_checkpoint_period() {
609     int x = (int)aid.checkpoint_period;
610     if (app_min_checkpoint_period > x) {
611         x = app_min_checkpoint_period;
612     }
613     if (x == 0) x = DEFAULT_CHECKPOINT_PERIOD;
614     return x;
615 }
616 
617 int boinc_set_min_checkpoint_period(int x) {
618     app_min_checkpoint_period = x;
619     if (x > time_until_checkpoint) {
620         time_until_checkpoint = x;
621     }
622     return 0;
623 }
624 
625 int boinc_init_options_general(BOINC_OPTIONS& opt) {
626     options = opt;
627 
628 #ifndef MSGS_FROM_FILE
629     int retval;
630     if (!diagnostics_is_initialized()) {
631         retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
632         if (retval) return retval;
633     }
634 #endif
635 
636     boinc_status.no_heartbeat = false;
637     boinc_status.suspended = false;
638     boinc_status.quit_request = false;
639     boinc_status.abort_request = false;
640 
641 #ifdef MSGS_FROM_FILE
642     fout = fopen("out_msgs.txt", "w");
643     if (!fout) {
644         fprintf(stderr, "Can't open out_msgs.txt\n");
645     }
646     options.check_heartbeat = false;
647 #else
648     char buf[256];
649     if (options.main_program) {
650         // make sure we're the only app running in this slot
651         //
652         retval = file_lock.lock(LOCKFILE);
653         if (retval) {
654             // give any previous occupant a chance to timeout and exit
655             //
656             fprintf(stderr, "%s Can't acquire lockfile (%d) - waiting %ds\n",
657                 boinc_msg_prefix(buf, sizeof(buf)),
658                 retval, LOCKFILE_TIMEOUT_PERIOD
659             );
660             boinc_sleep(LOCKFILE_TIMEOUT_PERIOD);
661             retval = file_lock.lock(LOCKFILE);
662         }
663         if (retval) {
664             fprintf(stderr, "%s Can't acquire lockfile (%d) - exiting\n",
665                 boinc_msg_prefix(buf, sizeof(buf)),
666                 retval
667             );
668 #ifdef _WIN32
669             char buf2[256];
670             windows_format_error_string(GetLastError(), buf2, 256);
671             fprintf(stderr, "%s Error: %s\n", boinc_msg_prefix(buf, sizeof(buf)), buf2);
672 #endif
673             // if we can't acquire the lock file there must be
674             // another app instance running in this slot.
675             // If we exit(0), the client will keep restarting us.
676             // Instead, tell the client not to restart us for 10 min.
677             //
678             boinc_temporary_exit(600,
679                 "Waiting to acquire slot directory lock.  Another instance may be running."
680             );
681         }
682     }
683 
684     retval = boinc_parse_init_data_file();
685     if (retval) {
686         standalone = true;
687     } else {
688         retval = setup_shared_mem();
689         if (retval) {
690             fprintf(stderr,
691                 "%s Can't set up shared mem: %d. Will run in standalone mode.\n",
692                 boinc_msg_prefix(buf, sizeof(buf)), retval
693             );
694             standalone = true;
695         }
696     }
697 #endif      // MSGS_FROM_FILE
698 
699     // copy the WU CPU time to a separate var,
700     // since we may reread the structure again later.
701     //
702     initial_wu_cpu_time = aid.wu_cpu_time;
703 
704     fraction_done = -1;
705     time_until_checkpoint = min_checkpoint_period();
706     last_checkpoint_cpu_time = aid.wu_cpu_time;
707     last_wu_cpu_time = aid.wu_cpu_time;
708 
709     if (standalone) {
710         options.check_heartbeat = false;
711     }
712     heartbeat_giveup_count = interrupt_count + HEARTBEAT_GIVEUP_COUNT;
713 
714     init_mutex();
715 
716     return 0;
717 }
718 
719 int boinc_get_status(BOINC_STATUS *s) {
720     s->no_heartbeat = boinc_status.no_heartbeat;
721     s->suspended = boinc_status.suspended;
722     s->quit_request = boinc_status.quit_request;
723     s->reread_init_data_file = boinc_status.reread_init_data_file;
724     s->abort_request = boinc_status.abort_request;
725     s->working_set_size = boinc_status.working_set_size;
726     s->max_working_set_size = boinc_status.max_working_set_size;
727     s->network_suspended = boinc_status.network_suspended;
728     return 0;
729 }
730 
731 // if we have any new trickle-ups or file upload requests,
732 // send a message describing them
733 //
734 static void send_trickle_up_msg() {
735     char buf[MSG_CHANNEL_SIZE];
736     if (standalone) return;
737     safe_strcpy(buf, "");
738     if (have_new_trickle_up) {
739         safe_strcat(buf, "<have_new_trickle_up/>\n");
740     }
741     if (have_new_upload_file) {
742         safe_strcat(buf, "<have_new_upload_file/>\n");
743     }
744     if (strlen(buf)) {
745         BOINCINFO("Sending Trickle Up Message");
746         if (app_client_shm->shm->trickle_up.send_msg(buf)) {
747             have_new_trickle_up = false;
748             have_new_upload_file = false;
749         }
750     }
751 }
752 
753 // NOTE: a non-zero status tells the client that we're exiting with
754 // an "unrecoverable error", which will be reported back to server.
755 // A zero exit-status tells the client we've successfully finished the result.
756 //
757 int boinc_finish_message(int status, const char* msg, bool is_notice) {
758     char buf[256];
759     fraction_done = 1;
760     fprintf(stderr,
761         "%s called boinc_finish(%d)\n",
762         boinc_msg_prefix(buf, sizeof(buf)), status
763     );
764     finishing = true;
765     boinc_sleep(2.0);   // let the timer thread send final messages
766     boinc_disable_timer_thread = true;     // then disable it
767 
768     if (options.main_program) {
769         FILE* f = fopen(BOINC_FINISH_CALLED_FILE, "w");
770         if (f) {
771             fprintf(f, "%d\n", status);
772             if (msg) {
773                 fprintf(f, "%s\n%s\n", msg, is_notice?"notice":"");
774             }
775             fclose(f);
776         }
777     }
778 
779     boinc_exit(status);
780 
781     return 0;   // never reached
782 }
783 
784 int boinc_finish(int status) {
785     return boinc_finish_message(status, NULL, false);
786 }
787 
788 int boinc_temporary_exit(int delay, const char* reason, bool is_notice) {
789     FILE* f = fopen(TEMPORARY_EXIT_FILE, "w");
790     if (!f) {
791         return ERR_FOPEN;
792     }
793     fprintf(f, "%d\n", delay);
794     if (reason) {
795         fprintf(f, "%s\n", reason);
796         if (is_notice) {
797             fprintf(f, "notice\n");
798         }
799     }
800     fclose(f);
801     boinc_exit(0);
802     return 0;
803 }
804 
805 // unlock the lockfile and call the appropriate exit function
806 // Unix: called only from the worker thread.
807 // Win: called from the worker or timer thread.
808 //
809 // make static eventually
810 //
811 void boinc_exit(int status) {
812     int retval;
813     char buf[256];
814 
815     if (options.main_program && file_lock.locked) {
816         retval = file_lock.unlock(LOCKFILE);
817         if (retval) {
818 #ifdef _WIN32
819             windows_format_error_string(GetLastError(), buf, 256);
820             fprintf(stderr,
821                 "%s Can't unlock lockfile (%d): %s\n",
822                 boinc_msg_prefix(buf, sizeof(buf)), retval, buf
823             );
824 #else
825             fprintf(stderr,
826                 "%s Can't unlock lockfile (%d)\n",
827                 boinc_msg_prefix(buf, sizeof(buf)), retval
828             );
829             perror("file unlock failed");
830 #endif
831         }
832     }
833 
834     // kill any processes the app may have created
835     //
836     if (options.multi_process) {
837         kill_descendants();
838     }
839 
840     boinc_finish_diag();
841 
842     // various platforms have problems shutting down a process
843     // while other threads are still executing,
844     // or triggering endless exit()/atexit() loops.
845     //
846     BOINCINFO("Exit Status: %d", status);
847     fflush(NULL);
848 
849 #if defined(_WIN32)
850     // Halt all the threads and clean up.
851     TerminateProcess(GetCurrentProcess(), status);
852     // note: the above CAN return!
853     Sleep(1000);
854     DebugBreak();
855 #elif defined(__APPLE_CC__)
856     // stops endless exit()/atexit() loops.
857     _exit(status);
858 #else
859     // arrange to exit with given status even if errors happen
860     // in atexit() functions
861     //
862     set_signal_exit_code(status);
863     exit(status);
864 #endif
865 }
866 
867 void boinc_network_usage(double sent, double received) {
868     bytes_sent = sent;
869     bytes_received = received;
870 }
871 
872 int boinc_is_standalone() {
873     if (standalone) return 1;
874     return 0;
875 }
876 
877 // called from the timer thread if we need to exit,
878 // e.g. quit message from client, or client has gone away
879 //
880 // On Linux we can't exit directly from the timer thread.
881 // Set a flag telling the worker thread to exit.
882 //
883 static void exit_from_timer_thread(int status) {
884 #ifdef VERBOSE
885     char buf[256];
886     fprintf(stderr, "%s exit_from_timer_thread(%d) called\n",
887         boinc_msg_prefix(buf, sizeof(buf)), status
888     );
889 #endif
890 #ifdef _WIN32
891     // TerminateProcess() doesn't work if there are suspended threads?
892     if (boinc_status.suspended) {
893         resume_activities();
894     }
895     // this seems to work OK on Windows
896     //
897     boinc_exit(status);
898 #else
899     // but on Unix there are synchronization problems if we exit here;
900     // set a flag telling the worker thread to exit
901     //
902     worker_thread_exit_status = status;
903     worker_thread_exit_flag = true;
904 #ifdef ANDROID
905     // trigger the worker signal handler, which will call boinc_exit()
906     //
907     pthread_kill(worker_thread_handle, SIGALRM);
908 
909     // the exit should happen more or less instantly.
910     // But if we're still here after 5 sec, exit directly
911     //
912     sleep(5.0);
913     boinc_exit(status);
914 #else
915     pthread_exit(NULL);
916 #endif
917 #endif
918 }
919 
920 // parse the init data file.
921 // This is done at startup, and also if a "reread prefs" message is received
922 //
923 int boinc_parse_init_data_file() {
924     FILE* f;
925     int retval;
926     char buf[256];
927 
928     if (aid.project_preferences) {
929         free(aid.project_preferences);
930         aid.project_preferences = NULL;
931     }
932     aid.clear();
933     aid.checkpoint_period = DEFAULT_CHECKPOINT_PERIOD;
934 
935     if (!boinc_file_exists(INIT_DATA_FILE)) {
936         fprintf(stderr,
937             "%s Can't open init data file - running in standalone mode\n",
938             boinc_msg_prefix(buf, sizeof(buf))
939         );
940         return ERR_FOPEN;
941     }
942     f = boinc_fopen(INIT_DATA_FILE, "r");
943     retval = parse_init_data_file(f, aid);
944     fclose(f);
945     if (retval) {
946         fprintf(stderr,
947             "%s Can't parse init data file - running in standalone mode\n",
948             boinc_msg_prefix(buf, sizeof(buf))
949         );
950         return retval;
951     }
952     return 0;
953 }
954 
955 // used by wrappers
956 //
957 int boinc_report_app_status_aux(
958     double cpu_time,
959     double checkpoint_cpu_time,
960     double _fraction_done,
961     int other_pid,
962     double _bytes_sent,
963     double _bytes_received
964 ) {
965     char msg_buf[MSG_CHANNEL_SIZE], buf[1024];
966     if (standalone) return 0;
967 
968     sprintf(msg_buf,
969         "<current_cpu_time>%e</current_cpu_time>\n"
970         "<checkpoint_cpu_time>%e</checkpoint_cpu_time>\n"
971         "<fraction_done>%e</fraction_done>\n",
972         cpu_time,
973         checkpoint_cpu_time,
974         _fraction_done
975     );
976     if (other_pid) {
977         sprintf(buf, "<other_pid>%d</other_pid>\n", other_pid);
978         safe_strcat(msg_buf, buf);
979     }
980     if (_bytes_sent) {
981         sprintf(buf, "<bytes_sent>%f</bytes_sent>\n", _bytes_sent);
982         safe_strcat(msg_buf, buf);
983     }
984     if (_bytes_received) {
985         sprintf(buf, "<bytes_received>%f</bytes_received>\n", _bytes_received);
986         safe_strcat(msg_buf, buf);
987     }
988 #ifdef MSGS_FROM_FILE
989     if (fout) {
990         fputs(msg_buf, fout);
991     }
992     return 0;
993 #else
994     if (app_client_shm->shm->app_status.send_msg(msg_buf)) {
995         return 0;
996     }
997     return ERR_WRITE;
998 #endif
999 }
1000 
1001 int boinc_report_app_status(
1002     double cpu_time,
1003     double checkpoint_cpu_time,
1004     double _fraction_done
1005 ){
1006     return boinc_report_app_status_aux(
1007         cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0
1008     );
1009 }
1010 
1011 int boinc_get_init_data_p(APP_INIT_DATA* app_init_data) {
1012     *app_init_data = aid;
1013     return 0;
1014 }
1015 
1016 int boinc_get_init_data(APP_INIT_DATA& app_init_data) {
1017     app_init_data = aid;
1018     return 0;
1019 }
1020 
1021 int boinc_wu_cpu_time(double& cpu_t) {
1022     cpu_t = last_wu_cpu_time;
1023     return 0;
1024 }
1025 
1026 // Suspend this job.
1027 // Can be called from either timer or worker thread.
1028 //
1029 static int suspend_activities(bool called_from_worker) {
1030 #ifdef VERBOSE
1031     char log_buf[256];
1032     fprintf(stderr, "%s suspend_activities() called from %s\n",
1033         boinc_msg_prefix(log_buf, sizeof(log_buf)),
1034         called_from_worker?"worker thread":"timer thread"
1035     );
1036 #endif
1037 #ifdef _WIN32
1038     static vector<int> pids;
1039     if (options.multi_thread) {
1040         if (pids.size() == 0) {
1041             pids.push_back(GetCurrentProcessId());
1042         }
1043         suspend_or_resume_threads(pids, timer_thread_id, false, true);
1044     } else {
1045         SuspendThread(worker_thread_handle);
1046     }
1047 #else
1048     if (options.multi_process) {
1049         suspend_or_resume_descendants(false);
1050     }
1051     // if called from worker thread, sleep until suspension is over
1052     // if called from time thread, don't need to do anything;
1053     // suspension is done by signal handler in worker thread
1054     //
1055     if (called_from_worker) {
1056         while (boinc_status.suspended) {
1057             sleep(1);
1058         }
1059     }
1060 #endif
1061     return 0;
1062 }
1063 
1064 int resume_activities() {
1065 #ifdef VERBOSE
1066     char log_buf[256];
1067     fprintf(stderr, "%s resume_activities()\n",
1068         boinc_msg_prefix(log_buf, sizeof(log_buf))
1069     );
1070 #endif
1071 #ifdef _WIN32
1072     static vector<int> pids;
1073     if (options.multi_thread) {
1074         if (pids.size() == 0) pids.push_back(GetCurrentProcessId());
1075         suspend_or_resume_threads(pids, timer_thread_id, true, true);
1076     } else {
1077         ResumeThread(worker_thread_handle);
1078     }
1079 #else
1080     if (options.multi_process) {
1081         suspend_or_resume_descendants(true);
1082     }
1083 #endif
1084     return 0;
1085 }
1086 
1087 #ifndef MSGS_FROM_FILE
1088 static void handle_upload_file_status() {
1089     char path[MAXPATHLEN], buf[256], log_name[256], *p, log_buf[256];
1090     std::string filename;
1091     int status;
1092 
1093     relative_to_absolute("", path);
1094     DirScanner dirscan(path);
1095     while (dirscan.scan(filename)) {
1096         strlcpy(buf, filename.c_str(), sizeof(buf));
1097         if (strstr(buf, UPLOAD_FILE_STATUS_PREFIX) != buf) continue;
1098         strlcpy(log_name, buf+strlen(UPLOAD_FILE_STATUS_PREFIX), sizeof(log_name));
1099         FILE* f = boinc_fopen(filename.c_str(), "r");
1100         if (!f) {
1101             fprintf(stderr,
1102                 "%s handle_file_upload_status: can't open %s\n",
1103                 boinc_msg_prefix(buf, sizeof(buf)), filename.c_str()
1104             );
1105             continue;
1106         }
1107         p = fgets(buf, sizeof(buf), f);
1108         fclose(f);
1109         if (p && parse_int(buf, "<status>", status)) {
1110             UPLOAD_FILE_STATUS uf;
1111             uf.name = std::string(log_name);
1112             uf.status = status;
1113             upload_file_status.push_back(uf);
1114         } else {
1115             fprintf(stderr, "%s handle_upload_file_status: can't parse %s\n",
1116                 boinc_msg_prefix(log_buf, sizeof(log_buf)), buf
1117             );
1118         }
1119     }
1120 }
1121 
1122 // handle trickle and file upload status messages
1123 //
1124 static void handle_trickle_down_msg() {
1125     char buf[MSG_CHANNEL_SIZE];
1126     if (app_client_shm->shm->trickle_down.get_msg(buf)) {
1127         BOINCINFO("Received Trickle Down Message");
1128         if (match_tag(buf, "<have_trickle_down/>")) {
1129             have_trickle_down = true;
1130         }
1131         if (match_tag(buf, "<upload_file_status/>")) {
1132             handle_upload_file_status();
1133         }
1134     }
1135 }
1136 #endif
1137 
1138 // This flag is set of we get a suspend request while in a critical section,
1139 // and options.direct_process_action is set.
1140 // As soon as we're not in the critical section we'll do the suspend.
1141 //
1142 static bool suspend_request = false;
1143 
1144 // runs in timer thread
1145 //
1146 static void handle_process_control_msg() {
1147     char buf[MSG_CHANNEL_SIZE];
1148 #ifdef MSGS_FROM_FILE
1149     strcpy(buf, "");
1150     if (boinc_file_exists("msgs.txt")) {
1151         FILE* f = fopen("msgs.txt", "r");
1152         if (!f) {
1153             fprintf(stderr, "msgs.txt exists but can't open it\n");
1154             return;
1155         }
1156         fgets(buf, sizeof(buf), f);
1157         fclose(f);
1158         unlink("msgs.txt");
1159     }
1160     if (!strlen(buf)) {
1161         return;
1162     }
1163 #else
1164     if (!app_client_shm->shm->process_control_request.get_msg(buf)) {
1165         return;
1166     }
1167 #endif
1168 
1169     // here if we have a message to process
1170 
1171     acquire_mutex();
1172 #ifdef VERBOSE
1173     char log_buf[256];
1174     fprintf(stderr, "%s got process control msg %s\n",
1175         boinc_msg_prefix(log_buf, sizeof(log_buf)), buf
1176     );
1177 #endif
1178     if (match_tag(buf, "<suspend/>")) {
1179         BOINCINFO("Received suspend message");
1180         if (options.direct_process_action) {
1181             if (in_critical_section) {
1182                 suspend_request = true;
1183             } else {
1184                 boinc_status.suspended = true;
1185                 suspend_request = false;
1186                 suspend_activities(false);
1187             }
1188         } else {
1189             boinc_status.suspended = true;
1190         }
1191     }
1192 
1193     if (match_tag(buf, "<resume/>")) {
1194         BOINCINFO("Received resume message");
1195         if (options.direct_process_action) {
1196             if (boinc_status.suspended) {
1197                 resume_activities();
1198             } else if (suspend_request) {
1199                 suspend_request = false;
1200             }
1201         }
1202         boinc_status.suspended = false;
1203     }
1204 
1205     if (boinc_status.quit_request || match_tag(buf, "<quit/>")) {
1206         BOINCINFO("Received quit message");
1207         boinc_status.quit_request = true;
1208         if (!in_critical_section && options.direct_process_action) {
1209             release_mutex();
1210                 // we hold mutex, and it's possible that worker
1211                 // is waiting on it, so release it
1212             exit_from_timer_thread(0);
1213         }
1214     }
1215     if (boinc_status.abort_request || match_tag(buf, "<abort/>")) {
1216         BOINCINFO("Received abort message");
1217         boinc_status.abort_request = true;
1218         if (!in_critical_section && options.direct_process_action) {
1219             diagnostics_set_aborted_via_gui();
1220 #if   defined(_WIN32)
1221             // Cause a controlled assert and dump the callstacks.
1222             DebugBreak();
1223 #elif defined(__APPLE__)
1224             PrintBacktrace();
1225 #endif
1226             release_mutex();
1227             exit_from_timer_thread(EXIT_ABORTED_BY_CLIENT);
1228         }
1229     }
1230     if (match_tag(buf, "<reread_app_info/>")) {
1231         boinc_status.reread_init_data_file = true;
1232     }
1233     if (match_tag(buf, "<network_available/>")) {
1234         have_network = 1;
1235     }
1236 #ifdef ANDROID
1237     // Trigger call to worker_signal_handler() in the worker thread
1238     //
1239     pthread_kill(worker_thread_handle, SIGALRM);
1240 #endif
1241     release_mutex();
1242 }
1243 
1244 // timer handler; called every 0.1 sec in the timer thread
1245 //
1246 static void timer_handler() {
1247     char buf[512];
1248 //#ifdef VERBOSE
1249 #if 0
1250     fprintf(stderr,
1251         "%s timer handler: disabled %s; in critical section %s; finishing %s\n",
1252         boinc_msg_prefix(buf, sizeof(buf)),
1253         boinc_disable_timer_thread?"yes":"no",
1254         in_critical_section?"yes":"no",
1255         finishing?"yes":"no"
1256     );
1257 #endif
1258     if (boinc_disable_timer_thread) {
1259         return;
1260     }
1261     if (finishing) {
1262         if (options.send_status_msgs) {
1263             double cur_cpu = boinc_worker_thread_cpu_time();
1264             last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
1265             update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
1266         }
1267         boinc_disable_timer_thread = true;
1268         return;
1269     }
1270     interrupt_count++;
1271     if (!boinc_status.suspended) {
1272         running_interrupt_count++;
1273     }
1274     // handle messages from the client
1275     //
1276 #ifdef MSGS_FROM_FILE
1277     handle_process_control_msg();
1278 #else
1279     if (app_client_shm) {
1280         if (options.check_heartbeat) {
1281             handle_heartbeat_msg();
1282         }
1283         if (handle_trickle_downs) {
1284             handle_trickle_down_msg();
1285         }
1286         if (options.handle_process_control) {
1287             handle_process_control_msg();
1288         }
1289     }
1290 #endif
1291     if (interrupt_count % TIMERS_PER_SEC) return;
1292 
1293 #ifdef VERBOSE
1294     fprintf(stderr, "%s 1 sec elapsed - doing slow actions\n", boinc_msg_prefix(buf, sizeof(buf)));
1295 #endif
1296 
1297     // here if we're at a one-second boundary; do slow stuff
1298     //
1299 
1300     if (!ready_to_checkpoint) {
1301         time_until_checkpoint -= 1;
1302         if (time_until_checkpoint <= 0) {
1303             ready_to_checkpoint = true;
1304         }
1305     }
1306 
1307     // see if the client has died, which means we need to die too
1308     // (unless we're in a critical section)
1309     //
1310     if (options.check_heartbeat) {
1311         if (client_dead()) {
1312             fprintf(stderr, "%s timer handler: client dead, exiting\n",
1313                 boinc_msg_prefix(buf, sizeof(buf))
1314             );
1315             if (options.direct_process_action && !in_critical_section) {
1316                 exit_from_timer_thread(0);
1317             } else {
1318                 boinc_status.no_heartbeat = true;
1319             }
1320         }
1321     }
1322 
1323     // don't bother reporting CPU time etc. if we're suspended
1324     //
1325     if (options.send_status_msgs && !boinc_status.suspended) {
1326         double cur_cpu = boinc_worker_thread_cpu_time();
1327         last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
1328         update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
1329     }
1330 
1331     if (have_new_trickle_up || have_new_upload_file) {
1332         send_trickle_up_msg();
1333     }
1334     if (timer_callback) {
1335         timer_callback();
1336     }
1337 
1338     // send graphics-related messages
1339     //
1340     if (send_web_graphics_url && !app_client_shm->shm->graphics_reply.has_msg()) {
1341         sprintf(buf,
1342             "<web_graphics_url>%s</web_graphics_url>",
1343             web_graphics_url
1344         );
1345         app_client_shm->shm->graphics_reply.send_msg(buf);
1346         send_web_graphics_url = false;
1347     }
1348     if (send_remote_desktop_addr && !app_client_shm->shm->graphics_reply.has_msg()) {
1349         sprintf(buf,
1350             "<remote_desktop_addr>%s</remote_desktop_addr>",
1351             remote_desktop_addr
1352         );
1353         app_client_shm->shm->graphics_reply.send_msg(buf);
1354         send_remote_desktop_addr = false;
1355     }
1356 }
1357 
1358 #ifdef _WIN32
1359 
1360 DWORD WINAPI timer_thread(void *) {
1361     while (1) {
1362         Sleep((int)(TIMER_PERIOD*1000));
1363         timer_handler();
1364 
1365         // poor man's CPU time accounting for Win9x
1366         //
1367         if (!boinc_status.suspended) {
1368             nrunning_ticks++;
1369         }
1370     }
1371     return 0;
1372 }
1373 
1374 #else
1375 
1376 static void* timer_thread(void*) {
1377     block_sigalrm();
1378     while(1) {
1379         boinc_sleep(TIMER_PERIOD);
1380         timer_handler();
1381     }
1382     return 0;
1383 }
1384 
1385 // This SIGALRM handler gets handled only by the worker thread.
1386 // It gets CPU time and implements sleeping.
1387 // It must call only signal-safe functions, and must not do FP math
1388 //
1389 static void worker_signal_handler(int) {
1390 #ifdef ANDROID
1391     // per-thread signal masking doesn't work on pre-4.1 Android.
1392     // If we're handling this signal in the timer thread,
1393     // send signal explicitly to worker thread.
1394     //
1395     if (pthread_self() != worker_thread_handle) {
1396 #ifdef VERBOSE
1397         fprintf(stderr, "worker signal handler: called in timer thread; forwarding to worker\n");
1398 #endif
1399         pthread_kill(worker_thread_handle, SIGALRM);
1400         return;
1401     }
1402 #endif
1403 #ifndef GETRUSAGE_IN_TIMER_THREAD
1404     getrusage(RUSAGE_SELF, &worker_thread_ru);
1405 #endif
1406     if (worker_thread_exit_flag) {
1407 #ifdef VERBOSE
1408         fprintf(stderr, "worker signal handler: exiting\n");
1409 #endif
1410         boinc_exit(worker_thread_exit_status);
1411     }
1412     if (options.direct_process_action) {
1413         while (boinc_status.suspended && in_critical_section==0) {
1414 #ifdef VERBOSE
1415             fprintf(stderr, "worker signal handler: sleeping\n");
1416 #endif
1417             sleep(1);   // don't use boinc_sleep() because it does FP math
1418         }
1419     }
1420 }
1421 
1422 #endif
1423 
1424 
1425 // Called from the worker thread; create the timer thread
1426 //
1427 int start_timer_thread() {
1428     char buf[256];
1429 
1430 #ifdef _WIN32
1431 
1432     // get the worker thread handle
1433     //
1434     DuplicateHandle(
1435         GetCurrentProcess(),
1436         GetCurrentThread(),
1437         GetCurrentProcess(),
1438         &worker_thread_handle,
1439         0,
1440         FALSE,
1441         DUPLICATE_SAME_ACCESS
1442     );
1443 
1444     // Create the timer thread
1445     //
1446     if (!CreateThread(NULL, 0, timer_thread, 0, 0, &timer_thread_id)) {
1447         fprintf(stderr,
1448             "%s start_timer_thread(): CreateThread() failed, errno %d\n",
1449             boinc_msg_prefix(buf, sizeof(buf)), errno
1450         );
1451         return errno;
1452     }
1453 
1454     if (!options.normal_thread_priority) {
1455         // lower our (worker thread) priority
1456         //
1457         SetThreadPriority(worker_thread_handle, THREAD_PRIORITY_IDLE);
1458     }
1459 #else
1460     worker_thread_handle = pthread_self();
1461     pthread_attr_t thread_attrs;
1462     pthread_attr_init(&thread_attrs);
1463     pthread_attr_setstacksize(&thread_attrs, 32768);
1464     int retval = pthread_create(&timer_thread_handle, &thread_attrs, timer_thread, NULL);
1465     if (retval) {
1466         fprintf(stderr,
1467             "%s start_timer_thread(): pthread_create(): %d",
1468             boinc_msg_prefix(buf, sizeof(buf)), retval
1469         );
1470         return retval;
1471     }
1472 #endif
1473     return 0;
1474 }
1475 
1476 #ifndef _WIN32
1477 
1478 // called in the worker thread.
1479 // set up a handler for SIGALRM.
1480 // If Android, we'll get signals from the time thread.
1481 // otherwise, set an interval timer to deliver signals
1482 //
1483 static int start_worker_signals() {
1484     int retval;
1485     struct sigaction sa;
1486     memset(&sa, 0, sizeof(sa));
1487     sa.sa_handler = worker_signal_handler;
1488     sa.sa_flags = SA_RESTART;
1489     sigemptyset(&sa.sa_mask);
1490     retval = sigaction(SIGALRM, &sa, NULL);
1491     if (retval) {
1492         perror("boinc start_worker_signals(): sigaction failed");
1493         return retval;
1494     }
1495 #ifndef ANDROID
1496     itimerval value;
1497     value.it_value.tv_sec = 0;
1498     value.it_value.tv_usec = (int)(TIMER_PERIOD*1e6);
1499     value.it_interval = value.it_value;
1500     retval = setitimer(ITIMER_REAL, &value, NULL);
1501     if (retval) {
1502         perror("boinc start_worker_thread(): setitimer failed");
1503         return retval;
1504     }
1505 #endif
1506     return 0;
1507 }
1508 #endif
1509 
1510 int boinc_send_trickle_up(char* variety, char* p) {
1511     FILE* f = boinc_fopen(TRICKLE_UP_FILENAME, "wb");
1512     if (!f) return ERR_FOPEN;
1513     fprintf(f, "<variety>%s</variety>\n", variety);
1514     size_t n = 1;
1515     if (strlen(p)) {
1516         n = fwrite(p, strlen(p), 1, f);
1517     }
1518     fclose(f);
1519     if (n != 1) return ERR_WRITE;
1520     have_new_trickle_up = true;
1521     return 0;
1522 }
1523 
1524 int boinc_time_to_checkpoint() {
1525     if (ready_to_checkpoint) {
1526         boinc_begin_critical_section();
1527         return 1;
1528     }
1529     return 0;
1530 }
1531 
1532 int boinc_checkpoint_completed() {
1533     double cur_cpu;
1534     cur_cpu = boinc_worker_thread_cpu_time();
1535     last_wu_cpu_time = cur_cpu + aid.wu_cpu_time;
1536     last_checkpoint_cpu_time = last_wu_cpu_time;
1537     time_until_checkpoint = min_checkpoint_period();
1538     boinc_end_critical_section();
1539     ready_to_checkpoint = false;
1540 
1541     return 0;
1542 }
1543 
1544 void boinc_begin_critical_section() {
1545 #ifdef VERBOSE
1546     char buf[256];
1547     fprintf(stderr,
1548         "%s begin_critical_section\n",
1549         boinc_msg_prefix(buf, sizeof(buf))
1550     );
1551 #endif
1552     in_critical_section++;
1553 }
1554 
1555 void boinc_end_critical_section() {
1556 #ifdef VERBOSE
1557     char buf[256];
1558     fprintf(stderr,
1559         "%s end_critical_section\n",
1560         boinc_msg_prefix(buf, sizeof(buf))
1561     );
1562 #endif
1563     in_critical_section--;
1564     if (in_critical_section < 0) {
1565         in_critical_section = 0;        // just in case
1566     }
1567 
1568     if (in_critical_section) return;
1569 
1570     // We're out of the critical section.
1571     // See if we got suspend/quit/abort while in critical section,
1572     // and handle them here.
1573     //
1574     if (options.direct_process_action) {
1575         if (boinc_status.no_heartbeat) {
1576             boinc_exit(0);
1577         }
1578         if (boinc_status.quit_request) {
1579             boinc_exit(0);
1580         }
1581         if (boinc_status.abort_request) {
1582             boinc_exit(EXIT_ABORTED_BY_CLIENT);
1583         }
1584         acquire_mutex();
1585         if (suspend_request) {
1586             suspend_request = false;
1587             boinc_status.suspended = true;
1588             release_mutex();
1589             suspend_activities(true);
1590         } else {
1591             release_mutex();
1592         }
1593     }
1594 }
1595 
1596 int boinc_fraction_done(double x) {
1597     fraction_done = x;
1598     return 0;
1599 }
1600 
1601 int boinc_receive_trickle_down(char* buf, int len) {
1602     std::string filename;
1603     char path[MAXPATHLEN];
1604 
1605     handle_trickle_downs = true;
1606 
1607     if (have_trickle_down) {
1608         relative_to_absolute("", path);
1609         DirScanner dirscan(path);
1610         while (dirscan.scan(filename)) {
1611             if (strstr(filename.c_str(), "trickle_down")) {
1612                 strlcpy(buf, filename.c_str(), len);
1613                 return true;
1614             }
1615         }
1616         have_trickle_down = false;
1617     }
1618     return false;
1619 }
1620 
1621 int boinc_upload_file(std::string& name) {
1622     char buf[256];
1623     std::string pname;
1624     int retval;
1625 
1626     retval = boinc_resolve_filename_s(name.c_str(), pname);
1627     if (retval) return retval;
1628     sprintf(buf, "%s%s", UPLOAD_FILE_REQ_PREFIX, name.c_str());
1629     FILE* f = boinc_fopen(buf, "w");
1630     if (!f) return ERR_FOPEN;
1631     have_new_upload_file = true;
1632     fclose(f);
1633 
1634     // file upload status messages are on same channel as
1635     // trickle down messages, so listen to that channel
1636     //
1637     handle_trickle_downs = true;
1638 
1639     return 0;
1640 }
1641 
1642 int boinc_upload_status(std::string& name) {
1643     for (unsigned int i=0; i<upload_file_status.size(); i++) {
1644         UPLOAD_FILE_STATUS& ufs = upload_file_status[i];
1645         if (ufs.name == name) {
1646             return ufs.status;
1647         }
1648     }
1649     return ERR_NOT_FOUND;
1650 }
1651 
1652 void boinc_need_network() {
1653     want_network = 1;
1654     have_network = 0;
1655 }
1656 
1657 int boinc_network_poll() {
1658     return have_network?0:1;
1659 }
1660 
1661 void boinc_network_done() {
1662     want_network = 0;
1663 }
1664 
1665 #ifndef _WIN32
1666 // block SIGALRM, so that the worker thread will be forced to handle it
1667 //
1668 static void block_sigalrm() {
1669     sigset_t mask;
1670     sigemptyset(&mask);
1671     sigaddset(&mask, SIGALRM);
1672     pthread_sigmask(SIG_BLOCK, &mask, NULL);
1673 }
1674 #endif
1675 
1676 void boinc_register_timer_callback(FUNC_PTR p) {
1677     timer_callback = p;
1678 }
1679 
1680 double boinc_get_fraction_done() {
1681     return fraction_done;
1682 }
1683 
1684 double boinc_elapsed_time() {
1685     return running_interrupt_count*TIMER_PERIOD;
1686 }
1687 
1688 void boinc_web_graphics_url(char* url) {
1689     if (standalone) return;
1690     strlcpy(web_graphics_url, url, sizeof(web_graphics_url));
1691     send_web_graphics_url = true;
1692 }
1693 
1694 void boinc_remote_desktop_addr(char* addr) {
1695     if (standalone) return;
1696     strlcpy(remote_desktop_addr, addr, sizeof(remote_desktop_addr));
1697     send_remote_desktop_addr = true;
1698 }
1699