1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 // The BOINC API and runtime system.
19 //
20 // Notes:
21 // 1) Thread structure:
22 // Sequential apps
23 // Unix
24 // Suspend/resume have to be done in the worker thread,
25 // so we use a 10 Hz SIGALRM signal handler.
26 // Also get CPU time (getrusage()) in the signal handler.
27 // Note: many library functions and system calls
28 // are not "asynch signal safe": see, e.g.
29 // http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03
30 // (e.g. sprintf() in a signal handler hangs Mac OS X).
31 // Can't do floating-point math because FP regs not saved.
32 // So we do as little as possible in the signal handler,
33 // and do the rest in a separate "timer thread".
34 // - send status and graphics messages to client
35 // - handle messages from client
36 // - set ready-to-checkpoint flag
37 // - check heartbeat
38 // - call app-defined timer callback function
39 // Mac: similar to Linux,
40 // but getrusage() in the worker signal handler causes crashes,
41 // so do it in the timer thread (GETRUSAGE_IN_TIMER_THREAD)
42 // TODO: why not do this on Linux too?
43 // Android: similar to Linux,
44 // but setitimer() causes crashes on some Android versions,
45 // so instead of using a periodic signal,
46 // have the timer thread send SIGALRM signals to the worker thread
47 // every .1 sec.
48 // TODO: for uniformity should we do this on Linux as well?
49 // Win
50 // the timer thread does everything
51 // Multi-thread apps:
52 // Unix:
53 // fork
54 // original process runs timer loop:
55 // handle suspend/resume/quit, heartbeat (use signals)
56 // new process call boinc_init_options() with flags to
57 // send status messages and handle checkpoint stuff,
58 // and returns from boinc_init_parallel()
59 // NOTE: THIS DOESN'T RESPECT CRITICAL SECTIONS.
60 // NEED TO MASK SIGNALS IN CHILD DURING CRITICAL SECTIONS
61 // Win:
62 // like sequential case, except suspend/resume must enumerate
63 // all threads (except timer) and suspend/resume them all
64 //
65 // 2) All variables that are accessed by two threads (i.e. worker and timer)
66 // MUST be declared volatile.
67 //
68 // 3) For compatibility with C, we use int instead of bool various places
69 //
70 // 4) We must periodically check that the client is still alive and exit if not.
71 // Originally this was done using heartbeat msgs from client.
72 // This is unreliable, e.g. if the client is blocked for a long time.
73 // As of Oct 11 2012 we use a different mechanism:
74 // the client passes its PID and we periodically check whether it exists.
75 // But we need to support the heartbeat mechanism also for compatibility.
76 //
77 // Terminology:
78 // The processing of a result can be divided
79 // into multiple "episodes" (executions of the app),
80 // each of which resumes from the checkpointed state of the previous episode.
81 // Unless otherwise noted, "CPU time" refers to the sum over all episodes
82 // (not counting the part after the last checkpoint in an episode).
83
84
85 #if defined(_WIN32) && !defined(__STDWX_H__) && !defined(_BOINC_WIN_) && !defined(_AFX_STDAFX_H_)
86 #include "boinc_win.h"
87 #endif
88
89 #ifdef _WIN32
90 #include "version.h"
91 #include "win_util.h"
92 #else
93 #include "config.h"
94 #include <cstdlib>
95 #include <cstring>
96 #include <cstdio>
97 #include <cstdarg>
98 #include <sys/types.h>
99 #include <errno.h>
100 #include <unistd.h>
101 #include <sys/time.h>
102 #include <sys/resource.h>
103 #include <sys/wait.h>
104 #include <pthread.h>
105 #include <vector>
106 #ifndef __EMX__
107 #include <sched.h>
108 #endif
109 #endif
110
111 #include "app_ipc.h"
112 #include "common_defs.h"
113 #include "diagnostics.h"
114 #include "error_numbers.h"
115 #include "filesys.h"
116 #include "mem_usage.h"
117 #include "parse.h"
118 #include "proc_control.h"
119 #include "shmem.h"
120 #include "str_replace.h"
121 #include "str_util.h"
122 #include "util.h"
123
124 #include "boinc_api.h"
125
126 using std::vector;
127
128 //#define VERBOSE
129 // enable a bunch of fprintfs to stderr
130
131 //#define MSGS_FROM_FILE
132 // get messages from a file "msgs.txt" instead of shared mem
133 // write messages to a file "out_msgs.txt" instead of shared mem
134
135 //#define ANDROID
136 // use the Android thread/signal logic, which works on Linux too
137
138 #ifdef __APPLE__
139 #include "mac_backtrace.h"
140 #endif
141 #if defined(__APPLE__) || defined(ANDROID)
142 #define GETRUSAGE_IN_TIMER_THREAD
143 // call getrusage() in the timer thread,
144 // rather than in the worker thread's signal handler
145 // (which can cause crashes on Mac)
146 // If you want, you can set this for Linux too:
147 // CPPFLAGS=-DGETRUSAGE_IN_TIMER_THREAD
148 #endif
149
150 const char* api_version = "API_VERSION_" PACKAGE_VERSION;
151 static APP_INIT_DATA aid;
152 static FILE_LOCK file_lock;
153 APP_CLIENT_SHM* app_client_shm = 0;
154 static volatile int time_until_checkpoint;
155 // time until enable checkpoint
156 static volatile double fraction_done;
157 static volatile double last_checkpoint_cpu_time;
158 static volatile bool ready_to_checkpoint = false;
159 static volatile int in_critical_section = 0;
160 static volatile double last_wu_cpu_time;
161 static volatile bool standalone = false;
162 static volatile double initial_wu_cpu_time;
163 static volatile bool have_new_trickle_up = false;
164 static volatile bool have_trickle_down = true;
165 // set if the client notified us of a trickle-down.
166 // init to true so the first call to boinc_receive_trickle_down()
167 // will scan the slot dir for old trickle-down files
168 static volatile bool handle_trickle_downs = false;
169 // whether we should check for notifications of trickle_downs
170 // and file upload status.
171 // set by boinc_receive_trickle_down() and boinc_upload_file().
172 static volatile int heartbeat_giveup_count;
173 // interrupt count value at which to give up on client
174 #ifdef _WIN32
175 static volatile int nrunning_ticks = 0;
176 #endif
177 static volatile int interrupt_count = 0;
178 // number of timer interrupts
179 // used to measure elapsed time in a way that's
180 // not affected by user changing system clock,
181 // and doesn't have big jump after hibernation
182 static volatile int running_interrupt_count = 0;
183 // number of timer interrupts while not suspended.
184 // Used to compute elapsed time
185 static volatile bool finishing;
186 // used for worker/timer synch during boinc_finish();
187 static int want_network = 0;
188 static int have_network = 1;
189 static double bytes_sent = 0;
190 static double bytes_received = 0;
191 bool boinc_disable_timer_thread = false;
192 // simulate unresponsive app by setting to true (debugging)
193 static FUNC_PTR timer_callback = 0;
194 char web_graphics_url[256];
195 bool send_web_graphics_url = false;
196 char remote_desktop_addr[256];
197 bool send_remote_desktop_addr = false;
198 int app_min_checkpoint_period = 0;
199 // min checkpoint period requested by app
200
201 #define TIMER_PERIOD 0.1
202 // Sleep interval for timer thread;
203 // determines max rate of handling messages from client.
204 // Unix: period of worker-thread timer interrupts.
205 #define TIMERS_PER_SEC 10
206 // reciprocal of TIMER_PERIOD
207 // This determines the resolution of fraction done and CPU time reporting
208 // to the client, and of checkpoint enabling.
209 #define HEARTBEAT_GIVEUP_SECS 30
210 #define HEARTBEAT_GIVEUP_COUNT ((int)(HEARTBEAT_GIVEUP_SECS/TIMER_PERIOD))
211 // quit if no heartbeat from client in this #interrupts
212 #define LOCKFILE_TIMEOUT_PERIOD 35
213 // quit if we cannot aquire slot lock file in this #secs after startup
214
215 #ifdef _WIN32
216 static HANDLE hSharedMem;
217 HANDLE worker_thread_handle;
218 // used to suspend worker thread, and to measure its CPU time
219 DWORD timer_thread_id;
220 #else
221 static volatile bool worker_thread_exit_flag = false;
222 static volatile int worker_thread_exit_status;
223 // the above are used by the timer thread to tell
224 // the worker thread to exit
225 static pthread_t worker_thread_handle;
226 static pthread_t timer_thread_handle;
227 #ifndef GETRUSAGE_IN_TIMER_THREAD
228 static struct rusage worker_thread_ru;
229 #endif
230 #endif
231
232 static BOINC_OPTIONS options;
233 volatile BOINC_STATUS boinc_status;
234
235 #ifdef MSGS_FROM_FILE
236 static FILE* fout;
237 #endif
238
239 // vars related to intermediate file upload
240 struct UPLOAD_FILE_STATUS {
241 std::string name;
242 int status;
243 };
244 static bool have_new_upload_file;
245 static std::vector<UPLOAD_FILE_STATUS> upload_file_status;
246
247 static int resume_activities();
248 static void boinc_exit(int);
249 static void block_sigalrm();
250 static int start_worker_signals();
251
boinc_msg_prefix(char * sbuf,int len)252 char* boinc_msg_prefix(char* sbuf, int len) {
253 #ifdef ANDROID
254 // the time stuff crashes on Android if in a signal handler
255 //
256 sbuf[0] = 0;
257 #else
258 char buf[256];
259 struct tm tm;
260 struct tm *tmp = &tm;
261 int n;
262
263 time_t x = time(0);
264 if (x == -1) {
265 strlcpy(sbuf, "time() failed", len);
266 return sbuf;
267 }
268 #ifdef _WIN32
269 #ifdef __MINGW32__
270 if ((tmp = localtime(&x)) == NULL) {
271 #else
272 if (localtime_s(&tm, &x) == EINVAL) {
273 #endif
274 #else
275 if (localtime_r(&x, &tm) == NULL) {
276 #endif
277 strlcpy(sbuf, "localtime() failed", len);
278 return sbuf;
279 }
280 if (strftime(buf, sizeof(buf)-1, "%H:%M:%S", tmp) == 0) {
281 strlcpy(sbuf, "strftime() failed", len);
282 return sbuf;
283 }
284 #ifdef _WIN32
285 n = _snprintf(sbuf, len, "%s (%d):", buf, GetCurrentProcessId());
286 #else
287 n = snprintf(sbuf, len, "%s (%d):", buf, getpid());
288 #endif
289 if (n < 0) {
290 strlcpy(sbuf, "sprintf() failed", len);
291 return sbuf;
292 }
293 sbuf[len-1] = 0; // just in case
294 #endif // ANDROID
295 return sbuf;
296 }
297
298 #ifndef MSGS_FROM_FILE
299
300 static int setup_shared_mem() {
301 char buf[256];
302 if (standalone) {
303 fprintf(stderr,
304 "%s Standalone mode, so not using shared memory.\n",
305 boinc_msg_prefix(buf, sizeof(buf))
306 );
307 return 0;
308 }
309 app_client_shm = new APP_CLIENT_SHM;
310
311 #ifdef _WIN32
312 sprintf(buf, "%s%s", SHM_PREFIX, aid.shmem_seg_name);
313 hSharedMem = attach_shmem(buf, (void**)&app_client_shm->shm);
314 if (hSharedMem == NULL) {
315 delete app_client_shm;
316 app_client_shm = NULL;
317 }
318 #else
319 #ifdef __EMX__
320 if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
321 delete app_client_shm;
322 app_client_shm = NULL;
323 }
324 #else
325 if (aid.shmem_seg_name == -1) {
326 // Version 6 Unix/Linux/Mac client
327 if (attach_shmem_mmap(MMAPPED_FILE_NAME, (void**)&app_client_shm->shm)) {
328 delete app_client_shm;
329 app_client_shm = NULL;
330 }
331 } else {
332 // version 5 Unix/Linux/Mac client
333 if (attach_shmem(aid.shmem_seg_name, (void**)&app_client_shm->shm)) {
334 delete app_client_shm;
335 app_client_shm = NULL;
336 }
337 }
338 #endif
339 #endif // ! _WIN32
340 if (app_client_shm == NULL) return -1;
341 return 0;
342 }
343 #endif // MSGS_FROM_FILE
344
345 // a mutex for data structures shared between time and worker threads
346 //
347 #ifdef _WIN32
348 static HANDLE mutex;
349 static void init_mutex() {
350 mutex = CreateMutex(NULL, FALSE, NULL);
351 }
352 static inline void acquire_mutex() {
353 WaitForSingleObject(mutex, INFINITE);
354 }
355 static inline void release_mutex() {
356 ReleaseMutex(mutex);
357 }
358 #else
359 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
360 static void init_mutex() {}
361 static inline void acquire_mutex() {
362 #ifdef VERBOSE
363 char buf[256];
364 fprintf(stderr, "%s acquiring mutex\n",
365 boinc_msg_prefix(buf, sizeof(buf))
366 );
367 #endif
368 pthread_mutex_lock(&mutex);
369 }
370 static inline void release_mutex() {
371 #ifdef VERBOSE
372 char buf[256];
373 fprintf(stderr, "%s releasing mutex\n",
374 boinc_msg_prefix(buf, sizeof(buf))
375 );
376 #endif
377 pthread_mutex_unlock(&mutex);
378 }
379 #endif
380
381 // Return CPU time of process.
382 //
383 double boinc_worker_thread_cpu_time() {
384 double cpu;
385 #ifdef _WIN32
386 int retval;
387 retval = boinc_process_cpu_time(GetCurrentProcess(), cpu);
388 if (retval) {
389 cpu = nrunning_ticks * TIMER_PERIOD; // for Win9x
390 }
391 #else
392 #ifdef GETRUSAGE_IN_TIMER_THREAD
393 struct rusage worker_thread_ru;
394 getrusage(RUSAGE_SELF, &worker_thread_ru);
395 #endif
396 cpu = (double)worker_thread_ru.ru_utime.tv_sec
397 + (((double)worker_thread_ru.ru_utime.tv_usec)/1000000.0);
398 cpu += (double)worker_thread_ru.ru_stime.tv_sec
399 + (((double)worker_thread_ru.ru_stime.tv_usec)/1000000.0);
400 #endif
401
402 return cpu;
403 }
404
405 // Communicate to the client (via shared mem)
406 // the current CPU time and fraction done.
407 // NOTE: various bugs could cause some of these FP numbers to be enormous,
408 // possibly overflowing the buffer.
409 // So use strlcat() instead of strcat()
410 //
411 // This is called only from the timer thread (so no need for synch)
412 //
413 static bool update_app_progress(double cpu_t, double cp_cpu_t) {
414 char msg_buf[MSG_CHANNEL_SIZE], buf[256];
415
416 if (standalone) return true;
417
418 sprintf(msg_buf,
419 "<current_cpu_time>%e</current_cpu_time>\n"
420 "<checkpoint_cpu_time>%e</checkpoint_cpu_time>\n",
421 cpu_t, cp_cpu_t
422 );
423 if (want_network) {
424 strlcat(msg_buf, "<want_network>1</want_network>\n", sizeof(msg_buf));
425 }
426 if (fraction_done >= 0) {
427 double range = aid.fraction_done_end - aid.fraction_done_start;
428 double fdone = aid.fraction_done_start + fraction_done*range;
429 sprintf(buf, "<fraction_done>%e</fraction_done>\n", fdone);
430 strlcat(msg_buf, buf, sizeof(msg_buf));
431 }
432 if (bytes_sent) {
433 sprintf(buf, "<bytes_sent>%f</bytes_sent>\n", bytes_sent);
434 strlcat(msg_buf, buf, sizeof(msg_buf));
435 }
436 if (bytes_received) {
437 sprintf(buf, "<bytes_received>%f</bytes_received>\n", bytes_received);
438 strlcat(msg_buf, buf, sizeof(msg_buf));
439 }
440 #ifdef MSGS_FROM_FILE
441 if (fout) {
442 fputs(msg_buf, fout);
443 }
444 return 0;
445 #else
446 return app_client_shm->shm->app_status.send_msg(msg_buf);
447 #endif
448 }
449
450 // called in timer thread
451 //
452 static void handle_heartbeat_msg() {
453 char buf[MSG_CHANNEL_SIZE];
454 double dtemp;
455 bool btemp;
456
457 if (!app_client_shm->shm->heartbeat.get_msg(buf)) {
458 return;
459 }
460 boinc_status.network_suspended = false;
461 if (match_tag(buf, "<heartbeat/>")) {
462 heartbeat_giveup_count = interrupt_count + HEARTBEAT_GIVEUP_COUNT;
463 }
464 if (parse_double(buf, "<wss>", dtemp)) {
465 boinc_status.working_set_size = dtemp;
466 }
467 if (parse_double(buf, "<max_wss>", dtemp)) {
468 boinc_status.max_working_set_size = dtemp;
469 }
470 if (parse_bool(buf, "suspend_network", btemp)) {
471 boinc_status.network_suspended = btemp;
472 }
473 }
474
475 // called in timer thread
476 //
477 static bool client_dead() {
478 char buf[256];
479 bool dead;
480 if (aid.client_pid) {
481 // check every 10 sec
482 //
483 if (interrupt_count%(TIMERS_PER_SEC*10)) return false;
484 #ifdef _WIN32
485 HANDLE h = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, aid.client_pid);
486 // If the process exists but is running under a different user account (boinc_master)
487 // then the handle returned is NULL and GetLastError() returns ERROR_ACCESS_DENIED.
488 //
489 if ((h == NULL) && (GetLastError() != ERROR_ACCESS_DENIED)) {
490 dead = true;
491 } else {
492 if (h) CloseHandle(h);
493 dead = false;
494 }
495 #else
496 int retval = kill(aid.client_pid, 0);
497 dead = (retval == -1 && errno == ESRCH);
498 #endif
499 } else {
500 dead = (interrupt_count > heartbeat_giveup_count);
501 }
502 if (dead) {
503 boinc_msg_prefix(buf, sizeof(buf));
504 fputs(buf, stderr); // don't use fprintf() here
505 if (aid.client_pid) {
506 fputs(" BOINC client no longer exists - exiting\n", stderr);
507 } else {
508 fputs(" No heartbeat from client for 30 sec - exiting\n", stderr);
509 }
510 return true;
511 }
512 return false;
513 }
514
515 #ifndef _WIN32
516 // For multithread apps on Unix, the main process executes the following.
517 //
518 static void parallel_master(int child_pid) {
519 char buf[MSG_CHANNEL_SIZE];
520 int exit_status;
521 while (1) {
522 boinc_sleep(TIMER_PERIOD);
523 interrupt_count++;
524 if (app_client_shm) {
525 handle_heartbeat_msg();
526 if (app_client_shm->shm->process_control_request.get_msg(buf)) {
527 if (match_tag(buf, "<suspend/>")) {
528 kill(child_pid, SIGSTOP);
529 } else if (match_tag(buf, "<resume/>")) {
530 kill(child_pid, SIGCONT);
531 } else if (match_tag(buf, "<quit/>")) {
532 kill(child_pid, SIGKILL);
533 exit(0);
534 } else if (match_tag(buf, "<abort/>")) {
535 kill(child_pid, SIGKILL);
536 exit(EXIT_ABORTED_BY_CLIENT);
537 }
538 }
539
540 if (client_dead()) {
541 kill(child_pid, SIGKILL);
542 exit(0);
543 }
544 }
545 if (interrupt_count % TIMERS_PER_SEC) continue;
546 if (waitpid(child_pid, &exit_status, WNOHANG) == child_pid) break;
547 }
548 boinc_finish(exit_status);
549 }
550 #endif
551
552 int boinc_init() {
553 #ifndef MSGS_FROM_FILE
554 int retval;
555 if (!diagnostics_is_initialized()) {
556 retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
557 if (retval) return retval;
558 }
559 #endif
560 boinc_options_defaults(options);
561 return boinc_init_options(&options);
562 }
563
564 int boinc_init_options(BOINC_OPTIONS* opt) {
565 int retval;
566 #ifndef _WIN32
567 if (options.multi_thread) {
568 int child_pid = fork();
569 if (child_pid) {
570 // original process - master
571 //
572 options.send_status_msgs = false;
573 retval = boinc_init_options_general(options);
574 if (retval) {
575 kill(child_pid, SIGKILL);
576 return retval;
577 }
578 parallel_master(child_pid);
579 }
580 // new process - slave
581 //
582 options.main_program = false;
583 options.check_heartbeat = false;
584 options.handle_process_control = false;
585 options.multi_thread = false;
586 options.multi_process = false;
587 return boinc_init_options(&options);
588 }
589 #endif
590 retval = boinc_init_options_general(*opt);
591 if (retval) return retval;
592 retval = start_timer_thread();
593 if (retval) return retval;
594 #ifndef _WIN32
595 retval = start_worker_signals();
596 if (retval) return retval;
597 #endif
598 return 0;
599 }
600
601 int boinc_init_parallel() {
602 BOINC_OPTIONS _options;
603 boinc_options_defaults(_options);
604 _options.multi_thread = true;
605 return boinc_init_options(&_options);
606 }
607
608 static int min_checkpoint_period() {
609 int x = (int)aid.checkpoint_period;
610 if (app_min_checkpoint_period > x) {
611 x = app_min_checkpoint_period;
612 }
613 if (x == 0) x = DEFAULT_CHECKPOINT_PERIOD;
614 return x;
615 }
616
617 int boinc_set_min_checkpoint_period(int x) {
618 app_min_checkpoint_period = x;
619 if (x > time_until_checkpoint) {
620 time_until_checkpoint = x;
621 }
622 return 0;
623 }
624
625 int boinc_init_options_general(BOINC_OPTIONS& opt) {
626 options = opt;
627
628 #ifndef MSGS_FROM_FILE
629 int retval;
630 if (!diagnostics_is_initialized()) {
631 retval = boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
632 if (retval) return retval;
633 }
634 #endif
635
636 boinc_status.no_heartbeat = false;
637 boinc_status.suspended = false;
638 boinc_status.quit_request = false;
639 boinc_status.abort_request = false;
640
641 #ifdef MSGS_FROM_FILE
642 fout = fopen("out_msgs.txt", "w");
643 if (!fout) {
644 fprintf(stderr, "Can't open out_msgs.txt\n");
645 }
646 options.check_heartbeat = false;
647 #else
648 char buf[256];
649 if (options.main_program) {
650 // make sure we're the only app running in this slot
651 //
652 retval = file_lock.lock(LOCKFILE);
653 if (retval) {
654 // give any previous occupant a chance to timeout and exit
655 //
656 fprintf(stderr, "%s Can't acquire lockfile (%d) - waiting %ds\n",
657 boinc_msg_prefix(buf, sizeof(buf)),
658 retval, LOCKFILE_TIMEOUT_PERIOD
659 );
660 boinc_sleep(LOCKFILE_TIMEOUT_PERIOD);
661 retval = file_lock.lock(LOCKFILE);
662 }
663 if (retval) {
664 fprintf(stderr, "%s Can't acquire lockfile (%d) - exiting\n",
665 boinc_msg_prefix(buf, sizeof(buf)),
666 retval
667 );
668 #ifdef _WIN32
669 char buf2[256];
670 windows_format_error_string(GetLastError(), buf2, 256);
671 fprintf(stderr, "%s Error: %s\n", boinc_msg_prefix(buf, sizeof(buf)), buf2);
672 #endif
673 // if we can't acquire the lock file there must be
674 // another app instance running in this slot.
675 // If we exit(0), the client will keep restarting us.
676 // Instead, tell the client not to restart us for 10 min.
677 //
678 boinc_temporary_exit(600,
679 "Waiting to acquire slot directory lock. Another instance may be running."
680 );
681 }
682 }
683
684 retval = boinc_parse_init_data_file();
685 if (retval) {
686 standalone = true;
687 } else {
688 retval = setup_shared_mem();
689 if (retval) {
690 fprintf(stderr,
691 "%s Can't set up shared mem: %d. Will run in standalone mode.\n",
692 boinc_msg_prefix(buf, sizeof(buf)), retval
693 );
694 standalone = true;
695 }
696 }
697 #endif // MSGS_FROM_FILE
698
699 // copy the WU CPU time to a separate var,
700 // since we may reread the structure again later.
701 //
702 initial_wu_cpu_time = aid.wu_cpu_time;
703
704 fraction_done = -1;
705 time_until_checkpoint = min_checkpoint_period();
706 last_checkpoint_cpu_time = aid.wu_cpu_time;
707 last_wu_cpu_time = aid.wu_cpu_time;
708
709 if (standalone) {
710 options.check_heartbeat = false;
711 }
712 heartbeat_giveup_count = interrupt_count + HEARTBEAT_GIVEUP_COUNT;
713
714 init_mutex();
715
716 return 0;
717 }
718
719 int boinc_get_status(BOINC_STATUS *s) {
720 s->no_heartbeat = boinc_status.no_heartbeat;
721 s->suspended = boinc_status.suspended;
722 s->quit_request = boinc_status.quit_request;
723 s->reread_init_data_file = boinc_status.reread_init_data_file;
724 s->abort_request = boinc_status.abort_request;
725 s->working_set_size = boinc_status.working_set_size;
726 s->max_working_set_size = boinc_status.max_working_set_size;
727 s->network_suspended = boinc_status.network_suspended;
728 return 0;
729 }
730
731 // if we have any new trickle-ups or file upload requests,
732 // send a message describing them
733 //
734 static void send_trickle_up_msg() {
735 char buf[MSG_CHANNEL_SIZE];
736 if (standalone) return;
737 safe_strcpy(buf, "");
738 if (have_new_trickle_up) {
739 safe_strcat(buf, "<have_new_trickle_up/>\n");
740 }
741 if (have_new_upload_file) {
742 safe_strcat(buf, "<have_new_upload_file/>\n");
743 }
744 if (strlen(buf)) {
745 BOINCINFO("Sending Trickle Up Message");
746 if (app_client_shm->shm->trickle_up.send_msg(buf)) {
747 have_new_trickle_up = false;
748 have_new_upload_file = false;
749 }
750 }
751 }
752
753 // NOTE: a non-zero status tells the client that we're exiting with
754 // an "unrecoverable error", which will be reported back to server.
755 // A zero exit-status tells the client we've successfully finished the result.
756 //
757 int boinc_finish_message(int status, const char* msg, bool is_notice) {
758 char buf[256];
759 fraction_done = 1;
760 fprintf(stderr,
761 "%s called boinc_finish(%d)\n",
762 boinc_msg_prefix(buf, sizeof(buf)), status
763 );
764 finishing = true;
765 boinc_sleep(2.0); // let the timer thread send final messages
766 boinc_disable_timer_thread = true; // then disable it
767
768 if (options.main_program) {
769 FILE* f = fopen(BOINC_FINISH_CALLED_FILE, "w");
770 if (f) {
771 fprintf(f, "%d\n", status);
772 if (msg) {
773 fprintf(f, "%s\n%s\n", msg, is_notice?"notice":"");
774 }
775 fclose(f);
776 }
777 }
778
779 boinc_exit(status);
780
781 return 0; // never reached
782 }
783
784 int boinc_finish(int status) {
785 return boinc_finish_message(status, NULL, false);
786 }
787
788 int boinc_temporary_exit(int delay, const char* reason, bool is_notice) {
789 FILE* f = fopen(TEMPORARY_EXIT_FILE, "w");
790 if (!f) {
791 return ERR_FOPEN;
792 }
793 fprintf(f, "%d\n", delay);
794 if (reason) {
795 fprintf(f, "%s\n", reason);
796 if (is_notice) {
797 fprintf(f, "notice\n");
798 }
799 }
800 fclose(f);
801 boinc_exit(0);
802 return 0;
803 }
804
805 // unlock the lockfile and call the appropriate exit function
806 // Unix: called only from the worker thread.
807 // Win: called from the worker or timer thread.
808 //
809 // make static eventually
810 //
811 void boinc_exit(int status) {
812 int retval;
813 char buf[256];
814
815 if (options.main_program && file_lock.locked) {
816 retval = file_lock.unlock(LOCKFILE);
817 if (retval) {
818 #ifdef _WIN32
819 windows_format_error_string(GetLastError(), buf, 256);
820 fprintf(stderr,
821 "%s Can't unlock lockfile (%d): %s\n",
822 boinc_msg_prefix(buf, sizeof(buf)), retval, buf
823 );
824 #else
825 fprintf(stderr,
826 "%s Can't unlock lockfile (%d)\n",
827 boinc_msg_prefix(buf, sizeof(buf)), retval
828 );
829 perror("file unlock failed");
830 #endif
831 }
832 }
833
834 // kill any processes the app may have created
835 //
836 if (options.multi_process) {
837 kill_descendants();
838 }
839
840 boinc_finish_diag();
841
842 // various platforms have problems shutting down a process
843 // while other threads are still executing,
844 // or triggering endless exit()/atexit() loops.
845 //
846 BOINCINFO("Exit Status: %d", status);
847 fflush(NULL);
848
849 #if defined(_WIN32)
850 // Halt all the threads and clean up.
851 TerminateProcess(GetCurrentProcess(), status);
852 // note: the above CAN return!
853 Sleep(1000);
854 DebugBreak();
855 #elif defined(__APPLE_CC__)
856 // stops endless exit()/atexit() loops.
857 _exit(status);
858 #else
859 // arrange to exit with given status even if errors happen
860 // in atexit() functions
861 //
862 set_signal_exit_code(status);
863 exit(status);
864 #endif
865 }
866
867 void boinc_network_usage(double sent, double received) {
868 bytes_sent = sent;
869 bytes_received = received;
870 }
871
872 int boinc_is_standalone() {
873 if (standalone) return 1;
874 return 0;
875 }
876
877 // called from the timer thread if we need to exit,
878 // e.g. quit message from client, or client has gone away
879 //
880 // On Linux we can't exit directly from the timer thread.
881 // Set a flag telling the worker thread to exit.
882 //
883 static void exit_from_timer_thread(int status) {
884 #ifdef VERBOSE
885 char buf[256];
886 fprintf(stderr, "%s exit_from_timer_thread(%d) called\n",
887 boinc_msg_prefix(buf, sizeof(buf)), status
888 );
889 #endif
890 #ifdef _WIN32
891 // TerminateProcess() doesn't work if there are suspended threads?
892 if (boinc_status.suspended) {
893 resume_activities();
894 }
895 // this seems to work OK on Windows
896 //
897 boinc_exit(status);
898 #else
899 // but on Unix there are synchronization problems if we exit here;
900 // set a flag telling the worker thread to exit
901 //
902 worker_thread_exit_status = status;
903 worker_thread_exit_flag = true;
904 #ifdef ANDROID
905 // trigger the worker signal handler, which will call boinc_exit()
906 //
907 pthread_kill(worker_thread_handle, SIGALRM);
908
909 // the exit should happen more or less instantly.
910 // But if we're still here after 5 sec, exit directly
911 //
912 sleep(5.0);
913 boinc_exit(status);
914 #else
915 pthread_exit(NULL);
916 #endif
917 #endif
918 }
919
920 // parse the init data file.
921 // This is done at startup, and also if a "reread prefs" message is received
922 //
923 int boinc_parse_init_data_file() {
924 FILE* f;
925 int retval;
926 char buf[256];
927
928 if (aid.project_preferences) {
929 free(aid.project_preferences);
930 aid.project_preferences = NULL;
931 }
932 aid.clear();
933 aid.checkpoint_period = DEFAULT_CHECKPOINT_PERIOD;
934
935 if (!boinc_file_exists(INIT_DATA_FILE)) {
936 fprintf(stderr,
937 "%s Can't open init data file - running in standalone mode\n",
938 boinc_msg_prefix(buf, sizeof(buf))
939 );
940 return ERR_FOPEN;
941 }
942 f = boinc_fopen(INIT_DATA_FILE, "r");
943 retval = parse_init_data_file(f, aid);
944 fclose(f);
945 if (retval) {
946 fprintf(stderr,
947 "%s Can't parse init data file - running in standalone mode\n",
948 boinc_msg_prefix(buf, sizeof(buf))
949 );
950 return retval;
951 }
952 return 0;
953 }
954
955 // used by wrappers
956 //
957 int boinc_report_app_status_aux(
958 double cpu_time,
959 double checkpoint_cpu_time,
960 double _fraction_done,
961 int other_pid,
962 double _bytes_sent,
963 double _bytes_received
964 ) {
965 char msg_buf[MSG_CHANNEL_SIZE], buf[1024];
966 if (standalone) return 0;
967
968 sprintf(msg_buf,
969 "<current_cpu_time>%e</current_cpu_time>\n"
970 "<checkpoint_cpu_time>%e</checkpoint_cpu_time>\n"
971 "<fraction_done>%e</fraction_done>\n",
972 cpu_time,
973 checkpoint_cpu_time,
974 _fraction_done
975 );
976 if (other_pid) {
977 sprintf(buf, "<other_pid>%d</other_pid>\n", other_pid);
978 safe_strcat(msg_buf, buf);
979 }
980 if (_bytes_sent) {
981 sprintf(buf, "<bytes_sent>%f</bytes_sent>\n", _bytes_sent);
982 safe_strcat(msg_buf, buf);
983 }
984 if (_bytes_received) {
985 sprintf(buf, "<bytes_received>%f</bytes_received>\n", _bytes_received);
986 safe_strcat(msg_buf, buf);
987 }
988 #ifdef MSGS_FROM_FILE
989 if (fout) {
990 fputs(msg_buf, fout);
991 }
992 return 0;
993 #else
994 if (app_client_shm->shm->app_status.send_msg(msg_buf)) {
995 return 0;
996 }
997 return ERR_WRITE;
998 #endif
999 }
1000
1001 int boinc_report_app_status(
1002 double cpu_time,
1003 double checkpoint_cpu_time,
1004 double _fraction_done
1005 ){
1006 return boinc_report_app_status_aux(
1007 cpu_time, checkpoint_cpu_time, _fraction_done, 0, 0, 0
1008 );
1009 }
1010
1011 int boinc_get_init_data_p(APP_INIT_DATA* app_init_data) {
1012 *app_init_data = aid;
1013 return 0;
1014 }
1015
1016 int boinc_get_init_data(APP_INIT_DATA& app_init_data) {
1017 app_init_data = aid;
1018 return 0;
1019 }
1020
1021 int boinc_wu_cpu_time(double& cpu_t) {
1022 cpu_t = last_wu_cpu_time;
1023 return 0;
1024 }
1025
1026 // Suspend this job.
1027 // Can be called from either timer or worker thread.
1028 //
1029 static int suspend_activities(bool called_from_worker) {
1030 #ifdef VERBOSE
1031 char log_buf[256];
1032 fprintf(stderr, "%s suspend_activities() called from %s\n",
1033 boinc_msg_prefix(log_buf, sizeof(log_buf)),
1034 called_from_worker?"worker thread":"timer thread"
1035 );
1036 #endif
1037 #ifdef _WIN32
1038 static vector<int> pids;
1039 if (options.multi_thread) {
1040 if (pids.size() == 0) {
1041 pids.push_back(GetCurrentProcessId());
1042 }
1043 suspend_or_resume_threads(pids, timer_thread_id, false, true);
1044 } else {
1045 SuspendThread(worker_thread_handle);
1046 }
1047 #else
1048 if (options.multi_process) {
1049 suspend_or_resume_descendants(false);
1050 }
1051 // if called from worker thread, sleep until suspension is over
1052 // if called from time thread, don't need to do anything;
1053 // suspension is done by signal handler in worker thread
1054 //
1055 if (called_from_worker) {
1056 while (boinc_status.suspended) {
1057 sleep(1);
1058 }
1059 }
1060 #endif
1061 return 0;
1062 }
1063
1064 int resume_activities() {
1065 #ifdef VERBOSE
1066 char log_buf[256];
1067 fprintf(stderr, "%s resume_activities()\n",
1068 boinc_msg_prefix(log_buf, sizeof(log_buf))
1069 );
1070 #endif
1071 #ifdef _WIN32
1072 static vector<int> pids;
1073 if (options.multi_thread) {
1074 if (pids.size() == 0) pids.push_back(GetCurrentProcessId());
1075 suspend_or_resume_threads(pids, timer_thread_id, true, true);
1076 } else {
1077 ResumeThread(worker_thread_handle);
1078 }
1079 #else
1080 if (options.multi_process) {
1081 suspend_or_resume_descendants(true);
1082 }
1083 #endif
1084 return 0;
1085 }
1086
1087 #ifndef MSGS_FROM_FILE
1088 static void handle_upload_file_status() {
1089 char path[MAXPATHLEN], buf[256], log_name[256], *p, log_buf[256];
1090 std::string filename;
1091 int status;
1092
1093 relative_to_absolute("", path);
1094 DirScanner dirscan(path);
1095 while (dirscan.scan(filename)) {
1096 strlcpy(buf, filename.c_str(), sizeof(buf));
1097 if (strstr(buf, UPLOAD_FILE_STATUS_PREFIX) != buf) continue;
1098 strlcpy(log_name, buf+strlen(UPLOAD_FILE_STATUS_PREFIX), sizeof(log_name));
1099 FILE* f = boinc_fopen(filename.c_str(), "r");
1100 if (!f) {
1101 fprintf(stderr,
1102 "%s handle_file_upload_status: can't open %s\n",
1103 boinc_msg_prefix(buf, sizeof(buf)), filename.c_str()
1104 );
1105 continue;
1106 }
1107 p = fgets(buf, sizeof(buf), f);
1108 fclose(f);
1109 if (p && parse_int(buf, "<status>", status)) {
1110 UPLOAD_FILE_STATUS uf;
1111 uf.name = std::string(log_name);
1112 uf.status = status;
1113 upload_file_status.push_back(uf);
1114 } else {
1115 fprintf(stderr, "%s handle_upload_file_status: can't parse %s\n",
1116 boinc_msg_prefix(log_buf, sizeof(log_buf)), buf
1117 );
1118 }
1119 }
1120 }
1121
1122 // handle trickle and file upload status messages
1123 //
1124 static void handle_trickle_down_msg() {
1125 char buf[MSG_CHANNEL_SIZE];
1126 if (app_client_shm->shm->trickle_down.get_msg(buf)) {
1127 BOINCINFO("Received Trickle Down Message");
1128 if (match_tag(buf, "<have_trickle_down/>")) {
1129 have_trickle_down = true;
1130 }
1131 if (match_tag(buf, "<upload_file_status/>")) {
1132 handle_upload_file_status();
1133 }
1134 }
1135 }
1136 #endif
1137
1138 // This flag is set of we get a suspend request while in a critical section,
1139 // and options.direct_process_action is set.
1140 // As soon as we're not in the critical section we'll do the suspend.
1141 //
1142 static bool suspend_request = false;
1143
1144 // runs in timer thread
1145 //
1146 static void handle_process_control_msg() {
1147 char buf[MSG_CHANNEL_SIZE];
1148 #ifdef MSGS_FROM_FILE
1149 strcpy(buf, "");
1150 if (boinc_file_exists("msgs.txt")) {
1151 FILE* f = fopen("msgs.txt", "r");
1152 if (!f) {
1153 fprintf(stderr, "msgs.txt exists but can't open it\n");
1154 return;
1155 }
1156 fgets(buf, sizeof(buf), f);
1157 fclose(f);
1158 unlink("msgs.txt");
1159 }
1160 if (!strlen(buf)) {
1161 return;
1162 }
1163 #else
1164 if (!app_client_shm->shm->process_control_request.get_msg(buf)) {
1165 return;
1166 }
1167 #endif
1168
1169 // here if we have a message to process
1170
1171 acquire_mutex();
1172 #ifdef VERBOSE
1173 char log_buf[256];
1174 fprintf(stderr, "%s got process control msg %s\n",
1175 boinc_msg_prefix(log_buf, sizeof(log_buf)), buf
1176 );
1177 #endif
1178 if (match_tag(buf, "<suspend/>")) {
1179 BOINCINFO("Received suspend message");
1180 if (options.direct_process_action) {
1181 if (in_critical_section) {
1182 suspend_request = true;
1183 } else {
1184 boinc_status.suspended = true;
1185 suspend_request = false;
1186 suspend_activities(false);
1187 }
1188 } else {
1189 boinc_status.suspended = true;
1190 }
1191 }
1192
1193 if (match_tag(buf, "<resume/>")) {
1194 BOINCINFO("Received resume message");
1195 if (options.direct_process_action) {
1196 if (boinc_status.suspended) {
1197 resume_activities();
1198 } else if (suspend_request) {
1199 suspend_request = false;
1200 }
1201 }
1202 boinc_status.suspended = false;
1203 }
1204
1205 if (boinc_status.quit_request || match_tag(buf, "<quit/>")) {
1206 BOINCINFO("Received quit message");
1207 boinc_status.quit_request = true;
1208 if (!in_critical_section && options.direct_process_action) {
1209 release_mutex();
1210 // we hold mutex, and it's possible that worker
1211 // is waiting on it, so release it
1212 exit_from_timer_thread(0);
1213 }
1214 }
1215 if (boinc_status.abort_request || match_tag(buf, "<abort/>")) {
1216 BOINCINFO("Received abort message");
1217 boinc_status.abort_request = true;
1218 if (!in_critical_section && options.direct_process_action) {
1219 diagnostics_set_aborted_via_gui();
1220 #if defined(_WIN32)
1221 // Cause a controlled assert and dump the callstacks.
1222 DebugBreak();
1223 #elif defined(__APPLE__)
1224 PrintBacktrace();
1225 #endif
1226 release_mutex();
1227 exit_from_timer_thread(EXIT_ABORTED_BY_CLIENT);
1228 }
1229 }
1230 if (match_tag(buf, "<reread_app_info/>")) {
1231 boinc_status.reread_init_data_file = true;
1232 }
1233 if (match_tag(buf, "<network_available/>")) {
1234 have_network = 1;
1235 }
1236 #ifdef ANDROID
1237 // Trigger call to worker_signal_handler() in the worker thread
1238 //
1239 pthread_kill(worker_thread_handle, SIGALRM);
1240 #endif
1241 release_mutex();
1242 }
1243
1244 // timer handler; called every 0.1 sec in the timer thread
1245 //
1246 static void timer_handler() {
1247 char buf[512];
1248 //#ifdef VERBOSE
1249 #if 0
1250 fprintf(stderr,
1251 "%s timer handler: disabled %s; in critical section %s; finishing %s\n",
1252 boinc_msg_prefix(buf, sizeof(buf)),
1253 boinc_disable_timer_thread?"yes":"no",
1254 in_critical_section?"yes":"no",
1255 finishing?"yes":"no"
1256 );
1257 #endif
1258 if (boinc_disable_timer_thread) {
1259 return;
1260 }
1261 if (finishing) {
1262 if (options.send_status_msgs) {
1263 double cur_cpu = boinc_worker_thread_cpu_time();
1264 last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
1265 update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
1266 }
1267 boinc_disable_timer_thread = true;
1268 return;
1269 }
1270 interrupt_count++;
1271 if (!boinc_status.suspended) {
1272 running_interrupt_count++;
1273 }
1274 // handle messages from the client
1275 //
1276 #ifdef MSGS_FROM_FILE
1277 handle_process_control_msg();
1278 #else
1279 if (app_client_shm) {
1280 if (options.check_heartbeat) {
1281 handle_heartbeat_msg();
1282 }
1283 if (handle_trickle_downs) {
1284 handle_trickle_down_msg();
1285 }
1286 if (options.handle_process_control) {
1287 handle_process_control_msg();
1288 }
1289 }
1290 #endif
1291 if (interrupt_count % TIMERS_PER_SEC) return;
1292
1293 #ifdef VERBOSE
1294 fprintf(stderr, "%s 1 sec elapsed - doing slow actions\n", boinc_msg_prefix(buf, sizeof(buf)));
1295 #endif
1296
1297 // here if we're at a one-second boundary; do slow stuff
1298 //
1299
1300 if (!ready_to_checkpoint) {
1301 time_until_checkpoint -= 1;
1302 if (time_until_checkpoint <= 0) {
1303 ready_to_checkpoint = true;
1304 }
1305 }
1306
1307 // see if the client has died, which means we need to die too
1308 // (unless we're in a critical section)
1309 //
1310 if (options.check_heartbeat) {
1311 if (client_dead()) {
1312 fprintf(stderr, "%s timer handler: client dead, exiting\n",
1313 boinc_msg_prefix(buf, sizeof(buf))
1314 );
1315 if (options.direct_process_action && !in_critical_section) {
1316 exit_from_timer_thread(0);
1317 } else {
1318 boinc_status.no_heartbeat = true;
1319 }
1320 }
1321 }
1322
1323 // don't bother reporting CPU time etc. if we're suspended
1324 //
1325 if (options.send_status_msgs && !boinc_status.suspended) {
1326 double cur_cpu = boinc_worker_thread_cpu_time();
1327 last_wu_cpu_time = cur_cpu + initial_wu_cpu_time;
1328 update_app_progress(last_wu_cpu_time, last_checkpoint_cpu_time);
1329 }
1330
1331 if (have_new_trickle_up || have_new_upload_file) {
1332 send_trickle_up_msg();
1333 }
1334 if (timer_callback) {
1335 timer_callback();
1336 }
1337
1338 // send graphics-related messages
1339 //
1340 if (send_web_graphics_url && !app_client_shm->shm->graphics_reply.has_msg()) {
1341 sprintf(buf,
1342 "<web_graphics_url>%s</web_graphics_url>",
1343 web_graphics_url
1344 );
1345 app_client_shm->shm->graphics_reply.send_msg(buf);
1346 send_web_graphics_url = false;
1347 }
1348 if (send_remote_desktop_addr && !app_client_shm->shm->graphics_reply.has_msg()) {
1349 sprintf(buf,
1350 "<remote_desktop_addr>%s</remote_desktop_addr>",
1351 remote_desktop_addr
1352 );
1353 app_client_shm->shm->graphics_reply.send_msg(buf);
1354 send_remote_desktop_addr = false;
1355 }
1356 }
1357
1358 #ifdef _WIN32
1359
1360 DWORD WINAPI timer_thread(void *) {
1361 while (1) {
1362 Sleep((int)(TIMER_PERIOD*1000));
1363 timer_handler();
1364
1365 // poor man's CPU time accounting for Win9x
1366 //
1367 if (!boinc_status.suspended) {
1368 nrunning_ticks++;
1369 }
1370 }
1371 return 0;
1372 }
1373
1374 #else
1375
1376 static void* timer_thread(void*) {
1377 block_sigalrm();
1378 while(1) {
1379 boinc_sleep(TIMER_PERIOD);
1380 timer_handler();
1381 }
1382 return 0;
1383 }
1384
1385 // This SIGALRM handler gets handled only by the worker thread.
1386 // It gets CPU time and implements sleeping.
1387 // It must call only signal-safe functions, and must not do FP math
1388 //
1389 static void worker_signal_handler(int) {
1390 #ifdef ANDROID
1391 // per-thread signal masking doesn't work on pre-4.1 Android.
1392 // If we're handling this signal in the timer thread,
1393 // send signal explicitly to worker thread.
1394 //
1395 if (pthread_self() != worker_thread_handle) {
1396 #ifdef VERBOSE
1397 fprintf(stderr, "worker signal handler: called in timer thread; forwarding to worker\n");
1398 #endif
1399 pthread_kill(worker_thread_handle, SIGALRM);
1400 return;
1401 }
1402 #endif
1403 #ifndef GETRUSAGE_IN_TIMER_THREAD
1404 getrusage(RUSAGE_SELF, &worker_thread_ru);
1405 #endif
1406 if (worker_thread_exit_flag) {
1407 #ifdef VERBOSE
1408 fprintf(stderr, "worker signal handler: exiting\n");
1409 #endif
1410 boinc_exit(worker_thread_exit_status);
1411 }
1412 if (options.direct_process_action) {
1413 while (boinc_status.suspended && in_critical_section==0) {
1414 #ifdef VERBOSE
1415 fprintf(stderr, "worker signal handler: sleeping\n");
1416 #endif
1417 sleep(1); // don't use boinc_sleep() because it does FP math
1418 }
1419 }
1420 }
1421
1422 #endif
1423
1424
1425 // Called from the worker thread; create the timer thread
1426 //
1427 int start_timer_thread() {
1428 char buf[256];
1429
1430 #ifdef _WIN32
1431
1432 // get the worker thread handle
1433 //
1434 DuplicateHandle(
1435 GetCurrentProcess(),
1436 GetCurrentThread(),
1437 GetCurrentProcess(),
1438 &worker_thread_handle,
1439 0,
1440 FALSE,
1441 DUPLICATE_SAME_ACCESS
1442 );
1443
1444 // Create the timer thread
1445 //
1446 if (!CreateThread(NULL, 0, timer_thread, 0, 0, &timer_thread_id)) {
1447 fprintf(stderr,
1448 "%s start_timer_thread(): CreateThread() failed, errno %d\n",
1449 boinc_msg_prefix(buf, sizeof(buf)), errno
1450 );
1451 return errno;
1452 }
1453
1454 if (!options.normal_thread_priority) {
1455 // lower our (worker thread) priority
1456 //
1457 SetThreadPriority(worker_thread_handle, THREAD_PRIORITY_IDLE);
1458 }
1459 #else
1460 worker_thread_handle = pthread_self();
1461 pthread_attr_t thread_attrs;
1462 pthread_attr_init(&thread_attrs);
1463 pthread_attr_setstacksize(&thread_attrs, 32768);
1464 int retval = pthread_create(&timer_thread_handle, &thread_attrs, timer_thread, NULL);
1465 if (retval) {
1466 fprintf(stderr,
1467 "%s start_timer_thread(): pthread_create(): %d",
1468 boinc_msg_prefix(buf, sizeof(buf)), retval
1469 );
1470 return retval;
1471 }
1472 #endif
1473 return 0;
1474 }
1475
1476 #ifndef _WIN32
1477
1478 // called in the worker thread.
1479 // set up a handler for SIGALRM.
1480 // If Android, we'll get signals from the time thread.
1481 // otherwise, set an interval timer to deliver signals
1482 //
1483 static int start_worker_signals() {
1484 int retval;
1485 struct sigaction sa;
1486 memset(&sa, 0, sizeof(sa));
1487 sa.sa_handler = worker_signal_handler;
1488 sa.sa_flags = SA_RESTART;
1489 sigemptyset(&sa.sa_mask);
1490 retval = sigaction(SIGALRM, &sa, NULL);
1491 if (retval) {
1492 perror("boinc start_worker_signals(): sigaction failed");
1493 return retval;
1494 }
1495 #ifndef ANDROID
1496 itimerval value;
1497 value.it_value.tv_sec = 0;
1498 value.it_value.tv_usec = (int)(TIMER_PERIOD*1e6);
1499 value.it_interval = value.it_value;
1500 retval = setitimer(ITIMER_REAL, &value, NULL);
1501 if (retval) {
1502 perror("boinc start_worker_thread(): setitimer failed");
1503 return retval;
1504 }
1505 #endif
1506 return 0;
1507 }
1508 #endif
1509
1510 int boinc_send_trickle_up(char* variety, char* p) {
1511 FILE* f = boinc_fopen(TRICKLE_UP_FILENAME, "wb");
1512 if (!f) return ERR_FOPEN;
1513 fprintf(f, "<variety>%s</variety>\n", variety);
1514 size_t n = 1;
1515 if (strlen(p)) {
1516 n = fwrite(p, strlen(p), 1, f);
1517 }
1518 fclose(f);
1519 if (n != 1) return ERR_WRITE;
1520 have_new_trickle_up = true;
1521 return 0;
1522 }
1523
1524 int boinc_time_to_checkpoint() {
1525 if (ready_to_checkpoint) {
1526 boinc_begin_critical_section();
1527 return 1;
1528 }
1529 return 0;
1530 }
1531
1532 int boinc_checkpoint_completed() {
1533 double cur_cpu;
1534 cur_cpu = boinc_worker_thread_cpu_time();
1535 last_wu_cpu_time = cur_cpu + aid.wu_cpu_time;
1536 last_checkpoint_cpu_time = last_wu_cpu_time;
1537 time_until_checkpoint = min_checkpoint_period();
1538 boinc_end_critical_section();
1539 ready_to_checkpoint = false;
1540
1541 return 0;
1542 }
1543
1544 void boinc_begin_critical_section() {
1545 #ifdef VERBOSE
1546 char buf[256];
1547 fprintf(stderr,
1548 "%s begin_critical_section\n",
1549 boinc_msg_prefix(buf, sizeof(buf))
1550 );
1551 #endif
1552 in_critical_section++;
1553 }
1554
1555 void boinc_end_critical_section() {
1556 #ifdef VERBOSE
1557 char buf[256];
1558 fprintf(stderr,
1559 "%s end_critical_section\n",
1560 boinc_msg_prefix(buf, sizeof(buf))
1561 );
1562 #endif
1563 in_critical_section--;
1564 if (in_critical_section < 0) {
1565 in_critical_section = 0; // just in case
1566 }
1567
1568 if (in_critical_section) return;
1569
1570 // We're out of the critical section.
1571 // See if we got suspend/quit/abort while in critical section,
1572 // and handle them here.
1573 //
1574 if (options.direct_process_action) {
1575 if (boinc_status.no_heartbeat) {
1576 boinc_exit(0);
1577 }
1578 if (boinc_status.quit_request) {
1579 boinc_exit(0);
1580 }
1581 if (boinc_status.abort_request) {
1582 boinc_exit(EXIT_ABORTED_BY_CLIENT);
1583 }
1584 acquire_mutex();
1585 if (suspend_request) {
1586 suspend_request = false;
1587 boinc_status.suspended = true;
1588 release_mutex();
1589 suspend_activities(true);
1590 } else {
1591 release_mutex();
1592 }
1593 }
1594 }
1595
1596 int boinc_fraction_done(double x) {
1597 fraction_done = x;
1598 return 0;
1599 }
1600
1601 int boinc_receive_trickle_down(char* buf, int len) {
1602 std::string filename;
1603 char path[MAXPATHLEN];
1604
1605 handle_trickle_downs = true;
1606
1607 if (have_trickle_down) {
1608 relative_to_absolute("", path);
1609 DirScanner dirscan(path);
1610 while (dirscan.scan(filename)) {
1611 if (strstr(filename.c_str(), "trickle_down")) {
1612 strlcpy(buf, filename.c_str(), len);
1613 return true;
1614 }
1615 }
1616 have_trickle_down = false;
1617 }
1618 return false;
1619 }
1620
1621 int boinc_upload_file(std::string& name) {
1622 char buf[256];
1623 std::string pname;
1624 int retval;
1625
1626 retval = boinc_resolve_filename_s(name.c_str(), pname);
1627 if (retval) return retval;
1628 sprintf(buf, "%s%s", UPLOAD_FILE_REQ_PREFIX, name.c_str());
1629 FILE* f = boinc_fopen(buf, "w");
1630 if (!f) return ERR_FOPEN;
1631 have_new_upload_file = true;
1632 fclose(f);
1633
1634 // file upload status messages are on same channel as
1635 // trickle down messages, so listen to that channel
1636 //
1637 handle_trickle_downs = true;
1638
1639 return 0;
1640 }
1641
1642 int boinc_upload_status(std::string& name) {
1643 for (unsigned int i=0; i<upload_file_status.size(); i++) {
1644 UPLOAD_FILE_STATUS& ufs = upload_file_status[i];
1645 if (ufs.name == name) {
1646 return ufs.status;
1647 }
1648 }
1649 return ERR_NOT_FOUND;
1650 }
1651
1652 void boinc_need_network() {
1653 want_network = 1;
1654 have_network = 0;
1655 }
1656
1657 int boinc_network_poll() {
1658 return have_network?0:1;
1659 }
1660
1661 void boinc_network_done() {
1662 want_network = 0;
1663 }
1664
1665 #ifndef _WIN32
1666 // block SIGALRM, so that the worker thread will be forced to handle it
1667 //
1668 static void block_sigalrm() {
1669 sigset_t mask;
1670 sigemptyset(&mask);
1671 sigaddset(&mask, SIGALRM);
1672 pthread_sigmask(SIG_BLOCK, &mask, NULL);
1673 }
1674 #endif
1675
1676 void boinc_register_timer_callback(FUNC_PTR p) {
1677 timer_callback = p;
1678 }
1679
1680 double boinc_get_fraction_done() {
1681 return fraction_done;
1682 }
1683
1684 double boinc_elapsed_time() {
1685 return running_interrupt_count*TIMER_PERIOD;
1686 }
1687
1688 void boinc_web_graphics_url(char* url) {
1689 if (standalone) return;
1690 strlcpy(web_graphics_url, url, sizeof(web_graphics_url));
1691 send_web_graphics_url = true;
1692 }
1693
1694 void boinc_remote_desktop_addr(char* addr) {
1695 if (standalone) return;
1696 strlcpy(remote_desktop_addr, addr, sizeof(remote_desktop_addr));
1697 send_remote_desktop_addr = true;
1698 }
1699