1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 #ifdef __APPLE__
19 #include <Carbon/Carbon.h>
20 #endif
21
22 #ifdef _WIN32
23 #include "boinc_win.h"
24 #else
25 #include "config.h"
26 #include <unistd.h>
27 #include <csignal>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <ctime>
31 #include <cstdarg>
32 #include <cstring>
33 #include <cmath>
34 #if HAVE_SYS_SOCKET_H
35 #include <sys/socket.h>
36 #endif
37 #endif
38
39 #ifdef _MSC_VER
40 #define snprintf _snprintf
41 #endif
42
43 #ifdef __EMX__
44 #define INCL_DOS
45 #include <os2.h>
46 #endif
47
48 #include "cpp.h"
49 #include "error_numbers.h"
50 #include "filesys.h"
51 #include "parse.h"
52 #include "str_replace.h"
53 #include "str_util.h"
54 #include "util.h"
55 #ifdef _WIN32
56 #include "run_app_windows.h"
57 #endif
58
59 #include "app_config.h"
60 #include "async_file.h"
61 #include "client_msgs.h"
62 #include "cs_notice.h"
63 #include "cs_proxy.h"
64 #include "cs_trickle.h"
65 #include "file_names.h"
66 #include "hostinfo.h"
67 #include "http_curl.h"
68 #include "network.h"
69 #include "project.h"
70 #include "result.h"
71 #include "sandbox.h"
72 #include "shmem.h"
73
74 #include "client_state.h"
75
76 using std::max;
77
78 CLIENT_STATE gstate;
79 COPROCS coprocs;
80
81 #ifndef SIM
82 #ifdef NEW_CPU_THROTTLE
83 THREAD_LOCK client_mutex;
84 THREAD throttle_thread;
85 #endif
86 #endif
87
CLIENT_STATE()88 CLIENT_STATE::CLIENT_STATE()
89 : lookup_website_op(&gui_http),
90 get_current_version_op(&gui_http),
91 get_project_list_op(&gui_http),
92 acct_mgr_op(&gui_http)
93 {
94 http_ops = new HTTP_OP_SET();
95 file_xfers = new FILE_XFER_SET(http_ops);
96 pers_file_xfers = new PERS_FILE_XFER_SET(file_xfers);
97 #ifndef SIM
98 scheduler_op = new SCHEDULER_OP(http_ops);
99 #endif
100 time_stats.init();
101 client_state_dirty = false;
102 old_major_version = 0;
103 old_minor_version = 0;
104 old_release = 0;
105 clock_change = false;
106 check_all_logins = false;
107 user_active = false;
108 cmdline_gui_rpc_port = 0;
109 run_cpu_benchmarks = false;
110 file_xfer_giveup_period = PERS_GIVEUP;
111 had_or_requested_work = false;
112 tasks_suspended = false;
113 tasks_throttled = false;
114 network_suspended = false;
115 file_xfers_suspended = false;
116 suspend_reason = 0;
117 network_suspend_reason = 0;
118 core_client_version.major = BOINC_MAJOR_VERSION;
119 core_client_version.minor = BOINC_MINOR_VERSION;
120 core_client_version.release = BOINC_RELEASE;
121 #ifdef BOINC_PRERELEASE
122 core_client_version.prerelease = true;
123 #else
124 core_client_version.prerelease = false;
125 #endif
126 safe_strcpy(language, "");
127 safe_strcpy(client_brand, "");
128 exit_after_app_start_secs = 0;
129 app_started = 0;
130 exit_before_upload = false;
131 run_test_app = false;
132 #ifndef _WIN32
133 boinc_project_gid = 0;
134 #endif
135 show_projects = false;
136 safe_strcpy(detach_project_url, "");
137 safe_strcpy(reset_project_url, "");
138 safe_strcpy(update_prefs_url, "");
139 safe_strcpy(main_host_venue, "");
140 safe_strcpy(attach_project_url, "");
141 safe_strcpy(attach_project_auth, "");
142 cpu_run_mode.set(RUN_MODE_AUTO, 0);
143 gpu_run_mode.set(RUN_MODE_AUTO, 0);
144 network_run_mode.set(RUN_MODE_AUTO, 0);
145 started_by_screensaver = false;
146 requested_exit = false;
147 os_requested_suspend = false;
148 os_requested_suspend_time = 0;
149 cleanup_completed = false;
150 in_abort_sequence = false;
151 master_fetch_period = MASTER_FETCH_PERIOD;
152 retry_cap = RETRY_CAP;
153 master_fetch_retry_cap = MASTER_FETCH_RETRY_CAP;
154 master_fetch_interval = MASTER_FETCH_INTERVAL;
155 sched_retry_delay_min = SCHED_RETRY_DELAY_MIN;
156 sched_retry_delay_max = SCHED_RETRY_DELAY_MAX;
157 pers_retry_delay_min = PERS_RETRY_DELAY_MIN;
158 pers_retry_delay_max = PERS_RETRY_DELAY_MAX;
159 pers_giveup = PERS_GIVEUP;
160 executing_as_daemon = false;
161 redirect_io = false;
162 disable_graphics = false;
163 cant_write_state_file = false;
164 ncpus = 1;
165 benchmarks_running = false;
166 client_disk_usage = 0.0;
167 total_disk_usage = 0.0;
168 device_status_time = 0;
169
170 rec_interval_start = 0;
171 total_cpu_time_this_rec_interval = 0.0;
172 must_enforce_cpu_schedule = false;
173 must_schedule_cpus = true;
174 must_check_work_fetch = true;
175 retry_shmem_time = 0;
176 no_gui_rpc = false;
177 gui_rpc_unix_domain = false;
178 new_version_check_time = 0;
179 all_projects_list_check_time = 0;
180 detach_console = false;
181 #ifdef SANDBOX
182 g_use_sandbox = true; // User can override with -insecure command-line arg
183 #endif
184 launched_by_manager = false;
185 run_by_updater = false;
186 now = 0.0;
187 initialized = false;
188 last_wakeup_time = dtime();
189 device_status_time = 0;
190 #ifdef _WIN32
191 have_sysmon_msg = false;
192 #endif
193 }
194
show_host_info()195 void CLIENT_STATE::show_host_info() {
196 char buf[256], buf2[256];
197
198 msg_printf(NULL, MSG_INFO,
199 "Host name: %s",
200 host_info.domain_name
201 );
202 nbytes_to_string(host_info.m_cache, 0, buf, sizeof(buf));
203 msg_printf(NULL, MSG_INFO,
204 "Processor: %d %s %s",
205 host_info.p_ncpus, host_info.p_vendor, host_info.p_model
206 );
207 if (ncpus != host_info.p_ncpus) {
208 msg_printf(NULL, MSG_INFO, "Using %d CPUs", ncpus);
209 }
210 #if 0
211 if (host_info.m_cache > 0) {
212 msg_printf(NULL, MSG_INFO,
213 "Processor: %s cache",
214 buf
215 );
216 }
217 #endif
218 msg_printf(NULL, MSG_INFO,
219 "Processor features: %s", host_info.p_features
220 );
221 #ifdef __APPLE__
222 buf[0] = '\0';
223 FILE *f = popen("sw_vers -productVersion", "r");
224 fgets(buf, sizeof(buf), f);
225 strip_whitespace(buf);
226 pclose(f);
227 msg_printf(NULL, MSG_INFO,
228 "OS: Mac OS X %s (%s %s)", buf,
229 host_info.os_name, host_info.os_version
230 );
231 #else
232 msg_printf(NULL, MSG_INFO,
233 "OS: %s: %s", host_info.os_name, host_info.os_version
234 );
235 #endif
236
237 nbytes_to_string(host_info.m_nbytes, 0, buf, sizeof(buf));
238 nbytes_to_string(host_info.m_swap, 0, buf2, sizeof(buf2));
239 msg_printf(NULL, MSG_INFO,
240 "Memory: %s physical, %s virtual",
241 buf, buf2
242 );
243
244 nbytes_to_string(host_info.d_total, 0, buf, sizeof(buf));
245 nbytes_to_string(host_info.d_free, 0, buf2, sizeof(buf2));
246 msg_printf(NULL, MSG_INFO, "Disk: %s total, %s free", buf, buf2);
247 int tz = host_info.timezone/3600;
248 msg_printf(0, MSG_INFO, "Local time is UTC %s%d hours",
249 tz<0?"":"+", tz
250 );
251
252 if (strlen(host_info.virtualbox_version)) {
253 msg_printf(NULL, MSG_INFO,
254 "VirtualBox version: %s",
255 host_info.virtualbox_version
256 );
257 } else {
258 #if defined (_WIN32) && !defined(_WIN64)
259 if (!strcmp(get_primary_platform(), "windows_x86_64")) {
260 msg_printf(NULL, MSG_USER_ALERT,
261 "Can't detect VirtualBox because this is a 32-bit version of BOINC; to fix, please install a 64-bit version."
262 );
263 }
264 #endif
265 }
266 }
267
rsc_index(const char * name)268 int rsc_index(const char* name) {
269 const char* nm = strcmp(name, "CUDA")?name:GPU_TYPE_NVIDIA;
270 // handle old state files
271 for (int i=0; i<coprocs.n_rsc; i++) {
272 if (!strcmp(nm, coprocs.coprocs[i].type)) {
273 return i;
274 }
275 }
276 return -1;
277 }
278
279 // used in XML and COPROC::type
280 //
rsc_name(int i)281 const char* rsc_name(int i) {
282 return coprocs.coprocs[i].type;
283 }
284
285 // user-friendly version
286 //
rsc_name_long(int i)287 const char* rsc_name_long(int i) {
288 int num = coproc_type_name_to_num(coprocs.coprocs[i].type);
289 if (num >= 0) return proc_type_name(num); // CPU, NVIDIA GPU, AMD GPU or Intel GPU
290 return coprocs.coprocs[i].type; // Some other type
291 }
292
293 #ifndef SIM
294 // alert user if any jobs need more RAM than available
295 // (based on RAM estimate, not measured size)
296 //
check_too_large_jobs()297 static void check_too_large_jobs() {
298 unsigned int i, j;
299 double m = gstate.max_available_ram();
300 for (i=0; i<gstate.projects.size(); i++) {
301 PROJECT* p = gstate.projects[i];
302 bool found = false;
303 for (j=0; j<gstate.results.size(); j++) {
304 RESULT* rp = gstate.results[j];
305 if (rp->project == p && rp->wup->rsc_memory_bound > m) {
306 found = true;
307 break;
308 }
309 }
310 if (found) {
311 msg_printf(p, MSG_USER_ALERT,
312 _("Some tasks need more memory than allowed by your preferences. Please check the preferences.")
313 );
314 }
315 }
316 }
317 #endif
318
319 // Something has failed N times.
320 // Calculate an exponential backoff between MIN and MAX
321 //
calculate_exponential_backoff(int n,double MIN,double MAX)322 double calculate_exponential_backoff(int n, double MIN, double MAX) {
323 double x = pow(2, (double)n);
324 x *= MIN;
325 if (x > MAX) x = MAX;
326 x *= (.5 + .5*drand());
327 return x;
328 }
329
330 #ifndef SIM
331
set_now()332 void CLIENT_STATE::set_now() {
333 double x = dtime();
334
335 // if time went backward significantly, clear delays
336 //
337 clock_change = false;
338 if (x < (now-60)) {
339 clock_change = true;
340 msg_printf(NULL, MSG_INFO,
341 "New system time (%.0f) < old system time (%.0f); clearing timeouts",
342 x, now
343 );
344 clear_absolute_times();
345 }
346
347 #ifdef _WIN32
348 // On Win, check for evidence that we're awake after a suspension
349 // (in case we missed the event announcing this)
350 //
351 if (os_requested_suspend) {
352 if (x > now+10) {
353 msg_printf(0, MSG_INFO, "Resuming after OS suspension");
354 os_requested_suspend = false;
355 } else if (x > os_requested_suspend_time + 300) {
356 msg_printf(0, MSG_INFO, "Resuming after OS suspension");
357 os_requested_suspend = false;
358 }
359 }
360 #endif
361 now = x;
362 }
363
364 // Check if version or platform has changed;
365 // if so we're running a different client than before.
366 //
is_new_client()367 bool CLIENT_STATE::is_new_client() {
368 bool new_client = false;
369 if ((core_client_version.major != old_major_version)
370 || (core_client_version.minor != old_minor_version)
371 || (core_client_version.release != old_release)
372 ) {
373 msg_printf(NULL, MSG_INFO,
374 "Version change (%d.%d.%d -> %d.%d.%d)",
375 old_major_version, old_minor_version, old_release,
376 core_client_version.major,
377 core_client_version.minor,
378 core_client_version.release
379 );
380 new_client = true;
381 }
382 if (statefile_platform_name.size() && strcmp(get_primary_platform(), statefile_platform_name.c_str())) {
383 msg_printf(NULL, MSG_INFO,
384 "Platform changed from %s to %s",
385 statefile_platform_name.c_str(), get_primary_platform()
386 );
387 new_client = true;
388 }
389 return new_client;
390 }
391
392 #ifdef _WIN32
393 typedef DWORD (WINAPI *STP)(HANDLE, DWORD);
394 #endif
395
set_client_priority()396 static void set_client_priority() {
397 #ifdef _WIN32
398 STP stp = (STP) GetProcAddress(GetModuleHandle(_T("kernel32.dll")), "SetThreadPriority");
399 if (!stp) return;
400 if (stp(GetCurrentThread(), THREAD_MODE_BACKGROUND_BEGIN)) {
401 msg_printf(NULL, MSG_INFO, "Running at background priority");
402 } else {
403 msg_printf(NULL, MSG_INFO, "Failed to set background priority");
404 }
405 #endif
406 #ifdef __linux__
407 char buf[1024];
408 snprintf(buf, sizeof(buf), "ionice -c 3 -p %d", getpid());
409 system(buf);
410 #endif
411 }
412
init()413 int CLIENT_STATE::init() {
414 int retval;
415 unsigned int i;
416 char buf[256];
417 PROJECT* p;
418
419 srand((unsigned int)time(0));
420 now = dtime();
421 #ifdef ANDROID
422 device_status_time = dtime();
423 #endif
424 scheduler_op->url_random = drand();
425
426 notices.init();
427 daily_xfer_history.init();
428 time_stats.init();
429
430 detect_platforms();
431 time_stats.start();
432
433 msg_printf(
434 NULL, MSG_INFO, "Starting BOINC client version %d.%d.%d for %s%s",
435 core_client_version.major,
436 core_client_version.minor,
437 core_client_version.release,
438 get_primary_platform(),
439 #ifdef _DEBUG
440 " (DEBUG)"
441 #else
442 ""
443 #endif
444 );
445
446 if (core_client_version.prerelease) {
447 msg_printf(NULL, MSG_INFO,
448 "This a development version of BOINC and may not function properly"
449 );
450 }
451
452 log_flags.show();
453
454 msg_printf(NULL, MSG_INFO, "Libraries: %s", curl_version());
455
456 if (cc_config.lower_client_priority) {
457 set_client_priority();
458 }
459
460 if (executing_as_daemon) {
461 #ifdef _WIN32
462 msg_printf(NULL, MSG_INFO, "Running as a daemon (GPU computing disabled)");
463 #else
464 msg_printf(NULL, MSG_INFO, "Running as a daemon");
465 #endif
466 }
467
468 relative_to_absolute("", buf);
469 msg_printf(NULL, MSG_INFO, "Data directory: %s", buf);
470
471 #ifdef _WIN32
472 DWORD buf_size = sizeof(buf);
473 LPTSTR pbuf = buf;
474
475 GetUserName(pbuf, &buf_size);
476 msg_printf(NULL, MSG_INFO, "Running under account %s", pbuf);
477 #endif
478
479 FILE* f = fopen(CLIENT_BRAND_FILENAME, "r");
480 if (f) {
481 fgets(client_brand, sizeof(client_brand), f);
482 strip_whitespace(client_brand);
483 msg_printf(NULL, MSG_INFO, "Client brand: %s", client_brand);
484 fclose(f);
485 }
486
487 parse_account_files();
488 parse_statistics_files();
489
490 // check for GPUs.
491 //
492 coprocs.bound_counts(); // show GPUs described in cc_config.xml
493 if (!cc_config.no_gpus
494 #ifdef _WIN32
495 && !executing_as_daemon
496 #endif
497 ) {
498 vector<string> descs;
499 vector<string> warnings;
500 coprocs.get(
501 cc_config.use_all_gpus, descs, warnings, cc_config.ignore_gpu_instance
502 );
503 for (i=0; i<descs.size(); i++) {
504 msg_printf(NULL, MSG_INFO, "%s", descs[i].c_str());
505 }
506 if (log_flags.coproc_debug) {
507 for (i=0; i<warnings.size(); i++) {
508 msg_printf(NULL, MSG_INFO, "[coproc] %s", warnings[i].c_str());
509 }
510 }
511 #if 0
512 msg_printf(NULL, MSG_INFO, "Faking an NVIDIA GPU");
513 coprocs.nvidia.fake(18000, 512*MEGA, 490*MEGA, 2);
514 #endif
515 #if 0
516 msg_printf(NULL, MSG_INFO, "Faking an ATI GPU");
517 coprocs.ati.fake(512*MEGA, 256*MEGA, 2);
518 #endif
519 #if 0
520 msg_printf(NULL, MSG_INFO, "Faking an Intel GPU");
521 coprocs.intel_gpu.fake(512*MEGA, 256*MEGA, 2);
522 #endif
523 #if 0
524 fake_opencl_gpu("Mali-T628");
525 #endif
526 }
527
528 if (coprocs.have_nvidia()) {
529 if (rsc_index(GPU_TYPE_NVIDIA)>0) {
530 msg_printf(NULL, MSG_INFO, "NVIDIA GPU info taken from cc_config.xml");
531 } else {
532 coprocs.add(coprocs.nvidia);
533 }
534 }
535 if (coprocs.have_ati()) {
536 if (rsc_index(GPU_TYPE_ATI)>0) {
537 msg_printf(NULL, MSG_INFO, "ATI GPU info taken from cc_config.xml");
538 } else {
539 coprocs.add(coprocs.ati);
540 }
541 }
542 if (coprocs.have_intel_gpu()) {
543 if (rsc_index(GPU_TYPE_INTEL)>0) {
544 msg_printf(NULL, MSG_INFO, "INTEL GPU info taken from cc_config.xml");
545 } else {
546 coprocs.add(coprocs.intel_gpu);
547 }
548 }
549 coprocs.add_other_coproc_types();
550
551 host_info.coprocs = coprocs;
552
553 if (coprocs.none() ) {
554 msg_printf(NULL, MSG_INFO, "No usable GPUs found");
555 }
556
557 set_no_rsc_config();
558
559 // check for app_info.xml file in project dirs.
560 // If find, read app info from there, set project.anonymous_platform
561 // - this must follow coproc.get() (need to know if GPUs are present)
562 // - this is being done before CPU speed has been read from state file,
563 // so we'll need to patch up avp->flops later;
564 //
565 check_anonymous();
566
567 // first time, set p_fpops nonzero to avoid div by zero
568 //
569 cpu_benchmarks_set_defaults();
570
571 // Parse the client state file,
572 // ignoring any <project> tags (and associated stuff)
573 // for projects with no account file
574 //
575 parse_state_file();
576
577 bool new_client = is_new_client();
578
579 // this follows parse_state_file() since we need to have read
580 // domain_name for Android
581 //
582 host_info.get_host_info(true);
583 set_ncpus();
584 show_host_info();
585
586 // this follows parse_state_file() because that's where we read project names
587 //
588 sort_projects_by_name();
589
590 // check for app_config.xml files in project dirs
591 //
592 check_app_config();
593
594 // this needs to go after parse_state_file() because
595 // GPU exclusions refer to projects
596 //
597 cc_config.show();
598
599 // inform the user if there's a newer version of client
600 //
601 newer_version_startup_check();
602
603 // parse account files again,
604 // now that we know the host's venue on each project
605 //
606 parse_account_files_venue();
607
608 // fill in p->no_X_apps for anon platform projects,
609 // and check no_rsc_apps for others
610 //
611 for (i=0; i<projects.size(); i++) {
612 p = projects[i];
613 if (p->anonymous_platform) {
614 p->check_no_apps();
615 } else {
616 p->check_no_rsc_apps();
617 }
618 }
619
620 // fill in avp->flops for anonymous platform projects
621 //
622 for (i=0; i<app_versions.size(); i++) {
623 APP_VERSION* avp = app_versions[i];
624 if (!avp->flops) {
625 if (!avp->avg_ncpus) {
626 avp->avg_ncpus = 1;
627 }
628 avp->flops = avp->avg_ncpus * host_info.p_fpops;
629
630 // for GPU apps, use conservative estimate:
631 // assume GPU runs at 10X peak CPU speed
632 //
633 if (avp->gpu_usage.rsc_type) {
634 avp->flops += avp->gpu_usage.usage * 10 * host_info.p_fpops;
635 }
636 }
637 }
638
639 process_gpu_exclusions();
640
641 check_clock_reset();
642
643 // Check to see if we can write the state file.
644 //
645 retval = write_state_file();
646 if (retval) {
647 msg_printf_notice(NULL, false,
648 "http://boinc.berkeley.edu/manager_links.php?target=notice&controlid=statefile",
649 _("Couldn't write state file; check directory permissions")
650 );
651 cant_write_state_file = true;
652 }
653
654 // scan user prefs; create file records
655 //
656 parse_preferences_for_user_files();
657
658 if (log_flags.state_debug) {
659 print_summary();
660 }
661 do_cmdline_actions();
662
663 // if new version of client,
664 // - run CPU benchmarks
665 // - get new project list
666 // - contact reference site (or some project) to trigger firewall alert
667 //
668 if (new_client) {
669 run_cpu_benchmarks = true;
670 all_projects_list_check_time = 0;
671 if (cc_config.dont_contact_ref_site) {
672 if (projects.size() > 0) {
673 projects[0]->master_url_fetch_pending = true;
674 }
675 } else {
676 net_status.need_to_contact_reference_site = true;
677 }
678 }
679
680 check_if_need_benchmarks();
681
682 log_show_projects();
683
684 read_global_prefs();
685
686 // do CPU scheduler and work fetch
687 //
688 request_schedule_cpus("Startup");
689 request_work_fetch("Startup");
690 work_fetch.init();
691 rec_interval_start = now;
692
693 // set up the project and slot directories
694 //
695 delete_old_slot_dirs();
696 retval = make_project_dirs();
697 if (retval) return retval;
698
699 active_tasks.init();
700 active_tasks.report_overdue();
701 active_tasks.handle_upload_files();
702 had_or_requested_work = (active_tasks.active_tasks.size() > 0);
703
704 // Just to be on the safe side; something may have been modified
705 //
706 set_client_state_dirty("init");
707
708 // check for initialization files
709 //
710 acct_mgr_info.init();
711 project_init.init();
712
713 // set up for handling GUI RPCs
714 //
715 if (!no_gui_rpc) {
716 if (gui_rpc_unix_domain) {
717 retval = gui_rpcs.init_unix_domain();
718 } else {
719 // When we're running at boot time,
720 // it may be a few seconds before we can socket/bind/listen.
721 // So retry a few times.
722 //
723 for (i=0; i<30; i++) {
724 bool last_time = (i==29);
725 retval = gui_rpcs.init_tcp(last_time);
726 if (!retval) break;
727 boinc_sleep(1.0);
728 }
729 }
730 if (retval) return retval;
731 }
732
733 if (g_use_sandbox) get_project_gid();
734 #ifdef _WIN32
735 get_sandbox_account_service_token();
736 if (sandbox_account_service_token != NULL) g_use_sandbox = true;
737 #endif
738
739 check_file_existence();
740 if (!boinc_file_exists(ALL_PROJECTS_LIST_FILENAME)) {
741 all_projects_list_check_time = 0;
742 }
743
744 #ifdef ENABLE_AUTO_UPDATE
745 auto_update.init();
746 #endif
747
748 http_ops->cleanup_temp_files();
749
750 // must parse env vars after parsing state file
751 // otherwise items will get overwritten with state file info
752 //
753 parse_env_vars();
754
755 // do this after parsing env vars
756 //
757 proxy_info_startup();
758
759 if (gstate.projects.size() == 0) {
760 msg_printf(NULL, MSG_INFO,
761 "This computer is not attached to any projects"
762 );
763 msg_printf(NULL, MSG_INFO,
764 "Visit http://boinc.berkeley.edu for instructions"
765 );
766 }
767
768 // get list of BOINC projects occasionally,
769 // and initialize notice RSS feeds
770 //
771 if (!cc_config.no_info_fetch) {
772 all_projects_list_check();
773 notices.init_rss();
774 }
775
776 // warn user if some jobs need more memory than available
777 //
778 check_too_large_jobs();
779
780 // initialize project priorities (for the GUI, in case we're suspended)
781 //
782 project_priority_init(false);
783
784 #ifdef NEW_CPU_THROTTLE
785 client_mutex.lock();
786 throttle_thread.run(throttler, NULL);
787 #endif
788 initialized = true;
789 return 0;
790 }
791
double_to_timeval(double x,timeval & t)792 static void double_to_timeval(double x, timeval& t) {
793 t.tv_sec = (int)x;
794 t.tv_usec = (int)(1000000*(x - (int)x));
795 }
796
797 FDSET_GROUP curl_fds;
798 FDSET_GROUP gui_rpc_fds;
799 FDSET_GROUP all_fds;
800
801 // Spend x seconds either doing I/O (if possible) or sleeping.
802 //
do_io_or_sleep(double max_time)803 void CLIENT_STATE::do_io_or_sleep(double max_time) {
804 int n;
805 struct timeval tv;
806 set_now();
807 double end_time = now + max_time;
808 double time_remaining = max_time;
809
810 while (1) {
811 curl_fds.zero();
812 gui_rpc_fds.zero();
813 http_ops->get_fdset(curl_fds);
814 all_fds = curl_fds;
815 gui_rpcs.get_fdset(gui_rpc_fds, all_fds);
816
817 bool have_async = have_async_file_op();
818
819 // prioritize network (including GUI RPC) over async file ops.
820 // if there's a pending asynch file op, do the select with zero timeout;
821 // otherwise do it for the remaining amount of time.
822
823 double_to_timeval(have_async?0:time_remaining, tv);
824 #ifdef NEW_CPU_THROTTLE
825 client_mutex.unlock();
826 #endif
827 n = select(
828 all_fds.max_fd+1,
829 &all_fds.read_fds, &all_fds.write_fds, &all_fds.exc_fds,
830 &tv
831 );
832 //printf("select in %d out %d\n", all_fds.max_fd, n);
833 #ifdef NEW_CPU_THROTTLE
834 client_mutex.lock();
835 #endif
836
837 // Note: curl apparently likes to have curl_multi_perform()
838 // (called from net_xfers->got_select())
839 // called pretty often, even if no descriptors are enabled.
840 // So do the "if (n==0) break" AFTER the got_selects().
841
842 http_ops->got_select(all_fds, time_remaining);
843 gui_rpcs.got_select(all_fds);
844
845 if (have_async) {
846 // do the async file op only if no network activity
847 //
848 if (n == 0) {
849 do_async_file_op();
850 }
851 } else {
852 if (n == 0) {
853 break;
854 }
855 }
856
857 set_now();
858 if (now > end_time) break;
859 time_remaining = end_time - now;
860 }
861 }
862
863 #define POLL_ACTION(name, func) \
864 do { if (func()) { \
865 ++actions; \
866 if (log_flags.poll_debug) { \
867 msg_printf(0, MSG_INFO, "[poll] CLIENT_STATE::poll_slow_events(): " #name "\n"); \
868 } \
869 } } while(0)
870
871 // Poll the client's finite-state machines
872 // possibly triggering state transitions.
873 // Returns true if something happened
874 // (in which case should call this again immediately)
875 //
poll_slow_events()876 bool CLIENT_STATE::poll_slow_events() {
877 int actions = 0, retval;
878 static int last_suspend_reason=0;
879 static bool tasks_restarted = false;
880 static bool first=true;
881 double old_now = now;
882 #ifdef __APPLE__
883 double idletime;
884 #endif
885
886 set_now();
887
888 if (cant_write_state_file) {
889 return false;
890 }
891
892 if (now - old_now > POLL_INTERVAL*10) {
893 if (log_flags.network_status_debug) {
894 msg_printf(0, MSG_INFO,
895 "[network_status] woke up after %f seconds",
896 now - old_now
897 );
898 }
899 last_wakeup_time = now;
900 }
901
902 if (run_cpu_benchmarks && can_run_cpu_benchmarks()) {
903 run_cpu_benchmarks = false;
904 start_cpu_benchmarks();
905 }
906
907 #ifdef _WIN32
908 if (have_sysmon_msg) {
909 msg_printf(NULL, MSG_INFO, sysmon_msg);
910 have_sysmon_msg = false;
911 }
912 #endif
913
914 bool old_user_active = user_active;
915 #ifdef ANDROID
916 user_active = device_status.user_active;
917 #else
918 user_active = !host_info.users_idle(
919 check_all_logins, global_prefs.idle_time_to_run
920 #ifdef __APPLE__
921 , &idletime
922 #endif
923 );
924 #endif
925
926 if (user_active != old_user_active) {
927 request_schedule_cpus(user_active?"Not idle":"Idle");
928 }
929
930 #if 0
931 // NVIDIA provides an interface for finding if a GPU is
932 // running a graphics app. ATI doesn't as far as I know
933 //
934 if (host_info.have_nvidia() && user_active && !global_prefs.run_gpu_if_user_active) {
935 if (host_info.coprocs.nvidia.check_running_graphics_app()) {
936 request_schedule_cpus("GPU state change");
937 }
938 }
939 #endif
940
941 #ifdef __APPLE__
942 // Mac screensaver launches client if not already running.
943 // OS X quits screensaver when energy saver puts display to sleep,
944 // but we want to keep crunching.
945 // Also, user can start Mac screensaver by putting cursor in "hot corner"
946 // so idletime may be very small initially.
947 // If screensaver started client, this code tells client
948 // to exit when user becomes active, accounting for all these factors.
949 //
950 if (started_by_screensaver && (idletime < 30) && (getppid() == 1)) {
951 // pid is 1 if parent has exited
952 requested_exit = true;
953 }
954
955 // Exit if we were launched by Manager and it crashed.
956 //
957 if (launched_by_manager && (getppid() == 1)) {
958 gstate.requested_exit = true;
959 }
960 #endif
961
962 // active_tasks.get_memory_usage() sets variables needed by
963 // check_suspend_processing(), so it must be called first.
964 //
965 active_tasks.get_memory_usage();
966 suspend_reason = check_suspend_processing();
967
968 // suspend or resume activities (but only if already did startup)
969 //
970 if (tasks_restarted) {
971 if (suspend_reason) {
972 if (!tasks_suspended) {
973 show_suspend_tasks_message(suspend_reason);
974 active_tasks.suspend_all(suspend_reason);
975 }
976 last_suspend_reason = suspend_reason;
977 } else {
978 if (tasks_suspended && !tasks_throttled) {
979 resume_tasks(last_suspend_reason);
980 }
981 }
982 } else if (first) {
983 // if suspended, show message the first time around
984 //
985 first = false;
986 if (suspend_reason) {
987 show_suspend_tasks_message(suspend_reason);
988 }
989 }
990 tasks_suspended = (suspend_reason != 0);
991
992 if (benchmarks_running) {
993 cpu_benchmarks_poll();
994 }
995
996 int old_network_suspend_reason = network_suspend_reason;
997 bool old_network_suspended = network_suspended;
998 check_suspend_network();
999 if (network_suspend_reason) {
1000 if (!old_network_suspend_reason) {
1001 char buf[256];
1002 if (network_suspended) {
1003 snprintf(buf, sizeof(buf),
1004 "Suspending network activity - %s",
1005 suspend_reason_string(network_suspend_reason)
1006 );
1007 request_schedule_cpus("network suspended");
1008 // in case any "needs_network" jobs are running
1009 } else {
1010 snprintf(buf, sizeof(buf),
1011 "Suspending file transfers - %s",
1012 suspend_reason_string(network_suspend_reason)
1013 );
1014 }
1015 msg_printf(NULL, MSG_INFO, "%s", buf);
1016 pers_file_xfers->suspend();
1017 }
1018 } else {
1019 if (old_network_suspend_reason) {
1020 if (old_network_suspended) {
1021 msg_printf(NULL, MSG_INFO, "Resuming network activity");
1022 } else {
1023 msg_printf(NULL, MSG_INFO, "Resuming file transfers");
1024 }
1025 request_schedule_cpus("network resumed");
1026 }
1027
1028 // if we're emerging from a bandwidth quota suspension,
1029 // add a random delay to avoid DDOS effect
1030 //
1031 if (
1032 old_network_suspend_reason == SUSPEND_REASON_NETWORK_QUOTA_EXCEEDED
1033 && network_run_mode.get_current() == RUN_MODE_AUTO
1034 ) {
1035 pers_file_xfers->add_random_delay(3600);
1036 }
1037 }
1038
1039 // NOTE:
1040 // The order of calls in the following lists generally doesn't matter,
1041 // except for the following:
1042 // must have:
1043 // active_tasks_poll
1044 // handle_finished_apps
1045 // schedule_cpus
1046 // in that order (active_tasks_poll() sets must_schedule_cpus,
1047 // and handle_finished_apps() must be done before schedule_cpus()
1048
1049 check_project_timeout();
1050 #ifdef ENABLE_AUTO_UPDATE
1051 auto_update.poll();
1052 #endif
1053 POLL_ACTION(active_tasks , active_tasks.poll );
1054 POLL_ACTION(garbage_collect , garbage_collect );
1055 // remove PERS_FILE_XFERs (and associated FILE_XFERs and HTTP_OPs)
1056 // for unreferenced files
1057 POLL_ACTION(gui_http , gui_http.poll );
1058 POLL_ACTION(gui_rpc_http , gui_rpcs.poll );
1059 POLL_ACTION(trickle_up_ops, trickle_up_poll);
1060 // scan FILE_INFOS and create PERS_FILE_XFERs
1061 // for PERS_FILE_XFERS that are done, delete them
1062
1063 if (!network_suspended && suspend_reason != SUSPEND_REASON_BENCHMARKS) {
1064 // don't initiate network activity if we're doing CPU benchmarks
1065 net_status.poll();
1066 daily_xfer_history.poll();
1067 POLL_ACTION(acct_mgr , acct_mgr_info.poll );
1068 POLL_ACTION(file_xfers , file_xfers->poll );
1069 // check for file xfer completion; don't delete anything
1070 POLL_ACTION(pers_file_xfers , pers_file_xfers->poll );
1071 // poll PERS_FILE_XFERS
1072 // if we need to start xfer, creat FILE_XFER and init
1073 // if FILE_XFER is complete
1074 // handle transient and permanent failures
1075 // delete the FILE_XFER
1076
1077 if (!cc_config.no_info_fetch) {
1078 POLL_ACTION(rss_feed_op , rss_feed_op.poll );
1079 }
1080 }
1081 POLL_ACTION(create_and_delete_pers_file_xfers ,
1082 create_and_delete_pers_file_xfers
1083 );
1084 POLL_ACTION(handle_finished_apps , handle_finished_apps );
1085 POLL_ACTION(update_results , update_results );
1086 if (!tasks_suspended) {
1087 POLL_ACTION(schedule_cpus, schedule_cpus );
1088 tasks_restarted = true;
1089 }
1090 if (!network_suspended) {
1091 POLL_ACTION(scheduler_rpc , scheduler_rpc_poll );
1092 }
1093 retval = write_state_file_if_needed();
1094 if (retval) {
1095 msg_printf(NULL, MSG_INTERNAL_ERROR,
1096 "Couldn't write state file: %s; giving up", boincerror(retval)
1097 );
1098 exit(EXIT_STATEFILE_WRITE);
1099 }
1100 if (log_flags.poll_debug) {
1101 msg_printf(0, MSG_INFO,
1102 "[poll] CLIENT_STATE::do_something(): End poll: %d tasks active\n", actions
1103 );
1104 }
1105 if (actions > 0) {
1106 return true;
1107 } else {
1108 time_stats.update(suspend_reason, gpu_suspend_reason);
1109
1110 // on some systems, DNS resolution only starts working
1111 // a few minutes after system boot.
1112 // If it didn't work before, try it again.
1113 //
1114 if (!strlen(host_info.domain_name)) {
1115 host_info.get_local_network_info();
1116 }
1117 return false;
1118 }
1119 }
1120
1121 #endif // ifndef SIM
1122
1123 // See if the project specified by master_url already exists
1124 // in the client state record. Ignore any trailing "/" characters
1125 //
lookup_project(const char * master_url)1126 PROJECT* CLIENT_STATE::lookup_project(const char* master_url) {
1127 int len1, len2;
1128 char *mu;
1129
1130 len1 = (int)strlen(master_url);
1131 if (master_url[strlen(master_url)-1] == '/') len1--;
1132
1133 for (unsigned int i=0; i<projects.size(); i++) {
1134 mu = projects[i]->master_url;
1135 len2 = (int)strlen(mu);
1136 if (mu[strlen(mu)-1] == '/') len2--;
1137 if (!strncmp(master_url, projects[i]->master_url, max(len1,len2))) {
1138 return projects[i];
1139 }
1140 }
1141 return 0;
1142 }
1143
lookup_app(PROJECT * p,const char * name)1144 APP* CLIENT_STATE::lookup_app(PROJECT* p, const char* name) {
1145 for (unsigned int i=0; i<apps.size(); i++) {
1146 APP* app = apps[i];
1147 if (app->project == p && !strcmp(name, app->name)) return app;
1148 }
1149 return 0;
1150 }
1151
lookup_result(PROJECT * p,const char * name)1152 RESULT* CLIENT_STATE::lookup_result(PROJECT* p, const char* name) {
1153 for (unsigned int i=0; i<results.size(); i++) {
1154 RESULT* rp = results[i];
1155 if (rp->project == p && !strcmp(name, rp->name)) return rp;
1156 }
1157 return 0;
1158 }
1159
lookup_workunit(PROJECT * p,const char * name)1160 WORKUNIT* CLIENT_STATE::lookup_workunit(PROJECT* p, const char* name) {
1161 for (unsigned int i=0; i<workunits.size(); i++) {
1162 WORKUNIT* wup = workunits[i];
1163 if (wup->project == p && !strcmp(name, wup->name)) return wup;
1164 }
1165 return 0;
1166 }
1167
lookup_app_version(APP * app,char * platform,int version_num,char * plan_class)1168 APP_VERSION* CLIENT_STATE::lookup_app_version(
1169 APP* app, char* platform, int version_num, char* plan_class
1170 ) {
1171 for (unsigned int i=0; i<app_versions.size(); i++) {
1172 APP_VERSION* avp = app_versions[i];
1173 if (avp->app != app) continue;
1174 if (version_num != avp->version_num) continue;
1175 if (strcmp(avp->platform, platform)) continue;
1176 if (strcmp(avp->plan_class, plan_class)) continue;
1177 return avp;
1178 }
1179 return 0;
1180 }
1181
lookup_file_info(PROJECT * p,const char * name)1182 FILE_INFO* CLIENT_STATE::lookup_file_info(PROJECT* p, const char* name) {
1183 for (unsigned int i=0; i<file_infos.size(); i++) {
1184 FILE_INFO* fip = file_infos[i];
1185 if (fip->project == p && !strcmp(fip->name, name)) {
1186 return fip;
1187 }
1188 }
1189 return 0;
1190 }
1191
1192 // functions to create links between state objects
1193 // (which, in their XML form, reference one another by name)
1194 // Return nonzero if already in client state.
1195 //
link_app(PROJECT * p,APP * app)1196 int CLIENT_STATE::link_app(PROJECT* p, APP* app) {
1197 if (lookup_app(p, app->name)) return ERR_NOT_UNIQUE;
1198 app->project = p;
1199 return 0;
1200 }
1201
link_file_info(PROJECT * p,FILE_INFO * fip)1202 int CLIENT_STATE::link_file_info(PROJECT* p, FILE_INFO* fip) {
1203 if (lookup_file_info(p, fip->name)) return ERR_NOT_UNIQUE;
1204 fip->project = p;
1205 return 0;
1206 }
1207
link_app_version(PROJECT * p,APP_VERSION * avp)1208 int CLIENT_STATE::link_app_version(PROJECT* p, APP_VERSION* avp) {
1209 APP* app;
1210
1211 avp->project = p;
1212 app = lookup_app(p, avp->app_name);
1213 if (!app) {
1214 msg_printf(p, MSG_INTERNAL_ERROR,
1215 "State file error: bad application name %s",
1216 avp->app_name
1217 );
1218 return ERR_NOT_FOUND;
1219 }
1220 avp->app = app;
1221
1222 if (lookup_app_version(app, avp->platform, avp->version_num, avp->plan_class)) {
1223 #ifndef SIM
1224 msg_printf(p, MSG_INTERNAL_ERROR,
1225 "State file error: duplicate app version: %s %s %d %s",
1226 avp->app_name, avp->platform, avp->version_num, avp->plan_class
1227 );
1228 #endif
1229 return ERR_NOT_UNIQUE;
1230 }
1231
1232 #ifndef SIM
1233
1234 safe_strcpy(avp->graphics_exec_path, "");
1235 safe_strcpy(avp->graphics_exec_file, "");
1236
1237 for (unsigned int i=0; i<avp->app_files.size(); i++) {
1238 FILE_REF& file_ref = avp->app_files[i];
1239 FILE_INFO* fip = lookup_file_info(p, file_ref.file_name);
1240 if (!fip) {
1241 msg_printf(p, MSG_INTERNAL_ERROR,
1242 "State file error: missing application file %s",
1243 file_ref.file_name
1244 );
1245 return ERR_NOT_FOUND;
1246 }
1247
1248 if (!strcmp(file_ref.open_name, GRAPHICS_APP_FILENAME)) {
1249 char relpath[MAXPATHLEN], path[MAXPATHLEN];
1250 get_pathname(fip, relpath, sizeof(relpath));
1251 relative_to_absolute(relpath, path);
1252 safe_strcpy(avp->graphics_exec_path, path);
1253 safe_strcpy(avp->graphics_exec_file, fip->name);
1254 }
1255
1256 // any file associated with an app version must be signed
1257 //
1258 if (!cc_config.unsigned_apps_ok) {
1259 fip->signature_required = true;
1260 }
1261
1262 file_ref.file_info = fip;
1263 }
1264 #endif
1265 return 0;
1266 }
1267
link_file_ref(PROJECT * p,FILE_REF * file_refp)1268 int CLIENT_STATE::link_file_ref(PROJECT* p, FILE_REF* file_refp) {
1269 FILE_INFO* fip;
1270
1271 fip = lookup_file_info(p, file_refp->file_name);
1272 if (!fip) {
1273 msg_printf(p, MSG_INTERNAL_ERROR,
1274 "State file error: missing file %s",
1275 file_refp->file_name
1276 );
1277 return ERR_NOT_FOUND;
1278 }
1279 file_refp->file_info = fip;
1280 return 0;
1281 }
1282
link_workunit(PROJECT * p,WORKUNIT * wup)1283 int CLIENT_STATE::link_workunit(PROJECT* p, WORKUNIT* wup) {
1284 APP* app;
1285 unsigned int i;
1286 int retval;
1287
1288 app = lookup_app(p, wup->app_name);
1289 if (!app) {
1290 msg_printf(p, MSG_INTERNAL_ERROR,
1291 "State file error: missing application %s",
1292 wup->app_name
1293 );
1294 return ERR_NOT_FOUND;
1295 }
1296 wup->project = p;
1297 wup->app = app;
1298 for (i=0; i<wup->input_files.size(); i++) {
1299 retval = link_file_ref(p, &wup->input_files[i]);
1300 if (retval) {
1301 msg_printf(p, MSG_INTERNAL_ERROR,
1302 "State file error: missing input file %s\n",
1303 wup->input_files[i].file_name
1304 );
1305 return retval;
1306 }
1307 }
1308 return 0;
1309 }
1310
link_result(PROJECT * p,RESULT * rp)1311 int CLIENT_STATE::link_result(PROJECT* p, RESULT* rp) {
1312 WORKUNIT* wup;
1313 unsigned int i;
1314 int retval;
1315
1316 wup = lookup_workunit(p, rp->wu_name);
1317 if (!wup) {
1318 msg_printf(p, MSG_INTERNAL_ERROR,
1319 "State file error: missing task %s\n", rp->wu_name
1320 );
1321 return ERR_NOT_FOUND;
1322 }
1323 rp->project = p;
1324 rp->wup = wup;
1325 rp->app = wup->app;
1326 for (i=0; i<rp->output_files.size(); i++) {
1327 retval = link_file_ref(p, &rp->output_files[i]);
1328 if (retval) return retval;
1329 }
1330 return 0;
1331 }
1332
1333 // Print debugging information about how many projects/files/etc
1334 // are currently in the client state record
1335 //
print_summary()1336 void CLIENT_STATE::print_summary() {
1337 unsigned int i;
1338 double t;
1339
1340 msg_printf(0, MSG_INFO, "[state] Client state summary:");
1341 msg_printf(0, MSG_INFO, "%d projects:", (int)projects.size());
1342 for (i=0; i<projects.size(); i++) {
1343 t = projects[i]->min_rpc_time;
1344 if (t) {
1345 msg_printf(0, MSG_INFO, " %s min RPC %f.0 seconds from now", projects[i]->master_url, t-now);
1346 } else {
1347 msg_printf(0, MSG_INFO, " %s", projects[i]->master_url);
1348 }
1349 }
1350 msg_printf(0, MSG_INFO, "%d file_infos:", (int)file_infos.size());
1351 for (i=0; i<file_infos.size(); i++) {
1352 msg_printf(0, MSG_INFO, " %s status:%d %s", file_infos[i]->name, file_infos[i]->status, file_infos[i]->pers_file_xfer?"active":"inactive");
1353 }
1354 msg_printf(0, MSG_INFO, "%d app_versions", (int)app_versions.size());
1355 for (i=0; i<app_versions.size(); i++) {
1356 msg_printf(0, MSG_INFO, " %s %d", app_versions[i]->app_name, app_versions[i]->version_num);
1357 }
1358 msg_printf(0, MSG_INFO, "%d workunits", (int)workunits.size());
1359 for (i=0; i<workunits.size(); i++) {
1360 msg_printf(0, MSG_INFO, " %s", workunits[i]->name);
1361 }
1362 msg_printf(0, MSG_INFO, "%d results", (int)results.size());
1363 for (i=0; i<results.size(); i++) {
1364 msg_printf(0, MSG_INFO, " %s state:%d", results[i]->name, results[i]->state());
1365 }
1366 msg_printf(0, MSG_INFO, "%d persistent file xfers", (int)pers_file_xfers->pers_file_xfers.size());
1367 for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
1368 msg_printf(0, MSG_INFO, " %s http op state: %d", pers_file_xfers->pers_file_xfers[i]->fip->name, (pers_file_xfers->pers_file_xfers[i]->fxp?pers_file_xfers->pers_file_xfers[i]->fxp->http_op_state:-1));
1369 }
1370 msg_printf(0, MSG_INFO, "%d active tasks", (int)active_tasks.active_tasks.size());
1371 for (i=0; i<active_tasks.active_tasks.size(); i++) {
1372 msg_printf(0, MSG_INFO, " %s", active_tasks.active_tasks[i]->result->name);
1373 }
1374 }
1375
nresults_for_project(PROJECT * p)1376 int CLIENT_STATE::nresults_for_project(PROJECT* p) {
1377 int n=0;
1378 for (unsigned int i=0; i<results.size(); i++) {
1379 if (results[i]->project == p) n++;
1380 }
1381 return n;
1382 }
1383
abort_unstarted_late_jobs()1384 bool CLIENT_STATE::abort_unstarted_late_jobs() {
1385 bool action = false;
1386 if (now < 1235668593) return false; // skip if user reset system clock
1387 for (unsigned int i=0; i<results.size(); i++) {
1388 RESULT* rp = results[i];
1389 if (!rp->is_not_started()) continue;
1390 if (rp->report_deadline > now) continue;
1391 msg_printf(rp->project, MSG_INFO,
1392 "Aborting task %s; not started and deadline has passed",
1393 rp->name
1394 );
1395 rp->abort_inactive(EXIT_UNSTARTED_LATE);
1396 action = true;
1397 }
1398 return action;
1399 }
1400
garbage_collect()1401 bool CLIENT_STATE::garbage_collect() {
1402 bool action;
1403 static double last_time=0;
1404 if (!clock_change && now - last_time < GARBAGE_COLLECT_PERIOD) return false;
1405 last_time = gstate.now;
1406
1407 action = abort_unstarted_late_jobs();
1408 if (action) return true;
1409 action = garbage_collect_always();
1410 if (action) return true;
1411
1412 #ifndef SIM
1413 // Detach projects that are marked for detach when done
1414 // and are in fact done (have no results).
1415 // This is done here (not in garbage_collect_always())
1416 // because detach_project() calls garbage_collect_always(),
1417 // and we need to avoid infinite recursion
1418 //
1419 while (1) {
1420 bool found = false;
1421 for (unsigned i=0; i<projects.size(); i++) {
1422 PROJECT* p = projects[i];
1423 if (p->detach_when_done && !nresults_for_project(p)) {
1424 // If we're using an AM,
1425 // wait until the next successful RPC to detach project,
1426 // so the AM will be informed of its work done.
1427 //
1428 if (!p->attached_via_acct_mgr) {
1429 msg_printf(p, MSG_INFO, "Detaching - no more tasks");
1430 detach_project(p);
1431 action = true;
1432 found = true;
1433 }
1434 }
1435 }
1436 if (!found) break;
1437 }
1438 #endif
1439 return action;
1440 }
1441
1442 // delete unneeded records and files
1443 //
garbage_collect_always()1444 bool CLIENT_STATE::garbage_collect_always() {
1445 unsigned int i, j;
1446 int failnum;
1447 FILE_INFO* fip;
1448 RESULT* rp;
1449 WORKUNIT* wup;
1450 APP_VERSION* avp, *avp2;
1451 vector<RESULT*>::iterator result_iter;
1452 vector<WORKUNIT*>::iterator wu_iter;
1453 vector<FILE_INFO*>::iterator fi_iter;
1454 vector<APP_VERSION*>::iterator avp_iter;
1455 bool action = false, found;
1456 string error_msgs;
1457 PROJECT* project;
1458
1459 // zero references counts on WUs, FILE_INFOs and APP_VERSIONs
1460
1461 for (i=0; i<workunits.size(); i++) {
1462 wup = workunits[i];
1463 wup->ref_cnt = 0;
1464 }
1465 for (i=0; i<file_infos.size(); i++) {
1466 fip = file_infos[i];
1467 fip->ref_cnt = 0;
1468 }
1469 for (i=0; i<app_versions.size(); i++) {
1470 avp = app_versions[i];
1471 avp->ref_cnt = 0;
1472 }
1473
1474 // reference-count user and project files
1475 //
1476 for (i=0; i<projects.size(); i++) {
1477 project = projects[i];
1478 for (j=0; j<project->user_files.size(); j++) {
1479 project->user_files[j].file_info->ref_cnt++;
1480 }
1481 for (j=0; j<project->project_files.size(); j++) {
1482 project->project_files[j].file_info->ref_cnt++;
1483 }
1484 }
1485
1486 #ifdef ENABLE_AUTO_UPDATE
1487 // reference-count auto update files
1488 //
1489 if (auto_update.present) {
1490 for (i=0; i<auto_update.file_refs.size(); i++) {
1491 auto_update.file_refs[i].file_info->ref_cnt++;
1492 }
1493 }
1494 #endif
1495
1496 // Scan through RESULTs.
1497 // delete RESULTs that have been reported and acked.
1498 // Check for results whose WUs had download failures
1499 // Check for results that had upload failures
1500 // Reference-count output files
1501 // Reference-count WUs
1502 //
1503 result_iter = results.begin();
1504 while (result_iter != results.end()) {
1505 rp = *result_iter;
1506 #ifndef SIM
1507 if (rp->got_server_ack) {
1508 // see if - for some reason - there's an active task
1509 // for this result. don't want to create dangling ptr.
1510 //
1511 ACTIVE_TASK* atp = active_tasks.lookup_result(rp);
1512 if (atp) {
1513 msg_printf(rp->project, MSG_INTERNAL_ERROR,
1514 "garbage_collect(); still have active task for acked result %s; state %d",
1515 rp->name, atp->task_state()
1516 );
1517 atp->abort_task(
1518 EXIT_ABORTED_BY_CLIENT,
1519 "Got ack for job that's still active"
1520 );
1521 } else {
1522 if (log_flags.state_debug) {
1523 msg_printf(0, MSG_INFO,
1524 "[state] garbage_collect: deleting result %s\n",
1525 rp->name
1526 );
1527 }
1528 add_old_result(*rp);
1529 delete rp;
1530 result_iter = results.erase(result_iter);
1531 action = true;
1532 continue;
1533 }
1534 }
1535 #endif
1536 // See if the files for this result's workunit had
1537 // any errors (download failure, MD5, RSA, etc)
1538 // and we don't already have an error for this result
1539 //
1540 if (!rp->ready_to_report) {
1541 wup = rp->wup;
1542 if (wup->had_download_failure(failnum)) {
1543 wup->get_file_errors(error_msgs);
1544 string err_msg = "WU download error: " + error_msgs;
1545 report_result_error(*rp, err_msg.c_str());
1546 } else if (rp->avp && rp->avp->had_download_failure(failnum)) {
1547 rp->avp->get_file_errors(error_msgs);
1548 string err_msg = "app_version download error: " + error_msgs;
1549 report_result_error(*rp, err_msg.c_str());
1550 }
1551 }
1552 bool found_error = false;
1553 string error_str;
1554 for (i=0; i<rp->output_files.size(); i++) {
1555 // If one of the output files had an upload failure,
1556 // mark the result as done and report the error.
1557 //
1558 if (!rp->ready_to_report) {
1559 fip = rp->output_files[i].file_info;
1560 if (fip->had_failure(failnum)) {
1561 string msg;
1562 fip->failure_message(msg);
1563 found_error = true;
1564 error_str += msg;
1565 }
1566 }
1567 rp->output_files[i].file_info->ref_cnt++;
1568 }
1569 #ifndef SIM
1570 if (found_error) {
1571 // check for process still running; this can happen
1572 // e.g. if an intermediate upload fails
1573 //
1574 ACTIVE_TASK* atp = active_tasks.lookup_result(rp);
1575 if (atp) {
1576 switch (atp->task_state()) {
1577 case PROCESS_EXECUTING:
1578 case PROCESS_SUSPENDED:
1579 atp->abort_task(ERR_RESULT_UPLOAD, "upload failure");
1580 }
1581 }
1582 string err_msg = "upload failure: " + error_str;
1583 report_result_error(*rp, err_msg.c_str());
1584 }
1585 #endif
1586 rp->avp->ref_cnt++;
1587 rp->wup->ref_cnt++;
1588 ++result_iter;
1589 }
1590
1591 // delete WORKUNITs not referenced by any in-progress result;
1592 // reference-count files and APP_VERSIONs referred to by other WUs
1593 //
1594 wu_iter = workunits.begin();
1595 while (wu_iter != workunits.end()) {
1596 wup = *wu_iter;
1597 if (wup->ref_cnt == 0) {
1598 if (log_flags.state_debug) {
1599 msg_printf(0, MSG_INFO,
1600 "[state] CLIENT_STATE::garbage_collect(): deleting workunit %s\n",
1601 wup->name
1602 );
1603 }
1604 delete wup;
1605 wu_iter = workunits.erase(wu_iter);
1606 action = true;
1607 } else {
1608 for (i=0; i<wup->input_files.size(); i++) {
1609 wup->input_files[i].file_info->ref_cnt++;
1610 }
1611 ++wu_iter;
1612 }
1613 }
1614
1615 // go through APP_VERSIONs;
1616 // delete any not referenced by any WORKUNIT
1617 // and superceded by a more recent version
1618 // for the same platform and plan class
1619 //
1620 avp_iter = app_versions.begin();
1621 while (avp_iter != app_versions.end()) {
1622 avp = *avp_iter;
1623 if (avp->ref_cnt == 0) {
1624 found = false;
1625 for (j=0; j<app_versions.size(); j++) {
1626 avp2 = app_versions[j];
1627 if (avp2->app == avp->app
1628 && avp2->version_num > avp->version_num
1629 && (!strcmp(avp2->plan_class, avp->plan_class))
1630 && (!strcmp(avp2->platform, avp->platform))
1631 ) {
1632 found = true;
1633 break;
1634 }
1635 }
1636 if (found) {
1637 delete avp;
1638 avp_iter = app_versions.erase(avp_iter);
1639 action = true;
1640 } else {
1641 ++avp_iter;
1642 }
1643 } else {
1644 ++avp_iter;
1645 }
1646 }
1647
1648 // Then go through remaining APP_VERSIONs,
1649 // bumping refcnt of associated files.
1650 //
1651 for (i=0; i<app_versions.size(); i++) {
1652 avp = app_versions[i];
1653 for (j=0; j<avp->app_files.size(); j++) {
1654 avp->app_files[j].file_info->ref_cnt++;
1655 }
1656 }
1657
1658 // reference-count sticky files not marked for deletion
1659 //
1660
1661 for (fi_iter = file_infos.begin(); fi_iter!=file_infos.end(); ++fi_iter) {
1662 fip = *fi_iter;
1663 if (fip->sticky_expire_time && now > fip->sticky_expire_time) {
1664 fip->sticky = false;
1665 fip->sticky_expire_time = 0;
1666 }
1667 if (!fip->sticky) continue;
1668 if (fip->status < 0) continue;
1669 fip->ref_cnt++;
1670 }
1671
1672 // remove PERS_FILE_XFERs (and associated FILE_XFERs and HTTP_OPs)
1673 // for unreferenced files
1674 //
1675 vector<PERS_FILE_XFER*>::iterator pfx_iter;
1676 pfx_iter = pers_file_xfers->pers_file_xfers.begin();
1677 while (pfx_iter != pers_file_xfers->pers_file_xfers.end()) {
1678 PERS_FILE_XFER* pfx = *pfx_iter;
1679 if (pfx->fip->ref_cnt == 0) {
1680 pfx->suspend();
1681 delete pfx;
1682 pfx_iter = pers_file_xfers->pers_file_xfers.erase(pfx_iter);
1683 } else {
1684 ++pfx_iter;
1685 }
1686 }
1687
1688 // delete FILE_INFOs (and corresponding files) that are not referenced
1689 //
1690 fi_iter = file_infos.begin();
1691 while (fi_iter != file_infos.end()) {
1692 fip = *fi_iter;
1693 if (fip->ref_cnt==0) {
1694 fip->delete_file();
1695 if (log_flags.state_debug) {
1696 msg_printf(0, MSG_INFO,
1697 "[state] CLIENT_STATE::garbage_collect(): deleting file %s\n",
1698 fip->name
1699 );
1700 }
1701 delete fip;
1702 fi_iter = file_infos.erase(fi_iter);
1703 action = true;
1704 } else {
1705 ++fi_iter;
1706 }
1707 }
1708
1709 if (action && log_flags.state_debug) {
1710 print_summary();
1711 }
1712
1713 return action;
1714 }
1715
1716 // For results that are waiting for file transfer,
1717 // check if the transfer is done,
1718 // and if so switch to new state and take other actions.
1719 // Also set some fields for newly-aborted results.
1720 //
update_results()1721 bool CLIENT_STATE::update_results() {
1722 RESULT* rp;
1723 vector<RESULT*>::iterator result_iter;
1724 bool action = false;
1725 static double last_time=0;
1726
1727 if (!clock_change && now - last_time < UPDATE_RESULTS_PERIOD) return false;
1728 last_time = now;
1729
1730 result_iter = results.begin();
1731 while (result_iter != results.end()) {
1732 rp = *result_iter;
1733
1734 switch (rp->state()) {
1735 case RESULT_NEW:
1736 rp->set_state(RESULT_FILES_DOWNLOADING, "CS::update_results");
1737 action = true;
1738 break;
1739 #ifndef SIM
1740 case RESULT_FILES_DOWNLOADING:
1741 if (input_files_available(rp, false) == 0) {
1742 if (rp->avp->app_files.size()==0) {
1743 // if this is a file-transfer app, start the upload phase
1744 //
1745 rp->set_state(RESULT_FILES_UPLOADING, "CS::update_results");
1746 rp->clear_uploaded_flags();
1747 } else {
1748 // else try to start the computation
1749 //
1750 rp->set_state(RESULT_FILES_DOWNLOADED, "CS::update_results");
1751 request_schedule_cpus("files downloaded");
1752 }
1753 action = true;
1754 }
1755 break;
1756 #endif
1757 case RESULT_FILES_UPLOADING:
1758 if (rp->is_upload_done()) {
1759 rp->set_ready_to_report();
1760 rp->completed_time = gstate.now;
1761 rp->set_state(RESULT_FILES_UPLOADED, "CS::update_results");
1762
1763 // clear backoffs for app's resources;
1764 // this addresses the situation where the project has a
1765 // "max # jobs in progress" limit,
1766 // and we're backed off because of that
1767 //
1768 work_fetch.clear_backoffs(*rp->avp);
1769 action = true;
1770 }
1771 break;
1772 case RESULT_FILES_UPLOADED:
1773 break;
1774 case RESULT_ABORTED:
1775 if (!rp->ready_to_report) {
1776 rp->set_ready_to_report();
1777 rp->completed_time = now;
1778 action = true;
1779 }
1780 break;
1781 }
1782 ++result_iter;
1783 }
1784 return action;
1785 }
1786
1787 // Returns true if client should exit for various reasons
1788 //
time_to_exit()1789 bool CLIENT_STATE::time_to_exit() {
1790 if (exit_after_app_start_secs
1791 && (app_started>0)
1792 && ((now - app_started) >= exit_after_app_start_secs)
1793 ) {
1794 msg_printf(NULL, MSG_INFO,
1795 "Exiting because %d elapsed since started task",
1796 exit_after_app_start_secs
1797 );
1798 return true;
1799 }
1800 if (cc_config.exit_when_idle
1801 && (results.size() == 0)
1802 && had_or_requested_work
1803 ) {
1804 msg_printf(NULL, MSG_INFO, "exiting because no more results");
1805 return true;
1806 }
1807 if (cant_write_state_file) {
1808 static bool first = true;
1809 double t = now - last_wakeup_time;
1810 if (first && t > 50) {
1811 first = false;
1812 msg_printf(NULL, MSG_INFO,
1813 "Can't write state file, exiting in 10 seconds"
1814 );
1815 }
1816 if (t > 60) {
1817 msg_printf(NULL, MSG_INFO,
1818 "Can't write state file, exiting now"
1819 );
1820 return true;
1821 }
1822 }
1823 return false;
1824 }
1825
1826 // Call this when a result has a nonrecoverable error.
1827 // - back off on contacting the project's scheduler
1828 // (so don't crash over and over)
1829 // - Append a description of the error to result.stderr_out
1830 // - If result state is FILES_DOWNLOADED, change it to COMPUTE_ERROR
1831 // so that we don't try to run it again.
1832 //
report_result_error(RESULT & res,const char * err_msg)1833 int CLIENT_STATE::report_result_error(RESULT& res, const char* err_msg) {
1834 char buf[1024];
1835 unsigned int i;
1836 int failnum;
1837
1838 // only do this once per result
1839 //
1840 if (res.ready_to_report) {
1841 return 0;
1842 }
1843
1844 res.set_ready_to_report();
1845 res.completed_time = now;
1846
1847 sprintf(buf, "Unrecoverable error for task %s", res.name);
1848 #ifndef SIM
1849 scheduler_op->project_rpc_backoff(res.project, buf);
1850 #endif
1851
1852 res.stderr_out.append("<message>\n");
1853 res.stderr_out.append(err_msg);
1854 res.stderr_out.append("</message>\n");
1855
1856 switch(res.state()) {
1857 case RESULT_NEW:
1858 case RESULT_FILES_DOWNLOADING:
1859 // called from:
1860 // CLIENT_STATE::garbage_collect()
1861 // if WU or app_version had a download failure
1862 //
1863 if (!res.exit_status) {
1864 res.exit_status = ERR_RESULT_DOWNLOAD;
1865 }
1866 break;
1867
1868 case RESULT_FILES_DOWNLOADED:
1869 // called from:
1870 // ACTIVE_TASK::start (if couldn't start app)
1871 // ACTIVE_TASK::restart (if files missing)
1872 // ACITVE_TASK_SET::restart_tasks (catch other error returns)
1873 // ACTIVE_TASK::handle_exited_app (on nonzero exit or signal)
1874 // ACTIVE_TASK::abort_task (if exceeded resource limit)
1875 // CLIENT_STATE::schedule_cpus (catch-all for resume/start errors)
1876 //
1877 res.set_state(RESULT_COMPUTE_ERROR, "CS::report_result_error");
1878 if (!res.exit_status) {
1879 res.exit_status = ERR_RESULT_START;
1880 }
1881 break;
1882
1883 case RESULT_FILES_UPLOADING:
1884 // called from
1885 // CLIENT_STATE::garbage_collect() if result had an upload error
1886 //
1887 for (i=0; i<res.output_files.size(); i++) {
1888 if (res.output_files[i].file_info->had_failure(failnum)) {
1889 sprintf(buf,
1890 "<upload_error>\n"
1891 " <file_name>%s</file_name>\n"
1892 " <error_code>%d</error_code>\n"
1893 "</upload_error>\n",
1894 res.output_files[i].file_info->name, failnum
1895 );
1896 res.stderr_out.append(buf);
1897 }
1898 }
1899 if (!res.exit_status) {
1900 res.exit_status = ERR_RESULT_UPLOAD;
1901 }
1902 res.set_state(RESULT_UPLOAD_FAILED, "CS::report_result_error");
1903 break;
1904 case RESULT_FILES_UPLOADED:
1905 msg_printf(res.project, MSG_INTERNAL_ERROR,
1906 "Error reported for completed task %s", res.name
1907 );
1908 break;
1909 }
1910
1911 res.stderr_out = res.stderr_out.substr(0, MAX_STDERR_LEN);
1912 return 0;
1913 }
1914
1915 #ifndef SIM
1916
1917 // "Reset" a project: (clear error conditions)
1918 // - stop all active tasks
1919 // - stop all file transfers
1920 // - stop scheduler RPC if any
1921 // - delete workunits and results
1922 // - delete apps and app_versions
1923 // - garbage collect to delete unneeded files
1924 // - clear backoffs
1925 //
1926 // does not delete project dir
1927 //
reset_project(PROJECT * project,bool detaching)1928 int CLIENT_STATE::reset_project(PROJECT* project, bool detaching) {
1929 unsigned int i;
1930 APP_VERSION* avp;
1931 APP* app;
1932 vector<APP*>::iterator app_iter;
1933 vector<APP_VERSION*>::iterator avp_iter;
1934 RESULT* rp;
1935 PERS_FILE_XFER* pxp;
1936
1937 msg_printf(project, MSG_INFO, "Resetting project");
1938 active_tasks.abort_project(project);
1939
1940 // stop and remove file transfers
1941 //
1942 for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
1943 pxp = pers_file_xfers->pers_file_xfers[i];
1944 if (pxp->fip->project == project) {
1945 if (pxp->fxp) {
1946 file_xfers->remove(pxp->fxp);
1947 delete pxp->fxp;
1948 }
1949 pers_file_xfers->remove(pxp);
1950 delete pxp;
1951 i--;
1952 }
1953 }
1954
1955 // if we're in the middle of a scheduler op to the project, abort it
1956 //
1957 scheduler_op->abort(project);
1958
1959 // abort other HTTP operations
1960 //
1961 //http_ops.abort_project_ops(project);
1962
1963 // mark results as server-acked.
1964 // This will cause garbage_collect to delete them,
1965 // and in turn their WUs will be deleted
1966 //
1967 for (i=0; i<results.size(); i++) {
1968 rp = results[i];
1969 if (rp->project == project) {
1970 rp->got_server_ack = true;
1971 }
1972 }
1973
1974 project->user_files.clear();
1975 project->project_files.clear();
1976
1977 // clear flags so that sticky files get deleted
1978 //
1979 for (i=0; i<file_infos.size(); i++) {
1980 FILE_INFO* fip = file_infos[i];
1981 if (fip->project == project) {
1982 fip->sticky = false;
1983 }
1984 }
1985
1986 garbage_collect_always();
1987
1988 // remove apps and app_versions (but not if anonymous platform)
1989 //
1990 if (!project->anonymous_platform || detaching) {
1991 avp_iter = app_versions.begin();
1992 while (avp_iter != app_versions.end()) {
1993 avp = *avp_iter;
1994 if (avp->project == project) {
1995 avp_iter = app_versions.erase(avp_iter);
1996 delete avp;
1997 } else {
1998 ++avp_iter;
1999 }
2000 }
2001
2002 app_iter = apps.begin();
2003 while (app_iter != apps.end()) {
2004 app = *app_iter;
2005 if (app->project == project) {
2006 app_iter = apps.erase(app_iter);
2007 delete app;
2008 } else {
2009 ++app_iter;
2010 }
2011 }
2012 garbage_collect_always();
2013 }
2014
2015 // if not anonymous platform, clean out the project dir
2016 // except for app_config.xml
2017 //
2018 if (!project->anonymous_platform) {
2019 client_clean_out_dir(
2020 project->project_dir(),
2021 "reset project",
2022 "app_config.xml"
2023 );
2024 }
2025
2026 // force refresh of scheduler URLs
2027 //
2028 project->scheduler_urls.clear();
2029
2030 project->duration_correction_factor = 1;
2031 project->ams_resource_share = -1;
2032 project->min_rpc_time = 0;
2033 project->pwf.reset(project);
2034 for (int j=0; j<coprocs.n_rsc; j++) {
2035 project->rsc_pwf[j].reset();
2036 }
2037 write_state_file();
2038 return 0;
2039 }
2040
2041 // "Detach" a project:
2042 // - Reset (see above)
2043 // - delete all file infos
2044 // - delete account file
2045 // - delete project directory
2046 // - delete various per-project files
2047 //
detach_project(PROJECT * project)2048 int CLIENT_STATE::detach_project(PROJECT* project) {
2049 vector<PROJECT*>::iterator project_iter;
2050 vector<FILE_INFO*>::iterator fi_iter;
2051 FILE_INFO* fip;
2052 PROJECT* p;
2053 char path[MAXPATHLEN];
2054 int retval;
2055
2056 reset_project(project, true);
2057
2058 msg_printf(project, MSG_INFO, "Detaching from project");
2059
2060 // delete all FILE_INFOs associated with this project
2061 //
2062 fi_iter = file_infos.begin();
2063 while (fi_iter != file_infos.end()) {
2064 fip = *fi_iter;
2065 if (fip->project == project) {
2066 fi_iter = file_infos.erase(fi_iter);
2067 delete fip;
2068 } else {
2069 ++fi_iter;
2070 }
2071 }
2072
2073 // find project and remove it from the vector
2074 //
2075 for (project_iter = projects.begin(); project_iter != projects.end(); ++project_iter) {
2076 p = *project_iter;
2077 if (p == project) {
2078 project_iter = projects.erase(project_iter);
2079 break;
2080 }
2081 }
2082
2083 // delete statistics file
2084 //
2085 get_statistics_filename(project->master_url, path, sizeof(path));
2086 retval = boinc_delete_file(path);
2087 if (retval) {
2088 msg_printf(project, MSG_INTERNAL_ERROR,
2089 "Can't delete statistics file: %s", boincerror(retval)
2090 );
2091 }
2092
2093 // delete account file
2094 //
2095 get_account_filename(project->master_url, path, sizeof(path));
2096 retval = boinc_delete_file(path);
2097 if (retval) {
2098 msg_printf(project, MSG_INTERNAL_ERROR,
2099 "Can't delete account file: %s", boincerror(retval)
2100 );
2101 }
2102
2103 get_sched_request_filename(*project, path, sizeof(path));
2104 retval = boinc_delete_file(path);
2105
2106 get_sched_reply_filename(*project, path, sizeof(path));
2107 retval = boinc_delete_file(path);
2108
2109 get_master_filename(*project, path, sizeof(path));
2110 retval = boinc_delete_file(path);
2111
2112 // remove project directory and its contents
2113 //
2114 retval = remove_project_dir(*project);
2115 if (retval) {
2116 msg_printf(project, MSG_INTERNAL_ERROR,
2117 "Can't delete project directory: %s", boincerror(retval)
2118 );
2119 }
2120
2121 // remove miscellaneous per-project files
2122 //
2123 //job_log_filename(*project, path, sizeof(path));
2124 //boinc_delete_file(path);
2125 delete_project_notice_files(project);
2126
2127 rss_feeds.update_feed_list();
2128
2129 delete project;
2130 write_state_file();
2131
2132 adjust_rec();
2133 request_schedule_cpus("Detach");
2134 request_work_fetch("Detach");
2135 return 0;
2136 }
2137
2138 // Quit running applications, quit benchmarks,
2139 // write the client_state.xml file
2140 // (in principle we could also terminate net_xfers here,
2141 // e.g. flush buffers, but why bother)
2142 //
quit_activities()2143 int CLIENT_STATE::quit_activities() {
2144 // calculate REC (for state file)
2145 //
2146 adjust_rec();
2147
2148 daily_xfer_history.write_file();
2149 write_state_file();
2150 gui_rpcs.close();
2151 abort_cpu_benchmarks();
2152 time_stats.quit();
2153
2154 // stop jobs.
2155 // Do this last because it could take a long time,
2156 // and the OS might kill us in the middle
2157 //
2158 int retval = active_tasks.exit_tasks();
2159 if (retval) {
2160 msg_printf(NULL, MSG_INTERNAL_ERROR,
2161 "Couldn't exit tasks: %s", boincerror(retval)
2162 );
2163 }
2164
2165 return 0;
2166 }
2167
2168 #endif
2169
2170 // Called at startup to see if a timestamp in the client state file
2171 // is later than the current time.
2172 // If so, the user must have decremented the system clock.
2173 //
check_clock_reset()2174 void CLIENT_STATE::check_clock_reset() {
2175 if (!time_stats.last_update) return;
2176 if (time_stats.last_update <= now) return;
2177 msg_printf(NULL, MSG_INFO,
2178 "System clock (%.0f) < state file timestamp (%.0f); clearing timeouts",
2179 now, time_stats.last_update
2180 );
2181 clear_absolute_times();
2182 }
2183
2184 // The system clock seems to have been set back,
2185 // possibly by a large amount (years).
2186 // Clear various "wait until X" absolute times.
2187 //
2188 // Note: there are other absolute times (like job deadlines)
2189 // that we could try to patch up, but it's not clear how.
2190 //
clear_absolute_times()2191 void CLIENT_STATE::clear_absolute_times() {
2192 exclusive_app_running = 0;
2193 exclusive_gpu_app_running = 0;
2194 new_version_check_time = now;
2195 all_projects_list_check_time = now;
2196 retry_shmem_time = 0;
2197 cpu_run_mode.temp_timeout = 0;
2198 gpu_run_mode.temp_timeout = 0;
2199 network_run_mode.temp_timeout = 0;
2200 time_stats.last_update = now;
2201
2202 unsigned int i;
2203 for (i=0; i<projects.size(); i++) {
2204 PROJECT* p = projects[i];
2205 p->min_rpc_time = 0;
2206 if (p->next_rpc_time) {
2207 p->next_rpc_time = now;
2208 }
2209 p->download_backoff.next_xfer_time = 0;
2210 p->upload_backoff.next_xfer_time = 0;
2211 for (int j=0; j<coprocs.n_rsc; j++) {
2212 p->rsc_pwf[j].clear_backoff();
2213 }
2214 //#ifdef USE_REC
2215 p->pwf.rec_time = now;
2216 //#endif
2217 }
2218 for (i=0; i<pers_file_xfers->pers_file_xfers.size(); i++) {
2219 PERS_FILE_XFER* pfx = pers_file_xfers->pers_file_xfers[i];
2220 pfx->next_request_time = 0;
2221 }
2222
2223 for (i=0; i<results.size(); i++) {
2224 RESULT* rp = results[i];
2225 rp->schedule_backoff = 0;
2226 }
2227 }
2228
log_show_projects()2229 void CLIENT_STATE::log_show_projects() {
2230 char buf[256];
2231 for (unsigned int i=0; i<projects.size(); i++) {
2232 PROJECT* p = projects[i];
2233 if (p->hostid) {
2234 sprintf(buf, "%d", p->hostid);
2235 } else {
2236 safe_strcpy(buf, "not assigned yet");
2237 }
2238 msg_printf(p, MSG_INFO,
2239 "URL %s; Computer ID %s; resource share %.0f",
2240 p->master_url, buf, p->resource_share
2241 );
2242 if (p->ended) {
2243 msg_printf(p, MSG_INFO, "Project has ended - OK to detach");
2244 }
2245 p->show_no_work_notice();
2246 }
2247 }
2248
2249 #ifndef SIM
2250
2251 // the following is done on client exit if the
2252 // "abort_jobs_on_exit" flag is present.
2253 // Abort jobs, and arrange to tell projects about it.
2254 //
start_abort_sequence()2255 void CLIENT_STATE::start_abort_sequence() {
2256 unsigned int i;
2257
2258 in_abort_sequence = true;
2259
2260 for (i=0; i<results.size(); i++) {
2261 RESULT* rp = results[i];
2262 rp->project->sched_rpc_pending = RPC_REASON_USER_REQ;
2263 if (rp->computing_done()) continue;
2264 ACTIVE_TASK* atp = lookup_active_task_by_result(rp);
2265 if (atp) {
2266 atp->abort_task(EXIT_CLIENT_EXITING, "aborting on client exit");
2267 } else {
2268 rp->abort_inactive(EXIT_CLIENT_EXITING);
2269 }
2270 }
2271 for (i=0; i<projects.size(); i++) {
2272 PROJECT* p = projects[i];
2273 p->min_rpc_time = 0;
2274 p->dont_request_more_work = true;
2275 }
2276 }
2277
2278 // The second part of the above; check if RPCs are done
2279 //
abort_sequence_done()2280 bool CLIENT_STATE::abort_sequence_done() {
2281 unsigned int i;
2282 for (i=0; i<projects.size(); i++) {
2283 PROJECT* p = projects[i];
2284 if (p->sched_rpc_pending == RPC_REASON_USER_REQ) return false;
2285 }
2286 return true;
2287 }
2288
2289 #endif
2290