1 // This file is part of BOINC. 2 // http://boinc.berkeley.edu 3 // Copyright (C) 2008 University of California 4 // 5 // BOINC is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU Lesser General Public License 7 // as published by the Free Software Foundation, 8 // either version 3 of the License, or (at your option) any later version. 9 // 10 // BOINC is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 13 // See the GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>. 17 18 #ifndef BOINC_CLIENT_STATE_H 19 #define BOINC_CLIENT_STATE_H 20 21 #define NEW_CPU_THROTTLE 22 // do CPU throttling using a separate thread. 23 // This makes it possible to throttle faster than the client's 1-sec poll period 24 25 #ifndef _WIN32 26 #include <string> 27 #include <vector> 28 #include <ctime> 29 #endif 30 31 using std::string; 32 using std::vector; 33 34 #include "coproc.h" 35 #include "util.h" 36 #ifdef NEW_CPU_THROTTLE 37 #include "thread.h" 38 #endif 39 40 #include "acct_mgr.h" 41 #include "acct_setup.h" 42 #include "app.h" 43 #include "client_types.h" 44 #include "current_version.h" 45 #include "file_xfer.h" 46 #include "file_names.h" 47 #include "gui_rpc_server.h" 48 #include "gui_http.h" 49 #include "project_init.h" 50 #include "hostinfo.h" 51 #include "miofile.h" 52 #include "net_stats.h" 53 #include "pers_file_xfer.h" 54 #include "prefs.h" 55 #include "scheduler_op.h" 56 #include "time_stats.h" 57 58 #ifdef SIM 59 #include "../sched/edf_sim.h" 60 #endif 61 62 #define WORK_FETCH_DONT_NEED 0 63 // project: suspended, deferred, or no new work (can't ask for more work) 64 // overall: not work_fetch_ok (from CPU policy) 65 #define WORK_FETCH_OK 1 66 // project: has more than min queue * share, not suspended/def/nonewwork 67 // overall: at least min queue, work fetch OK 68 #define WORK_FETCH_NEED 2 69 // project: less than min queue * resource share of DL/runnable results 70 // overall: less than min queue 71 #define WORK_FETCH_NEED_IMMEDIATELY 3 72 // project: no downloading or runnable results 73 // overall: at least one idle CPU 74 75 // encapsulates the global variables of the core client. 76 // If you add anything here, initialize it in the constructor 77 // 78 struct CLIENT_STATE { 79 vector<PLATFORM> platforms; 80 vector<PROJECT*> projects; 81 // in alphabetical order, to improve display 82 vector<APP*> apps; 83 vector<FILE_INFO*> file_infos; 84 vector<APP_VERSION*> app_versions; 85 vector<WORKUNIT*> workunits; 86 vector<RESULT*> results; 87 // list of jobs, ordered by increasing arrival time 88 89 PERS_FILE_XFER_SET* pers_file_xfers; 90 HTTP_OP_SET* http_ops; 91 FILE_XFER_SET* file_xfers; 92 #ifndef SIM 93 GUI_RPC_CONN_SET gui_rpcs; 94 #endif 95 GUI_HTTP gui_http; 96 #ifdef ENABLE_AUTO_UPDATE 97 AUTO_UPDATE auto_update; 98 #endif 99 LOOKUP_WEBSITE_OP lookup_website_op; 100 GET_CURRENT_VERSION_OP get_current_version_op; 101 GET_PROJECT_LIST_OP get_project_list_op; 102 ACCT_MGR_OP acct_mgr_op; 103 104 CLIENT_TIME_STATS time_stats; 105 GLOBAL_PREFS global_prefs; 106 NET_STATS net_stats; 107 ACTIVE_TASK_SET active_tasks; 108 HOST_INFO host_info; 109 110 // the following used only on Android 111 DEVICE_STATUS device_status; 112 double device_status_time; 113 114 char language[16]; // ISO language code reported by GUI 115 char client_brand[256]; 116 // contents of client_brand.txt, e.g. "HTC Power to Give" 117 // reported to scheduler 118 VERSION_INFO core_client_version; 119 string statefile_platform_name; 120 int file_xfer_giveup_period; 121 RUN_MODE cpu_run_mode; 122 RUN_MODE gpu_run_mode; 123 RUN_MODE network_run_mode; 124 bool started_by_screensaver; 125 bool check_all_logins; 126 bool user_active; // there has been recent mouse/kbd input 127 int cmdline_gui_rpc_port; 128 bool show_projects; 129 bool requested_exit; 130 // we should exit now. Set when 131 // - got a "quit" GUI RPC 132 // - (Unix) got a HUP, INT, QUIT, TERM, or PWR signal 133 // - (Win) got CTRL_LOGOFF, CTRL_C, CTRL_BREAK, etc. event 134 // - (Mac) client was started from screensaver, 135 // which has since exited 136 bool os_requested_suspend; 137 // we should suspend for OS reasonts (used on Win only). 138 // Set when 139 // - got BATTERY_LOW, SUSPEND, SERVICE_CONTROL_PAUSE 140 double os_requested_suspend_time; 141 bool cleanup_completed; 142 bool in_abort_sequence; 143 // Determine when it is safe to leave the quit_client() handler 144 // and to finish cleaning up. 145 char detach_project_url[256]; 146 // stores URL for --detach_project option 147 char reset_project_url[256]; 148 // stores URL for --reset_project option 149 char update_prefs_url[256]; 150 // stores URL for --update_prefs option 151 char main_host_venue[256]; 152 // venue from project or AMS that gave us general prefs 153 char attach_project_url[256]; 154 char attach_project_auth[256]; 155 bool exit_before_upload; 156 // exit when about to upload a file 157 bool run_test_app; 158 // API test mode 159 #ifndef _WIN32 160 gid_t boinc_project_gid; 161 #endif 162 #ifdef _WIN32 163 // vars so that the sysmon thread can write messages 164 // 165 bool have_sysmon_msg; 166 char sysmon_msg[256]; 167 #endif 168 169 // backoff-related variables 170 // 171 int master_fetch_period; 172 // fetch project's master URL (and stop doing scheduler RPCs) 173 // if get this many successive RPC failures (default 10) 174 int retry_cap; 175 // cap project->nrpc_failures at this number 176 int master_fetch_retry_cap; 177 // after this many master-fetch failures, 178 // move into a state in which we retry master fetch 179 // at the frequency below 180 int master_fetch_interval; 181 // see above 182 183 int sched_retry_delay_min; 184 int sched_retry_delay_max; 185 int pers_retry_delay_min; 186 int pers_retry_delay_max; 187 int pers_giveup; 188 189 bool tasks_suspended; 190 // Computing suspended for reason other than throttling 191 int suspend_reason; 192 bool tasks_throttled; 193 // Computing suspended because of throttling 194 195 bool network_suspended; 196 // Don't use network. 197 bool file_xfers_suspended; 198 // Don't do file xfers (but allow other network activity). 199 int network_suspend_reason; 200 201 bool executing_as_daemon; 202 // true if --daemon is on the commandline 203 // this means we are running as a daemon on unix, 204 // or as a service on Windows 205 bool redirect_io; 206 // redirect stdout, stderr to log files 207 bool disable_graphics; 208 // a condition has occurred in which we know graphics will 209 // not be displayable, so GUIs shouldn't offer graphics. 210 bool detach_console; 211 bool launched_by_manager; 212 bool run_by_updater; 213 double now; 214 bool clock_change; // system clock was recently decreased 215 double last_wakeup_time; 216 bool initialized; 217 bool cant_write_state_file; 218 // failed to write state file. 219 // In this case we continue to run for 1 minute, 220 // handling GUI RPCs but doing nothing else, 221 // so that the Manager can tell the user what the problem is 222 223 bool client_state_dirty; 224 int old_major_version; 225 int old_minor_version; 226 int old_release; 227 bool run_cpu_benchmarks; 228 // if set, run benchmarks when possible 229 230 int exit_after_app_start_secs; 231 // if nonzero, exit this many seconds after starting an app 232 double app_started; 233 // when the most recent app was started 234 235 // --------------- acct_mgr.cpp: 236 ACCT_MGR_INFO acct_mgr_info; 237 238 // --------------- acct_setup.cpp: 239 PROJECT_INIT project_init; 240 PROJECT_ATTACH project_attach; 241 void new_version_check(bool force = false); 242 void all_projects_list_check(); 243 double new_version_check_time; 244 double all_projects_list_check_time; 245 // the time we last successfully fetched the project list 246 string newer_version; 247 248 // --------------- client_state.cpp: 249 CLIENT_STATE(); 250 void show_host_info(); 251 bool is_new_client(); 252 int init(); 253 bool poll_slow_events(); 254 // Never blocks. 255 // Returns true if it actually did something, 256 // in which case it should be called again immediately. 257 void do_io_or_sleep(double dt); 258 bool time_to_exit(); 259 PROJECT* lookup_project(const char*); 260 APP* lookup_app(PROJECT*, const char*); 261 FILE_INFO* lookup_file_info(PROJECT*, const char* name); 262 RESULT* lookup_result(PROJECT*, const char*); 263 WORKUNIT* lookup_workunit(PROJECT*, const char*); 264 APP_VERSION* lookup_app_version( 265 APP*, char* platform, int ver, char* plan_class 266 ); 267 int detach_project(PROJECT*); 268 int report_result_error(RESULT&, const char* err_msg); 269 int reset_project(PROJECT*, bool detaching); 270 bool no_gui_rpc; 271 bool gui_rpc_unix_domain; 272 // do GUI RPC over Unix-domain sockets rather than TCP 273 void start_abort_sequence(); 274 bool abort_sequence_done(); 275 int quit_activities(); 276 277 int link_app(PROJECT*, APP*); 278 int link_file_info(PROJECT*, FILE_INFO*); 279 int link_file_ref(PROJECT*, FILE_REF*); 280 int link_app_version(PROJECT*, APP_VERSION*); 281 int link_workunit(PROJECT*, WORKUNIT*); 282 int link_result(PROJECT*, RESULT*); 283 void print_summary(); 284 bool abort_unstarted_late_jobs(); 285 bool garbage_collect(); 286 bool garbage_collect_always(); 287 bool update_results(); 288 int nresults_for_project(PROJECT*); 289 void check_clock_reset(); 290 void clear_absolute_times(); 291 void set_now(); 292 void log_show_projects(); 293 294 // --------------- cpu_sched.cpp: 295 double total_resource_share(); 296 double potentially_runnable_resource_share(); 297 double nearly_runnable_resource_share(); 298 double fetchable_resource_share(); 299 double rec_interval_start; 300 double total_cpu_time_this_rec_interval; 301 bool must_enforce_cpu_schedule; 302 bool must_schedule_cpus; 303 bool must_check_work_fetch; 304 void assign_results_to_projects(); 305 RESULT* highest_prio_project_best_result(); 306 void reset_rec_accounting(); 307 bool schedule_cpus(); 308 void make_run_list(vector<RESULT*>&); 309 bool enforce_run_list(vector<RESULT*>&); 310 void append_unfinished_time_slice(vector<RESULT*>&); 311 312 double runnable_resource_share(int); 313 void adjust_rec(); 314 double retry_shmem_time; 315 // if we fail to start a task due to no shared-mem segments, 316 // wait until at least this time to try running 317 // another task that needs a shared-mem seg work_buf_minCLIENT_STATE318 inline double work_buf_min() { 319 double x = global_prefs.work_buf_min_days * 86400; 320 if (x < 180) x = 180; 321 return x; 322 } work_buf_additionalCLIENT_STATE323 inline double work_buf_additional() { 324 return global_prefs.work_buf_additional_days *86400; 325 } work_buf_totalCLIENT_STATE326 inline double work_buf_total() { 327 double x = work_buf_min() + work_buf_additional(); 328 if (x < 1) x = 1; 329 return x; 330 } 331 332 void request_schedule_cpus(const char*); 333 // Reschedule CPUs ASAP. 334 // Called when: 335 // - core client starts (CS::init()) 336 // - an app exits (ATS::check_app_exited()) 337 // - Tasks are killed (ATS::exit_tasks()) 338 // - a result's input files finish downloading (CS::update_results()) 339 // - an app fails to start (CS::schedule_cpus()) 340 // - any project op is done via RPC (suspend/resume) 341 // - any result op is done via RPC (suspend/resume) 342 void set_ncpus(); 343 344 // --------------- cs_account.cpp: 345 int add_project( 346 const char* master_url, const char* authenticator, 347 const char* project_name, bool attached_via_acct_mgr 348 ); 349 350 int parse_account_files(); 351 int parse_account_files_venue(); 352 int parse_preferences_for_user_files(); 353 int parse_statistics_files(); 354 // should be move to a new file, but this will do it for testing 355 356 // --------------- cs_apps.cpp: 357 double get_fraction_done(RESULT* result); 358 int input_files_available(RESULT*, bool, FILE_INFO** f=0); 359 ACTIVE_TASK* lookup_active_task_by_result(RESULT*); 360 int ncpus; 361 // Act like there are this many CPUs. 362 // By default this is the # of physical CPUs, 363 // but it can be changed in two ways: 364 // - type <ncpus>N</ncpus> in the config file 365 // - type the max_ncpus_pct pref 366 367 int latest_version(APP*, char*); 368 int app_finished(ACTIVE_TASK&); 369 bool start_apps(); 370 bool handle_finished_apps(); 371 372 ACTIVE_TASK* get_task(RESULT*); 373 374 // --------------- cs_benchmark.cpp: 375 bool benchmarks_running; 376 377 void check_if_need_benchmarks(); 378 bool can_run_cpu_benchmarks(); 379 void start_cpu_benchmarks(); 380 bool cpu_benchmarks_poll(); 381 void abort_cpu_benchmarks(); 382 bool cpu_benchmarks_done(); 383 void cpu_benchmarks_set_defaults(); 384 void print_benchmark_results(); 385 386 // --------------- cs_cmdline.cpp: 387 void parse_cmdline(int argc, char** argv); 388 void parse_env_vars(); 389 void do_cmdline_actions(); 390 391 // --------------- cs_files.cpp: 392 void check_file_existence(); 393 bool start_new_file_xfer(PERS_FILE_XFER&); 394 395 int make_project_dirs(); 396 bool create_and_delete_pers_file_xfers(); 397 398 // --------------- cs_platforms.cpp: 399 const char* get_primary_platform(); 400 void add_platform(const char*); 401 void detect_platforms(); 402 void write_platforms(PROJECT*, MIOFILE&); 403 bool is_supported_platform(const char*); 404 405 // --------------- cs_prefs.cpp: 406 double client_disk_usage; 407 // disk usage not counting projects 408 // computed by get_disk_usages() 409 double total_disk_usage; 410 // client plus projects 411 int get_disk_usages(); 412 void get_disk_shares(); 413 double allowed_disk_usage(double boinc_total); 414 int allowed_project_disk_usage(double&); 415 void show_suspend_tasks_message(int reason); 416 int resume_tasks(int reason=0); 417 void read_global_prefs( 418 const char* fname = GLOBAL_PREFS_FILE_NAME, 419 const char* override_fname = GLOBAL_PREFS_OVERRIDE_FILE 420 ); 421 int save_global_prefs(char* prefs, char* url, char* sched); 422 double available_ram(); 423 double max_available_ram(); 424 int check_suspend_processing(); 425 void check_suspend_network(); 426 void install_global_prefs(); 427 PROJECT* global_prefs_source_project(); 428 void show_global_prefs_source(bool); 429 430 // --------------- cs_scheduler.cpp: 431 void request_work_fetch(const char*); 432 // Called when: 433 // - core client starts (CS::init()) 434 // - task is completed or fails 435 // - tasks are killed 436 // - an RPC completes 437 // - project suspend/detch/attach/reset GUI RPC 438 // - result suspend/abort GUI RPC 439 int make_scheduler_request(PROJECT*); 440 int handle_scheduler_reply(PROJECT*, char* scheduler_url); 441 SCHEDULER_OP* scheduler_op; 442 PROJECT* next_project_master_pending(); 443 PROJECT* next_project_sched_rpc_pending(); 444 PROJECT* next_project_trickle_up_pending(); 445 PROJECT* find_project_with_overdue_results(bool network_suspend_soon); 446 bool had_or_requested_work; 447 bool scheduler_rpc_poll(); 448 449 // --------------- cs_statefile.cpp: 450 void set_client_state_dirty(const char*); 451 int parse_state_file(); 452 int parse_state_file_aux(const char*); 453 int write_state(MIOFILE&); 454 int write_state_file(); 455 int write_state_file_if_needed(); 456 void check_anonymous(); 457 int parse_app_info(PROJECT*, FILE*); 458 int write_state_gui(MIOFILE&); 459 int write_file_transfers_gui(MIOFILE&); 460 int write_tasks_gui(MIOFILE&, bool); 461 void sort_results(); 462 void sort_projects_by_name(); 463 464 // --------------- cs_trickle.cpp: 465 int read_trickle_files(PROJECT*, FILE*); 466 int remove_trickle_files(PROJECT*); 467 int handle_trickle_down(PROJECT*, FILE*); 468 469 // --------------- check_state.cpp: 470 // stuff related to data-structure integrity checking 471 // 472 void check_project_pointer(PROJECT*); 473 void check_app_pointer(APP*); 474 void check_file_info_pointer(FILE_INFO*); 475 void check_app_version_pointer(APP_VERSION*); 476 void check_workunit_pointer(WORKUNIT*); 477 void check_result_pointer(RESULT*); 478 void check_pers_file_xfer_pointer(PERS_FILE_XFER*); 479 void check_file_xfer_pointer(FILE_XFER*); 480 481 void check_app(APP&); 482 void check_file_info(FILE_INFO&); 483 void check_file_ref(FILE_REF&); 484 void check_app_version(APP_VERSION&); 485 void check_workunit(WORKUNIT&); 486 void check_result(RESULT&); 487 void check_active_task(ACTIVE_TASK&); 488 void check_pers_file_xfer(PERS_FILE_XFER&); 489 void check_file_xfer(FILE_XFER&); 490 491 void check_all(); 492 void free_mem(); 493 494 // --------------- work_fetch.cpp: 495 int proj_min_results(PROJECT*, double); 496 void check_project_timeout(); 497 double overall_cpu_frac(); 498 double overall_cpu_and_network_frac(); 499 double overall_gpu_frac(); 500 double time_until_work_done(PROJECT*, int, double); 501 bool compute_work_requests(); 502 void scale_duration_correction_factors(double); 503 void generate_new_host_cpid(); 504 void compute_nuploading_results(); 505 506 #ifdef SIM 507 double share_violation(); 508 double monotony(); 509 510 void handle_completed_results(PROJECT*); 511 void get_workload(vector<IP_RESULT>&); 512 bool simulate_rpc(PROJECT*); 513 #endif 514 }; 515 516 extern CLIENT_STATE gstate; 517 518 extern bool gpus_usable; 519 // set to false if GPUs not usable because of remote desktop 520 // or login situation (Windows) 521 522 // return a random double in the range [MIN,min(e^n,MAX)) 523 524 extern double calculate_exponential_backoff( 525 int n, double MIN, double MAX 526 ); 527 528 #ifdef NEW_CPU_THROTTLE 529 extern THREAD_LOCK client_mutex; 530 extern THREAD throttle_thread; 531 #endif 532 533 //////// TIME-RELATED CONSTANTS //////////// 534 535 //////// CLIENT INTERNAL 536 537 #define POLL_INTERVAL 1.0 538 // the client will handle I/O (including GUI RPCs) 539 // for up to POLL_INTERVAL seconds before calling poll_slow_events() 540 // to call the polling functions 541 542 #define GARBAGE_COLLECT_PERIOD 10 543 // how often to garbage collect 544 545 #define TASK_POLL_PERIOD 1.0 546 547 #define UPDATE_RESULTS_PERIOD 1.0 548 549 #define HANDLE_FINISHED_APPS_PERIOD 1.0 550 551 #define BENCHMARK_POLL_PERIOD 1.0 552 553 #define PERS_FILE_XFER_START_PERIOD 1.0 554 #define PERS_FILE_XFER_POLL_PERIOD 1.0 555 556 #define SCHEDULER_RPC_POLL_PERIOD 5.0 557 558 #define FILE_XFER_POLL_PERIOD 1.0 559 560 #define GUI_HTTP_POLL_PERIOD 1.0 561 562 #define MEMORY_USAGE_PERIOD 10 563 // computer memory usage and check for exclusive apps this often 564 565 //////// WORK FETCH 566 567 #define WORK_FETCH_PERIOD 60 568 // see if we need to fetch work at least this often 569 #define WF_MIN_BACKOFF_INTERVAL 600 570 #define WF_MAX_BACKOFF_INTERVAL 86400 571 // if we ask a project for work for a resource and don't get it, 572 // we do exponential backoff. 573 // This constant is an upper bound for this. 574 // E.g., if we need GPU work, we'll end up asking once a day, 575 // so if the project develops a GPU app, 576 // we'll find out about it within a day. 577 578 #define WF_UPLOAD_DEFER_INTERVAL 300 579 // if a project is uploading, 580 // and the last upload started within this interval, 581 // don't fetch work from it. 582 // This allows the work fetch to be merged with the reporting of the 583 // jobs that are currently uploading. 584 585 #define RESULT_REPORT_IF_AT_LEAST_N 64 586 // If a project has at least this many ready-to-report tasks, report them. 587 588 //////// CPU SCHEDULING 589 590 #define CPU_SCHED_PERIOD 60 591 // do CPU schedule at least this often 592 593 #define REC_ADJUST_PERIOD CPU_SCHED_PERIOD 594 // REC is adjusted at least this often, 595 // since adjust_rec() is called from enforce_schedule() 596 597 #define DEADLINE_CUSHION 0 598 // try to finish jobs this much in advance of their deadline 599 600 /////// JOB CONTROL 601 602 #define ABORT_TIMEOUT 60 603 // if we send app <abort> request, wait this long before killing it. 604 // This gives it time to download symbol files (which can be several MB) 605 // and write stack trace to stderr 606 607 #define QUIT_TIMEOUT 60 608 // Same, for <quit>. 609 // Should be large enough that apps can finalize 610 // (e.g. write checkpoint file) in that time. 611 // In Nov 2015 we increased it from 15 to 60 612 // because CERN's VBox apps take a long time to save state. 613 614 #define MAX_STARTUP_TIME 10 615 // if app startup takes longer than this, quit loop 616 617 //////// NETWORK 618 619 #define CONNECT_ERROR_PERIOD 600.0 620 621 #define ALLOW_NETWORK_IF_RECENT_RPC_PERIOD 300 622 // if there has been a GUI RPC within this period 623 // that requires network access (e.g. attach to project) 624 // allow it even if setting is "no access" 625 626 //////// MISC 627 628 #define EXCLUSIVE_APP_WAIT 5 629 // if "exclusive app" feature used, 630 // wait this long after app exits before restarting jobs 631 632 #define DAILY_XFER_HISTORY_PERIOD 60 633 634 #define ACCT_MGR_MIN_BACKOFF 600 635 #define ACCT_MGR_MAX_BACKOFF 86400 636 // min/max account manager RPC backoff 637 638 #define ANDROID_KEEPALIVE_TIMEOUT 30 639 // Android: if don't get a report_device_status() RPC from the GUI 640 // in this interval, exit. 641 // We rely on the GUI to report battery status. 642 643 #ifndef ANDROID 644 #define USE_NET_PREFS 645 // use preferences obtained over the network 646 // (i.e. through scheduler replies) 647 // Don't do this on Android 648 #endif 649 650 #define NEED_NETWORK_MSG _("BOINC can't access Internet - check network connection or proxy configuration.") 651 #define NO_WORK_MSG _("Your current settings do not allow tasks from this project.") 652 653 #endif 654