1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 #ifndef BOINC_CLIENT_STATE_H
19 #define BOINC_CLIENT_STATE_H
20 
21 #define NEW_CPU_THROTTLE
22 // do CPU throttling using a separate thread.
23 // This makes it possible to throttle faster than the client's 1-sec poll period
24 
25 #ifndef _WIN32
26 #include <string>
27 #include <vector>
28 #include <ctime>
29 #endif
30 
31 using std::string;
32 using std::vector;
33 
34 #include "coproc.h"
35 #include "util.h"
36 #ifdef NEW_CPU_THROTTLE
37 #include "thread.h"
38 #endif
39 
40 #include "acct_mgr.h"
41 #include "acct_setup.h"
42 #include "app.h"
43 #include "client_types.h"
44 #include "current_version.h"
45 #include "file_xfer.h"
46 #include "file_names.h"
47 #include "gui_rpc_server.h"
48 #include "gui_http.h"
49 #include "project_init.h"
50 #include "hostinfo.h"
51 #include "miofile.h"
52 #include "net_stats.h"
53 #include "pers_file_xfer.h"
54 #include "prefs.h"
55 #include "scheduler_op.h"
56 #include "time_stats.h"
57 
58 #ifdef SIM
59 #include "../sched/edf_sim.h"
60 #endif
61 
62 #define WORK_FETCH_DONT_NEED 0
63     // project: suspended, deferred, or no new work (can't ask for more work)
64     // overall: not work_fetch_ok (from CPU policy)
65 #define WORK_FETCH_OK        1
66     // project: has more than min queue * share, not suspended/def/nonewwork
67     // overall: at least min queue, work fetch OK
68 #define WORK_FETCH_NEED      2
69     // project: less than min queue * resource share of DL/runnable results
70     // overall: less than min queue
71 #define WORK_FETCH_NEED_IMMEDIATELY 3
72     // project: no downloading or runnable results
73     // overall: at least one idle CPU
74 
75 // encapsulates the global variables of the core client.
76 // If you add anything here, initialize it in the constructor
77 //
78 struct CLIENT_STATE {
79     vector<PLATFORM> platforms;
80     vector<PROJECT*> projects;
81         // in alphabetical order, to improve display
82     vector<APP*> apps;
83     vector<FILE_INFO*> file_infos;
84     vector<APP_VERSION*> app_versions;
85     vector<WORKUNIT*> workunits;
86     vector<RESULT*> results;
87         // list of jobs, ordered by increasing arrival time
88 
89     PERS_FILE_XFER_SET* pers_file_xfers;
90     HTTP_OP_SET* http_ops;
91     FILE_XFER_SET* file_xfers;
92 #ifndef SIM
93     GUI_RPC_CONN_SET gui_rpcs;
94 #endif
95     GUI_HTTP gui_http;
96 #ifdef ENABLE_AUTO_UPDATE
97     AUTO_UPDATE auto_update;
98 #endif
99     LOOKUP_WEBSITE_OP lookup_website_op;
100     GET_CURRENT_VERSION_OP get_current_version_op;
101     GET_PROJECT_LIST_OP get_project_list_op;
102     ACCT_MGR_OP acct_mgr_op;
103 
104     CLIENT_TIME_STATS time_stats;
105     GLOBAL_PREFS global_prefs;
106     NET_STATS net_stats;
107     ACTIVE_TASK_SET active_tasks;
108     HOST_INFO host_info;
109 
110     // the following used only on Android
111     DEVICE_STATUS device_status;
112     double device_status_time;
113 
114     char language[16];                // ISO language code reported by GUI
115     char client_brand[256];
116         // contents of client_brand.txt, e.g. "HTC Power to Give"
117         // reported to scheduler
118     VERSION_INFO core_client_version;
119     string statefile_platform_name;
120     int file_xfer_giveup_period;
121     RUN_MODE cpu_run_mode;
122     RUN_MODE gpu_run_mode;
123     RUN_MODE network_run_mode;
124     bool started_by_screensaver;
125     bool check_all_logins;
126     bool user_active;       // there has been recent mouse/kbd input
127     int cmdline_gui_rpc_port;
128     bool show_projects;
129     bool requested_exit;
130         // we should exit now.  Set when
131         // - got a "quit" GUI RPC
132         // - (Unix) got a HUP, INT, QUIT, TERM, or PWR signal
133         // - (Win) got CTRL_LOGOFF, CTRL_C, CTRL_BREAK, etc. event
134         // - (Mac) client was started from screensaver,
135         //   which has since exited
136     bool os_requested_suspend;
137         // we should suspend for OS reasonts (used on Win only).
138         // Set when
139         // - got BATTERY_LOW, SUSPEND, SERVICE_CONTROL_PAUSE
140     double os_requested_suspend_time;
141     bool cleanup_completed;
142     bool in_abort_sequence;
143         // Determine when it is safe to leave the quit_client() handler
144         // and to finish cleaning up.
145     char detach_project_url[256];
146         // stores URL for --detach_project option
147     char reset_project_url[256];
148         // stores URL for --reset_project option
149     char update_prefs_url[256];
150         // stores URL for --update_prefs option
151     char main_host_venue[256];
152         // venue from project or AMS that gave us general prefs
153     char attach_project_url[256];
154     char attach_project_auth[256];
155     bool exit_before_upload;
156         // exit when about to upload a file
157     bool run_test_app;
158         // API test mode
159 #ifndef _WIN32
160     gid_t boinc_project_gid;
161 #endif
162 #ifdef _WIN32
163     // vars so that the sysmon thread can write messages
164     //
165     bool have_sysmon_msg;
166     char sysmon_msg[256];
167 #endif
168 
169     // backoff-related variables
170     //
171     int master_fetch_period;
172         // fetch project's master URL (and stop doing scheduler RPCs)
173         // if get this many successive RPC failures (default 10)
174     int retry_cap;
175         // cap project->nrpc_failures at this number
176     int master_fetch_retry_cap;
177         // after this many master-fetch failures,
178         // move into a state in which we retry master fetch
179         // at the frequency below
180     int master_fetch_interval;
181         // see above
182 
183     int sched_retry_delay_min;
184     int sched_retry_delay_max;
185     int pers_retry_delay_min;
186     int pers_retry_delay_max;
187     int pers_giveup;
188 
189     bool tasks_suspended;
190         // Computing suspended for reason other than throttling
191     int suspend_reason;
192     bool tasks_throttled;
193         // Computing suspended because of throttling
194 
195     bool network_suspended;
196         // Don't use network.
197     bool file_xfers_suspended;
198         // Don't do file xfers (but allow other network activity).
199     int network_suspend_reason;
200 
201     bool executing_as_daemon;
202         // true if --daemon is on the commandline
203         // this means we are running as a daemon on unix,
204         // or as a service on Windows
205     bool redirect_io;
206         // redirect stdout, stderr to log files
207     bool disable_graphics;
208         // a condition has occurred in which we know graphics will
209         // not be displayable, so GUIs shouldn't offer graphics.
210     bool detach_console;
211     bool launched_by_manager;
212     bool run_by_updater;
213     double now;
214     bool clock_change;      // system clock was recently decreased
215     double last_wakeup_time;
216     bool initialized;
217     bool cant_write_state_file;
218         // failed to write state file.
219         // In this case we continue to run for 1 minute,
220         // handling GUI RPCs but doing nothing else,
221         // so that the Manager can tell the user what the problem is
222 
223     bool client_state_dirty;
224     int old_major_version;
225     int old_minor_version;
226     int old_release;
227     bool run_cpu_benchmarks;
228         // if set, run benchmarks when possible
229 
230     int exit_after_app_start_secs;
231         // if nonzero, exit this many seconds after starting an app
232     double app_started;
233         // when the most recent app was started
234 
235 // --------------- acct_mgr.cpp:
236     ACCT_MGR_INFO acct_mgr_info;
237 
238 // --------------- acct_setup.cpp:
239     PROJECT_INIT project_init;
240     PROJECT_ATTACH project_attach;
241     void new_version_check(bool force = false);
242     void all_projects_list_check();
243     double new_version_check_time;
244     double all_projects_list_check_time;
245         // the time we last successfully fetched the project list
246     string newer_version;
247 
248 // --------------- client_state.cpp:
249     CLIENT_STATE();
250     void show_host_info();
251     bool is_new_client();
252     int init();
253     bool poll_slow_events();
254         // Never blocks.
255         // Returns true if it actually did something,
256         // in which case it should be called again immediately.
257     void do_io_or_sleep(double dt);
258     bool time_to_exit();
259     PROJECT* lookup_project(const char*);
260     APP* lookup_app(PROJECT*, const char*);
261     FILE_INFO* lookup_file_info(PROJECT*, const char* name);
262     RESULT* lookup_result(PROJECT*, const char*);
263     WORKUNIT* lookup_workunit(PROJECT*, const char*);
264     APP_VERSION* lookup_app_version(
265         APP*, char* platform, int ver, char* plan_class
266     );
267     int detach_project(PROJECT*);
268     int report_result_error(RESULT&, const char* err_msg);
269     int reset_project(PROJECT*, bool detaching);
270     bool no_gui_rpc;
271     bool gui_rpc_unix_domain;
272         // do GUI RPC over Unix-domain sockets rather than TCP
273     void start_abort_sequence();
274     bool abort_sequence_done();
275     int quit_activities();
276 
277     int link_app(PROJECT*, APP*);
278     int link_file_info(PROJECT*, FILE_INFO*);
279     int link_file_ref(PROJECT*, FILE_REF*);
280     int link_app_version(PROJECT*, APP_VERSION*);
281     int link_workunit(PROJECT*, WORKUNIT*);
282     int link_result(PROJECT*, RESULT*);
283     void print_summary();
284     bool abort_unstarted_late_jobs();
285     bool garbage_collect();
286     bool garbage_collect_always();
287     bool update_results();
288     int nresults_for_project(PROJECT*);
289     void check_clock_reset();
290     void clear_absolute_times();
291     void set_now();
292     void log_show_projects();
293 
294 // --------------- cpu_sched.cpp:
295     double total_resource_share();
296     double potentially_runnable_resource_share();
297     double nearly_runnable_resource_share();
298     double fetchable_resource_share();
299     double rec_interval_start;
300     double total_cpu_time_this_rec_interval;
301     bool must_enforce_cpu_schedule;
302     bool must_schedule_cpus;
303     bool must_check_work_fetch;
304     void assign_results_to_projects();
305     RESULT* highest_prio_project_best_result();
306     void reset_rec_accounting();
307     bool schedule_cpus();
308     void make_run_list(vector<RESULT*>&);
309     bool enforce_run_list(vector<RESULT*>&);
310     void append_unfinished_time_slice(vector<RESULT*>&);
311 
312     double runnable_resource_share(int);
313     void adjust_rec();
314     double retry_shmem_time;
315         // if we fail to start a task due to no shared-mem segments,
316         // wait until at least this time to try running
317         // another task that needs a shared-mem seg
work_buf_minCLIENT_STATE318     inline double work_buf_min() {
319         double x = global_prefs.work_buf_min_days * 86400;
320         if (x < 180) x = 180;
321         return x;
322     }
work_buf_additionalCLIENT_STATE323     inline double work_buf_additional() {
324         return global_prefs.work_buf_additional_days *86400;
325     }
work_buf_totalCLIENT_STATE326     inline double work_buf_total() {
327         double x = work_buf_min() + work_buf_additional();
328         if (x < 1) x = 1;
329         return x;
330     }
331 
332     void request_schedule_cpus(const char*);
333         // Reschedule CPUs ASAP.
334         // Called when:
335         // - core client starts (CS::init())
336         // - an app exits (ATS::check_app_exited())
337         // - Tasks are killed (ATS::exit_tasks())
338         // - a result's input files finish downloading (CS::update_results())
339         // - an app fails to start (CS::schedule_cpus())
340         // - any project op is done via RPC (suspend/resume)
341         // - any result op is done via RPC (suspend/resume)
342     void set_ncpus();
343 
344 // --------------- cs_account.cpp:
345     int add_project(
346         const char* master_url, const char* authenticator,
347         const char* project_name, bool attached_via_acct_mgr
348     );
349 
350     int parse_account_files();
351     int parse_account_files_venue();
352     int parse_preferences_for_user_files();
353     int parse_statistics_files();
354         // should be move to a new file, but this will do it for testing
355 
356 // --------------- cs_apps.cpp:
357     double get_fraction_done(RESULT* result);
358     int input_files_available(RESULT*, bool, FILE_INFO** f=0);
359     ACTIVE_TASK* lookup_active_task_by_result(RESULT*);
360     int ncpus;
361         // Act like there are this many CPUs.
362         // By default this is the # of physical CPUs,
363         // but it can be changed in two ways:
364         // - type <ncpus>N</ncpus> in the config file
365         // - type the max_ncpus_pct pref
366 
367     int latest_version(APP*, char*);
368     int app_finished(ACTIVE_TASK&);
369     bool start_apps();
370     bool handle_finished_apps();
371 
372     ACTIVE_TASK* get_task(RESULT*);
373 
374 // --------------- cs_benchmark.cpp:
375     bool benchmarks_running;
376 
377     void check_if_need_benchmarks();
378     bool can_run_cpu_benchmarks();
379     void start_cpu_benchmarks();
380     bool cpu_benchmarks_poll();
381     void abort_cpu_benchmarks();
382     bool cpu_benchmarks_done();
383     void cpu_benchmarks_set_defaults();
384     void print_benchmark_results();
385 
386 // --------------- cs_cmdline.cpp:
387     void parse_cmdline(int argc, char** argv);
388     void parse_env_vars();
389     void do_cmdline_actions();
390 
391 // --------------- cs_files.cpp:
392     void check_file_existence();
393     bool start_new_file_xfer(PERS_FILE_XFER&);
394 
395     int make_project_dirs();
396     bool create_and_delete_pers_file_xfers();
397 
398 // --------------- cs_platforms.cpp:
399     const char* get_primary_platform();
400     void add_platform(const char*);
401     void detect_platforms();
402     void write_platforms(PROJECT*, MIOFILE&);
403     bool is_supported_platform(const char*);
404 
405 // --------------- cs_prefs.cpp:
406     double client_disk_usage;
407         // disk usage not counting projects
408         // computed by get_disk_usages()
409     double total_disk_usage;
410         // client plus projects
411     int get_disk_usages();
412     void get_disk_shares();
413     double allowed_disk_usage(double boinc_total);
414     int allowed_project_disk_usage(double&);
415     void show_suspend_tasks_message(int reason);
416     int resume_tasks(int reason=0);
417     void read_global_prefs(
418         const char* fname = GLOBAL_PREFS_FILE_NAME,
419         const char* override_fname = GLOBAL_PREFS_OVERRIDE_FILE
420     );
421     int save_global_prefs(char* prefs, char* url, char* sched);
422     double available_ram();
423     double max_available_ram();
424     int check_suspend_processing();
425     void check_suspend_network();
426     void install_global_prefs();
427     PROJECT* global_prefs_source_project();
428     void show_global_prefs_source(bool);
429 
430 // --------------- cs_scheduler.cpp:
431     void request_work_fetch(const char*);
432         // Called when:
433         // - core client starts (CS::init())
434         // - task is completed or fails
435         // - tasks are killed
436         // - an RPC completes
437         // - project suspend/detch/attach/reset GUI RPC
438         // - result suspend/abort GUI RPC
439     int make_scheduler_request(PROJECT*);
440     int handle_scheduler_reply(PROJECT*, char* scheduler_url);
441     SCHEDULER_OP* scheduler_op;
442     PROJECT* next_project_master_pending();
443     PROJECT* next_project_sched_rpc_pending();
444     PROJECT* next_project_trickle_up_pending();
445     PROJECT* find_project_with_overdue_results(bool network_suspend_soon);
446     bool had_or_requested_work;
447     bool scheduler_rpc_poll();
448 
449 // --------------- cs_statefile.cpp:
450     void set_client_state_dirty(const char*);
451     int parse_state_file();
452     int parse_state_file_aux(const char*);
453     int write_state(MIOFILE&);
454     int write_state_file();
455     int write_state_file_if_needed();
456     void check_anonymous();
457     int parse_app_info(PROJECT*, FILE*);
458     int write_state_gui(MIOFILE&);
459     int write_file_transfers_gui(MIOFILE&);
460     int write_tasks_gui(MIOFILE&, bool);
461     void sort_results();
462     void sort_projects_by_name();
463 
464 // --------------- cs_trickle.cpp:
465     int read_trickle_files(PROJECT*, FILE*);
466     int remove_trickle_files(PROJECT*);
467     int handle_trickle_down(PROJECT*, FILE*);
468 
469 // --------------- check_state.cpp:
470 // stuff related to data-structure integrity checking
471 //
472     void check_project_pointer(PROJECT*);
473     void check_app_pointer(APP*);
474     void check_file_info_pointer(FILE_INFO*);
475     void check_app_version_pointer(APP_VERSION*);
476     void check_workunit_pointer(WORKUNIT*);
477     void check_result_pointer(RESULT*);
478     void check_pers_file_xfer_pointer(PERS_FILE_XFER*);
479     void check_file_xfer_pointer(FILE_XFER*);
480 
481     void check_app(APP&);
482     void check_file_info(FILE_INFO&);
483     void check_file_ref(FILE_REF&);
484     void check_app_version(APP_VERSION&);
485     void check_workunit(WORKUNIT&);
486     void check_result(RESULT&);
487     void check_active_task(ACTIVE_TASK&);
488     void check_pers_file_xfer(PERS_FILE_XFER&);
489     void check_file_xfer(FILE_XFER&);
490 
491     void check_all();
492     void free_mem();
493 
494 // --------------- work_fetch.cpp:
495     int proj_min_results(PROJECT*, double);
496     void check_project_timeout();
497     double overall_cpu_frac();
498     double overall_cpu_and_network_frac();
499     double overall_gpu_frac();
500     double time_until_work_done(PROJECT*, int, double);
501     bool compute_work_requests();
502     void scale_duration_correction_factors(double);
503     void generate_new_host_cpid();
504     void compute_nuploading_results();
505 
506 #ifdef SIM
507     double share_violation();
508     double monotony();
509 
510     void handle_completed_results(PROJECT*);
511     void get_workload(vector<IP_RESULT>&);
512     bool simulate_rpc(PROJECT*);
513 #endif
514 };
515 
516 extern CLIENT_STATE gstate;
517 
518 extern bool gpus_usable;
519     // set to false if GPUs not usable because of remote desktop
520     // or login situation (Windows)
521 
522 // return a random double in the range [MIN,min(e^n,MAX))
523 
524 extern double calculate_exponential_backoff(
525     int n, double MIN, double MAX
526 );
527 
528 #ifdef NEW_CPU_THROTTLE
529 extern THREAD_LOCK client_mutex;
530 extern THREAD throttle_thread;
531 #endif
532 
533 //////// TIME-RELATED CONSTANTS ////////////
534 
535 //////// CLIENT INTERNAL
536 
537 #define POLL_INTERVAL   1.0
538     // the client will handle I/O (including GUI RPCs)
539     // for up to POLL_INTERVAL seconds before calling poll_slow_events()
540     // to call the polling functions
541 
542 #define GARBAGE_COLLECT_PERIOD  10
543     // how often to garbage collect
544 
545 #define TASK_POLL_PERIOD    1.0
546 
547 #define UPDATE_RESULTS_PERIOD   1.0
548 
549 #define HANDLE_FINISHED_APPS_PERIOD 1.0
550 
551 #define BENCHMARK_POLL_PERIOD   1.0
552 
553 #define PERS_FILE_XFER_START_PERIOD  1.0
554 #define PERS_FILE_XFER_POLL_PERIOD  1.0
555 
556 #define SCHEDULER_RPC_POLL_PERIOD   5.0
557 
558 #define FILE_XFER_POLL_PERIOD   1.0
559 
560 #define GUI_HTTP_POLL_PERIOD    1.0
561 
562 #define MEMORY_USAGE_PERIOD     10
563     // computer memory usage and check for exclusive apps this often
564 
565 //////// WORK FETCH
566 
567 #define WORK_FETCH_PERIOD   60
568     // see if we need to fetch work at least this often
569 #define WF_MIN_BACKOFF_INTERVAL    600
570 #define WF_MAX_BACKOFF_INTERVAL    86400
571     // if we ask a project for work for a resource and don't get it,
572     // we do exponential backoff.
573     // This constant is an upper bound for this.
574     // E.g., if we need GPU work, we'll end up asking once a day,
575     // so if the project develops a GPU app,
576     // we'll find out about it within a day.
577 
578 #define WF_UPLOAD_DEFER_INTERVAL   300
579     // if a project is uploading,
580     // and the last upload started within this interval,
581     // don't fetch work from it.
582     // This allows the work fetch to be merged with the reporting of the
583     // jobs that are currently uploading.
584 
585 #define RESULT_REPORT_IF_AT_LEAST_N 64
586     // If a project has at least this many ready-to-report tasks, report them.
587 
588 //////// CPU SCHEDULING
589 
590 #define CPU_SCHED_PERIOD    60
591     // do CPU schedule at least this often
592 
593 #define REC_ADJUST_PERIOD CPU_SCHED_PERIOD
594     // REC is adjusted at least this often,
595     // since adjust_rec() is called from enforce_schedule()
596 
597 #define DEADLINE_CUSHION    0
598     // try to finish jobs this much in advance of their deadline
599 
600 /////// JOB CONTROL
601 
602 #define ABORT_TIMEOUT   60
603     // if we send app <abort> request, wait this long before killing it.
604     // This gives it time to download symbol files (which can be several MB)
605     // and write stack trace to stderr
606 
607 #define QUIT_TIMEOUT    60
608     // Same, for <quit>.
609     // Should be large enough that apps can finalize
610     // (e.g. write checkpoint file) in that time.
611     // In Nov 2015 we increased it from 15 to 60
612     // because CERN's VBox apps take a long time to save state.
613 
614 #define MAX_STARTUP_TIME    10
615     // if app startup takes longer than this, quit loop
616 
617 //////// NETWORK
618 
619 #define CONNECT_ERROR_PERIOD    600.0
620 
621 #define ALLOW_NETWORK_IF_RECENT_RPC_PERIOD  300
622     // if there has been a GUI RPC within this period
623     // that requires network access (e.g. attach to project)
624     // allow it even if setting is "no access"
625 
626 //////// MISC
627 
628 #define EXCLUSIVE_APP_WAIT   5
629     // if "exclusive app" feature used,
630     // wait this long after app exits before restarting jobs
631 
632 #define DAILY_XFER_HISTORY_PERIOD   60
633 
634 #define ACCT_MGR_MIN_BACKOFF    600
635 #define ACCT_MGR_MAX_BACKOFF    86400
636     // min/max account manager RPC backoff
637 
638 #define ANDROID_KEEPALIVE_TIMEOUT   30
639     // Android: if don't get a report_device_status() RPC from the GUI
640     // in this interval, exit.
641     // We rely on the GUI to report battery status.
642 
643 #ifndef ANDROID
644 #define USE_NET_PREFS
645     // use preferences obtained over the network
646     // (i.e. through scheduler replies)
647     // Don't do this on Android
648 #endif
649 
650 #define NEED_NETWORK_MSG _("BOINC can't access Internet - check network connection or proxy configuration.")
651 #define NO_WORK_MSG _("Your current settings do not allow tasks from this project.")
652 
653 #endif
654