1 // This file is part of BOINC. 2 // http://boinc.berkeley.edu 3 // Copyright (C) 2008 University of California 4 // 5 // BOINC is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU Lesser General Public License 7 // as published by the Free Software Foundation, 8 // either version 3 of the License, or (at your option) any later version. 9 // 10 // BOINC is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 13 // See the GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>. 17 18 #ifndef BOINC_APP_H 19 #define BOINC_APP_H 20 21 #ifndef _WIN32 22 #include <cstdio> 23 #include <vector> 24 #endif 25 26 #include "app_ipc.h" 27 #include "common_defs.h" 28 #include "procinfo.h" 29 30 #include "client_types.h" 31 32 // values for preempt_type 33 // 34 #define REMOVE_NEVER 0 35 #define REMOVE_MAYBE_USER 1 36 #define REMOVE_MAYBE_SCHED 2 37 #define REMOVE_ALWAYS 3 38 39 struct CLIENT_STATE; 40 struct ASYNC_COPY; 41 typedef int PROCESS_ID; 42 43 #define MAX_STDERR_LEN 65536 44 // The stderr output of an application is truncated to this length 45 // before sending to server, 46 // to protect against apps that write unbounded amounts. 47 48 // Represents a job in progress. 49 50 // When an active task is created, it is assigned a "slot" 51 // which determines the directory it runs in. 52 // This doesn't change over the life of the active task; 53 // thus the task can use the slot directory for temp files 54 // that BOINC doesn't know about. 55 56 struct ACTIVE_TASK { 57 #ifdef _WIN32 58 HANDLE process_handle, shm_handle; 59 bool kill_all_children(); 60 #endif 61 SHMEM_SEG_NAME shmem_seg_name; 62 RESULT* result; 63 WORKUNIT* wup; 64 APP_VERSION* app_version; 65 PROCESS_ID pid; 66 PROCINFO procinfo; 67 68 // START OF ITEMS SAVED IN TASK STATE FILE 69 // (in addition to result name and project URL) 70 71 double checkpoint_cpu_time; 72 // CPU at the last checkpoint 73 // Note: "CPU time" refers to the sum over all episodes. 74 // (not counting the "lost" time after the last checkpoint 75 // in episodes before the current one) 76 double checkpoint_elapsed_time; 77 // elapsed time at last checkpoint 78 double peak_working_set_size; 79 double peak_swap_size; 80 double peak_disk_usage; 81 82 // START OF ITEMS ALSO SAVED IN CLIENT STATE FILE 83 84 int _task_state; 85 // PROCESS_*; see common_defs.h 86 int slot; 87 // subdirectory of slots/ where this runs 88 double checkpoint_fraction_done; 89 // fraction done at last checkpoint 90 double checkpoint_fraction_done_elapsed_time; 91 // fraction done elapsed time at last checkpoint 92 double current_cpu_time; 93 // most recent CPU time reported by app 94 bool once_ran_edf; 95 96 // END OF ITEMS SAVED IN STATE FILE 97 98 double fraction_done; 99 // App's estimate of how much of the work unit is done. 100 // Passed from the application via an API call; 101 // will be zero if the app doesn't use this call 102 double fraction_done_elapsed_time; 103 // elapsed time when fraction done was last reported 104 double first_fraction_done; 105 // first frac done reported during this run of task 106 double first_fraction_done_elapsed_time; 107 // elapsed time when the above was reported 108 int scheduler_state; 109 int next_scheduler_state; // temp 110 int signal; 111 double run_interval_start_wall_time; 112 // Wall time at the start of the current run interval 113 double checkpoint_wall_time; 114 // wall time at the last checkpoint 115 double elapsed_time; 116 // current total elapsed (running) time 117 double bytes_sent_episode; 118 // bytes sent in current episode of job, 119 // as (optionally) reported by boinc_network_usage() 120 double bytes_received_episode; 121 double bytes_sent; 122 // bytes in all episodes 123 double bytes_received; 124 char slot_dir[256]; 125 // directory where process runs (relative) 126 char slot_path[MAXPATHLEN]; 127 // same, absolute 128 // This is used only to run graphics apps 129 // (that way don't have to worry about top-level dirs 130 // being non-readable, etc). 131 double max_elapsed_time; 132 // abort if elapsed time exceeds this 133 double max_disk_usage; 134 // abort if disk usage (in+out+temp) exceeds this 135 double max_mem_usage; 136 // abort if memory usage exceeds this 137 bool have_trickle_down; 138 bool send_upload_file_status; 139 bool too_large; 140 // Working set too large to run now; waiting for RAM 141 // This is a slight misnomer. 142 // It doesn't mean that this job itself is too large; 143 // rather, it means that the last time we did CPU scheduling, 144 // the set of jobs we tried to run was too big, 145 // and this one came after we ran out of mem. 146 bool needs_shmem; // waiting for a free shared memory segment 147 int want_network; 148 // This task wants to do network comm (for F@h) 149 // this is passed via share-memory message (app_status channel) 150 double abort_time; 151 // when we sent an abort message to this app 152 // kill it 5 seconds later if it doesn't exit 153 double quit_time; 154 int premature_exit_count; 155 // when we sent a quit message; kill if still there after 10 sec 156 bool overdue_checkpoint; 157 // running past end of time slice because not checkpointed; 158 // when we do checkpoint, reschedule 159 double last_deadline_miss_time; 160 161 APP_CLIENT_SHM app_client_shm; 162 // core/app shared mem segment 163 MSG_QUEUE graphics_request_queue; 164 MSG_QUEUE process_control_queue; 165 std::vector<int> other_pids; 166 // IDs of processes that are part of this task 167 // but not descendants of the main process 168 // (e.g. VMs created by vboxwrapper) 169 // These are communicated via the app_status message channel 170 char web_graphics_url[256]; 171 char remote_desktop_addr[256]; 172 ASYNC_COPY* async_copy; 173 double finish_file_time; 174 // time when we saw finish file in slot dir. 175 // Used to kill apps that hang after writing finished file 176 177 void set_task_state(int, const char*); task_stateACTIVE_TASK178 inline int task_state() { 179 return _task_state; 180 } 181 182 #if (defined (__APPLE__) && (defined(__i386__) || defined(__x86_64__))) 183 // PowerPC apps emulated on i386 Macs crash if running graphics 184 int powerpc_emulated_on_i386; 185 int is_native_i386_app(char*); 186 #endif 187 int request_reread_prefs(); 188 int request_reread_app_info(); 189 int link_user_files(); 190 int get_shmem_seg_name(); runnableACTIVE_TASK191 bool runnable() { 192 return _task_state == PROCESS_UNINITIALIZED 193 || _task_state == PROCESS_EXECUTING 194 || _task_state == PROCESS_SUSPENDED; 195 } 196 void copy_final_info(); 197 // copy final CPU time etc. to result 198 199 ACTIVE_TASK(); 200 ~ACTIVE_TASK(); 201 int init(RESULT*); 202 void cleanup_task(); 203 204 int current_disk_usage(double&); 205 // disk used by output files and temp files of this task 206 int get_free_slot(RESULT*); 207 int start(bool test=false); // start a process 208 209 // Termination stuff. 210 // Terminology: 211 // "kill": forcibly kill the main process and all its descendants. 212 // "request exit": send a request-exit message, and enumerate descendants. 213 // If after 15 secs any processes remain, kill them 214 // called from: 215 // task preemption 216 // project detach or reset 217 // implementation: 218 // sends msg, sets quit_time, state QUIT_PENDING; 219 // get list of descendants 220 // normal exit handled in handle_premature_exit() 221 // timeout handled in ACTIVE_TASK_SET::poll() 222 // "abort_task": like request exit, 223 // but the app is supposed to write a stack trace to stderr 224 // called from: rsc exceeded; got ack of running task; 225 // intermediate upload failure 226 // client exiting w/ abort_jobs_on_exit set 227 // 228 int request_exit(); 229 int request_abort(); 230 int kill_running_task(bool will_restart); 231 // Kill process and subsidiary processes forcibly. 232 // Unix: send a SIGKILL signal, Windows: TerminateProcess() 233 int kill_subsidiary_processes(); 234 // kill subsidiary processes of a job 235 // whose main process has already exited 236 int abort_task(int exit_status, const char*); 237 // can be called whether or not process exists 238 239 // is the GPU task running or suspended (due to CPU throttling) 240 // is_gpu_task_runningACTIVE_TASK241 inline bool is_gpu_task_running() { 242 int s = task_state(); 243 return s == PROCESS_EXECUTING || s == PROCESS_SUSPENDED; 244 } 245 246 // Implementation stuff related to termination 247 // 248 std::vector<int> descendants; 249 // PIDs of descendants, computed every 10 sec or so 250 // during resource usage computation. 251 bool process_exists(); 252 bool has_task_exited(); 253 // return true if this task has exited 254 255 int suspend(); 256 // tell a process to stop executing (but stay in mem) 257 // Done by sending it a <suspend> message 258 int unsuspend(int reason=0); 259 // Undo a suspend: send a <resume> message 260 int preempt(int preempt_type, int reason=0); 261 // preempt (via suspend or quit) a running task 262 int resume_or_start(bool); 263 void send_network_available(); 264 #ifdef _WIN32 265 void handle_exited_app(unsigned long); 266 #else 267 void handle_exited_app(int stat); 268 #endif 269 void handle_premature_exit(bool&); 270 void handle_temporary_exit(bool&, double, const char*, bool); 271 272 bool check_max_disk_exceeded(); 273 274 bool get_app_status_msg(); 275 bool get_trickle_up_msg(); 276 void get_graphics_msg(); 277 double est_dur(); 278 int read_stderr_file(); 279 bool finish_file_present(); 280 bool temporary_exit_file_present(double&, char*, bool&); 281 void init_app_init_data(APP_INIT_DATA&); 282 int write_app_init_file(APP_INIT_DATA&); 283 int move_trickle_file(); 284 int handle_upload_files(); 285 void upload_notify_app(const FILE_INFO*, const FILE_REF*); 286 int copy_output_files(); 287 int setup_file(FILE_INFO*, FILE_REF&, char*, bool, bool); 288 bool must_copy_file(FILE_REF&, bool); 289 void write_task_state_file(); 290 void read_task_state_file(); 291 292 int write(MIOFILE&); 293 int write_gui(MIOFILE&); 294 int parse(XML_PARSER&); 295 }; 296 297 // Represents the set of all jobs in progress 298 299 class ACTIVE_TASK_SET { 300 public: 301 typedef std::vector<ACTIVE_TASK*> active_tasks_v; 302 active_tasks_v active_tasks; 303 ACTIVE_TASK* lookup_pid(int); 304 ACTIVE_TASK* lookup_result(RESULT*); 305 void init(); 306 bool poll(); 307 void suspend_all(int reason); 308 void unsuspend_all(int reason=0); 309 bool is_task_executing(); 310 void request_tasks_exit(PROJECT* p=0); 311 int wait_for_exit(double, PROJECT* p=0); 312 int exit_tasks(PROJECT* p=0); 313 void kill_tasks(PROJECT* p=0); 314 int abort_project(PROJECT*); 315 void get_msgs(); 316 bool check_app_exited(); 317 bool check_rsc_limits_exceeded(); 318 bool check_quit_timeout_exceeded(); 319 bool is_slot_in_use(int); 320 bool is_slot_dir_in_use(char*); 321 void send_heartbeats(); 322 void send_trickle_downs(); 323 void report_overdue(); 324 void handle_upload_files(); 325 void upload_notify_app(FILE_INFO*); 326 bool want_network(); // does any task want network? 327 void network_available(); // notify tasks that network is available 328 void free_mem(); 329 bool slot_taken(int); 330 void get_memory_usage(); 331 332 void process_control_poll(); 333 void request_reread_prefs(PROJECT*); 334 void request_reread_app_info(); 335 336 int write(MIOFILE&); 337 int parse(XML_PARSER&); 338 }; 339 340 extern double exclusive_app_running; // last time an exclusive app was running 341 extern double exclusive_gpu_app_running; 342 extern int gpu_suspend_reason; 343 extern double non_boinc_cpu_usage; 344 345 extern void run_test_app(); 346 347 #ifdef _WIN32 348 extern DWORD WINAPI throttler(void*); 349 #else 350 extern void* throttler(void*); 351 #endif 352 353 #endif 354