1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 #ifndef BOINC_APP_H
19 #define BOINC_APP_H
20 
21 #ifndef _WIN32
22 #include <cstdio>
23 #include <vector>
24 #endif
25 
26 #include "app_ipc.h"
27 #include "common_defs.h"
28 #include "procinfo.h"
29 
30 #include "client_types.h"
31 
32 // values for preempt_type
33 //
34 #define REMOVE_NEVER        0
35 #define REMOVE_MAYBE_USER   1
36 #define REMOVE_MAYBE_SCHED  2
37 #define REMOVE_ALWAYS       3
38 
39 struct CLIENT_STATE;
40 struct ASYNC_COPY;
41 typedef int PROCESS_ID;
42 
43 #define MAX_STDERR_LEN  65536
44     // The stderr output of an application is truncated to this length
45     // before sending to server,
46     // to protect against apps that write unbounded amounts.
47 
48 // Represents a job in progress.
49 
50 // When an active task is created, it is assigned a "slot"
51 // which determines the directory it runs in.
52 // This doesn't change over the life of the active task;
53 // thus the task can use the slot directory for temp files
54 // that BOINC doesn't know about.
55 
56 struct ACTIVE_TASK {
57 #ifdef _WIN32
58     HANDLE process_handle, shm_handle;
59     bool kill_all_children();
60 #endif
61     SHMEM_SEG_NAME shmem_seg_name;
62     RESULT* result;
63     WORKUNIT* wup;
64     APP_VERSION* app_version;
65     PROCESS_ID pid;
66     PROCINFO procinfo;
67 
68     // START OF ITEMS SAVED IN TASK STATE FILE
69     // (in addition to result name and project URL)
70 
71     double checkpoint_cpu_time;
72         // CPU at the last checkpoint
73         // Note: "CPU time" refers to the sum over all episodes.
74         // (not counting the "lost" time after the last checkpoint
75         // in episodes before the current one)
76     double checkpoint_elapsed_time;
77         // elapsed time at last checkpoint
78     double peak_working_set_size;
79     double peak_swap_size;
80     double peak_disk_usage;
81 
82     // START OF ITEMS ALSO SAVED IN CLIENT STATE FILE
83 
84     int _task_state;
85         // PROCESS_*; see common_defs.h
86     int slot;
87         // subdirectory of slots/ where this runs
88     double checkpoint_fraction_done;
89         // fraction done at last checkpoint
90     double checkpoint_fraction_done_elapsed_time;
91         // fraction done elapsed time at last checkpoint
92     double current_cpu_time;
93         // most recent CPU time reported by app
94     bool once_ran_edf;
95 
96     // END OF ITEMS SAVED IN STATE FILE
97 
98     double fraction_done;
99         // App's estimate of how much of the work unit is done.
100         // Passed from the application via an API call;
101         // will be zero if the app doesn't use this call
102     double fraction_done_elapsed_time;
103         // elapsed time when fraction done was last reported
104     double first_fraction_done;
105         // first frac done reported during this run of task
106     double first_fraction_done_elapsed_time;
107         // elapsed time when the above was reported
108     int scheduler_state;
109     int next_scheduler_state; // temp
110     int signal;
111     double run_interval_start_wall_time;
112         // Wall time at the start of the current run interval
113     double checkpoint_wall_time;
114         // wall time at the last checkpoint
115     double elapsed_time;
116         // current total elapsed (running) time
117     double bytes_sent_episode;
118         // bytes sent in current episode of job,
119         // as (optionally) reported by boinc_network_usage()
120     double bytes_received_episode;
121     double bytes_sent;
122         // bytes in all episodes
123     double bytes_received;
124     char slot_dir[256];
125         // directory where process runs (relative)
126     char slot_path[MAXPATHLEN];
127         // same, absolute
128         // This is used only to run graphics apps
129         // (that way don't have to worry about top-level dirs
130         // being non-readable, etc).
131     double max_elapsed_time;
132         // abort if elapsed time exceeds this
133     double max_disk_usage;
134         // abort if disk usage (in+out+temp) exceeds this
135     double max_mem_usage;
136         // abort if memory usage exceeds this
137     bool have_trickle_down;
138     bool send_upload_file_status;
139     bool too_large;
140         // Working set too large to run now; waiting for RAM
141         // This is a slight misnomer.
142         // It doesn't mean that this job itself is too large;
143         // rather, it means that the last time we did CPU scheduling,
144         // the set of jobs we tried to run was too big,
145         // and this one came after we ran out of mem.
146     bool needs_shmem;               // waiting for a free shared memory segment
147     int want_network;
148         // This task wants to do network comm (for F@h)
149         // this is passed via share-memory message (app_status channel)
150     double abort_time;
151         // when we sent an abort message to this app
152         // kill it 5 seconds later if it doesn't exit
153     double quit_time;
154     int premature_exit_count;
155         // when we sent a quit message; kill if still there after 10 sec
156     bool overdue_checkpoint;
157         // running past end of time slice because not checkpointed;
158         // when we do checkpoint, reschedule
159     double last_deadline_miss_time;
160 
161     APP_CLIENT_SHM app_client_shm;
162         // core/app shared mem segment
163     MSG_QUEUE graphics_request_queue;
164     MSG_QUEUE process_control_queue;
165     std::vector<int> other_pids;
166         // IDs of processes that are part of this task
167         // but not descendants of the main process
168         // (e.g. VMs created by vboxwrapper)
169         // These are communicated via the app_status message channel
170     char web_graphics_url[256];
171     char remote_desktop_addr[256];
172     ASYNC_COPY* async_copy;
173     double finish_file_time;
174         // time when we saw finish file in slot dir.
175         // Used to kill apps that hang after writing finished file
176 
177     void set_task_state(int, const char*);
task_stateACTIVE_TASK178     inline int task_state() {
179         return _task_state;
180     }
181 
182 #if (defined (__APPLE__) && (defined(__i386__) || defined(__x86_64__)))
183     // PowerPC apps emulated on i386 Macs crash if running graphics
184     int powerpc_emulated_on_i386;
185     int is_native_i386_app(char*);
186 #endif
187     int request_reread_prefs();
188     int request_reread_app_info();
189     int link_user_files();
190     int get_shmem_seg_name();
runnableACTIVE_TASK191     bool runnable() {
192         return _task_state == PROCESS_UNINITIALIZED
193             || _task_state == PROCESS_EXECUTING
194             || _task_state == PROCESS_SUSPENDED;
195     }
196     void copy_final_info();
197         // copy final CPU time etc. to result
198 
199     ACTIVE_TASK();
200     ~ACTIVE_TASK();
201     int init(RESULT*);
202     void cleanup_task();
203 
204     int current_disk_usage(double&);
205         // disk used by output files and temp files of this task
206     int get_free_slot(RESULT*);
207     int start(bool test=false);         // start a process
208 
209     // Termination stuff.
210     // Terminology:
211     // "kill": forcibly kill the main process and all its descendants.
212     // "request exit": send a request-exit message, and enumerate descendants.
213     //      If after 15 secs any processes remain, kill them
214     //      called from:
215     //          task preemption
216     //          project detach or reset
217     //      implementation:
218     //          sends msg, sets quit_time, state QUIT_PENDING;
219     //              get list of descendants
220     //          normal exit handled in handle_premature_exit()
221     //          timeout handled in ACTIVE_TASK_SET::poll()
222     // "abort_task": like request exit,
223     //      but the app is supposed to write a stack trace to stderr
224     //      called from: rsc exceeded; got ack of running task;
225     //          intermediate upload failure
226     //          client exiting w/ abort_jobs_on_exit set
227     //
228     int request_exit();
229     int request_abort();
230     int kill_running_task(bool will_restart);
231         // Kill process and subsidiary processes forcibly.
232         // Unix: send a SIGKILL signal, Windows: TerminateProcess()
233     int kill_subsidiary_processes();
234         // kill subsidiary processes of a job
235         // whose main process has already exited
236     int abort_task(int exit_status, const char*);
237         // can be called whether or not process exists
238 
239     // is the GPU task running or suspended (due to CPU throttling)
240     //
is_gpu_task_runningACTIVE_TASK241     inline bool is_gpu_task_running() {
242         int s = task_state();
243         return s == PROCESS_EXECUTING || s == PROCESS_SUSPENDED;
244     }
245 
246     // Implementation stuff related to termination
247     //
248     std::vector<int> descendants;
249         // PIDs of descendants, computed every 10 sec or so
250         // during resource usage computation.
251     bool process_exists();
252     bool has_task_exited();
253         // return true if this task has exited
254 
255     int suspend();
256         // tell a process to stop executing (but stay in mem)
257         // Done by sending it a <suspend> message
258     int unsuspend(int reason=0);
259         // Undo a suspend: send a <resume> message
260     int preempt(int preempt_type, int reason=0);
261         // preempt (via suspend or quit) a running task
262     int resume_or_start(bool);
263     void send_network_available();
264 #ifdef _WIN32
265     void handle_exited_app(unsigned long);
266 #else
267     void handle_exited_app(int stat);
268 #endif
269     void handle_premature_exit(bool&);
270     void handle_temporary_exit(bool&, double, const char*, bool);
271 
272     bool check_max_disk_exceeded();
273 
274     bool get_app_status_msg();
275     bool get_trickle_up_msg();
276     void get_graphics_msg();
277     double est_dur();
278     int read_stderr_file();
279     bool finish_file_present();
280     bool temporary_exit_file_present(double&, char*, bool&);
281     void init_app_init_data(APP_INIT_DATA&);
282     int write_app_init_file(APP_INIT_DATA&);
283     int move_trickle_file();
284     int handle_upload_files();
285     void upload_notify_app(const FILE_INFO*, const FILE_REF*);
286     int copy_output_files();
287     int setup_file(FILE_INFO*, FILE_REF&, char*, bool, bool);
288     bool must_copy_file(FILE_REF&, bool);
289     void write_task_state_file();
290     void read_task_state_file();
291 
292     int write(MIOFILE&);
293     int write_gui(MIOFILE&);
294     int parse(XML_PARSER&);
295 };
296 
297 // Represents the set of all jobs in progress
298 
299 class ACTIVE_TASK_SET {
300 public:
301     typedef std::vector<ACTIVE_TASK*> active_tasks_v;
302     active_tasks_v active_tasks;
303     ACTIVE_TASK* lookup_pid(int);
304     ACTIVE_TASK* lookup_result(RESULT*);
305     void init();
306     bool poll();
307     void suspend_all(int reason);
308     void unsuspend_all(int reason=0);
309     bool is_task_executing();
310     void request_tasks_exit(PROJECT* p=0);
311     int wait_for_exit(double, PROJECT* p=0);
312     int exit_tasks(PROJECT* p=0);
313     void kill_tasks(PROJECT* p=0);
314     int abort_project(PROJECT*);
315     void get_msgs();
316     bool check_app_exited();
317     bool check_rsc_limits_exceeded();
318     bool check_quit_timeout_exceeded();
319     bool is_slot_in_use(int);
320     bool is_slot_dir_in_use(char*);
321     void send_heartbeats();
322     void send_trickle_downs();
323     void report_overdue();
324     void handle_upload_files();
325     void upload_notify_app(FILE_INFO*);
326     bool want_network();    // does any task want network?
327     void network_available();   // notify tasks that network is available
328     void free_mem();
329     bool slot_taken(int);
330     void get_memory_usage();
331 
332     void process_control_poll();
333     void request_reread_prefs(PROJECT*);
334     void request_reread_app_info();
335 
336     int write(MIOFILE&);
337     int parse(XML_PARSER&);
338 };
339 
340 extern double exclusive_app_running;    // last time an exclusive app was running
341 extern double exclusive_gpu_app_running;
342 extern int gpu_suspend_reason;
343 extern double non_boinc_cpu_usage;
344 
345 extern void run_test_app();
346 
347 #ifdef _WIN32
348 extern DWORD WINAPI throttler(void*);
349 #else
350 extern void* throttler(void*);
351 #endif
352 
353 #endif
354