1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 #ifndef BOINC_SCHED_TYPES_H
19 #define BOINC_SCHED_TYPES_H
20
21 #include <cstdio>
22 #include <vector>
23
24 #include "boinc_db.h"
25 #include "common_defs.h"
26 #include "md5_file.h"
27 #include "coproc.h"
28
29 #include "edf_sim.h"
30
31 // for projects that support work filtering by app,
32 // this records an app for which the user will accept work
33 //
34 struct APP_INFO {
35 int appid;
36 int work_available;
37 };
38
39 // represents a resource (disk etc.) that the client may not have enough of
40 //
41 struct RESOURCE {
42 bool insufficient;
43 double needed; // the min extra amount needed
44
set_insufficientRESOURCE45 inline void set_insufficient(double x) {
46 insufficient = true;
47 if (needed) {
48 if (x < needed) needed = x;
49 } else {
50 needed = x;
51 }
52 }
53 };
54
55 // a message for the volunteer
56 //
57 struct USER_MESSAGE {
58 std::string message;
59 std::string priority;
60 USER_MESSAGE(const char* m, const char*p);
61 };
62
63 struct HOST_USAGE {
64 int proc_type;
65 double gpu_usage;
66 double gpu_ram;
67 double avg_ncpus;
68 double max_ncpus;
69 double mem_usage;
70 // mem usage if specified by the plan class
71 // (overrides wu.rsc_memory_bound)
72 double projected_flops;
73 // the scheduler's best estimate of wu.rsc_fpops_est/elapsed_time.
74 // Taken from host_app_version elapsed time statistics if available,
75 // else on estimate provided by app_plan()
76 double peak_flops;
77 // stored in result.flops_estimate, and used for credit calculations
78 char cmdline[256];
79 char custom_coproc_type[256];
80 // if we're using a custom GPU type, its name
81 // TODO: get rid of PROC_TYPE_*, and this
82
HOST_USAGEHOST_USAGE83 HOST_USAGE() {
84 proc_type = PROC_TYPE_CPU;
85 gpu_usage = 0;
86 gpu_ram = 0;
87 avg_ncpus = 1;
88 max_ncpus = 1;
89 mem_usage = 0;
90 projected_flops = 0;
91 peak_flops = 0;
92 strcpy(cmdline, "");
93 strcpy(custom_coproc_type, "");
94 }
sequential_appHOST_USAGE95 void sequential_app(double flops) {
96 proc_type = PROC_TYPE_CPU;
97 gpu_usage = 0;
98 gpu_ram = 0;
99 avg_ncpus = 1;
100 max_ncpus = 1;
101 mem_usage = 0;
102 if (flops <= 0) flops = 1e9;
103 projected_flops = flops;
104 peak_flops = flops;
105 strcpy(cmdline, "");
106 }
is_sequential_appHOST_USAGE107 inline bool is_sequential_app() {
108 if (proc_type != PROC_TYPE_CPU) return false;
109 if (avg_ncpus != 1) return false;
110 return true;
111 }
resource_typeHOST_USAGE112 inline int resource_type() {
113 switch (proc_type) {
114 case PROC_TYPE_NVIDIA_GPU: return ANON_PLATFORM_NVIDIA;
115 case PROC_TYPE_AMD_GPU: return ANON_PLATFORM_ATI;
116 case PROC_TYPE_INTEL_GPU: return ANON_PLATFORM_INTEL;
117 default: return ANON_PLATFORM_CPU;
118 }
119 }
uses_gpuHOST_USAGE120 inline bool uses_gpu() {
121 return (proc_type != PROC_TYPE_CPU);
122 }
123 };
124
125 // a description of a sticky file on host, or a job input file
126 //
127 struct FILE_INFO {
128 char name[256];
129 double nbytes;
130 int status;
131 bool sticky;
132
133 int parse(XML_PARSER&);
134 };
135
136 struct MSG_FROM_HOST_DESC {
137 char variety[256];
138 std::string msg_text;
139 int parse(XML_PARSER&);
140 };
141
142 // an app version from an anonymous-platform client
143 // (starting with 6.11, ALL clients send these)
144 //
145 struct CLIENT_APP_VERSION {
146 char app_name[256];
147 char platform[256];
148 int version_num;
149 char plan_class[256];
150 HOST_USAGE host_usage;
151 double rsc_fpops_scale;
152 // multiply wu.rsc_fpops_est and rsc_fpops_limit
153 // by this amount when send to client,
154 // to reflect the discrepancy between how fast the client
155 // thinks the app is versus how fast we think it is
156 APP* app;
157 // if NULL, this record is a place-holder,
158 // used to preserve array indices
159
160 int parse(XML_PARSER&);
161 };
162
163 // keep track of the best app_version for each app for this host
164 //
165 struct BEST_APP_VERSION {
166 DB_ID_TYPE appid;
167 bool for_64b_jobs;
168 // maintain this separately for jobs that need > 2GB RAM,
169 // in which case we can't use 32-bit apps
170
171 bool present;
172 // false means there's no usable version for this app
173
174 CLIENT_APP_VERSION* cavp;
175 // populated if anonymous platform
176
177 APP_VERSION* avp;
178 // populated otherwise
179
180 HOST_USAGE host_usage;
181 // populated in either case
182
183 bool reliable;
184 bool trusted;
185
186 DB_HOST_APP_VERSION* host_app_version();
187 // get the HOST_APP_VERSION, if any
188
BEST_APP_VERSIONBEST_APP_VERSION189 BEST_APP_VERSION() {
190 appid = 0;
191 for_64b_jobs = false;
192 present = false;
193 cavp = NULL;
194 avp = NULL;
195 reliable = false;
196 trusted = false;
197 }
198 };
199
200 struct SCHED_DB_RESULT : DB_RESULT {
201 // the following used by the scheduler, but not stored in the DB
202 //
203 char wu_name[256];
204 int units; // used for granting credit by # of units processed
205 int parse_from_client(XML_PARSER&);
206 char platform_name[256];
207 BEST_APP_VERSION bav;
208
209 int write_to_client(FILE*);
210 };
211
212 // subset of global prefs used by scheduler
213 //
214 struct GLOBAL_PREFS {
215 double mod_time;
216 double disk_max_used_gb;
217 double disk_max_used_pct;
218 double disk_min_free_gb;
219 double work_buf_min_days;
220 double ram_max_used_busy_frac;
221 double ram_max_used_idle_frac;
222 double max_ncpus_pct;
223
224 void parse(const char* buf, const char* venue);
225 void defaults();
work_buf_minGLOBAL_PREFS226 inline double work_buf_min() {return work_buf_min_days*86400;}
227 };
228
229 struct GUI_URLS {
230 char* text;
231 void init();
232 void get_gui_urls(USER& user, HOST& host, TEAM& team, char*, int len);
233 };
234
235 struct PROJECT_FILES {
236 char* text;
237 void init();
238 };
239
240 // Represents a result from this project that the client has.
241 // The request message has a list of these.
242 // The reply message may include a list of those to be aborted
243 // or aborted if not started
244 //
245 struct OTHER_RESULT {
246 char name[256];
247 int app_version; // index into CLIENT_APP_VERSION array
248 char plan_class[64];
249 bool have_plan_class;
250 bool abort;
251 bool abort_if_not_started;
252 int reason; // see codes below
253
254 int parse(XML_PARSER&);
255 };
256
257 #define ABORT_REASON_NOT_FOUND 1
258 #define ABORT_REASON_WU_CANCELLED 2
259 #define ABORT_REASON_ASSIMILATED 3
260 #define ABORT_REASON_TIMED_OUT 4
261
262 struct CLIENT_PLATFORM {
263 char name[256];
264 int parse(XML_PARSER&);
265 };
266
267 struct PLATFORM_LIST {
268 std::vector<PLATFORM*> list;
269 };
270
271 struct SCHEDULER_REQUEST {
272 char authenticator[256];
273 CLIENT_PLATFORM platform;
274 std::vector<CLIENT_PLATFORM> alt_platforms;
275 PLATFORM_LIST platforms;
276 char cross_project_id[256];
277 DB_ID_TYPE hostid; // zero if first RPC
278 int core_client_major_version;
279 int core_client_minor_version;
280 int core_client_release;
281 int core_client_version; // 10000*major + 100*minor + release
282 int rpc_seqno;
283 double work_req_seconds;
284 // in "normalized CPU seconds" (see work_req.php)
285 double cpu_req_secs;
286 double cpu_req_instances;
287 double resource_share_fraction;
288 // this project's fraction of total resource share
289 double rrs_fraction;
290 // ... of runnable resource share
291 double prrs_fraction;
292 // ... of potentially runnable resource share
293 double cpu_estimated_delay;
294 // currently queued jobs saturate the CPU for this long;
295 // used for crude deadline check
296 double duration_correction_factor;
297 double uptime;
298 double previous_uptime;
299 char global_prefs_xml[BLOB_SIZE];
300 char working_global_prefs_xml[BLOB_SIZE];
301 char code_sign_key[4096];
302 bool dont_send_work;
303 char client_brand[256];
304 // as specified in client_brand.txt config file on client
305
306 std::vector<CLIENT_APP_VERSION> client_app_versions;
307
308 GLOBAL_PREFS global_prefs;
309 char global_prefs_source_email_hash[MD5_LEN];
310
311 HOST host; // request message is parsed into here.
312 // does NOT contain the full host record.
313 COPROCS coprocs;
314 std::vector<SCHED_DB_RESULT> results;
315 // completed results being reported
316 bool results_truncated;
317 // set if (to limit memory usage) we capped this size of "results"
318 // In this case, don't resend lost results
319 // since we don't know what was lost.
320 std::vector<RESULT> file_xfer_results;
321 std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
322 std::vector<FILE_INFO> file_infos;
323 // sticky files reported by host
324
325 // temps used by locality scheduling:
326 std::vector<FILE_INFO> file_delete_candidates;
327 // deletion candidates
328 std::vector<FILE_INFO> files_not_needed;
329 // files no longer needed
330
331 std::vector<OTHER_RESULT> other_results;
332 // in-progress results from this project
333 std::vector<IP_RESULT> ip_results;
334 // in-progress results from all projects
335 bool have_other_results_list;
336 bool have_ip_results_list;
337 bool have_time_stats_log;
338 bool client_cap_plan_class;
339 int sandbox;
340 // whether client uses account-based sandbox. -1 = don't know
341 int allow_multiple_clients;
342 // whether client allows multiple clients per host, -1 don't know
343 bool using_weak_auth;
344 // Request uses weak authenticator.
345 // Don't modify user prefs or CPID
346 int last_rpc_dayofyear;
347 int current_rpc_dayofyear;
348 std::string client_opaque;
349
350 SCHEDULER_REQUEST();
~SCHEDULER_REQUESTSCHEDULER_REQUEST351 ~SCHEDULER_REQUEST(){};
352 void clear();
353 const char* parse(XML_PARSER&);
354 int write(FILE*); // write request info to file: not complete
355 };
356
357 // keep track of bottleneck disk preference
358 //
359 struct DISK_LIMITS {
360 double max_used;
361 double max_frac;
362 double min_free;
363 };
364
365 // parsed version of project prefs that relate to scheduling
366 //
367 struct PROJECT_PREFS {
368 std::vector<APP_INFO> selected_apps;
369 bool dont_use_proc_type[NPROC_TYPES];
370 bool allow_non_selected_apps;
371 bool allow_beta_work;
372 int max_jobs_in_progress;
373 int max_cpus;
374
375 void parse();
376
PROJECT_PREFSPROJECT_PREFS377 PROJECT_PREFS() {
378 memset(&dont_use_proc_type, 0, sizeof(dont_use_proc_type));
379 allow_non_selected_apps = false;
380 allow_beta_work = false;
381 max_jobs_in_progress = 0;
382 max_cpus = 0;
383 }
384 };
385
386 // summary of a client's request for work, and our response to it
387 // Note: this is zeroed out in SCHEDULER_REPLY constructor,
388 // so don't put any vectors here
389 //
390 struct WORK_REQ_BASE {
391 bool anonymous_platform;
392
393 // the following defined if anonymous platform
394 //
395 bool client_has_apps_for_proc_type[NPROC_TYPES];
396
397 // Flags used by old-style scheduling,
398 // while making multiple passes through the work array
399 //
400 bool infeasible_only;
401 bool reliable_only;
402 bool user_apps_only;
403 bool beta_only;
404 bool locality_sched_lite;
405 // for LSL apps, send only jobs where client has > 0 files
406
407 bool resend_lost_results;
408 // this is set if the request is reporting a result
409 // that was previously reported.
410 // This is evidence that the earlier reply was not received
411 // by the client. It may have contained results,
412 // so check and resend just in case.
413
414 bool has_reliable_version;
415 // whether the host has a reliable app version
416
417 int effective_ncpus;
418 // # of usable CPUs on host, taking prefs into account
419 int effective_ngpus;
420
421 // 6.7+ clients send separate requests for different resource types:
422 //
423 double req_secs[NPROC_TYPES];
424 // instance-seconds requested
425 double req_instances[NPROC_TYPES];
426 // number of idle instances, use if possible
clear_reqWORK_REQ_BASE427 inline void clear_req(int proc_type) {
428 req_secs[proc_type] = 0;
429 req_instances[proc_type] = 0;
430 }
431
432 // older clients send send a single number, the requested duration of jobs
433 //
434 double seconds_to_fill;
435
436 // true if new-type request, which has resource-specific requests
437 //
438 bool rsc_spec_request;
439
need_proc_typeWORK_REQ_BASE440 inline bool need_proc_type(int t) {
441 if (rsc_spec_request) {
442 return (req_secs[t]>0) || (req_instances[t]>0);
443 }
444 return seconds_to_fill > 0;
445 }
446
447 double disk_available;
448 double ram, usable_ram;
449 double cpu_available_frac;
450 double gpu_available_frac;
451 int njobs_sent;
452
453 // The following keep track of the "easiest" job that was rejected
454 // by EDF simulation.
455 // Any jobs harder than this can be rejected without doing the simulation.
456 //
457 double edf_reject_min_cpu;
458 int edf_reject_max_delay_bound;
459 bool have_edf_reject;
edf_rejectWORK_REQ_BASE460 void edf_reject(double cpu, int delay_bound) {
461 if (have_edf_reject) {
462 if (cpu < edf_reject_min_cpu) edf_reject_min_cpu = cpu;
463 if (delay_bound> edf_reject_max_delay_bound) edf_reject_max_delay_bound = delay_bound;
464 } else {
465 edf_reject_min_cpu = cpu;
466 edf_reject_max_delay_bound = delay_bound;
467 have_edf_reject = true;
468 }
469 }
edf_reject_testWORK_REQ_BASE470 bool edf_reject_test(double cpu, int delay_bound) {
471 if (!have_edf_reject) return false;
472 if (cpu < edf_reject_min_cpu) return false;
473 if (delay_bound > edf_reject_max_delay_bound) return false;
474 return true;
475 }
476
477 RESOURCE disk;
478 RESOURCE mem;
479 RESOURCE speed;
480 RESOURCE bandwidth;
481
482 // various reasons for not sending jobs (used to explain why)
483 //
484 bool no_allowed_apps_available;
485 bool hr_reject_temp;
486 bool hr_reject_perm;
487 bool outdated_client;
488 bool max_jobs_on_host_exceeded;
489 bool max_jobs_on_host_proc_type_exceeded[NPROC_TYPES];
490 bool no_jobs_available; // project has no work right now
491 int max_jobs_per_rpc;
492
max_jobs_exceededWORK_REQ_BASE493 bool max_jobs_exceeded() {
494 if (max_jobs_on_host_exceeded) return true;
495 for (int i=0; i<NPROC_TYPES; i++) {
496 if (max_jobs_on_host_proc_type_exceeded[i]) return true;
497 }
498 return false;
499 }
clearWORK_REQ_BASE500 void clear() {
501 memset(this, 0, sizeof(WORK_REQ_BASE));
502 }
503
504 };
505
506 struct WORK_REQ : public WORK_REQ_BASE {
507 PROJECT_PREFS project_prefs;
508 std::vector<USER_MESSAGE> no_work_messages;
509 std::vector<BEST_APP_VERSION*> best_app_versions;
510 std::vector<DB_HOST_APP_VERSION> host_app_versions;
511 std::vector<DB_HOST_APP_VERSION> host_app_versions_orig;
512
513 void get_job_limits();
514 void add_no_work_message(const char*);
515
~WORK_REQWORK_REQ516 ~WORK_REQ() {}
517 };
518
519 // NOTE: if any field requires initialization,
520 // you must do it in the constructor. Nothing is zeroed by default.
521 //
522 struct SCHEDULER_REPLY {
523 WORK_REQ wreq;
524 DISK_LIMITS disk_limits;
525 double request_delay; // don't request again until this time elapses
526 std::vector<USER_MESSAGE> messages;
527 DB_ID_TYPE hostid;
528 // nonzero only if a new host record was created.
529 // this tells client to reset rpc_seqno
530 int lockfile_fd; // file descriptor of lockfile, or -1 if no lock.
531 bool send_global_prefs;
532 bool nucleus_only; // send only message
533 USER user;
534 char email_hash[MD5_LEN];
535 HOST host; // after validation, contains full host rec
536 TEAM team;
537 std::vector<APP> apps;
538 std::vector<APP_VERSION> app_versions;
539 std::vector<WORKUNIT>wus;
540 std::vector<SCHED_DB_RESULT>results;
541 std::vector<std::string>result_acks;
542 std::vector<std::string>result_aborts;
543 std::vector<std::string>result_abort_if_not_starteds;
544 std::vector<MSG_TO_HOST>msgs_to_host;
545 std::vector<FILE_INFO>file_deletes;
546 std::vector<std::string> file_transfer_requests;
547 char code_sign_key[4096];
548 char code_sign_key_signature[4096];
549 bool send_msg_ack;
550 bool project_is_down;
551 std::vector<APP_VERSION>old_app_versions;
552 // superceded app versions that we consider using because of
553 // homogeneous app version.
554
555 SCHEDULER_REPLY();
~SCHEDULER_REPLYSCHEDULER_REPLY556 ~SCHEDULER_REPLY(){};
557 int write(FILE*, SCHEDULER_REQUEST&);
558 void insert_app_unique(APP&);
559 void insert_app_version_unique(APP_VERSION&);
560 void insert_workunit_unique(WORKUNIT&);
561 void insert_result(SCHED_DB_RESULT&);
562 void insert_message(const char* msg, const char* prio);
563 void insert_message(USER_MESSAGE&);
564 void set_delay(double);
565 };
566
567 extern SCHEDULER_REQUEST* g_request;
568 extern SCHEDULER_REPLY* g_reply;
569 extern WORK_REQ* g_wreq;
570 extern double capped_host_fpops();
571
add_no_work_message(const char * m)572 static inline void add_no_work_message(const char* m) {
573 g_wreq->add_no_work_message(m);
574 }
575
576 extern void get_weak_auth(USER&, char*);
577 extern void get_rss_auth(USER&, char*);
578 extern void read_host_app_versions();
579 extern DB_HOST_APP_VERSION* get_host_app_version(DB_ID_TYPE gavid);
580 extern void write_host_app_versions();
581
582 extern DB_HOST_APP_VERSION* gavid_to_havp(DB_ID_TYPE gavid);
583 extern DB_HOST_APP_VERSION* quota_exceeded_version();
584
is_64b_platform(const char * name)585 inline bool is_64b_platform(const char* name) {
586 return (strstr(name, "64") != NULL);
587 }
588
589 extern double available_frac(BEST_APP_VERSION&);
590
591 #endif
592