1 /*
2 * Amanda, The Advanced Maryland Automatic Network Disk Archiver
3 * Copyright (c) 1991-1998 University of Maryland at College Park
4 * Copyright (c) 2007-2013 Zmanda, Inc. All Rights Reserved.
5 * All Rights Reserved.
6 *
7 * Permission to use, copy, modify, distribute, and sell this software and its
8 * documentation for any purpose is hereby granted without fee, provided that
9 * the above copyright notice appear in all copies and that both that
10 * copyright notice and this permission notice appear in supporting
11 * documentation, and that the name of U.M. not be used in advertising or
12 * publicity pertaining to distribution of the software without specific,
13 * written prior permission. U.M. makes no representations about the
14 * suitability of this software for any purpose. It is provided "as is"
15 * without express or implied warranty.
16 *
17 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
19 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
21 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
22 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 *
24 * Authors: the Amanda Development Team. Its members are listed in a
25 * file named AUTHORS, in the root directory of this distribution.
26 */
27 /*
28 * $Id: driver.c 6512 2007-05-24 17:00:24Z ian $
29 *
30 * controlling process for the Amanda backup system
31 */
32
33 /*
34 * XXX possibly modify tape queue to be cognizant of how much room is left on
35 * tape. Probably not effective though, should do this in planner.
36 */
37
38 #include "amanda.h"
39 #include "find.h"
40 #include "clock.h"
41 #include "conffile.h"
42 #include "diskfile.h"
43 #include "event.h"
44 #include "holding.h"
45 #include "infofile.h"
46 #include "logfile.h"
47 #include "fsusage.h"
48 #include "driverio.h"
49 #include "server_util.h"
50 #include "timestamp.h"
51
52 #define driver_debug(i, ...) do { \
53 if ((i) <= debug_driver) { \
54 dbprintf(__VA_ARGS__); \
55 } \
56 } while (0)
57
58 #define hold_debug(i, ...) do { \
59 if ((i) <= debug_holding) { \
60 dbprintf(__VA_ARGS__); \
61 } \
62 } while (0)
63
64 static disklist_t waitq; // dle waiting estimate result
65 static disklist_t runq; // dle waiting to be dumped to holding disk
66 static disklist_t directq; // dle waiting to be dumped directly to tape
67 static disklist_t tapeq; // dle on holding disk waiting to be written
68 // to tape
69 static disklist_t roomq; // dle waiting for more space on holding disk
70 static int pending_aborts;
71 static int degraded_mode;
72 static off_t reserved_space;
73 static off_t total_disksize;
74 static char *dumper_program;
75 static char *chunker_program;
76 static int inparallel;
77 static int nodump = 0;
78 static off_t tape_length = (off_t)0;
79 static int current_tape = 0;
80 static int conf_max_dle_by_volume;
81 static int conf_taperalgo;
82 static int conf_taper_parallel_write;
83 static int conf_runtapes;
84 static time_t sleep_time;
85 static int idle_reason;
86 static char *driver_timestamp;
87 static char *hd_driver_timestamp;
88 static am_host_t *flushhost = NULL;
89 static int need_degraded=0;
90 static holdalloc_t *holdalloc;
91 static int num_holdalloc;
92 static event_handle_t *dumpers_ev_time = NULL;
93 static event_handle_t *flush_ev_read = NULL;
94 static event_handle_t *schedule_ev_read = NULL;
95 static int conf_flush_threshold_dumped;
96 static int conf_flush_threshold_scheduled;
97 static int conf_taperflush;
98 static off_t flush_threshold_dumped;
99 static off_t flush_threshold_scheduled;
100 static off_t taperflush;
101 static int schedule_done; // 1 if we don't wait for a
102 // schedule from the planner
103 static int force_flush; // All dump are terminated, we
104 // must now respect taper_flush
105 static int taper_nb_scan_volume = 0;
106 static int nb_sent_new_tape = 0;
107 static int taper_started = 0;
108 static taper_t *last_started_taper = NULL;
109 static taper_t *taper_sent_first_write = NULL; // *-WRITE sent on new taper but no (NO-)NEW-TAPE received
110
111 static int wait_children(int count);
112 static void wait_for_children(void);
113 static void allocate_bandwidth(netif_t *ip, unsigned long kps);
114 static int assign_holdingdisk(assignedhd_t **holdp, disk_t *diskp);
115 static void adjust_diskspace(disk_t *diskp, cmd_t cmd);
116 static void delete_diskspace(disk_t *diskp);
117 static assignedhd_t **build_diskspace(char *destname);
118 static int client_constrained(disk_t *dp);
119 static void deallocate_bandwidth(netif_t *ip, unsigned long kps);
120 static void dump_schedule(disklist_t *qp, char *str);
121 static assignedhd_t **find_diskspace(off_t size, int *cur_idle,
122 assignedhd_t *preferred);
123 static unsigned long free_kps(netif_t *ip);
124 static off_t free_space(void);
125 static void dumper_chunker_result(disk_t *dp);
126 static void dumper_taper_result(disk_t *dp);
127 static void file_taper_result(disk_t *dp);
128 static void handle_dumper_result(void *);
129 static void handle_chunker_result(void *);
130 static void handle_dumpers_time(void *);
131 static void handle_taper_result(void *);
132
133 static void holdingdisk_state(char *time_str);
134 static taper_t *idle_taper(void);
135 static taper_t *taper_from_name(char *name);
136 static void interface_state(char *time_str);
137 static int queue_length(disklist_t q);
138 static void read_flush(void *cookie);
139 static void read_schedule(void *cookie);
140 static void short_dump_state(void);
141 static void startaflush(void);
142 static void start_degraded_mode(disklist_t *queuep);
143 static void start_some_dumps(disklist_t *rq);
144 static void continue_port_dumps(void);
145 static void update_failed_dump(disk_t *);
146 static int no_taper_flushing(void);
147 static int active_dumper(void);
148
149 typedef enum {
150 TAPE_ACTION_NO_ACTION = 0,
151 TAPE_ACTION_SCAN = (1 << 0),
152 TAPE_ACTION_NEW_TAPE = (1 << 1),
153 TAPE_ACTION_NO_NEW_TAPE = (1 << 2),
154 TAPE_ACTION_START_A_FLUSH = (1 << 3),
155 TAPE_ACTION_START_A_FLUSH_FIT = (1 << 4),
156 TAPE_ACTION_MOVE = (1 << 5)
157 } TapeAction;
158
159 static TapeAction tape_action(taper_t *taper, char **why_no_new_tape);
160
161 static const char *idle_strings[] = {
162 #define NOT_IDLE 0
163 T_("not-idle"),
164 #define IDLE_NO_DUMPERS 1
165 T_("no-dumpers"),
166 #define IDLE_START_WAIT 2
167 T_("start-wait"),
168 #define IDLE_NO_HOLD 3
169 T_("no-hold"),
170 #define IDLE_CLIENT_CONSTRAINED 4
171 T_("client-constrained"),
172 #define IDLE_NO_BANDWIDTH 5
173 T_("no-bandwidth"),
174 #define IDLE_NO_DISKSPACE 6
175 T_("no-diskspace")
176 };
177
178 int
main(int argc,char ** argv)179 main(
180 int argc,
181 char ** argv)
182 {
183 disklist_t origq;
184 disk_t *diskp;
185 int dsk;
186 dumper_t *dumper;
187 char *newdir = NULL;
188 struct fs_usage fsusage;
189 holdingdisk_t *hdp;
190 identlist_t il;
191 unsigned long reserve = 100;
192 char *conf_diskfile;
193 char *taper_program;
194 char *conf_tapetype;
195 tapetype_t *tape;
196 char *line;
197 char hostname[1025];
198 intmax_t kb_avail;
199 config_overrides_t *cfg_ovr = NULL;
200 char *cfg_opt = NULL;
201 holdalloc_t *ha, *ha_last;
202 find_result_t *holding_files;
203 disklist_t holding_disklist = { NULL, NULL };
204 int no_taper = FALSE;
205 int from_client = FALSE;
206
207 if (argc > 1 && argv && argv[1] && g_str_equal(argv[1], "--version")) {
208 printf("driver-%s\n", VERSION);
209 return (0);
210 }
211
212 /*
213 * Configure program for internationalization:
214 * 1) Only set the message locale for now.
215 * 2) Set textdomain for all amanda related programs to "amanda"
216 * We don't want to be forced to support dozens of message catalogs.
217 */
218 setlocale(LC_MESSAGES, "C");
219 textdomain("amanda");
220
221 safe_fd(-1, 0);
222
223 setvbuf(stdout, (char *)NULL, (int)_IOLBF, 0);
224 setvbuf(stderr, (char *)NULL, (int)_IOLBF, 0);
225
226 set_pname("driver");
227
228 dbopen(DBG_SUBDIR_SERVER);
229
230 atexit(wait_for_children);
231
232 /* Don't die when child closes pipe */
233 signal(SIGPIPE, SIG_IGN);
234
235 add_amanda_log_handler(amanda_log_stderr);
236 add_amanda_log_handler(amanda_log_trace_log);
237
238 startclock();
239
240 cfg_ovr = extract_commandline_config_overrides(&argc, &argv);
241
242 if (argc > 1)
243 cfg_opt = argv[1];
244 set_config_overrides(cfg_ovr);
245 config_init(CONFIG_INIT_EXPLICIT_NAME | CONFIG_INIT_USE_CWD, cfg_opt);
246
247 conf_diskfile = config_dir_relative(getconf_str(CNF_DISKFILE));
248 read_diskfile(conf_diskfile, &origq);
249 disable_skip_disk(&origq);
250 amfree(conf_diskfile);
251
252 if (config_errors(NULL) >= CFGERR_WARNINGS) {
253 config_print_errors();
254 if (config_errors(NULL) >= CFGERR_ERRORS) {
255 g_critical(_("errors processing config file"));
256 }
257 }
258
259 log_add(L_INFO, "%s pid %ld", get_pname(), (long)getpid());
260 g_printf(_("%s: pid %ld executable %s version %s\n"),
261 get_pname(), (long) getpid(), argv[0], VERSION);
262
263 if(argc > 2) {
264 if(strcmp(argv[2], "nodump") == 0) {
265 nodump = 1;
266 argv++;
267 argc--;
268 }
269 }
270
271 if (argc > 2) {
272 if (strcmp(argv[2], "--no-taper") == 0) {
273 no_taper = TRUE;
274 argv++;
275 argc--;
276 }
277 }
278
279 if (argc > 2) {
280 if (strcmp(argv[2], "--from-client") == 0) {
281 from_client = TRUE;
282 from_client = from_client;
283 argv++;
284 argc--;
285 }
286 }
287
288 safe_cd(); /* do this *after* config_init */
289
290 check_running_as(RUNNING_AS_DUMPUSER);
291
292 dbrename(get_config_name(), DBG_SUBDIR_SERVER);
293
294 /* load DLEs from the holding disk, in case there's anything to flush there */
295 search_holding_disk(&holding_files, &holding_disklist);
296 /* note that the dumps are added to the global disklist, so we need not consult
297 * holding_files or holding_disklist after this */
298
299 amfree(driver_timestamp);
300 /* read timestamp from stdin */
301 while ((line = agets(stdin)) != NULL) {
302 if (line[0] != '\0')
303 break;
304 amfree(line);
305 }
306 if ( line == NULL ) {
307 error(_("Did not get DATE line from planner"));
308 /*NOTREACHED*/
309 }
310 driver_timestamp = alloc(15);
311 strncpy(driver_timestamp, &line[5], 14);
312 driver_timestamp[14] = '\0';
313 amfree(line);
314 log_add(L_START,_("date %s"), driver_timestamp);
315
316 gethostname(hostname, SIZEOF(hostname));
317 log_add(L_STATS,_("hostname %s"), hostname);
318
319 /* check that we don't do many dump in a day and usetimestamps is off */
320 if(strlen(driver_timestamp) == 8) {
321 if (!nodump) {
322 char *conf_logdir = config_dir_relative(getconf_str(CNF_LOGDIR));
323 char *logfile = vstralloc(conf_logdir, "/log.",
324 driver_timestamp, ".0", NULL);
325 char *oldlogfile = vstralloc(conf_logdir, "/oldlog/log.",
326 driver_timestamp, ".0", NULL);
327 if(access(logfile, F_OK) == 0 || access(oldlogfile, F_OK) == 0) {
328 log_add(L_WARNING, _("WARNING: This is not the first amdump run today. Enable the usetimestamps option in the configuration file if you want to run amdump more than once per calendar day."));
329 }
330 amfree(oldlogfile);
331 amfree(logfile);
332 }
333 hd_driver_timestamp = get_timestamp_from_time(0);
334 }
335 else {
336 hd_driver_timestamp = stralloc(driver_timestamp);
337 }
338
339 taper_program = vstralloc(amlibexecdir, "/", "taper", NULL);
340 dumper_program = vstralloc(amlibexecdir, "/", "dumper", NULL);
341 chunker_program = vstralloc(amlibexecdir, "/", "chunker", NULL);
342
343 conf_taperalgo = getconf_taperalgo(CNF_TAPERALGO);
344 conf_taper_parallel_write = getconf_int(CNF_TAPER_PARALLEL_WRITE);
345 conf_tapetype = getconf_str(CNF_TAPETYPE);
346 conf_runtapes = getconf_int(CNF_RUNTAPES);
347 conf_max_dle_by_volume = getconf_int(CNF_MAX_DLE_BY_VOLUME);
348 if (conf_taper_parallel_write > conf_runtapes) {
349 conf_taper_parallel_write = conf_runtapes;
350 }
351 tape = lookup_tapetype(conf_tapetype);
352 tape_length = tapetype_get_length(tape);
353 g_printf("driver: tape size %lld\n", (long long)tape_length);
354 conf_flush_threshold_dumped = getconf_int(CNF_FLUSH_THRESHOLD_DUMPED);
355 conf_flush_threshold_scheduled = getconf_int(CNF_FLUSH_THRESHOLD_SCHEDULED);
356 conf_taperflush = getconf_int(CNF_TAPERFLUSH);
357 flush_threshold_dumped = (conf_flush_threshold_dumped * tape_length) / 100;
358 flush_threshold_scheduled = (conf_flush_threshold_scheduled * tape_length) / 100;
359 taperflush = (conf_taperflush *tape_length) / 100;
360
361 driver_debug(1, _("flush-threshold-dumped: %lld\n"), (long long)flush_threshold_dumped);
362 driver_debug(1, _("flush-threshold-scheduled: %lld\n"), (long long)flush_threshold_scheduled);
363 driver_debug(1, _("taperflush: %lld\n"), (long long)taperflush);
364
365 /* set up any configuration-dependent variables */
366
367 inparallel = getconf_int(CNF_INPARALLEL);
368
369 reserve = (unsigned long)getconf_int(CNF_RESERVE);
370
371 total_disksize = (off_t)0;
372 ha_last = NULL;
373 num_holdalloc = 0;
374 for (il = getconf_identlist(CNF_HOLDINGDISK), dsk = 0;
375 il != NULL;
376 il = il->next, dsk++) {
377 hdp = lookup_holdingdisk(il->data);
378 ha = alloc(SIZEOF(holdalloc_t));
379 num_holdalloc++;
380
381 /* link the list in the same order as getconf_holdingdisks's results */
382 ha->next = NULL;
383 if (ha_last == NULL)
384 holdalloc = ha;
385 else
386 ha_last->next = ha;
387 ha_last = ha;
388
389 ha->hdisk = hdp;
390 ha->allocated_dumpers = 0;
391 ha->allocated_space = (off_t)0;
392 ha->disksize = holdingdisk_get_disksize(hdp);
393
394 /* get disk size */
395 if(get_fs_usage(holdingdisk_get_diskdir(hdp), NULL, &fsusage) == -1
396 || access(holdingdisk_get_diskdir(hdp), W_OK) == -1) {
397 log_add(L_WARNING, _("WARNING: ignoring holding disk %s: %s\n"),
398 holdingdisk_get_diskdir(hdp), strerror(errno));
399 ha->disksize = 0L;
400 continue;
401 }
402
403 /* do the division first to avoid potential integer overflow */
404 if (fsusage.fsu_bavail_top_bit_set)
405 kb_avail = 0;
406 else
407 kb_avail = fsusage.fsu_bavail / 1024 * fsusage.fsu_blocksize;
408
409 if(ha->disksize > (off_t)0) {
410 if(ha->disksize > kb_avail) {
411 log_add(L_WARNING,
412 _("WARNING: %s: %lld KB requested, "
413 "but only %lld KB available."),
414 holdingdisk_get_diskdir(hdp),
415 (long long)ha->disksize,
416 (long long)kb_avail);
417 ha->disksize = kb_avail;
418 }
419 }
420 /* ha->disksize is negative; use all but that amount */
421 else if(kb_avail < -ha->disksize) {
422 log_add(L_WARNING,
423 _("WARNING: %s: not %lld KB free."),
424 holdingdisk_get_diskdir(hdp),
425 (long long)-ha->disksize);
426 ha->disksize = (off_t)0;
427 continue;
428 }
429 else
430 ha->disksize += kb_avail;
431
432 g_printf(_("driver: adding holding disk %d dir %s size %lld chunksize %lld\n"),
433 dsk, holdingdisk_get_diskdir(hdp),
434 (long long)ha->disksize,
435 (long long)(holdingdisk_get_chunksize(hdp)));
436
437 newdir = newvstralloc(newdir,
438 holdingdisk_get_diskdir(hdp), "/", hd_driver_timestamp,
439 NULL);
440 if(!mkholdingdir(newdir)) {
441 ha->disksize = (off_t)0;
442 }
443 total_disksize += ha->disksize;
444 }
445
446 reserved_space = total_disksize * (off_t)(reserve / 100);
447
448 g_printf(_("reserving %lld out of %lld for degraded-mode dumps\n"),
449 (long long)reserved_space, (long long)free_space());
450
451 amfree(newdir);
452
453 if(inparallel > MAX_DUMPERS) inparallel = MAX_DUMPERS;
454
455 /* taper takes a while to get going, so start it up right away */
456
457 init_driverio();
458 startup_tape_process(taper_program, conf_taper_parallel_write, no_taper);
459
460 /* fire up the dumpers now while we are waiting */
461 if(!nodump) startup_dump_processes(dumper_program, inparallel, driver_timestamp);
462
463 /*
464 * Read schedule from stdin. Usually, this is a pipe from planner,
465 * so the effect is that we wait here for the planner to
466 * finish, but meanwhile the taper is rewinding the tape, reading
467 * the label, checking it, writing a new label and all that jazz
468 * in parallel with the planner.
469 */
470
471 runq.head = NULL;
472 runq.tail = NULL;
473 directq.head = NULL;
474 directq.tail = NULL;
475 waitq = origq;
476 tapeq.head = NULL;
477 tapeq.tail = NULL;
478 roomq.head = NULL;
479 roomq.tail = NULL;
480 taper_nb_wait_reply = 0;
481
482 need_degraded = 0;
483 if (no_taper || conf_runtapes <= 0) {
484 taper_started = 1; /* we'll pretend the taper started and failed immediately */
485 need_degraded = 1;
486 } else {
487 tapetable[0].state = TAPER_STATE_INIT;
488 taper_nb_wait_reply++;
489 taper_nb_scan_volume++;
490 taper_ev_read = event_register(taper_fd, EV_READFD,
491 handle_taper_result, NULL);
492 taper_cmd(START_TAPER, NULL, tapetable[0].name, 0, driver_timestamp);
493 }
494
495 flush_ev_read = event_register((event_id_t)0, EV_READFD, read_flush, NULL);
496
497 log_add(L_STATS, _("startup time %s"), walltime_str(curclock()));
498
499 g_printf(_("driver: start time %s inparallel %d bandwidth %lu diskspace %lld "), walltime_str(curclock()), inparallel,
500 free_kps(NULL), (long long)free_space());
501 g_printf(_(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n"),
502 "OBSOLETE", driver_timestamp, taperalgo2str(conf_taperalgo),
503 getconf_str(CNF_DUMPORDER));
504 fflush(stdout);
505
506 schedule_done = nodump;
507 force_flush = 0;
508
509 short_dump_state();
510 event_loop(0);
511
512 force_flush = 1;
513
514 /* mv runq to directq */
515 while (!empty(runq)) {
516 diskp = dequeue_disk(&runq);
517 headqueue_disk(&directq, diskp);
518 }
519
520 run_server_global_scripts(EXECUTE_ON_POST_BACKUP, get_config_name());
521
522 /* log error for any remaining dumps */
523 while(!empty(directq)) {
524 diskp = dequeue_disk(&directq);
525
526 if (diskp->orig_holdingdisk == HOLD_REQUIRED) {
527 char *qname = quote_string(diskp->name);
528 log_add(L_FAIL, "%s %s %s %d [%s]",
529 diskp->host->hostname, qname, sched(diskp)->datestamp,
530 sched(diskp)->level,
531 _("can't dump required holdingdisk"));
532 amfree(qname);
533 }
534 else if (current_tape >= conf_runtapes) {
535 char *qname = quote_string(diskp->name);
536 log_add(L_FAIL, "%s %s %s %d [%d tapes filled; runtapes=%d does not allow additional tapes]",
537 diskp->host->hostname, qname, sched(diskp)->datestamp,
538 sched(diskp)->level,
539 current_tape, conf_runtapes);
540 amfree(qname);
541 }
542 else if (!degraded_mode) {
543 char *qname = quote_string(diskp->name);
544 log_add(L_FAIL, "%s %s %s %d [%s]",
545 diskp->host->hostname, qname, sched(diskp)->datestamp,
546 sched(diskp)->level,
547 _("can't dump in non degraded mode"));
548 amfree(qname);
549 }
550 else {
551 char *qname = quote_string(diskp->name);
552 log_add(L_FAIL, "%s %s %s %d [%s]",
553 diskp->host->hostname, qname, sched(diskp)->datestamp,
554 sched(diskp)->level,
555 num_holdalloc == 0 ?
556 _("can't do degraded dump without holding disk") :
557 diskp->orig_holdingdisk != HOLD_NEVER ?
558 _("out of holding space in degraded mode") :
559 _("can't dump 'holdingdisk never' dle in degraded mode"));
560 amfree(qname);
561 }
562 }
563
564 short_dump_state(); /* for amstatus */
565
566 g_printf(_("driver: QUITTING time %s telling children to quit\n"),
567 walltime_str(curclock()));
568 fflush(stdout);
569
570 if(!nodump) {
571 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
572 if(dumper->fd >= 0)
573 dumper_cmd(dumper, QUIT, NULL, NULL);
574 }
575 }
576
577 if(taper_fd >= 0) {
578 taper_cmd(QUIT, NULL, NULL, 0, NULL);
579 }
580
581 /* wait for all to die */
582 wait_children(600);
583
584 /* cleanup */
585 holding_cleanup(NULL, NULL);
586
587 amfree(newdir);
588
589 check_unfree_serial();
590 g_printf(_("driver: FINISHED time %s\n"), walltime_str(curclock()));
591 fflush(stdout);
592 log_add(L_FINISH,_("date %s time %s"), driver_timestamp, walltime_str(curclock()));
593 log_add(L_INFO, "pid-done %ld", (long)getpid());
594 amfree(driver_timestamp);
595
596 amfree(dumper_program);
597 amfree(taper_program);
598
599 dbclose();
600
601 return 0;
602 }
603
604 /* sleep up to count seconds, and wait for terminating child process */
605 /* if sleep is negative, this function will not timeout */
606 /* exit once all child process are finished or the timout expired */
607 /* return 0 if no more children to wait */
608 /* return 1 if some children are still alive */
609 static int
wait_children(int count)610 wait_children(int count)
611 {
612 pid_t pid;
613 amwait_t retstat;
614 char *who;
615 char *what;
616 int code=0;
617 dumper_t *dumper;
618 int wait_errno;
619
620 do {
621 do {
622 pid = waitpid((pid_t)-1, &retstat, WNOHANG);
623 wait_errno = errno;
624 if (pid > 0) {
625 what = NULL;
626 if (! WIFEXITED(retstat)) {
627 what = _("signal");
628 code = WTERMSIG(retstat);
629 } else if (WEXITSTATUS(retstat) != 0) {
630 what = _("code");
631 code = WEXITSTATUS(retstat);
632 }
633 who = NULL;
634 for (dumper = dmptable; dumper < dmptable + inparallel;
635 dumper++) {
636 if (pid == dumper->pid) {
637 who = stralloc(dumper->name);
638 dumper->pid = -1;
639 break;
640 }
641 if (dumper->chunker && pid == dumper->chunker->pid) {
642 who = stralloc(dumper->chunker->name);
643 dumper->chunker->pid = -1;
644 break;
645 }
646 }
647 if (who == NULL && pid == taper_pid) {
648 who = stralloc("taper");
649 taper_pid = -1;
650 }
651 if(what != NULL && who == NULL) {
652 who = stralloc("unknown");
653 }
654 if(who && what) {
655 log_add(L_WARNING, _("%s pid %u exited with %s %d\n"), who,
656 (unsigned)pid, what, code);
657 g_printf(_("driver: %s pid %u exited with %s %d\n"), who,
658 (unsigned)pid, what, code);
659 }
660 amfree(who);
661 }
662 } while (pid > 0 || wait_errno == EINTR);
663 if (errno != ECHILD)
664 sleep(1);
665 if (count > 0)
666 count--;
667 } while ((errno != ECHILD) && (count != 0));
668 return (errno != ECHILD);
669 }
670
671 static void
kill_children(int signal)672 kill_children(int signal)
673 {
674 dumper_t *dumper;
675
676 if(!nodump) {
677 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
678 if (!dumper->down && dumper->pid > 1) {
679 g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
680 dumper->name, (unsigned)dumper->pid);
681 if (kill(dumper->pid, signal) == -1 && errno == ESRCH) {
682 if (dumper->chunker)
683 dumper->chunker->pid = 0;
684 }
685 if (dumper->chunker && dumper->chunker->pid > 1) {
686 g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
687 dumper->chunker->name,
688 (unsigned)dumper->chunker->pid);
689 if (kill(dumper->chunker->pid, signal) == -1 &&
690 errno == ESRCH)
691 dumper->chunker->pid = 0;
692 }
693 }
694 }
695 }
696
697 if(taper_pid > 1) {
698 g_printf(_("driver: sending signal %d to %s pid %u\n"), signal,
699 "taper", (unsigned)taper_pid);
700 if (kill(taper_pid, signal) == -1 && errno == ESRCH)
701 taper_pid = 0;
702 }
703 }
704
705 static void
wait_for_children(void)706 wait_for_children(void)
707 {
708 dumper_t *dumper;
709
710 if(!nodump) {
711 for(dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
712 if (dumper->pid > 1 && dumper->fd >= 0) {
713 dumper_cmd(dumper, QUIT, NULL, NULL);
714 if (dumper->chunker && dumper->chunker->pid > 1 &&
715 dumper->chunker->fd >= 0)
716 chunker_cmd(dumper->chunker, QUIT, NULL, NULL);
717 }
718 }
719 }
720
721 if(taper_pid > 1 && taper_fd > 0) {
722 taper_cmd(QUIT, NULL, NULL, 0, NULL);
723 }
724
725 if(wait_children(60) == 0)
726 return;
727
728 kill_children(SIGHUP);
729 if(wait_children(60) == 0)
730 return;
731
732 kill_children(SIGKILL);
733 if(wait_children(-1) == 0)
734 return;
735
736 }
737
738 static void startaflush_tape(taper_t *taper, gboolean *state_changed);
739
740 static void
startaflush(void)741 startaflush(void)
742 {
743 taper_t *taper;
744 gboolean state_changed = FALSE;
745
746 for(taper = tapetable; taper <= tapetable+conf_taper_parallel_write;
747 taper++) {
748 if (!(taper->state & TAPER_STATE_DONE) &&
749 taper->state & TAPER_STATE_WAIT_FOR_TAPE) {
750 startaflush_tape(taper, &state_changed);
751 }
752 }
753 for(taper = tapetable; taper <= tapetable+conf_taper_parallel_write;
754 taper++) {
755 if (!(taper->state & TAPER_STATE_DONE) &&
756 taper->state & TAPER_STATE_TAPE_REQUESTED) {
757 startaflush_tape(taper, &state_changed);
758 }
759 }
760 for(taper = tapetable; taper <= tapetable+conf_taper_parallel_write;
761 taper++) {
762 if (!(taper->state & TAPER_STATE_DONE) &&
763 taper->state & TAPER_STATE_INIT) {
764 startaflush_tape(taper, &state_changed);
765 }
766 }
767 for(taper = tapetable; taper <= tapetable+conf_taper_parallel_write;
768 taper++) {
769 if (!(taper->state & TAPER_STATE_DONE) &&
770 taper->state & TAPER_STATE_IDLE) {
771 startaflush_tape(taper, &state_changed);
772 }
773 }
774 if (state_changed) {
775 short_dump_state();
776 }
777 }
778
779 static void
startaflush_tape(taper_t * taper,gboolean * state_changed)780 startaflush_tape(
781 taper_t *taper,
782 gboolean *state_changed)
783 {
784 disk_t *dp = NULL;
785 disk_t *fit = NULL;
786 char *datestamp;
787 off_t extra_tapes_size = 0;
788 off_t taper_left;
789 char *qname;
790 TapeAction result_tape_action;
791 char *why_no_new_tape = NULL;
792 taper_t *taper1;
793
794 result_tape_action = tape_action(taper, &why_no_new_tape);
795 if (result_tape_action & TAPE_ACTION_SCAN) {
796 taper->state &= ~TAPER_STATE_TAPE_REQUESTED;
797 taper->state |= TAPER_STATE_WAIT_FOR_TAPE;
798 taper_nb_scan_volume++;
799 taper_cmd(START_SCAN, taper->disk, NULL, 0, NULL);
800 } else if (result_tape_action & TAPE_ACTION_NEW_TAPE) {
801 taper->state &= ~TAPER_STATE_WAIT_FOR_TAPE;
802 taper->state |= TAPER_STATE_WAIT_NEW_TAPE;
803 nb_sent_new_tape++;
804 taper_cmd(NEW_TAPE, taper->disk, NULL, 0, NULL);
805 } else if (result_tape_action & TAPE_ACTION_NO_NEW_TAPE) {
806 taper->state &= ~TAPER_STATE_WAIT_FOR_TAPE;
807 taper_cmd(NO_NEW_TAPE, taper->disk, why_no_new_tape, 0, NULL);
808 taper->state |= TAPER_STATE_DONE;
809 start_degraded_mode(&runq);
810 *state_changed = TRUE;
811 } else if (result_tape_action & TAPE_ACTION_MOVE) {
812 taper_t *taper1 = idle_taper();
813 if (taper1) {
814 taper->state &= ~TAPER_STATE_TAPE_REQUESTED;
815 taper->state &= ~TAPER_STATE_WAIT_FOR_TAPE;
816 taper_cmd(TAKE_SCRIBE_FROM, taper->disk, taper1->name, 0 , NULL);
817 taper->state |= (taper1->state & TAPER_STATE_TAPE_STARTED);
818 taper->left = taper1->left;
819 taper->nb_dle++;
820 taper1->state = TAPER_STATE_DEFAULT;
821 if (last_started_taper == taper1) {
822 last_started_taper = taper;
823 }
824 *state_changed = TRUE;
825 }
826 }
827
828 if (!degraded_mode &&
829 taper->state & TAPER_STATE_IDLE &&
830 !empty(tapeq) &&
831 (result_tape_action & TAPE_ACTION_START_A_FLUSH ||
832 result_tape_action & TAPE_ACTION_START_A_FLUSH_FIT)) {
833
834 int taperalgo = conf_taperalgo;
835 if (result_tape_action & TAPE_ACTION_START_A_FLUSH_FIT) {
836 if (taperalgo == ALGO_FIRST)
837 taperalgo = ALGO_FIRSTFIT;
838 else if (taperalgo == ALGO_LARGEST)
839 taperalgo = ALGO_LARGESTFIT;
840 else if (taperalgo == ALGO_SMALLEST)
841 taperalgo = ALGO_SMALLESTFIT;
842 else if (taperalgo == ALGO_LAST)
843 taperalgo = ALGO_LASTFIT;
844 }
845
846 extra_tapes_size = tape_length * (off_t)(conf_runtapes - current_tape);
847 for (taper1 = tapetable; taper1 < tapetable + conf_taper_parallel_write;
848 taper1++) {
849 if (taper1->state & TAPER_STATE_TAPE_STARTED) {
850 extra_tapes_size += taper1->left;
851 }
852 dp = taper1->disk;
853 if (dp) {
854 extra_tapes_size -= (sched(dp)->act_size - taper1->written);
855 }
856 }
857
858 if (taper->state & TAPER_STATE_TAPE_STARTED) {
859 taper_left = taper->left;
860 } else {
861 taper_left = tape_length;
862 }
863 dp = NULL;
864 datestamp = sched(tapeq.head)->datestamp;
865 switch(taperalgo) {
866 case ALGO_FIRST:
867 dp = dequeue_disk(&tapeq);
868 break;
869 case ALGO_FIRSTFIT:
870 fit = tapeq.head;
871 while (fit != NULL) {
872 if (sched(fit)->act_size <=
873 (fit->splitsize ? extra_tapes_size : taper_left) &&
874 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
875 dp = fit;
876 fit = NULL;
877 }
878 else {
879 fit = fit->next;
880 }
881 }
882 if(dp) remove_disk(&tapeq, dp);
883 break;
884 case ALGO_LARGEST:
885 fit = dp = tapeq.head;
886 while (fit != NULL) {
887 if(sched(fit)->act_size > sched(dp)->act_size &&
888 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
889 dp = fit;
890 }
891 fit = fit->next;
892 }
893 if(dp) remove_disk(&tapeq, dp);
894 break;
895 case ALGO_LARGESTFIT:
896 fit = tapeq.head;
897 while (fit != NULL) {
898 if(sched(fit)->act_size <=
899 (fit->splitsize ? extra_tapes_size : taper_left) &&
900 (!dp || sched(fit)->act_size > sched(dp)->act_size) &&
901 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
902 dp = fit;
903 }
904 fit = fit->next;
905 }
906 if(dp) remove_disk(&tapeq, dp);
907 break;
908 case ALGO_SMALLEST:
909 fit = dp = tapeq.head;
910 while (fit != NULL) {
911 if (sched(fit)->act_size < sched(dp)->act_size &&
912 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
913 dp = fit;
914 }
915 fit = fit->next;
916 }
917 if(dp) remove_disk(&tapeq, dp);
918 break;
919 case ALGO_SMALLESTFIT:
920 fit = dp = tapeq.head;
921 while (fit != NULL) {
922 if (sched(fit)->act_size <=
923 (fit->splitsize ? extra_tapes_size : taper_left) &&
924 (!dp || sched(fit)->act_size < sched(dp)->act_size) &&
925 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
926 dp = fit;
927 }
928 fit = fit->next;
929 }
930 if(dp) remove_disk(&tapeq, dp);
931 break;
932 case ALGO_LAST:
933 dp = tapeq.tail;
934 remove_disk(&tapeq, dp);
935 break;
936 case ALGO_LASTFIT:
937 fit = tapeq.tail;
938 while (fit != NULL) {
939 if (sched(fit)->act_size <=
940 (fit->splitsize ? extra_tapes_size : taper_left) &&
941 (!dp || sched(fit)->act_size < sched(dp)->act_size) &&
942 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
943 dp = fit;
944 }
945 fit = fit->prev;
946 }
947 if(dp) remove_disk(&tapeq, dp);
948 break;
949 }
950 if (!dp) {
951 if (!(result_tape_action & TAPE_ACTION_START_A_FLUSH_FIT)) {
952 if(conf_taperalgo != ALGO_SMALLEST) {
953 g_fprintf(stderr,
954 _("driver: startaflush: Using SMALLEST because nothing fit\n"));
955 }
956
957 fit = dp = tapeq.head;
958 while (fit != NULL) {
959 if (sched(fit)->act_size < sched(dp)->act_size &&
960 strcmp(sched(fit)->datestamp, datestamp) <= 0) {
961 dp = fit;
962 }
963 fit = fit->next;
964 }
965 if(dp) remove_disk(&tapeq, dp);
966 }
967 }
968 if (dp) {
969 taper->disk = dp;
970 taper->dumper = NULL;
971 amfree(taper->input_error);
972 amfree(taper->tape_error);
973 taper->result = LAST_TOK;
974 taper->sendresult = 0;
975 amfree(taper->first_label);
976 taper->written = 0;
977 taper->state &= ~TAPER_STATE_IDLE;
978 taper->state |= TAPER_STATE_FILE_TO_TAPE;
979 taper->dumper = NULL;
980 qname = quote_string(dp->name);
981 if (taper_nb_wait_reply == 0) {
982 taper_ev_read = event_register(taper_fd, EV_READFD,
983 handle_taper_result, NULL);
984 }
985 taper_nb_wait_reply++;
986 taper->nb_dle++;
987 if (!(taper->state & TAPER_STATE_TAPE_STARTED)) {
988 assert(taper_sent_first_write == NULL);
989 taper_sent_first_write = taper;
990 taper->nb_dle = 1;
991 taper->left = tape_length;
992 }
993 sched(dp)->taper = taper;
994 taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
995 sched(dp)->datestamp);
996 g_fprintf(stderr,_("driver: startaflush: %s %s %s %lld %lld\n"),
997 taperalgo2str(taperalgo), dp->host->hostname, qname,
998 (long long)sched(taper->disk)->act_size,
999 (long long)taper->left);
1000 amfree(qname);
1001 *state_changed = TRUE;
1002 }
1003 }
1004 }
1005
1006 static int
client_constrained(disk_t * dp)1007 client_constrained(
1008 disk_t * dp)
1009 {
1010 disk_t *dp2;
1011
1012 /* first, check if host is too busy */
1013
1014 if(dp->host->inprogress >= dp->host->maxdumps) {
1015 return 1;
1016 }
1017
1018 /* next, check conflict with other dumps on same spindle */
1019
1020 if(dp->spindle == -1) { /* but spindle -1 never conflicts by def. */
1021 return 0;
1022 }
1023
1024 for(dp2 = dp->host->disks; dp2 != NULL; dp2 = dp2->hostnext)
1025 if(dp2->inprogress && dp2->spindle == dp->spindle) {
1026 return 1;
1027 }
1028
1029 return 0;
1030 }
1031
1032 static void
allow_dump_dle(disk_t * diskp,taper_t * taper,char dumptype,disklist_t * rq,const time_t now,int dumper_to_holding,int * cur_idle,disk_t ** delayed_diskp,disk_t ** diskp_accept,assignedhd_t *** holdp_accept,off_t extra_tapes_size)1033 allow_dump_dle(
1034 disk_t *diskp,
1035 taper_t *taper,
1036 char dumptype,
1037 disklist_t *rq,
1038 const time_t now,
1039 int dumper_to_holding,
1040 int *cur_idle,
1041 disk_t **delayed_diskp,
1042 disk_t **diskp_accept,
1043 assignedhd_t ***holdp_accept,
1044 off_t extra_tapes_size)
1045 {
1046 assignedhd_t **holdp=NULL;
1047
1048 if (diskp->host->start_t > now) {
1049 *cur_idle = max(*cur_idle, IDLE_START_WAIT);
1050 if (*delayed_diskp == NULL || sleep_time > diskp->host->start_t) {
1051 *delayed_diskp = diskp;
1052 sleep_time = diskp->host->start_t;
1053 }
1054 } else if(diskp->start_t > now) {
1055 *cur_idle = max(*cur_idle, IDLE_START_WAIT);
1056 if (*delayed_diskp == NULL || sleep_time > diskp->start_t) {
1057 *delayed_diskp = diskp;
1058 sleep_time = diskp->start_t;
1059 }
1060 } else if (diskp->host->netif->curusage > 0 &&
1061 sched(diskp)->est_kps > free_kps(diskp->host->netif)) {
1062 *cur_idle = max(*cur_idle, IDLE_NO_BANDWIDTH);
1063 } else if (!taper && sched(diskp)->no_space) {
1064 *cur_idle = max(*cur_idle, IDLE_NO_DISKSPACE);
1065 } else if (!taper && diskp->to_holdingdisk == HOLD_NEVER) {
1066 *cur_idle = max(*cur_idle, IDLE_NO_HOLD);
1067 } else if (extra_tapes_size && sched(diskp)->est_size > extra_tapes_size) {
1068 *cur_idle = max(*cur_idle, IDLE_NO_DISKSPACE);
1069 /* no tape space */
1070 } else if (!taper && (holdp =
1071 find_diskspace(sched(diskp)->est_size, cur_idle, NULL)) == NULL) {
1072 *cur_idle = max(*cur_idle, IDLE_NO_DISKSPACE);
1073 if (empty(tapeq) && dumper_to_holding == 0 && rq != &directq && no_taper_flushing()) {
1074 remove_disk(rq, diskp);
1075 if (diskp->to_holdingdisk == HOLD_REQUIRED) {
1076 char *qname = quote_string(diskp->name);
1077 log_add(L_FAIL, "%s %s %s %d [%s]",
1078 diskp->host->hostname, qname, sched(diskp)->datestamp,
1079 sched(diskp)->level,
1080 _("can't dump required holdingdisk when no holdingdisk space available "));
1081 amfree(qname);
1082 } else {
1083 enqueue_disk(&directq, diskp);
1084 diskp->to_holdingdisk = HOLD_NEVER;
1085 }
1086 if (empty(*rq) && active_dumper() == 0) { force_flush = 1;}
1087 }
1088 } else if (client_constrained(diskp)) {
1089 free_assignedhd(holdp);
1090 *cur_idle = max(*cur_idle, IDLE_CLIENT_CONSTRAINED);
1091 } else {
1092
1093 /* disk fits, dump it */
1094 int accept = !*diskp_accept;
1095 if(!accept) {
1096 switch(dumptype) {
1097 case 's': accept = (sched(diskp)->est_size < sched(*diskp_accept)->est_size);
1098 break;
1099 case 'S': accept = (sched(diskp)->est_size > sched(*diskp_accept)->est_size);
1100 break;
1101 case 't': accept = (sched(diskp)->est_time < sched(*diskp_accept)->est_time);
1102 break;
1103 case 'T': accept = (sched(diskp)->est_time > sched(*diskp_accept)->est_time);
1104 break;
1105 case 'b': accept = (sched(diskp)->est_kps < sched(*diskp_accept)->est_kps);
1106 break;
1107 case 'B': accept = (sched(diskp)->est_kps > sched(*diskp_accept)->est_kps);
1108 break;
1109 default: log_add(L_WARNING, _("Unknown dumporder character \'%c\', using 's'.\n"),
1110 dumptype);
1111 accept = (sched(diskp)->est_size < sched(*diskp_accept)->est_size);
1112 break;
1113 }
1114 }
1115 if(accept) {
1116 if( !*diskp_accept || !degraded_mode || diskp->priority >= (*diskp_accept)->priority) {
1117 if(*holdp_accept) free_assignedhd(*holdp_accept);
1118 *diskp_accept = diskp;
1119 *holdp_accept = holdp;
1120 }
1121 else {
1122 free_assignedhd(holdp);
1123 }
1124 }
1125 else {
1126 free_assignedhd(holdp);
1127 }
1128 }
1129 }
1130
1131 static void
start_some_dumps(disklist_t * rq)1132 start_some_dumps(
1133 disklist_t *rq)
1134 {
1135 const time_t now = time(NULL);
1136 int cur_idle;
1137 disk_t *diskp, *delayed_diskp, *diskp_accept, *diskp_next;
1138 disk_t *dp;
1139 assignedhd_t **holdp=NULL, **holdp_accept;
1140 cmd_t cmd;
1141 int result_argc;
1142 char **result_argv;
1143 chunker_t *chunker;
1144 dumper_t *dumper;
1145 taper_t *taper;
1146 char dumptype;
1147 char *dumporder;
1148 int dumper_to_holding = 0;
1149 gboolean state_changed = FALSE;
1150
1151 /* don't start any actual dumps until the taper is started */
1152 if (!taper_started) return;
1153
1154 idle_reason = IDLE_NO_DUMPERS;
1155 sleep_time = 0;
1156
1157 if(dumpers_ev_time != NULL) {
1158 event_release(dumpers_ev_time);
1159 dumpers_ev_time = NULL;
1160 }
1161
1162 for(dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1163 if (dumper->busy && dumper->dp->to_holdingdisk != HOLD_NEVER) {
1164 dumper_to_holding++;
1165 }
1166 }
1167 for (dumper = dmptable; dumper < dmptable+inparallel; dumper++) {
1168 gboolean directq_is_empty;
1169
1170 if( dumper->busy || dumper->down) {
1171 continue;
1172 }
1173
1174 if (dumper->ev_read != NULL) {
1175 event_release(dumper->ev_read);
1176 dumper->ev_read = NULL;
1177 }
1178
1179 /*
1180 * A potential problem with starting from the bottom of the dump time
1181 * distribution is that a slave host will have both one of the shortest
1182 * and one of the longest disks, so starting its shortest disk first will
1183 * tie up the host and eliminate its longest disk from consideration the
1184 * first pass through. This could cause a big delay in starting that long
1185 * disk, which could drag out the whole night's dumps.
1186 *
1187 * While starting from the top of the dump time distribution solves the
1188 * above problem, this turns out to be a bad idea, because the big dumps
1189 * will almost certainly pack the holding disk completely, leaving no
1190 * room for even one small dump to start. This ends up shutting out the
1191 * small-end dumpers completely (they stay idle).
1192 *
1193 * The introduction of multiple simultaneous dumps to one host alleviates
1194 * the biggest&smallest dumps problem: both can be started at the
1195 * beginning.
1196 */
1197
1198 diskp_accept = NULL;
1199 holdp_accept = NULL;
1200 delayed_diskp = NULL;
1201
1202 cur_idle = NOT_IDLE;
1203
1204 dumporder = getconf_str(CNF_DUMPORDER);
1205 if(strlen(dumporder) > (size_t)(dumper-dmptable)) {
1206 dumptype = dumporder[dumper-dmptable];
1207 }
1208 else {
1209 if(dumper-dmptable < 3)
1210 dumptype = 't';
1211 else
1212 dumptype = 'T';
1213 }
1214
1215 diskp = NULL;
1216 taper = NULL;
1217 directq_is_empty = empty(directq);
1218 if (!empty(directq)) {
1219 taper = idle_taper();
1220 if (taper) {
1221 TapeAction result_tape_action;
1222 char *why_no_new_tape = NULL;
1223 result_tape_action = tape_action(taper, &why_no_new_tape);
1224 if (result_tape_action & TAPE_ACTION_START_A_FLUSH ||
1225 result_tape_action & TAPE_ACTION_START_A_FLUSH_FIT) {
1226 off_t extra_tapes_size = 0;
1227 taper_t *taper1;
1228
1229 if (result_tape_action & TAPE_ACTION_START_A_FLUSH_FIT) {
1230 extra_tapes_size = tape_length *
1231 (off_t)(conf_runtapes - current_tape);
1232 for (taper1 = tapetable;
1233 taper1 < tapetable + conf_taper_parallel_write;
1234 taper1++) {
1235 if (taper1->state & TAPER_STATE_TAPE_STARTED) {
1236 extra_tapes_size += taper1->left;
1237 }
1238 dp = taper1->disk;
1239 if (dp) {
1240 extra_tapes_size -= (sched(dp)->est_size -
1241 taper1->written);
1242 }
1243 }
1244 }
1245
1246 for (diskp = directq.head; diskp != NULL;
1247 diskp = diskp_next) {
1248 diskp_next = diskp->next;
1249 allow_dump_dle(diskp, taper, dumptype, &directq, now,
1250 dumper_to_holding, &cur_idle,
1251 &delayed_diskp, &diskp_accept,
1252 &holdp_accept, extra_tapes_size);
1253 }
1254 if (diskp_accept) {
1255 diskp = diskp_accept;
1256 holdp = holdp_accept;
1257 } else {
1258 taper = NULL;
1259 }
1260 } else {
1261 taper = NULL;
1262 }
1263 }
1264 }
1265
1266 if (diskp == NULL) {
1267 for(diskp = rq->head; diskp != NULL; diskp = diskp_next) {
1268 diskp_next = diskp->next;
1269 assert(diskp->host != NULL && sched(diskp) != NULL);
1270
1271 allow_dump_dle(diskp, NULL, dumptype, rq, now,
1272 dumper_to_holding, &cur_idle, &delayed_diskp,
1273 &diskp_accept, &holdp_accept, 0);
1274 }
1275 diskp = diskp_accept;
1276 holdp = holdp_accept;
1277 }
1278
1279 /* Redo with same dumper if a diskp was moved to directq */
1280 if (diskp == NULL && directq_is_empty && !empty(directq)) {
1281 dumper--;
1282 continue;
1283 }
1284
1285 idle_reason = max(idle_reason, cur_idle);
1286 if (diskp == NULL && idle_reason == IDLE_NO_DISKSPACE) {
1287 /* continue flush waiting for new tape */
1288 startaflush();
1289 }
1290
1291 /*
1292 * If we have no disk at this point, and there are disks that
1293 * are delayed, then schedule a time event to call this dumper
1294 * with the disk with the shortest delay.
1295 */
1296 if (diskp == NULL && delayed_diskp != NULL) {
1297 assert(sleep_time > now);
1298 sleep_time -= now;
1299 dumpers_ev_time = event_register((event_id_t)sleep_time, EV_TIME,
1300 handle_dumpers_time, &runq);
1301 return;
1302 } else if (diskp != NULL && taper == NULL) {
1303 sched(diskp)->act_size = (off_t)0;
1304 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
1305 sched(diskp)->activehd = assign_holdingdisk(holdp, diskp);
1306 amfree(holdp);
1307 sched(diskp)->destname = newstralloc(sched(diskp)->destname,
1308 sched(diskp)->holdp[0]->destname);
1309 diskp->host->inprogress++; /* host is now busy */
1310 diskp->inprogress = 1;
1311 sched(diskp)->dumper = dumper;
1312 sched(diskp)->timestamp = now;
1313 amfree(diskp->dataport_list);
1314
1315 dumper->busy = 1; /* dumper is now busy */
1316 dumper->dp = diskp; /* link disk to dumper */
1317 remove_disk(rq, diskp); /* take it off the run queue */
1318
1319 sched(diskp)->origsize = (off_t)-1;
1320 sched(diskp)->dumpsize = (off_t)-1;
1321 sched(diskp)->dumptime = (time_t)0;
1322 sched(diskp)->tapetime = (time_t)0;
1323 chunker = dumper->chunker = &chktable[dumper - dmptable];
1324 chunker->result = LAST_TOK;
1325 dumper->result = LAST_TOK;
1326 startup_chunk_process(chunker,chunker_program);
1327 chunker_cmd(chunker, START, NULL, driver_timestamp);
1328 chunker->dumper = dumper;
1329 chunker_cmd(chunker, PORT_WRITE, diskp, NULL);
1330 cmd = getresult(chunker->fd, 1, &result_argc, &result_argv);
1331 if(cmd != PORT) {
1332 assignedhd_t **h=NULL;
1333 int activehd;
1334 char *qname = quote_string(diskp->name);
1335
1336 g_printf(_("driver: did not get PORT from %s for %s:%s\n"),
1337 chunker->name, diskp->host->hostname, qname);
1338 amfree(qname);
1339 fflush(stdout);
1340
1341 deallocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
1342 h = sched(diskp)->holdp;
1343 activehd = sched(diskp)->activehd;
1344 h[activehd]->used = 0;
1345 h[activehd]->disk->allocated_dumpers--;
1346 adjust_diskspace(diskp, DONE);
1347 delete_diskspace(diskp);
1348 diskp->host->inprogress--;
1349 diskp->inprogress = 0;
1350 sched(diskp)->dumper = NULL;
1351 dumper->busy = 0;
1352 dumper->dp = NULL;
1353 sched(diskp)->dump_attempted++;
1354 free_serial_dp(diskp);
1355 if(sched(diskp)->dump_attempted < 2)
1356 enqueue_disk(rq, diskp);
1357 }
1358 else {
1359 dumper->ev_read = event_register((event_id_t)dumper->fd, EV_READFD,
1360 handle_dumper_result, dumper);
1361 chunker->ev_read = event_register((event_id_t)chunker->fd, EV_READFD,
1362 handle_chunker_result, chunker);
1363 dumper->output_port = atoi(result_argv[1]);
1364 amfree(diskp->dataport_list);
1365 diskp->dataport_list = stralloc(result_argv[2]);
1366
1367 if (diskp->host->pre_script == 0) {
1368 run_server_host_scripts(EXECUTE_ON_PRE_HOST_BACKUP,
1369 get_config_name(), diskp->host);
1370 diskp->host->pre_script = 1;
1371 }
1372 run_server_dle_scripts(EXECUTE_ON_PRE_DLE_BACKUP,
1373 get_config_name(), diskp,
1374 sched(diskp)->level);
1375 dumper_cmd(dumper, PORT_DUMP, diskp, NULL);
1376 }
1377 diskp->host->start_t = now + 5;
1378 if (empty(*rq) && active_dumper() == 0) { force_flush = 1;}
1379
1380 if (result_argv)
1381 g_strfreev(result_argv);
1382 short_dump_state();
1383 } else if (diskp != NULL && taper != NULL) { /* dump to tape */
1384 sched(diskp)->act_size = (off_t)0;
1385 allocate_bandwidth(diskp->host->netif, sched(diskp)->est_kps);
1386 diskp->host->inprogress++; /* host is now busy */
1387 diskp->inprogress = 1;
1388 sched(diskp)->dumper = dumper;
1389 sched(diskp)->taper = taper;
1390 sched(diskp)->timestamp = now;
1391 dumper->chunker = NULL;
1392 amfree(diskp->dataport_list);
1393
1394 dumper->busy = 1; /* dumper is now busy */
1395 dumper->dp = diskp; /* link disk to dumper */
1396 remove_disk(&directq, diskp); /* take it off the direct queue */
1397
1398 sched(diskp)->origsize = (off_t)-1;
1399 sched(diskp)->dumpsize = (off_t)-1;
1400 sched(diskp)->dumptime = (time_t)0;
1401 sched(diskp)->tapetime = (time_t)0;
1402 dumper->result = LAST_TOK;
1403 taper->result = LAST_TOK;
1404 taper->input_error = NULL;
1405 taper->tape_error = NULL;
1406 taper->disk = diskp;
1407 taper->first_label = NULL;
1408 taper->written = 0;
1409 taper->dumper = dumper;
1410 taper->state |= TAPER_STATE_DUMP_TO_TAPE;
1411 taper->state &= ~TAPER_STATE_IDLE;
1412 taper->nb_dle++;
1413 if (!(taper->state & TAPER_STATE_TAPE_STARTED)) {
1414 assert(taper_sent_first_write == NULL);
1415 taper_sent_first_write = taper;
1416 taper->nb_dle = 1;
1417 taper->left = tape_length;
1418 }
1419 if (taper_nb_wait_reply == 0) {
1420 taper_ev_read = event_register(taper_fd, EV_READFD,
1421 handle_taper_result, NULL);
1422 }
1423
1424 taper_nb_wait_reply++;
1425 taper_cmd(PORT_WRITE, diskp, NULL, sched(diskp)->level,
1426 sched(diskp)->datestamp);
1427 taper->ready = FALSE;
1428 diskp->host->start_t = now + 5;
1429
1430 state_changed = TRUE;
1431 }
1432 }
1433 if (state_changed) {
1434 short_dump_state();
1435 }
1436 }
1437
1438 /*
1439 * This gets called when a dumper is delayed for some reason. It may
1440 * be because a disk has a delayed start, or amanda is constrained
1441 * by network or disk limits.
1442 */
1443
1444 static void
handle_dumpers_time(void * cookie)1445 handle_dumpers_time(
1446 void * cookie)
1447 {
1448 disklist_t *runq = cookie;
1449 event_release(dumpers_ev_time);
1450 dumpers_ev_time = NULL;
1451 start_some_dumps(runq);
1452 }
1453
1454 static void
dump_schedule(disklist_t * qp,char * str)1455 dump_schedule(
1456 disklist_t *qp,
1457 char * str)
1458 {
1459 disk_t *dp;
1460 char *qname;
1461
1462 g_printf(_("dump of driver schedule %s:\n--------\n"), str);
1463
1464 for(dp = qp->head; dp != NULL; dp = dp->next) {
1465 qname = quote_string(dp->name);
1466 g_printf(" %-20s %-25s lv %d t %5lu s %lld p %d\n",
1467 dp->host->hostname, qname, sched(dp)->level,
1468 sched(dp)->est_time,
1469 (long long)sched(dp)->est_size, sched(dp)->priority);
1470 amfree(qname);
1471 }
1472 g_printf("--------\n");
1473 }
1474
1475 static void
start_degraded_mode(disklist_t * queuep)1476 start_degraded_mode(
1477 /*@keep@*/ disklist_t *queuep)
1478 {
1479 disk_t *dp;
1480 disklist_t newq;
1481 off_t est_full_size;
1482 char *qname;
1483 taper_t *taper;
1484
1485 if (need_degraded == 0) {
1486 for(taper = tapetable;
1487 taper < tapetable+conf_taper_parallel_write;
1488 taper++) {
1489 if (!(taper->state & TAPER_STATE_DONE))
1490 return;
1491 }
1492 need_degraded = 1;
1493 }
1494
1495 if (!schedule_done || degraded_mode) {
1496 return;
1497 }
1498
1499 if (need_degraded == 0) {
1500 for(taper = tapetable;
1501 taper < tapetable+conf_taper_parallel_write;
1502 taper++) {
1503 if (!(taper->state & TAPER_STATE_DONE))
1504 return;
1505 }
1506 need_degraded = 1;
1507 }
1508
1509 newq.head = newq.tail = 0;
1510
1511 dump_schedule(queuep, _("before start degraded mode"));
1512
1513 est_full_size = (off_t)0;
1514 while(!empty(*queuep)) {
1515 dp = dequeue_disk(queuep);
1516
1517 qname = quote_string(dp->name);
1518 if(sched(dp)->level != 0)
1519 /* go ahead and do the disk as-is */
1520 enqueue_disk(&newq, dp);
1521 else {
1522 if (reserved_space + est_full_size + sched(dp)->est_size
1523 <= total_disksize) {
1524 enqueue_disk(&newq, dp);
1525 est_full_size += sched(dp)->est_size;
1526 }
1527 else if(sched(dp)->degr_level != -1) {
1528 sched(dp)->level = sched(dp)->degr_level;
1529 sched(dp)->dumpdate = sched(dp)->degr_dumpdate;
1530 sched(dp)->est_nsize = sched(dp)->degr_nsize;
1531 sched(dp)->est_csize = sched(dp)->degr_csize;
1532 sched(dp)->est_time = sched(dp)->degr_time;
1533 sched(dp)->est_kps = sched(dp)->degr_kps;
1534 enqueue_disk(&newq, dp);
1535 }
1536 else {
1537 log_add(L_FAIL, "%s %s %s %d [%s]",
1538 dp->host->hostname, qname, sched(dp)->datestamp,
1539 sched(dp)->level, sched(dp)->degr_mesg);
1540 }
1541 }
1542 amfree(qname);
1543 }
1544
1545 /*@i@*/ *queuep = newq;
1546 degraded_mode = 1;
1547
1548 dump_schedule(queuep, _("after start degraded mode"));
1549 }
1550
1551
1552 static void
continue_port_dumps(void)1553 continue_port_dumps(void)
1554 {
1555 disk_t *dp, *ndp;
1556 assignedhd_t **h;
1557 int active_dumpers=0, busy_dumpers=0, i;
1558 dumper_t *dumper;
1559
1560 /* First we try to grant diskspace to some dumps waiting for it. */
1561 for( dp = roomq.head; dp; dp = ndp ) {
1562 ndp = dp->next;
1563 /* find last holdingdisk used by this dump */
1564 for( i = 0, h = sched(dp)->holdp; h[i+1]; i++ ) {
1565 (void)h; /* Quiet lint */
1566 }
1567 /* find more space */
1568 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
1569 &active_dumpers, h[i] );
1570 if( h ) {
1571 for(dumper = dmptable; dumper < dmptable + inparallel &&
1572 dumper->dp != dp; dumper++) {
1573 (void)dp; /* Quiet lint */
1574 }
1575 assert( dumper < dmptable + inparallel );
1576 sched(dp)->activehd = assign_holdingdisk( h, dp );
1577 chunker_cmd( dumper->chunker, CONTINUE, dp, NULL );
1578 amfree(h);
1579 remove_disk( &roomq, dp );
1580 }
1581 }
1582
1583 /* So for some disks there is less holding diskspace available than
1584 * was asked for. Possible reasons are
1585 * a) diskspace has been allocated for other dumps which are
1586 * still running or already being written to tape
1587 * b) all other dumps have been suspended due to lack of diskspace
1588 * Case a) is not a problem. We just wait for the diskspace to
1589 * be freed by moving the current disk to a queue.
1590 * If case b) occurs, we have a deadlock situation. We select
1591 * a dump from the queue to be aborted and abort it. It will
1592 * be retried directly to tape.
1593 */
1594 for(dp=NULL, dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
1595 if( dumper->busy ) {
1596 busy_dumpers++;
1597 if( !find_disk(&roomq, dumper->dp) ) {
1598 if (dumper->chunker) {
1599 active_dumpers++;
1600 }
1601 } else if( !dp ||
1602 sched(dp)->est_size > sched(dumper->dp)->est_size ) {
1603 dp = dumper->dp;
1604 }
1605 }
1606 }
1607 if((dp != NULL) && (active_dumpers == 0) && (busy_dumpers > 0) &&
1608 ((no_taper_flushing() && empty(tapeq)) || degraded_mode) &&
1609 pending_aborts == 0 ) { /* case b */
1610 sched(dp)->no_space = 1;
1611 /* At this time, dp points to the dump with the smallest est_size.
1612 * We abort that dump, hopefully not wasting too much time retrying it.
1613 */
1614 remove_disk( &roomq, dp );
1615 chunker_cmd(sched(dp)->dumper->chunker, ABORT, NULL, _("Not enough holding disk space"));
1616 dumper_cmd( sched(dp)->dumper, ABORT, NULL, _("Not enough holding disk space"));
1617 pending_aborts++;
1618 }
1619 }
1620
1621
1622 static void
handle_taper_result(void * cookie G_GNUC_UNUSED)1623 handle_taper_result(
1624 void *cookie G_GNUC_UNUSED)
1625 {
1626 disk_t *dp = NULL;
1627 dumper_t *dumper;
1628 cmd_t cmd;
1629 int result_argc;
1630 char **result_argv;
1631 char *qname, *q;
1632 char *s;
1633 taper_t *taper = NULL;
1634 taper_t *taper1;
1635 int i;
1636 off_t partsize;
1637
1638 assert(cookie == NULL);
1639
1640 do {
1641
1642 short_dump_state();
1643 taper = NULL;
1644
1645 cmd = getresult(taper_fd, 1, &result_argc, &result_argv);
1646
1647 switch(cmd) {
1648
1649 case TAPER_OK:
1650 if(result_argc != 3) {
1651 error(_("error: [taper FAILED result_argc != 3: %d"), result_argc);
1652 /*NOTREACHED*/
1653 }
1654
1655 taper = NULL;
1656 taper_started = 1;
1657 for (i=0; i < conf_taper_parallel_write; i++) {
1658 if (strcmp(tapetable[i].name, result_argv[1]) == 0) {
1659 taper= &tapetable[i];
1660 }
1661 }
1662 assert(taper != NULL);
1663 taper->left = 0;
1664 taper->nb_dle = 0;
1665 taper->state &= ~TAPER_STATE_INIT;
1666 taper->state |= TAPER_STATE_RESERVATION;
1667 taper->state |= TAPER_STATE_IDLE;
1668 if (g_str_equal(result_argv[2], "ALLOW-TAKE-SCRIBE-FROM")) {
1669 taper->allow_take_scribe_from = TRUE;
1670 } else {
1671 taper->allow_take_scribe_from = FALSE;
1672 }
1673 amfree(taper->first_label);
1674 taper_nb_wait_reply--;
1675 taper_nb_scan_volume--;
1676 last_started_taper = taper;
1677 if (taper_nb_wait_reply == 0) {
1678 event_release(taper_ev_read);
1679 taper_ev_read = NULL;
1680 }
1681 start_some_dumps(&runq);
1682 startaflush();
1683 break;
1684
1685 case FAILED: /* FAILED <handle> INPUT-* TAPE-* <input err mesg> <tape err mesg> */
1686 if(result_argc != 6) {
1687 error(_("error: [taper FAILED result_argc != 6: %d"), result_argc);
1688 /*NOTREACHED*/
1689 }
1690
1691 dp = serial2disk(result_argv[1]);
1692 taper = sched(dp)->taper;
1693 assert(dp == taper->disk);
1694 if (!taper->dumper)
1695 free_serial(result_argv[1]);
1696
1697 qname = quote_string(dp->name);
1698 g_printf(_("driver: finished-cmd time %s taper wrote %s:%s\n"),
1699 walltime_str(curclock()), dp->host->hostname, qname);
1700 fflush(stdout);
1701
1702 taper->result = cmd;
1703 if (taper->dumper && !dp->dataport_list) {
1704 taper->dumper->result = FAILED;
1705 }
1706
1707
1708 if (strcmp(result_argv[2], "INPUT-ERROR") == 0) {
1709 taper->input_error = newstralloc(taper->input_error, result_argv[4]);
1710 amfree(qname);
1711 break;
1712 } else if (strcmp(result_argv[2], "INPUT-GOOD") != 0) {
1713 taper->tape_error = newstralloc(taper->tape_error,
1714 _("Taper protocol error"));
1715 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1716 dp->host->hostname, qname, sched(dp)->datestamp,
1717 sched(dp)->level, taper->tape_error);
1718 amfree(qname);
1719 break;
1720 }
1721 if (strcmp(result_argv[3], "TAPE-ERROR") == 0 ||
1722 strcmp(result_argv[3], "TAPE-CONFIG") == 0) {
1723 taper->state &= ~TAPER_STATE_TAPE_STARTED;
1724 taper->tape_error = newstralloc(taper->tape_error, result_argv[5]);
1725 amfree(qname);
1726 break;
1727 } else if (strcmp(result_argv[3], "TAPE-GOOD") != 0) {
1728 taper->state &= ~TAPER_STATE_TAPE_STARTED;
1729 taper->tape_error = newstralloc(taper->tape_error,
1730 _("Taper protocol error"));
1731 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1732 dp->host->hostname, qname, sched(dp)->datestamp,
1733 sched(dp)->level, taper->tape_error);
1734 amfree(qname);
1735 break;
1736 }
1737
1738 amfree(qname);
1739
1740 break;
1741
1742 case READY: /* READY <handle> */
1743 dp = serial2disk(result_argv[1]);
1744 taper = sched(dp)->taper;
1745 taper->ready = TRUE;
1746
1747 assert(dp == taper->disk);
1748
1749 if (taper->dumper &&
1750 taper->dumper->result != LAST_TOK) {
1751 if( taper->dumper->result == DONE) {
1752 taper_cmd(DONE, dp, NULL, 0, NULL);
1753 } else {
1754 taper_cmd(FAILED, dp, NULL, 0, NULL);
1755 }
1756 }
1757 break;
1758
1759 case PARTIAL: /* PARTIAL <handle> INPUT-* TAPE-* <stat mess> <input err mesg> <tape err mesg>*/
1760 case DONE: /* DONE <handle> INPUT-GOOD TAPE-GOOD <stat mess> <input err mesg> <tape err mesg> */
1761 if(result_argc != 7) {
1762 error(_("error: [taper PARTIAL result_argc != 7: %d"), result_argc);
1763 /*NOTREACHED*/
1764 }
1765
1766 dp = serial2disk(result_argv[1]);
1767 taper = sched(dp)->taper;
1768 assert(dp == taper->disk);
1769 if (!taper->dumper)
1770 free_serial(result_argv[1]);
1771
1772 qname = quote_string(dp->name);
1773 g_printf(_("driver: finished-cmd time %s taper wrote %s:%s\n"),
1774 walltime_str(curclock()), dp->host->hostname, qname);
1775 fflush(stdout);
1776
1777 if (strcmp(result_argv[2], "INPUT-ERROR") == 0) {
1778 taper->input_error = newstralloc(taper->input_error, result_argv[5]);
1779 taper->result = FAILED;
1780 amfree(qname);
1781 break;
1782 } else if (strcmp(result_argv[2], "INPUT-GOOD") != 0) {
1783 taper->tape_error = newstralloc(taper->tape_error,
1784 _("Taper protocol error"));
1785 taper->result = FAILED;
1786 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1787 dp->host->hostname, qname, sched(dp)->datestamp,
1788 sched(dp)->level, taper->tape_error);
1789 amfree(qname);
1790 break;
1791 }
1792 if (strcmp(result_argv[3], "TAPE-ERROR") == 0 ||
1793 strcmp(result_argv[3], "TAPE-CONFIG") == 0) {
1794 taper->state &= ~TAPER_STATE_TAPE_STARTED;
1795 taper->tape_error = newstralloc(taper->tape_error, result_argv[6]);
1796 taper->result = FAILED;
1797 amfree(qname);
1798 break;
1799 } else if (strcmp(result_argv[3], "TAPE-GOOD") != 0) {
1800 taper->state &= ~TAPER_STATE_TAPE_STARTED;
1801 taper->tape_error = newstralloc(taper->tape_error,
1802 _("Taper protocol error"));
1803 taper->result = FAILED;
1804 log_add(L_FAIL, _("%s %s %s %d [%s]"),
1805 dp->host->hostname, qname, sched(dp)->datestamp,
1806 sched(dp)->level, taper->tape_error);
1807 amfree(qname);
1808 break;
1809 }
1810
1811 s = strstr(result_argv[4], " kb ");
1812 if (s) {
1813 s += 4;
1814 sched(dp)->dumpsize = OFF_T_ATOI(s);
1815 } else {
1816 s = strstr(result_argv[4], " bytes ");
1817 if (s) {
1818 s += 7;
1819 sched(dp)->dumpsize = OFF_T_ATOI(s)/1024;
1820 }
1821 }
1822
1823 taper->result = cmd;
1824 amfree(qname);
1825
1826 break;
1827
1828 case PARTDONE: /* PARTDONE <handle> <label> <fileno> <kbytes> <stat> */
1829 dp = serial2disk(result_argv[1]);
1830 taper = sched(dp)->taper;
1831 assert(dp == taper->disk);
1832 if (result_argc != 6) {
1833 error(_("error [taper PARTDONE result_argc != 6: %d]"),
1834 result_argc);
1835 /*NOTREACHED*/
1836 }
1837 if (!taper->first_label) {
1838 amfree(taper->first_label);
1839 taper->first_label = stralloc(result_argv[2]);
1840 taper->first_fileno = OFF_T_ATOI(result_argv[3]);
1841 }
1842 taper->written += OFF_T_ATOI(result_argv[4]);
1843 if (taper->written > sched(taper->disk)->act_size)
1844 sched(taper->disk)->act_size = taper->written;
1845
1846 partsize = 0;
1847 s = strstr(result_argv[5], " kb ");
1848 if (s) {
1849 s += 4;
1850 partsize = OFF_T_ATOI(s);
1851 } else {
1852 s = strstr(result_argv[5], " bytes ");
1853 if (s) {
1854 s += 7;
1855 partsize = OFF_T_ATOI(s)/1024;
1856 }
1857 }
1858 taper->left -= partsize;
1859
1860 break;
1861
1862 case REQUEST_NEW_TAPE: /* REQUEST-NEW-TAPE <handle> */
1863 if (result_argc != 2) {
1864 error(_("error [taper REQUEST_NEW_TAPE result_argc != 2: %d]"),
1865 result_argc);
1866 /*NOTREACHED*/
1867 }
1868
1869 dp = serial2disk(result_argv[1]);
1870 taper = sched(dp)->taper;
1871 taper->left = 0;
1872 if (taper->state & TAPER_STATE_DONE) {
1873 taper_cmd(NO_NEW_TAPE, taper->disk, "taper found no tape", 0, NULL);
1874 } else {
1875 taper->state &= ~TAPER_STATE_TAPE_STARTED;
1876 taper->state |= TAPER_STATE_TAPE_REQUESTED;
1877
1878 start_some_dumps(&runq);
1879 startaflush();
1880 }
1881 break;
1882
1883 case NEW_TAPE: /* NEW-TAPE <handle> <label> */
1884 if (result_argc != 3) {
1885 error(_("error [taper NEW_TAPE result_argc != 3: %d]"),
1886 result_argc);
1887 /*NOTREACHED*/
1888 }
1889
1890 nb_sent_new_tape--;
1891 taper_nb_scan_volume--;
1892 dp = serial2disk(result_argv[1]);
1893 taper = sched(dp)->taper;
1894 /* Update our tape counter and reset taper->left */
1895 current_tape++;
1896 taper->nb_dle = 1;
1897 taper->left = tape_length;
1898 taper->state &= ~TAPER_STATE_WAIT_NEW_TAPE;
1899 taper->state |= TAPER_STATE_TAPE_STARTED;
1900 last_started_taper = NULL;
1901 if (taper_sent_first_write == taper) {
1902 taper_sent_first_write = NULL;
1903 }
1904
1905 /* start a new worker */
1906 for (i = 0; i < conf_taper_parallel_write ; i++) {
1907 taper1 = &tapetable[i];
1908 if (need_degraded == 0 &&
1909 taper1->state == TAPER_STATE_DEFAULT) {
1910 taper1->state = TAPER_STATE_INIT;
1911 if (taper_nb_wait_reply == 0) {
1912 taper_ev_read = event_register(taper_fd, EV_READFD,
1913 handle_taper_result, NULL);
1914 }
1915 taper_nb_wait_reply++;
1916 taper_nb_scan_volume++;
1917 taper_cmd(START_TAPER, NULL, taper1->name, 0,
1918 driver_timestamp);
1919 break;
1920 }
1921 }
1922 break;
1923
1924 case NO_NEW_TAPE: /* NO-NEW-TAPE <handle> */
1925 if (result_argc != 2) {
1926 error(_("error [taper NO_NEW_TAPE result_argc != 2: %d]"),
1927 result_argc);
1928 /*NOTREACHED*/
1929 }
1930 nb_sent_new_tape--;
1931 taper_nb_scan_volume--;
1932 dp = serial2disk(result_argv[1]);
1933 taper = sched(dp)->taper;
1934 taper->state |= TAPER_STATE_DONE;
1935 last_started_taper = NULL;
1936 if (taper_sent_first_write == taper) {
1937 taper_sent_first_write = NULL;
1938 }
1939 start_degraded_mode(&runq);
1940 break;
1941
1942 case DUMPER_STATUS: /* DUMPER-STATUS <handle> */
1943 if (result_argc != 2) {
1944 error(_("error [taper DUMPER_STATUS result_argc != 2: %d]"),
1945 result_argc);
1946 /*NOTREACHED*/
1947 }
1948 dp = serial2disk(result_argv[1]);
1949 taper = sched(dp)->taper;
1950 if (taper->dumper->result == LAST_TOK) {
1951 taper->sendresult = 1;
1952 } else {
1953 if( taper->dumper->result == DONE) {
1954 taper_cmd(DONE, dp, NULL, 0, NULL);
1955 } else {
1956 taper_cmd(FAILED, dp, NULL, 0, NULL);
1957 }
1958 }
1959 break;
1960
1961 case TAPE_ERROR: /* TAPE-ERROR <name> <err mess> */
1962 taper_started = 1;
1963 if (strcmp(result_argv[1], "SETUP") == 0) {
1964 taper_nb_wait_reply = 0;
1965 taper_nb_scan_volume = 0;
1966 need_degraded = 1;
1967 } else {
1968 taper = taper_from_name(result_argv[1]);
1969 taper->state = TAPER_STATE_DONE;
1970 fflush(stdout);
1971 q = quote_string(result_argv[2]);
1972 log_add(L_WARNING, _("Taper error: %s"), q);
1973 amfree(q);
1974 if (taper) {
1975 taper->tape_error = newstralloc(taper->tape_error,
1976 result_argv[2]);
1977 }
1978
1979 taper_nb_wait_reply--;
1980 taper_nb_scan_volume--;
1981 }
1982 if (taper_nb_wait_reply == 0) {
1983 need_degraded = 1;
1984 event_release(taper_ev_read);
1985 taper_ev_read = NULL;
1986 }
1987 start_degraded_mode(&runq);
1988 start_some_dumps(&runq);
1989 break;
1990
1991 case PORT: /* PORT <name> <handle> <port> <dataport_list> */
1992 dp = serial2disk(result_argv[2]);
1993 taper = sched(dp)->taper;
1994 dumper = sched(dp)->dumper;
1995 dumper->output_port = atoi(result_argv[3]);
1996 amfree(dp->dataport_list);
1997 dp->dataport_list = stralloc(result_argv[4]);
1998
1999 amfree(taper->input_error);
2000 amfree(taper->tape_error);
2001 amfree(taper->first_label);
2002 taper->written = 0;
2003 taper->state |= TAPER_STATE_DUMP_TO_TAPE;
2004
2005 if (dp->host->pre_script == 0) {
2006 run_server_host_scripts(EXECUTE_ON_PRE_HOST_BACKUP,
2007 get_config_name(), dp->host);
2008 dp->host->pre_script = 1;
2009 }
2010 run_server_dle_scripts(EXECUTE_ON_PRE_DLE_BACKUP,
2011 get_config_name(), dp,
2012 sched(dp)->level);
2013 /* tell the dumper to dump to a port */
2014 dumper_cmd(dumper, PORT_DUMP, dp, NULL);
2015 dp->host->start_t = time(NULL) + 5;
2016 amfree(dp->dataport_list);
2017
2018 taper->state |= TAPER_STATE_DUMP_TO_TAPE;
2019
2020 dumper->ev_read = event_register(dumper->fd, EV_READFD,
2021 handle_dumper_result, dumper);
2022 break;
2023
2024 case BOGUS:
2025 log_add(L_WARNING, _("Taper protocol error"));
2026 /*
2027 * Since we received a taper error, we can't send anything more
2028 * to the taper. Go into degraded mode to try to get everthing
2029 * onto disk. Later, these dumps can be flushed to a new tape.
2030 * The tape queue is zapped so that it appears empty in future
2031 * checks. If there are dumps waiting for diskspace to be freed,
2032 * cancel one.
2033 */
2034 taper_started = 1;
2035 if(!nodump) {
2036 log_add(L_WARNING,
2037 _("going into degraded mode because of taper component error."));
2038 }
2039
2040 for (taper = tapetable;
2041 taper < tapetable + conf_taper_parallel_write;
2042 taper++) {
2043 if (taper && taper->disk) {
2044 taper->tape_error = newstralloc(taper->tape_error,"BOGUS");
2045 taper->result = cmd;
2046 if (taper->dumper) {
2047 if (taper->dumper->result != LAST_TOK) {
2048 // Dumper already returned it's result
2049 dumper_taper_result(taper->disk);
2050 }
2051 } else {
2052 file_taper_result(taper->disk);
2053 }
2054 }
2055 }
2056 taper = NULL;
2057
2058 if(taper_ev_read != NULL) {
2059 event_release(taper_ev_read);
2060 taper_ev_read = NULL;
2061 taper_nb_wait_reply = 0;
2062 }
2063 need_degraded = 1;
2064 start_degraded_mode(&runq);
2065 tapeq.head = tapeq.tail = NULL;
2066 aclose(taper_fd);
2067
2068 break;
2069
2070 default:
2071 error(_("driver received unexpected token (%s) from taper"),
2072 cmdstr[cmd]);
2073 /*NOTREACHED*/
2074 }
2075
2076 g_strfreev(result_argv);
2077
2078 if (taper && taper->disk && taper->result != LAST_TOK) {
2079 if (taper->nb_dle >= conf_max_dle_by_volume) {
2080 taper_cmd(CLOSE_VOLUME, dp, NULL, 0, NULL);
2081 taper->state &= ~TAPER_STATE_TAPE_STARTED;
2082 }
2083 if(taper->dumper) {
2084 if (taper->dumper->result != LAST_TOK) {
2085 // Dumper already returned it's result
2086 dumper_taper_result(taper->disk);
2087 }
2088 } else {
2089 file_taper_result(taper->disk);
2090 }
2091 }
2092
2093 } while(areads_dataready(taper_fd));
2094 start_some_dumps(&runq);
2095 startaflush();
2096 }
2097
2098
2099 static void
file_taper_result(disk_t * dp)2100 file_taper_result(
2101 disk_t *dp)
2102 {
2103 taper_t *taper;
2104 char *qname = quote_string(dp->name);
2105
2106 taper = sched(dp)->taper;
2107 if (taper->result == DONE) {
2108 update_info_taper(dp, taper->first_label, taper->first_fileno,
2109 sched(dp)->level);
2110 }
2111
2112 sched(dp)->taper_attempted += 1;
2113
2114 if (taper->input_error) {
2115 g_printf("driver: taper failed %s %s: %s\n",
2116 dp->host->hostname, qname, taper->input_error);
2117 if (strcmp(sched(dp)->datestamp, driver_timestamp) == 0) {
2118 if(sched(dp)->taper_attempted >= 2) {
2119 log_add(L_FAIL, _("%s %s %s %d [too many taper retries after holding disk error: %s]"),
2120 dp->host->hostname, qname, sched(dp)->datestamp,
2121 sched(dp)->level, taper->input_error);
2122 g_printf("driver: taper failed %s %s, too many taper retry after holding disk error\n",
2123 dp->host->hostname, qname);
2124 amfree(sched(dp)->destname);
2125 amfree(sched(dp)->dumpdate);
2126 amfree(sched(dp)->degr_dumpdate);
2127 amfree(sched(dp)->degr_mesg);
2128 amfree(sched(dp)->datestamp);
2129 amfree(dp->up);
2130 } else {
2131 log_add(L_INFO, _("%s %s %s %d [Will retry dump because of holding disk error: %s]"),
2132 dp->host->hostname, qname, sched(dp)->datestamp,
2133 sched(dp)->level, taper->input_error);
2134 g_printf("driver: taper will retry %s %s because of holding disk error\n",
2135 dp->host->hostname, qname);
2136 if (dp->to_holdingdisk != HOLD_REQUIRED) {
2137 dp->to_holdingdisk = HOLD_NEVER;
2138 sched(dp)->dump_attempted -= 1;
2139 headqueue_disk(&directq, dp);
2140 } else {
2141 amfree(sched(dp)->destname);
2142 amfree(sched(dp)->dumpdate);
2143 amfree(sched(dp)->degr_dumpdate);
2144 amfree(sched(dp)->degr_mesg);
2145 amfree(sched(dp)->datestamp);
2146 amfree(dp->up);
2147 }
2148 }
2149 } else {
2150 amfree(sched(dp)->destname);
2151 amfree(sched(dp)->dumpdate);
2152 amfree(sched(dp)->degr_dumpdate);
2153 amfree(sched(dp)->degr_mesg);
2154 amfree(sched(dp)->datestamp);
2155 amfree(dp->up);
2156 }
2157 } else if (taper->tape_error) {
2158 g_printf("driver: taper failed %s %s with tape error: %s\n",
2159 dp->host->hostname, qname, taper->tape_error);
2160 if(sched(dp)->taper_attempted >= 2) {
2161 log_add(L_FAIL, _("%s %s %s %d [too many taper retries]"),
2162 dp->host->hostname, qname, sched(dp)->datestamp,
2163 sched(dp)->level);
2164 g_printf("driver: taper failed %s %s, too many taper retry\n",
2165 dp->host->hostname, qname);
2166 amfree(sched(dp)->destname);
2167 amfree(sched(dp)->dumpdate);
2168 amfree(sched(dp)->degr_dumpdate);
2169 amfree(sched(dp)->degr_mesg);
2170 amfree(sched(dp)->datestamp);
2171 amfree(dp->up);
2172 } else {
2173 g_printf("driver: taper will retry %s %s\n",
2174 dp->host->hostname, qname);
2175 /* Re-insert into taper queue. */
2176 headqueue_disk(&tapeq, dp);
2177 }
2178 } else if (taper->result != DONE) {
2179 g_printf("driver: taper failed %s %s without error\n",
2180 dp->host->hostname, qname);
2181 } else {
2182 delete_diskspace(dp);
2183 amfree(sched(dp)->destname);
2184 amfree(sched(dp)->dumpdate);
2185 amfree(sched(dp)->degr_dumpdate);
2186 amfree(sched(dp)->degr_mesg);
2187 amfree(sched(dp)->datestamp);
2188 amfree(dp->up);
2189 }
2190
2191 amfree(qname);
2192
2193 taper->state &= ~TAPER_STATE_FILE_TO_TAPE;
2194 taper->state |= TAPER_STATE_IDLE;
2195 amfree(taper->input_error);
2196 amfree(taper->tape_error);
2197 taper->disk = NULL;
2198 taper_nb_wait_reply--;
2199 if (taper_nb_wait_reply == 0) {
2200 event_release(taper_ev_read);
2201 taper_ev_read = NULL;
2202 }
2203
2204 /* continue with those dumps waiting for diskspace */
2205 continue_port_dumps();
2206 start_some_dumps(&runq);
2207 startaflush();
2208 }
2209
2210 static void
dumper_taper_result(disk_t * dp)2211 dumper_taper_result(
2212 disk_t *dp)
2213 {
2214 dumper_t *dumper;
2215 taper_t *taper;
2216 char *qname;
2217
2218 dumper = sched(dp)->dumper;
2219 taper = sched(dp)->taper;
2220
2221 free_serial_dp(dp);
2222 if(dumper->result == DONE && taper->result == DONE) {
2223 update_info_dumper(dp, sched(dp)->origsize,
2224 sched(dp)->dumpsize, sched(dp)->dumptime);
2225 update_info_taper(dp, taper->first_label, taper->first_fileno,
2226 sched(dp)->level);
2227 qname = quote_string(dp->name); /*quote to take care of spaces*/
2228
2229 log_add(L_STATS, _("estimate %s %s %s %d [sec %ld nkb %lld ckb %lld kps %lu]"),
2230 dp->host->hostname, qname, sched(dp)->datestamp,
2231 sched(dp)->level,
2232 sched(dp)->est_time, (long long)sched(dp)->est_nsize,
2233 (long long)sched(dp)->est_csize,
2234 sched(dp)->est_kps);
2235 amfree(qname);
2236 } else {
2237 update_failed_dump(dp);
2238 }
2239
2240 sched(dp)->dump_attempted += 1;
2241 sched(dp)->taper_attempted += 1;
2242
2243 if((dumper->result != DONE || taper->result != DONE) &&
2244 sched(dp)->dump_attempted <= 1 &&
2245 sched(dp)->taper_attempted <= 1) {
2246 enqueue_disk(&directq, dp);
2247 }
2248
2249 if(dumper->ev_read != NULL) {
2250 event_release(dumper->ev_read);
2251 dumper->ev_read = NULL;
2252 }
2253 taper_nb_wait_reply--;
2254 if (taper_nb_wait_reply == 0 && taper_ev_read != NULL) {
2255 event_release(taper_ev_read);
2256 taper_ev_read = NULL;
2257 }
2258 taper->state &= ~TAPER_STATE_DUMP_TO_TAPE;
2259 taper->state |= TAPER_STATE_IDLE;
2260 amfree(taper->input_error);
2261 amfree(taper->tape_error);
2262 dumper->busy = 0;
2263 dp->host->inprogress -= 1;
2264 dp->inprogress = 0;
2265 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2266 taper->dumper = NULL;
2267 taper->disk = NULL;
2268 sched(dp)->dumper = NULL;
2269 sched(dp)->taper = NULL;
2270 start_some_dumps(&runq);
2271 }
2272
2273
2274 static taper_t *
idle_taper(void)2275 idle_taper(void)
2276 {
2277 taper_t *taper;
2278
2279 /* Use an already started taper first */
2280 for (taper = tapetable; taper < tapetable + conf_taper_parallel_write;
2281 taper++) {
2282 if ((taper->state & TAPER_STATE_IDLE) &&
2283 (taper->state & TAPER_STATE_TAPE_STARTED) &&
2284 !(taper->state & TAPER_STATE_DONE) &&
2285 !(taper->state & TAPER_STATE_FILE_TO_TAPE) &&
2286 !(taper->state & TAPER_STATE_DUMP_TO_TAPE))
2287 return taper;
2288 }
2289 for (taper = tapetable; taper < tapetable + conf_taper_parallel_write;
2290 taper++) {
2291 if ((taper->state & TAPER_STATE_IDLE) &&
2292 (taper->state & TAPER_STATE_RESERVATION) &&
2293 !(taper->state & TAPER_STATE_DONE) &&
2294 !(taper->state & TAPER_STATE_FILE_TO_TAPE) &&
2295 !(taper->state & TAPER_STATE_DUMP_TO_TAPE))
2296 return taper;
2297 }
2298 return NULL;
2299 }
2300
2301 static taper_t *
taper_from_name(char * name)2302 taper_from_name(
2303 char *name)
2304 {
2305 taper_t *taper;
2306
2307 for (taper = tapetable; taper < tapetable+conf_taper_parallel_write;
2308 taper++)
2309 if (strcmp(taper->name, name) == 0) return taper;
2310
2311 return NULL;
2312 }
2313
2314 static void
dumper_chunker_result(disk_t * dp)2315 dumper_chunker_result(
2316 disk_t * dp)
2317 {
2318 dumper_t *dumper;
2319 chunker_t *chunker;
2320 assignedhd_t **h=NULL;
2321 int activehd, i;
2322 off_t dummy;
2323 off_t size;
2324 int is_partial;
2325 char *qname;
2326
2327 dumper = sched(dp)->dumper;
2328 chunker = dumper->chunker;
2329
2330 free_serial_dp(dp);
2331
2332 h = sched(dp)->holdp;
2333 activehd = sched(dp)->activehd;
2334
2335 if(dumper->result == DONE && chunker->result == DONE) {
2336 update_info_dumper(dp, sched(dp)->origsize,
2337 sched(dp)->dumpsize, sched(dp)->dumptime);
2338 qname = quote_string(dp->name);/*quote to take care of spaces*/
2339
2340 log_add(L_STATS, _("estimate %s %s %s %d [sec %ld nkb %lld ckb %lld kps %lu]"),
2341 dp->host->hostname, qname, sched(dp)->datestamp,
2342 sched(dp)->level,
2343 sched(dp)->est_time, (long long)sched(dp)->est_nsize,
2344 (long long)sched(dp)->est_csize,
2345 sched(dp)->est_kps);
2346 amfree(qname);
2347 } else {
2348 update_failed_dump(dp);
2349 }
2350
2351 deallocate_bandwidth(dp->host->netif, sched(dp)->est_kps);
2352
2353 is_partial = dumper->result != DONE || chunker->result != DONE;
2354 rename_tmp_holding(sched(dp)->destname, !is_partial);
2355 holding_set_origsize(sched(dp)->destname, sched(dp)->origsize);
2356
2357 dummy = (off_t)0;
2358 for( i = 0, h = sched(dp)->holdp; i < activehd; i++ ) {
2359 dummy += h[i]->used;
2360 }
2361
2362 size = holding_file_size(sched(dp)->destname, 0);
2363 h[activehd]->used = size - dummy;
2364 h[activehd]->disk->allocated_dumpers--;
2365 adjust_diskspace(dp, DONE);
2366
2367 sched(dp)->dump_attempted += 1;
2368
2369 if((dumper->result != DONE || chunker->result != DONE) &&
2370 sched(dp)->dump_attempted <= 1) {
2371 delete_diskspace(dp);
2372 if (sched(dp)->no_space) {
2373 enqueue_disk(&directq, dp);
2374 } else {
2375 enqueue_disk(&runq, dp);
2376 }
2377 }
2378 else if(size > (off_t)DISK_BLOCK_KB) {
2379 enqueue_disk(&tapeq, dp);
2380 }
2381 else {
2382 delete_diskspace(dp);
2383 }
2384
2385 dumper->busy = 0;
2386 dp->host->inprogress -= 1;
2387 dp->inprogress = 0;
2388
2389 waitpid(chunker->pid, NULL, 0 );
2390 aclose(chunker->fd);
2391 chunker->fd = -1;
2392 chunker->down = 1;
2393
2394 dp = NULL;
2395 if (chunker->result == ABORT_FINISHED)
2396 pending_aborts--;
2397 continue_port_dumps();
2398 /*
2399 * Wakeup any dumpers that are sleeping because of network
2400 * or disk constraints.
2401 */
2402 start_some_dumps(&runq);
2403 startaflush();
2404 }
2405
2406
2407 static void
handle_dumper_result(void * cookie)2408 handle_dumper_result(
2409 void * cookie)
2410 {
2411 /* uses global pending_aborts */
2412 dumper_t *dumper = cookie;
2413 taper_t *taper;
2414 disk_t *dp, *sdp, *dp1;
2415 cmd_t cmd;
2416 int result_argc;
2417 char *qname;
2418 char **result_argv;
2419
2420 assert(dumper != NULL);
2421 dp = dumper->dp;
2422 assert(dp != NULL);
2423 assert(sched(dp) != NULL);
2424 do {
2425
2426 short_dump_state();
2427
2428 cmd = getresult(dumper->fd, 1, &result_argc, &result_argv);
2429
2430 if(cmd != BOGUS) {
2431 /* result_argv[1] always contains the serial number */
2432 sdp = serial2disk(result_argv[1]);
2433 if (sdp != dp) {
2434 error(_("Invalid serial number %s"), result_argv[1]);
2435 g_assert_not_reached();
2436 }
2437 }
2438
2439 qname = quote_string(dp->name);
2440 switch(cmd) {
2441
2442 case DONE: /* DONE <handle> <origsize> <dumpsize> <dumptime> <errstr> */
2443 if(result_argc != 6) {
2444 error(_("error [dumper DONE result_argc != 6: %d]"), result_argc);
2445 /*NOTREACHED*/
2446 }
2447
2448 sched(dp)->origsize = OFF_T_ATOI(result_argv[2]);
2449 sched(dp)->dumptime = TIME_T_ATOI(result_argv[4]);
2450
2451 g_printf(_("driver: finished-cmd time %s %s dumped %s:%s\n"),
2452 walltime_str(curclock()), dumper->name,
2453 dp->host->hostname, qname);
2454 fflush(stdout);
2455
2456 dumper->result = cmd;
2457
2458 break;
2459
2460 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2461 /*
2462 * Requeue this disk, and fall through to the FAILED
2463 * case for cleanup.
2464 */
2465 if(sched(dp)->dump_attempted) {
2466 char *qname = quote_string(dp->name);
2467 char *qerr = quote_string(result_argv[2]);
2468 log_add(L_FAIL, _("%s %s %s %d [too many dumper retry: %s]"),
2469 dp->host->hostname, qname, sched(dp)->datestamp,
2470 sched(dp)->level, qerr);
2471 g_printf(_("driver: dump failed %s %s %s, too many dumper retry: %s\n"),
2472 result_argv[1], dp->host->hostname, qname, qerr);
2473 amfree(qname);
2474 amfree(qerr);
2475 }
2476 /* FALLTHROUGH */
2477 case FAILED: /* FAILED <handle> <errstr> */
2478 /*free_serial(result_argv[1]);*/
2479 dumper->result = cmd;
2480 break;
2481
2482 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
2483 /*
2484 * We sent an ABORT from the NO-ROOM case because this dump
2485 * wasn't going to fit onto the holding disk. We now need to
2486 * clean up the remains of this image, and try to finish
2487 * other dumps that are waiting on disk space.
2488 */
2489 assert(pending_aborts);
2490 /*free_serial(result_argv[1]);*/
2491 dumper->result = cmd;
2492 break;
2493
2494 case BOGUS:
2495 /* either EOF or garbage from dumper. Turn it off */
2496 log_add(L_WARNING, _("%s pid %ld is messed up, ignoring it.\n"),
2497 dumper->name, (long)dumper->pid);
2498 if (dumper->ev_read) {
2499 event_release(dumper->ev_read);
2500 dumper->ev_read = NULL;
2501 }
2502 aclose(dumper->fd);
2503 dumper->busy = 0;
2504 dumper->down = 1; /* mark it down so it isn't used again */
2505
2506 /* if it was dumping something, zap it and try again */
2507 if(sched(dp)->dump_attempted) {
2508 log_add(L_FAIL, _("%s %s %s %d [%s died]"),
2509 dp->host->hostname, qname, sched(dp)->datestamp,
2510 sched(dp)->level, dumper->name);
2511 } else {
2512 log_add(L_WARNING, _("%s died while dumping %s:%s lev %d."),
2513 dumper->name, dp->host->hostname, qname,
2514 sched(dp)->level);
2515 }
2516 dumper->result = cmd;
2517 break;
2518
2519 default:
2520 assert(0);
2521 }
2522 amfree(qname);
2523 g_strfreev(result_argv);
2524
2525 if (cmd != BOGUS) {
2526 int last_dump = 1;
2527 dumper_t *dumper;
2528
2529 run_server_dle_scripts(EXECUTE_ON_POST_DLE_BACKUP,
2530 get_config_name(), dp, sched(dp)->level);
2531 /* check dump not yet started */
2532 for (dp1=runq.head; dp1 != NULL; dp1 = dp1->next) {
2533 if (dp1->host == dp->host)
2534 last_dump = 0;
2535 }
2536 /* check direct to tape dump */
2537 for (dp1=directq.head; dp1 != NULL; dp1 = dp1->next) {
2538 if (dp1->host == dp->host)
2539 last_dump = 0;
2540 }
2541 /* check dumping dle */
2542 for (dumper = dmptable; dumper < dmptable + inparallel; dumper++) {
2543 if (dumper->busy && dumper->dp != dp &&
2544 dumper->dp->host == dp->host)
2545 last_dump = 0;
2546 }
2547 if (last_dump && dp->host->post_script == 0) {
2548 if (dp->host->post_script == 0) {
2549 run_server_host_scripts(EXECUTE_ON_POST_HOST_BACKUP,
2550 get_config_name(), dp->host);
2551 dp->host->post_script = 1;
2552 }
2553 }
2554 }
2555
2556 taper = sched(dp)->taper;
2557 /* send the dumper result to the chunker */
2558 if (dumper->chunker) {
2559 if (dumper->chunker->down == 0 && dumper->chunker->fd != -1 &&
2560 dumper->chunker->result == LAST_TOK) {
2561 if (cmd == DONE) {
2562 chunker_cmd(dumper->chunker, DONE, dp, NULL);
2563 }
2564 else {
2565 chunker_cmd(dumper->chunker, FAILED, dp, NULL);
2566 }
2567 }
2568 if( dumper->result != LAST_TOK &&
2569 dumper->chunker->result != LAST_TOK)
2570 dumper_chunker_result(dp);
2571 } else {
2572 if (taper->ready) { /* send the dumper result to the taper */
2573 if (cmd == DONE) {
2574 taper_cmd(DONE, dp, NULL, 0, NULL);
2575 } else {
2576 taper_cmd(FAILED, dp, NULL, 0, NULL);
2577 }
2578 taper->sendresult = 0;
2579 }
2580 if (taper->dumper && taper->result != LAST_TOK) {
2581 dumper_taper_result(dp);
2582 }
2583 }
2584 } while(areads_dataready(dumper->fd));
2585 }
2586
2587
2588 static void
handle_chunker_result(void * cookie)2589 handle_chunker_result(
2590 void * cookie)
2591 {
2592 chunker_t *chunker = cookie;
2593 assignedhd_t **h=NULL;
2594 dumper_t *dumper;
2595 disk_t *dp, *sdp;
2596 cmd_t cmd;
2597 int result_argc;
2598 char **result_argv;
2599 int dummy;
2600 int activehd = -1;
2601 char *qname;
2602
2603 assert(chunker != NULL);
2604 dumper = chunker->dumper;
2605 assert(dumper != NULL);
2606 dp = dumper->dp;
2607 assert(dp != NULL);
2608 assert(sched(dp) != NULL);
2609 assert(sched(dp)->destname != NULL);
2610 assert(dp != NULL && sched(dp) != NULL && sched(dp)->destname);
2611
2612 if(sched(dp)->holdp) {
2613 h = sched(dp)->holdp;
2614 activehd = sched(dp)->activehd;
2615 }
2616
2617 do {
2618 short_dump_state();
2619
2620 cmd = getresult(chunker->fd, 1, &result_argc, &result_argv);
2621
2622 if(cmd != BOGUS) {
2623 /* result_argv[1] always contains the serial number */
2624 sdp = serial2disk(result_argv[1]);
2625 if (sdp != dp) {
2626 error(_("Invalid serial number %s"), result_argv[1]);
2627 g_assert_not_reached();
2628 }
2629 }
2630
2631 switch(cmd) {
2632
2633 case PARTIAL: /* PARTIAL <handle> <dumpsize> <errstr> */
2634 case DONE: /* DONE <handle> <dumpsize> <errstr> */
2635 if(result_argc != 4) {
2636 error(_("error [chunker %s result_argc != 4: %d]"), cmdstr[cmd],
2637 result_argc);
2638 /*NOTREACHED*/
2639 }
2640 /*free_serial(result_argv[1]);*/
2641
2642 sched(dp)->dumpsize = (off_t)atof(result_argv[2]);
2643
2644 qname = quote_string(dp->name);
2645 g_printf(_("driver: finished-cmd time %s %s chunked %s:%s\n"),
2646 walltime_str(curclock()), chunker->name,
2647 dp->host->hostname, qname);
2648 fflush(stdout);
2649 amfree(qname);
2650
2651 event_release(chunker->ev_read);
2652
2653 chunker->result = cmd;
2654
2655 break;
2656
2657 case TRYAGAIN: /* TRY-AGAIN <handle> <errstr> */
2658 event_release(chunker->ev_read);
2659
2660 chunker->result = cmd;
2661
2662 break;
2663 case FAILED: /* FAILED <handle> <errstr> */
2664 /*free_serial(result_argv[1]);*/
2665
2666 event_release(chunker->ev_read);
2667
2668 chunker->result = cmd;
2669
2670 break;
2671
2672 case NO_ROOM: /* NO-ROOM <handle> <missing_size> */
2673 if (!h || activehd < 0) { /* should never happen */
2674 error(_("!h || activehd < 0"));
2675 /*NOTREACHED*/
2676 }
2677 h[activehd]->used -= OFF_T_ATOI(result_argv[2]);
2678 h[activehd]->reserved -= OFF_T_ATOI(result_argv[2]);
2679 h[activehd]->disk->allocated_space -= OFF_T_ATOI(result_argv[2]);
2680 h[activehd]->disk->disksize -= OFF_T_ATOI(result_argv[2]);
2681 break;
2682
2683 case RQ_MORE_DISK: /* RQ-MORE-DISK <handle> */
2684 if (!h || activehd < 0) { /* should never happen */
2685 error(_("!h || activehd < 0"));
2686 /*NOTREACHED*/
2687 }
2688 h[activehd]->disk->allocated_dumpers--;
2689 h[activehd]->used = h[activehd]->reserved;
2690 if( h[++activehd] ) { /* There's still some allocated space left.
2691 * Tell the dumper about it. */
2692 sched(dp)->activehd++;
2693 chunker_cmd( chunker, CONTINUE, dp, NULL );
2694 } else { /* !h[++activehd] - must allocate more space */
2695 sched(dp)->act_size = sched(dp)->est_size; /* not quite true */
2696 sched(dp)->est_size = (sched(dp)->act_size/(off_t)20) * (off_t)21; /* +5% */
2697 sched(dp)->est_size = am_round(sched(dp)->est_size, (off_t)DISK_BLOCK_KB);
2698 if (sched(dp)->est_size < sched(dp)->act_size + 2*DISK_BLOCK_KB)
2699 sched(dp)->est_size += 2 * DISK_BLOCK_KB;
2700 h = find_diskspace( sched(dp)->est_size - sched(dp)->act_size,
2701 &dummy,
2702 h[activehd-1] );
2703 if( !h ) {
2704 /* No diskspace available. The reason for this will be
2705 * determined in continue_port_dumps(). */
2706 enqueue_disk( &roomq, dp );
2707 continue_port_dumps();
2708 /* continue flush waiting for new tape */
2709 startaflush();
2710 } else {
2711 /* OK, allocate space for disk and have chunker continue */
2712 sched(dp)->activehd = assign_holdingdisk( h, dp );
2713 chunker_cmd( chunker, CONTINUE, dp, NULL );
2714 amfree(h);
2715 }
2716 }
2717 break;
2718
2719 case ABORT_FINISHED: /* ABORT-FINISHED <handle> */
2720 /*
2721 * We sent an ABORT from the NO-ROOM case because this dump
2722 * wasn't going to fit onto the holding disk. We now need to
2723 * clean up the remains of this image, and try to finish
2724 * other dumps that are waiting on disk space.
2725 */
2726 /*assert(pending_aborts);*/
2727
2728 /*free_serial(result_argv[1]);*/
2729
2730 event_release(chunker->ev_read);
2731
2732 chunker->result = cmd;
2733
2734 break;
2735
2736 case BOGUS:
2737 /* either EOF or garbage from chunker. Turn it off */
2738 log_add(L_WARNING, _("%s pid %ld is messed up, ignoring it.\n"),
2739 chunker->name, (long)chunker->pid);
2740
2741 /* if it was dumping something, zap it and try again */
2742 g_assert(h && activehd >= 0);
2743 qname = quote_string(dp->name);
2744 if(sched(dp)->dump_attempted) {
2745 log_add(L_FAIL, _("%s %s %s %d [%s died]"),
2746 dp->host->hostname, qname, sched(dp)->datestamp,
2747 sched(dp)->level, chunker->name);
2748 } else {
2749 log_add(L_WARNING, _("%s died while dumping %s:%s lev %d."),
2750 chunker->name, dp->host->hostname, qname,
2751 sched(dp)->level);
2752 }
2753 amfree(qname);
2754 dp = NULL;
2755
2756 event_release(chunker->ev_read);
2757
2758 chunker->result = cmd;
2759
2760 break;
2761
2762 default:
2763 assert(0);
2764 }
2765 g_strfreev(result_argv);
2766
2767 if(chunker->result != LAST_TOK && chunker->dumper->result != LAST_TOK)
2768 dumper_chunker_result(dp);
2769
2770 } while(areads_dataready(chunker->fd));
2771 }
2772
2773
2774 static void
read_flush(void * cookie)2775 read_flush(
2776 void * cookie)
2777 {
2778 sched_t *sp;
2779 disk_t *dp;
2780 int line;
2781 char *hostname, *diskname, *datestamp;
2782 int level;
2783 char *destname;
2784 disk_t *dp1;
2785 char *inpline = NULL;
2786 char *command;
2787 char *s;
2788 int ch;
2789 char *qname = NULL;
2790 char *qdestname = NULL;
2791 char *conf_infofile;
2792
2793 (void)cookie; /* Quiet unused parameter warning */
2794
2795 event_release(flush_ev_read);
2796 flush_ev_read = NULL;
2797
2798 /* read schedule from stdin */
2799 conf_infofile = config_dir_relative(getconf_str(CNF_INFOFILE));
2800 if (open_infofile(conf_infofile)) {
2801 error(_("could not open info db \"%s\""), conf_infofile);
2802 /*NOTREACHED*/
2803 }
2804 amfree(conf_infofile);
2805
2806 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
2807 dumpfile_t file;
2808
2809 line++;
2810 if (inpline[0] == '\0')
2811 continue;
2812
2813 s = inpline;
2814 ch = *s++;
2815
2816 skip_whitespace(s, ch); /* find the command */
2817 if(ch == '\0') {
2818 error(_("flush line %d: syntax error (no command)"), line);
2819 /*NOTREACHED*/
2820 }
2821 command = s - 1;
2822 skip_non_whitespace(s, ch);
2823 s[-1] = '\0';
2824
2825 if(strcmp(command,"ENDFLUSH") == 0) {
2826 break;
2827 }
2828
2829 if(strcmp(command,"FLUSH") != 0) {
2830 error(_("flush line %d: syntax error (%s != FLUSH)"), line, command);
2831 /*NOTREACHED*/
2832 }
2833
2834 skip_whitespace(s, ch); /* find the hostname */
2835 if(ch == '\0') {
2836 error(_("flush line %d: syntax error (no hostname)"), line);
2837 /*NOTREACHED*/
2838 }
2839 hostname = s - 1;
2840 skip_non_whitespace(s, ch);
2841 s[-1] = '\0';
2842
2843 skip_whitespace(s, ch); /* find the diskname */
2844 if(ch == '\0') {
2845 error(_("flush line %d: syntax error (no diskname)"), line);
2846 /*NOTREACHED*/
2847 }
2848 qname = s - 1;
2849 skip_quoted_string(s, ch);
2850 s[-1] = '\0'; /* terminate the disk name */
2851 diskname = unquote_string(qname);
2852
2853 skip_whitespace(s, ch); /* find the datestamp */
2854 if(ch == '\0') {
2855 error(_("flush line %d: syntax error (no datestamp)"), line);
2856 /*NOTREACHED*/
2857 }
2858 datestamp = s - 1;
2859 skip_non_whitespace(s, ch);
2860 s[-1] = '\0';
2861
2862 skip_whitespace(s, ch); /* find the level number */
2863 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
2864 error(_("flush line %d: syntax error (bad level)"), line);
2865 /*NOTREACHED*/
2866 }
2867 skip_integer(s, ch);
2868
2869 skip_whitespace(s, ch); /* find the filename */
2870 if(ch == '\0') {
2871 error(_("flush line %d: syntax error (no filename)"), line);
2872 /*NOTREACHED*/
2873 }
2874 qdestname = s - 1;
2875 skip_quoted_string(s, ch);
2876 s[-1] = '\0';
2877 destname = unquote_string(qdestname);
2878
2879 if (!holding_file_get_dumpfile(destname, &file)) {
2880 continue;
2881 }
2882
2883 if( file.type != F_DUMPFILE) {
2884 if( file.type != F_CONT_DUMPFILE )
2885 log_add(L_INFO, _("%s: ignoring cruft file."), destname);
2886 amfree(diskname);
2887 amfree(destname);
2888 dumpfile_free_data(&file);
2889 continue;
2890 }
2891
2892 if(strcmp(hostname, file.name) != 0 ||
2893 strcmp(diskname, file.disk) != 0 ||
2894 strcmp(datestamp, file.datestamp) != 0) {
2895 log_add(L_INFO, _("disk %s:%s not consistent with file %s"),
2896 hostname, diskname, destname);
2897 amfree(diskname);
2898 amfree(destname);
2899 dumpfile_free_data(&file);
2900 continue;
2901 }
2902 amfree(diskname);
2903
2904 dp = lookup_disk(file.name, file.disk);
2905
2906 if (dp == NULL) {
2907 log_add(L_INFO, _("%s: disk %s:%s not in database, skipping it."),
2908 destname, file.name, file.disk);
2909 amfree(destname);
2910 dumpfile_free_data(&file);
2911 continue;
2912 }
2913
2914 if (file.dumplevel < 0 || file.dumplevel > 399) {
2915 log_add(L_INFO, _("%s: ignoring file with bogus dump level %d."),
2916 destname, file.dumplevel);
2917 amfree(destname);
2918 dumpfile_free_data(&file);
2919 continue;
2920 }
2921
2922 if (holding_file_size(destname,1) <= 0) {
2923 log_add(L_INFO, "%s: removing file with no data.", destname);
2924 holding_file_unlink(destname);
2925 amfree(destname);
2926 dumpfile_free_data(&file);
2927 continue;
2928 }
2929
2930 dp1 = (disk_t *)alloc(SIZEOF(disk_t));
2931 *dp1 = *dp;
2932 dp1->next = dp1->prev = NULL;
2933
2934 /* add it to the flushhost list */
2935 if(!flushhost) {
2936 flushhost = alloc(SIZEOF(am_host_t));
2937 flushhost->next = NULL;
2938 flushhost->hostname = stralloc("FLUSHHOST");
2939 flushhost->up = NULL;
2940 flushhost->features = NULL;
2941 }
2942 dp1->hostnext = flushhost->disks;
2943 flushhost->disks = dp1;
2944
2945 sp = (sched_t *) alloc(SIZEOF(sched_t));
2946 sp->destname = destname;
2947 sp->level = file.dumplevel;
2948 sp->dumpdate = NULL;
2949 sp->degr_dumpdate = NULL;
2950 sp->degr_mesg = NULL;
2951 sp->datestamp = stralloc(file.datestamp);
2952 sp->est_nsize = (off_t)0;
2953 sp->est_csize = (off_t)0;
2954 sp->est_time = 0;
2955 sp->est_kps = 10;
2956 sp->origsize = file.orig_size;
2957 sp->priority = 0;
2958 sp->degr_level = -1;
2959 sp->dump_attempted = 0;
2960 sp->taper_attempted = 0;
2961 sp->act_size = holding_file_size(destname, 0);
2962 sp->holdp = build_diskspace(destname);
2963 if(sp->holdp == NULL) continue;
2964 sp->dumper = NULL;
2965 sp->taper = NULL;
2966 sp->timestamp = (time_t)0;
2967
2968 dp1->up = (char *)sp;
2969
2970 enqueue_disk(&tapeq, dp1);
2971 dumpfile_free_data(&file);
2972 }
2973 amfree(inpline);
2974 close_infofile();
2975
2976 startaflush();
2977 if (!nodump) {
2978 schedule_ev_read = event_register((event_id_t)0, EV_READFD,
2979 read_schedule, NULL);
2980 } else {
2981 force_flush = 1;
2982 }
2983 }
2984
2985 static void
read_schedule(void * cookie)2986 read_schedule(
2987 void * cookie)
2988 {
2989 sched_t *sp;
2990 disk_t *dp;
2991 int level, line, priority;
2992 char *dumpdate, *degr_dumpdate, *degr_mesg;
2993 int degr_level;
2994 time_t time, degr_time;
2995 time_t *time_p = &time;
2996 time_t *degr_time_p = °r_time;
2997 off_t nsize, csize, degr_nsize, degr_csize;
2998 unsigned long kps, degr_kps;
2999 char *hostname, *features, *diskname, *datestamp, *inpline = NULL;
3000 char *command;
3001 char *s;
3002 int ch;
3003 off_t flush_size = (off_t)0;
3004 char *qname = NULL;
3005 long long time_;
3006 long long nsize_;
3007 long long csize_;
3008 long long degr_nsize_;
3009 long long degr_csize_;
3010 GPtrArray *errarray;
3011
3012 (void)cookie; /* Quiet unused parameter warning */
3013
3014 event_release(schedule_ev_read);
3015 schedule_ev_read = NULL;
3016
3017 /* read schedule from stdin */
3018
3019 for(line = 0; (inpline = agets(stdin)) != NULL; free(inpline)) {
3020 if (inpline[0] == '\0')
3021 continue;
3022 line++;
3023
3024 s = inpline;
3025 ch = *s++;
3026
3027 skip_whitespace(s, ch); /* find the command */
3028 if(ch == '\0') {
3029 error(_("schedule line %d: syntax error (no command)"), line);
3030 /*NOTREACHED*/
3031 }
3032 command = s - 1;
3033 skip_non_whitespace(s, ch);
3034 s[-1] = '\0';
3035
3036 if(strcmp(command,"DUMP") != 0) {
3037 error(_("schedule line %d: syntax error (%s != DUMP)"), line, command);
3038 /*NOTREACHED*/
3039 }
3040
3041 skip_whitespace(s, ch); /* find the host name */
3042 if(ch == '\0') {
3043 error(_("schedule line %d: syntax error (no host name)"), line);
3044 /*NOTREACHED*/
3045 }
3046 hostname = s - 1;
3047 skip_non_whitespace(s, ch);
3048 s[-1] = '\0';
3049
3050 skip_whitespace(s, ch); /* find the feature list */
3051 if(ch == '\0') {
3052 error(_("schedule line %d: syntax error (no feature list)"), line);
3053 /*NOTREACHED*/
3054 }
3055 features = s - 1;
3056 skip_non_whitespace(s, ch);
3057 s[-1] = '\0';
3058
3059 skip_whitespace(s, ch); /* find the disk name */
3060 if(ch == '\0') {
3061 error(_("schedule line %d: syntax error (no disk name)"), line);
3062 /*NOTREACHED*/
3063 }
3064 qname = s - 1;
3065 skip_quoted_string(s, ch);
3066 s[-1] = '\0'; /* terminate the disk name */
3067 diskname = unquote_string(qname);
3068
3069 skip_whitespace(s, ch); /* find the datestamp */
3070 if(ch == '\0') {
3071 error(_("schedule line %d: syntax error (no datestamp)"), line);
3072 /*NOTREACHED*/
3073 }
3074 datestamp = s - 1;
3075 skip_non_whitespace(s, ch);
3076 s[-1] = '\0';
3077
3078 skip_whitespace(s, ch); /* find the priority number */
3079 if(ch == '\0' || sscanf(s - 1, "%d", &priority) != 1) {
3080 error(_("schedule line %d: syntax error (bad priority)"), line);
3081 /*NOTREACHED*/
3082 }
3083 skip_integer(s, ch);
3084
3085 skip_whitespace(s, ch); /* find the level number */
3086 if(ch == '\0' || sscanf(s - 1, "%d", &level) != 1) {
3087 error(_("schedule line %d: syntax error (bad level)"), line);
3088 /*NOTREACHED*/
3089 }
3090 skip_integer(s, ch);
3091
3092 skip_whitespace(s, ch); /* find the dump date */
3093 if(ch == '\0') {
3094 error(_("schedule line %d: syntax error (bad dump date)"), line);
3095 /*NOTREACHED*/
3096 }
3097 dumpdate = s - 1;
3098 skip_non_whitespace(s, ch);
3099 s[-1] = '\0';
3100
3101 skip_whitespace(s, ch); /* find the native size */
3102 nsize_ = (off_t)0;
3103 if(ch == '\0' || sscanf(s - 1, "%lld", &nsize_) != 1) {
3104 error(_("schedule line %d: syntax error (bad nsize)"), line);
3105 /*NOTREACHED*/
3106 }
3107 nsize = (off_t)nsize_;
3108 skip_integer(s, ch);
3109
3110 skip_whitespace(s, ch); /* find the compressed size */
3111 csize_ = (off_t)0;
3112 if(ch == '\0' || sscanf(s - 1, "%lld", &csize_) != 1) {
3113 error(_("schedule line %d: syntax error (bad csize)"), line);
3114 /*NOTREACHED*/
3115 }
3116 csize = (off_t)csize_;
3117 skip_integer(s, ch);
3118
3119 skip_whitespace(s, ch); /* find the time number */
3120 if(ch == '\0' || sscanf(s - 1, "%lld", &time_) != 1) {
3121 error(_("schedule line %d: syntax error (bad estimated time)"), line);
3122 /*NOTREACHED*/
3123 }
3124 *time_p = (time_t)time_;
3125 skip_integer(s, ch);
3126
3127 skip_whitespace(s, ch); /* find the kps number */
3128 if(ch == '\0' || sscanf(s - 1, "%lu", &kps) != 1) {
3129 error(_("schedule line %d: syntax error (bad kps)"), line);
3130 continue;
3131 }
3132 skip_integer(s, ch);
3133
3134 degr_dumpdate = NULL; /* flag if degr fields found */
3135 skip_whitespace(s, ch); /* find the degr level number */
3136 degr_mesg = NULL;
3137 if (ch == '"') {
3138 qname = s - 1;
3139 skip_quoted_string(s, ch);
3140 s[-1] = '\0'; /* terminate degr mesg */
3141 degr_mesg = unquote_string(qname);
3142 degr_level = -1;
3143 degr_nsize = (off_t)0;
3144 degr_csize = (off_t)0;
3145 degr_time = (time_t)0;
3146 degr_kps = 0;
3147 } else if (ch != '\0') {
3148 if(sscanf(s - 1, "%d", °r_level) != 1) {
3149 error(_("schedule line %d: syntax error (bad degr level)"), line);
3150 /*NOTREACHED*/
3151 }
3152 skip_integer(s, ch);
3153
3154 skip_whitespace(s, ch); /* find the degr dump date */
3155 if(ch == '\0') {
3156 error(_("schedule line %d: syntax error (bad degr dump date)"), line);
3157 /*NOTREACHED*/
3158 }
3159 degr_dumpdate = s - 1;
3160 skip_non_whitespace(s, ch);
3161 s[-1] = '\0';
3162
3163 skip_whitespace(s, ch); /* find the degr native size */
3164 degr_nsize_ = (off_t)0;
3165 if(ch == '\0' || sscanf(s - 1, "%lld", °r_nsize_) != 1) {
3166 error(_("schedule line %d: syntax error (bad degr nsize)"), line);
3167 /*NOTREACHED*/
3168 }
3169 degr_nsize = (off_t)degr_nsize_;
3170 skip_integer(s, ch);
3171
3172 skip_whitespace(s, ch); /* find the degr compressed size */
3173 degr_csize_ = (off_t)0;
3174 if(ch == '\0' || sscanf(s - 1, "%lld", °r_csize_) != 1) {
3175 error(_("schedule line %d: syntax error (bad degr csize)"), line);
3176 /*NOTREACHED*/
3177 }
3178 degr_csize = (off_t)degr_csize_;
3179 skip_integer(s, ch);
3180
3181 skip_whitespace(s, ch); /* find the degr time number */
3182 if(ch == '\0' || sscanf(s - 1, "%lld", &time_) != 1) {
3183 error(_("schedule line %d: syntax error (bad degr estimated time)"), line);
3184 /*NOTREACHED*/
3185 }
3186 *degr_time_p = (time_t)time_;
3187 skip_integer(s, ch);
3188
3189 skip_whitespace(s, ch); /* find the degr kps number */
3190 if(ch == '\0' || sscanf(s - 1, "%lu", °r_kps) != 1) {
3191 error(_("schedule line %d: syntax error (bad degr kps)"), line);
3192 /*NOTREACHED*/
3193 }
3194 skip_integer(s, ch);
3195 } else {
3196 error(_("schedule line %d: no degraded estimate or message"), line);
3197 }
3198
3199 dp = lookup_disk(hostname, diskname);
3200 if(dp == NULL) {
3201 log_add(L_WARNING,
3202 _("schedule line %d: %s:'%s' not in disklist, ignored"),
3203 line, hostname, qname);
3204 amfree(diskname);
3205 continue;
3206 }
3207
3208 sp = (sched_t *) alloc(SIZEOF(sched_t));
3209 /*@ignore@*/
3210 sp->level = level;
3211 sp->dumpdate = stralloc(dumpdate);
3212 sp->est_nsize = DISK_BLOCK_KB + nsize; /* include header */
3213 sp->est_csize = DISK_BLOCK_KB + csize; /* include header */
3214 /* round estimate to next multiple of DISK_BLOCK_KB */
3215 sp->est_csize = am_round(sp->est_csize, DISK_BLOCK_KB);
3216 sp->est_size = sp->est_csize;
3217 sp->est_time = time;
3218 sp->est_kps = kps;
3219 sp->priority = priority;
3220 sp->datestamp = stralloc(datestamp);
3221
3222 if(degr_dumpdate) {
3223 sp->degr_level = degr_level;
3224 sp->degr_dumpdate = stralloc(degr_dumpdate);
3225 sp->degr_nsize = DISK_BLOCK_KB + degr_nsize;
3226 sp->degr_csize = DISK_BLOCK_KB + degr_csize;
3227 /* round estimate to next multiple of DISK_BLOCK_KB */
3228 sp->degr_csize = am_round(sp->degr_csize, DISK_BLOCK_KB);
3229 sp->degr_time = degr_time;
3230 sp->degr_kps = degr_kps;
3231 sp->degr_mesg = NULL;
3232 } else {
3233 sp->degr_level = -1;
3234 sp->degr_dumpdate = NULL;
3235 sp->degr_mesg = degr_mesg;
3236 }
3237 /*@end@*/
3238
3239 sp->dump_attempted = 0;
3240 sp->taper_attempted = 0;
3241 sp->act_size = 0;
3242 sp->holdp = NULL;
3243 sp->activehd = -1;
3244 sp->dumper = NULL;
3245 sp->taper = NULL;
3246 sp->timestamp = (time_t)0;
3247 sp->destname = NULL;
3248 sp->no_space = 0;
3249
3250 dp->up = (char *) sp;
3251 if(dp->host->features == NULL) {
3252 dp->host->features = am_string_to_feature(features);
3253 if (!dp->host->features) {
3254 log_add(L_WARNING,
3255 _("Invalid feature string from client '%s'"),
3256 features);
3257 dp->host->features = am_set_default_feature_set();
3258 }
3259 }
3260 remove_disk(&waitq, dp);
3261
3262 errarray = validate_optionstr(dp);
3263 if (errarray->len > 0) {
3264 guint i;
3265 for (i=0; i < errarray->len; i++) {
3266 log_add(L_FAIL, _("%s %s %s 0 [%s]"),
3267 dp->host->hostname, qname,
3268 sp->datestamp,
3269 (char *)g_ptr_array_index(errarray, i));
3270 }
3271 amfree(qname);
3272 } else {
3273
3274 if (dp->data_path == DATA_PATH_DIRECTTCP &&
3275 dp->to_holdingdisk == HOLD_AUTO) {
3276 dp->to_holdingdisk = HOLD_NEVER;
3277 }
3278
3279 if (dp->to_holdingdisk == HOLD_NEVER) {
3280 enqueue_disk(&directq, dp);
3281 } else {
3282 enqueue_disk(&runq, dp);
3283 }
3284 flush_size += sp->act_size;
3285 }
3286 amfree(diskname);
3287 }
3288 g_printf(_("driver: flush size %lld\n"), (long long)flush_size);
3289 amfree(inpline);
3290 if(line == 0)
3291 log_add(L_WARNING, _("WARNING: got empty schedule from planner"));
3292 schedule_done = 1;
3293 start_degraded_mode(&runq);
3294 run_server_global_scripts(EXECUTE_ON_PRE_BACKUP, get_config_name());
3295 if (empty(runq)) force_flush = 1;
3296 start_some_dumps(&runq);
3297 startaflush();
3298 }
3299
3300 static unsigned long
free_kps(netif_t * ip)3301 free_kps(
3302 netif_t *ip)
3303 {
3304 unsigned long res;
3305
3306 if (ip == NULL) {
3307 netif_t *p;
3308 unsigned long maxusage=0;
3309 unsigned long curusage=0;
3310 for(p = disklist_netifs(); p != NULL; p = p->next) {
3311 maxusage += interface_get_maxusage(p->config);
3312 curusage += p->curusage;
3313 }
3314 if (maxusage >= curusage)
3315 res = maxusage - curusage;
3316 else
3317 res = 0;
3318 #ifndef __lint
3319 } else {
3320 if ((unsigned long)interface_get_maxusage(ip->config) >= ip->curusage)
3321 res = interface_get_maxusage(ip->config) - ip->curusage;
3322 else
3323 res = 0;
3324 #endif
3325 }
3326
3327 return res;
3328 }
3329
3330 static void
interface_state(char * time_str)3331 interface_state(
3332 char *time_str)
3333 {
3334 netif_t *ip;
3335
3336 g_printf(_("driver: interface-state time %s"), time_str);
3337
3338 for(ip = disklist_netifs(); ip != NULL; ip = ip->next) {
3339 g_printf(_(" if %s: free %lu"), interface_name(ip->config), free_kps(ip));
3340 }
3341 g_printf("\n");
3342 }
3343
3344 static void
allocate_bandwidth(netif_t * ip,unsigned long kps)3345 allocate_bandwidth(
3346 netif_t * ip,
3347 unsigned long kps)
3348 {
3349 ip->curusage += kps;
3350 }
3351
3352 static void
deallocate_bandwidth(netif_t * ip,unsigned long kps)3353 deallocate_bandwidth(
3354 netif_t * ip,
3355 unsigned long kps)
3356 {
3357 assert(kps <= ip->curusage);
3358 ip->curusage -= kps;
3359 }
3360
3361 /* ------------ */
3362 static off_t
free_space(void)3363 free_space(void)
3364 {
3365 holdalloc_t *ha;
3366 off_t total_free;
3367 off_t diff;
3368
3369 total_free = (off_t)0;
3370 for(ha = holdalloc; ha != NULL; ha = ha->next) {
3371 diff = ha->disksize - ha->allocated_space;
3372 if(diff > (off_t)0)
3373 total_free += diff;
3374 }
3375 return total_free;
3376 }
3377
3378 /*
3379 * We return an array of pointers to assignedhd_t. The array contains at
3380 * most one entry per holding disk. The list of pointers is terminated by
3381 * a NULL pointer. Each entry contains a pointer to a holdingdisk and
3382 * how much diskspace to use on that disk. Later on, assign_holdingdisk
3383 * will allocate the given amount of space.
3384 * If there is not enough room on the holdingdisks, NULL is returned.
3385 */
3386
3387 static assignedhd_t **
find_diskspace(off_t size,int * cur_idle,assignedhd_t * pref)3388 find_diskspace(
3389 off_t size,
3390 int * cur_idle,
3391 assignedhd_t * pref)
3392 {
3393 assignedhd_t **result = NULL;
3394 holdalloc_t *ha, *minp;
3395 int i=0;
3396 int j, minj;
3397 char *used;
3398 off_t halloc, dalloc, hfree, dfree;
3399
3400 (void)cur_idle; /* Quiet unused parameter warning */
3401
3402 if (holdalloc == NULL) {
3403 /* no holding disk in use */
3404 return NULL;
3405 }
3406
3407 if (size < 2*DISK_BLOCK_KB)
3408 size = 2*DISK_BLOCK_KB;
3409 size = am_round(size, (off_t)DISK_BLOCK_KB);
3410
3411 hold_debug(1, _("find_diskspace: want %lld K\n"),
3412 (long long)size);
3413
3414 used = alloc(SIZEOF(*used) * num_holdalloc);/*disks used during this run*/
3415 memset( used, 0, (size_t)num_holdalloc );
3416 result = alloc(SIZEOF(assignedhd_t *) * (num_holdalloc + 1));
3417 result[0] = NULL;
3418
3419 while( i < num_holdalloc && size > (off_t)0 ) {
3420 /* find the holdingdisk with the fewest active dumpers and among
3421 * those the one with the biggest free space
3422 */
3423 minp = NULL; minj = -1;
3424 for(j = 0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
3425 if( pref && pref->disk == ha && !used[j] &&
3426 ha->allocated_space <= ha->disksize - (off_t)DISK_BLOCK_KB) {
3427 minp = ha;
3428 minj = j;
3429 break;
3430 }
3431 else if( ha->allocated_space <= ha->disksize - (off_t)(2*DISK_BLOCK_KB) &&
3432 !used[j] &&
3433 (!minp ||
3434 ha->allocated_dumpers < minp->allocated_dumpers ||
3435 (ha->allocated_dumpers == minp->allocated_dumpers &&
3436 ha->disksize-ha->allocated_space > minp->disksize-minp->allocated_space)) ) {
3437 minp = ha;
3438 minj = j;
3439 }
3440 }
3441
3442 pref = NULL;
3443 if( !minp ) { break; } /* all holding disks are full */
3444 used[minj] = 1;
3445
3446 /* hfree = free space on the disk */
3447 hfree = minp->disksize - minp->allocated_space;
3448
3449 /* dfree = free space for data, remove 1 header for each chunksize */
3450 dfree = hfree - (((hfree-(off_t)1)/holdingdisk_get_chunksize(minp->hdisk))+(off_t)1) * (off_t)DISK_BLOCK_KB;
3451
3452 /* dalloc = space I can allocate for data */
3453 dalloc = ( dfree < size ) ? dfree : size;
3454
3455 /* halloc = space to allocate, including 1 header for each chunksize */
3456 halloc = dalloc + (((dalloc-(off_t)1)/holdingdisk_get_chunksize(minp->hdisk))+(off_t)1) * (off_t)DISK_BLOCK_KB;
3457
3458 hold_debug(1, _("find_diskspace: find diskspace: size %lld hf %lld df %lld da %lld ha %lld\n"),
3459 (long long)size,
3460 (long long)hfree,
3461 (long long)dfree,
3462 (long long)dalloc,
3463 (long long)halloc);
3464 size -= dalloc;
3465 result[i] = alloc(SIZEOF(assignedhd_t));
3466 result[i]->disk = minp;
3467 result[i]->reserved = halloc;
3468 result[i]->used = (off_t)0;
3469 result[i]->destname = NULL;
3470 result[i+1] = NULL;
3471 i++;
3472 }
3473 amfree(used);
3474
3475 if(size != (off_t)0) { /* not enough space available */
3476 g_printf(_("find diskspace: not enough diskspace. Left with %lld K\n"), (long long)size);
3477 fflush(stdout);
3478 free_assignedhd(result);
3479 result = NULL;
3480 }
3481
3482 if (debug_holding > 1) {
3483 for( i = 0; result && result[i]; i++ ) {
3484 hold_debug(1, _("find_diskspace: find diskspace: selected %s free %lld reserved %lld dumpers %d\n"),
3485 holdingdisk_get_diskdir(result[i]->disk->hdisk),
3486 (long long)(result[i]->disk->disksize -
3487 result[i]->disk->allocated_space),
3488 (long long)result[i]->reserved,
3489 result[i]->disk->allocated_dumpers);
3490 }
3491 }
3492
3493 return result;
3494 }
3495
3496 static int
assign_holdingdisk(assignedhd_t ** holdp,disk_t * diskp)3497 assign_holdingdisk(
3498 assignedhd_t ** holdp,
3499 disk_t * diskp)
3500 {
3501 int i, j, c, l=0;
3502 off_t size;
3503 char *sfn = sanitise_filename(diskp->name);
3504 char lvl[64];
3505 assignedhd_t **new_holdp;
3506 char *qname;
3507
3508 g_snprintf( lvl, SIZEOF(lvl), "%d", sched(diskp)->level );
3509
3510 size = am_round(sched(diskp)->est_size - sched(diskp)->act_size,
3511 (off_t)DISK_BLOCK_KB);
3512
3513 for( c = 0; holdp[c]; c++ )
3514 (void)c; /* count number of disks */
3515
3516 /* allocate memory for sched(diskp)->holdp */
3517 for(j = 0; sched(diskp)->holdp && sched(diskp)->holdp[j]; j++)
3518 (void)j; /* Quiet lint */
3519 new_holdp = (assignedhd_t **)alloc(SIZEOF(assignedhd_t*)*(j+c+1));
3520 if (sched(diskp)->holdp) {
3521 memcpy(new_holdp, sched(diskp)->holdp, j * SIZEOF(*new_holdp));
3522 amfree(sched(diskp)->holdp);
3523 }
3524 sched(diskp)->holdp = new_holdp;
3525 new_holdp = NULL;
3526
3527 i = 0;
3528 if( j > 0 ) { /* This is a request for additional diskspace. See if we can
3529 * merge assignedhd_t's */
3530 l=j;
3531 if( sched(diskp)->holdp[j-1]->disk == holdp[0]->disk ) { /* Yes! */
3532 sched(diskp)->holdp[j-1]->reserved += holdp[0]->reserved;
3533 holdp[0]->disk->allocated_space += holdp[0]->reserved;
3534 size = (holdp[0]->reserved>size) ? (off_t)0 : size-holdp[0]->reserved;
3535 qname = quote_string(diskp->name);
3536 hold_debug(1, _("assign_holdingdisk: merging holding disk %s to disk %s:%s, add %lld for reserved %lld, left %lld\n"),
3537 holdingdisk_get_diskdir(
3538 sched(diskp)->holdp[j-1]->disk->hdisk),
3539 diskp->host->hostname, qname,
3540 (long long)holdp[0]->reserved,
3541 (long long)sched(diskp)->holdp[j-1]->reserved,
3542 (long long)size);
3543 i++;
3544 amfree(qname);
3545 amfree(holdp[0]);
3546 l=j-1;
3547 }
3548 }
3549
3550 /* copy assignedhd_s to sched(diskp), adjust allocated_space */
3551 for( ; holdp[i]; i++ ) {
3552 holdp[i]->destname = newvstralloc( holdp[i]->destname,
3553 holdingdisk_get_diskdir(holdp[i]->disk->hdisk), "/",
3554 hd_driver_timestamp, "/",
3555 diskp->host->hostname, ".",
3556 sfn, ".",
3557 lvl, NULL );
3558 sched(diskp)->holdp[j++] = holdp[i];
3559 holdp[i]->disk->allocated_space += holdp[i]->reserved;
3560 size = (holdp[i]->reserved > size) ? (off_t)0 :
3561 (size - holdp[i]->reserved);
3562 qname = quote_string(diskp->name);
3563 hold_debug(1,
3564 _("assign_holdingdisk: %d assigning holding disk %s to disk %s:%s, reserved %lld, left %lld\n"),
3565 i, holdingdisk_get_diskdir(holdp[i]->disk->hdisk),
3566 diskp->host->hostname, qname,
3567 (long long)holdp[i]->reserved,
3568 (long long)size);
3569 amfree(qname);
3570 holdp[i] = NULL; /* so it doesn't get free()d... */
3571 }
3572 sched(diskp)->holdp[j] = NULL;
3573 amfree(sfn);
3574
3575 return l;
3576 }
3577
3578 static void
adjust_diskspace(disk_t * diskp,cmd_t cmd)3579 adjust_diskspace(
3580 disk_t * diskp,
3581 cmd_t cmd)
3582 {
3583 assignedhd_t **holdp;
3584 off_t total = (off_t)0;
3585 off_t diff;
3586 int i;
3587 char *qname, *hqname, *qdest;
3588
3589 (void)cmd; /* Quiet unused parameter warning */
3590
3591 qname = quote_string(diskp->name);
3592 qdest = quote_string(sched(diskp)->destname);
3593 hold_debug(1, _("adjust_diskspace: %s:%s %s\n"),
3594 diskp->host->hostname, qname, qdest);
3595
3596 holdp = sched(diskp)->holdp;
3597
3598 assert(holdp != NULL);
3599
3600 for( i = 0; holdp[i]; i++ ) { /* for each allocated disk */
3601 diff = holdp[i]->used - holdp[i]->reserved;
3602 total += holdp[i]->used;
3603 holdp[i]->disk->allocated_space += diff;
3604 hqname = quote_string(holdingdisk_name(holdp[i]->disk->hdisk));
3605 hold_debug(1, _("adjust_diskspace: hdisk %s done, reserved %lld used %lld diff %lld alloc %lld dumpers %d\n"),
3606 holdingdisk_name(holdp[i]->disk->hdisk),
3607 (long long)holdp[i]->reserved,
3608 (long long)holdp[i]->used,
3609 (long long)diff,
3610 (long long)holdp[i]->disk->allocated_space,
3611 holdp[i]->disk->allocated_dumpers );
3612 holdp[i]->reserved += diff;
3613 amfree(hqname);
3614 }
3615
3616 sched(diskp)->act_size = total;
3617
3618 hold_debug(1, _("adjust_diskspace: after: disk %s:%s used %lld\n"),
3619 diskp->host->hostname, qname,
3620 (long long)sched(diskp)->act_size);
3621 amfree(qdest);
3622 amfree(qname);
3623 }
3624
3625 static void
delete_diskspace(disk_t * diskp)3626 delete_diskspace(
3627 disk_t *diskp)
3628 {
3629 assignedhd_t **holdp;
3630 int i;
3631
3632 holdp = sched(diskp)->holdp;
3633
3634 assert(holdp != NULL);
3635
3636 for( i = 0; holdp[i]; i++ ) { /* for each disk */
3637 /* find all files of this dump on that disk, and subtract their
3638 * reserved sizes from the disk's allocated space
3639 */
3640 holdp[i]->disk->allocated_space -= holdp[i]->used;
3641 }
3642
3643 holding_file_unlink(holdp[0]->destname); /* no need for the entire list,
3644 * because holding_file_unlink
3645 * will walk through all files
3646 * using cont_filename */
3647 free_assignedhd(sched(diskp)->holdp);
3648 sched(diskp)->holdp = NULL;
3649 sched(diskp)->act_size = (off_t)0;
3650 }
3651
3652 static assignedhd_t **
build_diskspace(char * destname)3653 build_diskspace(
3654 char * destname)
3655 {
3656 int i, j;
3657 int fd;
3658 size_t buflen;
3659 char buffer[DISK_BLOCK_BYTES];
3660 dumpfile_t file;
3661 assignedhd_t **result;
3662 holdalloc_t *ha;
3663 off_t *used;
3664 char dirname[1000], *ch;
3665 struct stat finfo;
3666 char *filename = destname;
3667
3668 memset(buffer, 0, sizeof(buffer));
3669 used = alloc(SIZEOF(off_t) * num_holdalloc);
3670 for(i=0;i<num_holdalloc;i++)
3671 used[i] = (off_t)0;
3672 result = alloc(SIZEOF(assignedhd_t *) * (num_holdalloc + 1));
3673 result[0] = NULL;
3674 while(filename != NULL && filename[0] != '\0') {
3675 strncpy(dirname, filename, 999);
3676 dirname[999]='\0';
3677 ch = strrchr(dirname,'/');
3678 if (ch) {
3679 *ch = '\0';
3680 ch = strrchr(dirname,'/');
3681 if (ch) {
3682 *ch = '\0';
3683 }
3684 }
3685
3686 if (!ch) {
3687 g_fprintf(stderr,_("build_diskspace: bogus filename '%s'\n"), filename);
3688 amfree(used);
3689 amfree(result);
3690 return NULL;
3691 }
3692
3693 for(j = 0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
3694 if(strcmp(dirname, holdingdisk_get_diskdir(ha->hdisk))==0) {
3695 break;
3696 }
3697 }
3698 if (!ha || j >= num_holdalloc) {
3699 fprintf(stderr,_("build_diskspace: holding disk file '%s' is not in a holding disk directory.\n"), filename);
3700 amfree(used);
3701 amfree(result);
3702 return NULL;
3703 }
3704 if(stat(filename, &finfo) == -1) {
3705 g_fprintf(stderr, _("build_diskspace: can't stat %s: %s\n"),
3706 filename, strerror(errno));
3707 amfree(used);
3708 amfree(result);
3709 return NULL;
3710 }
3711 used[j] += ((off_t)finfo.st_size+(off_t)1023)/(off_t)1024;
3712 if((fd = open(filename,O_RDONLY)) == -1) {
3713 g_fprintf(stderr,_("build_diskspace: open of %s failed: %s\n"),
3714 filename, strerror(errno));
3715 amfree(used);
3716 amfree(result);
3717 return NULL;
3718 }
3719 if ((buflen = full_read(fd, buffer, SIZEOF(buffer))) > 0) {;
3720 parse_file_header(buffer, &file, buflen);
3721 }
3722 close(fd);
3723 filename = file.cont_filename;
3724 }
3725
3726 for(j = 0, i=0, ha = holdalloc; ha != NULL; ha = ha->next, j++ ) {
3727 if(used[j] != (off_t)0) {
3728 result[i] = alloc(SIZEOF(assignedhd_t));
3729 result[i]->disk = ha;
3730 result[i]->reserved = used[j];
3731 result[i]->used = used[j];
3732 result[i]->destname = stralloc(destname);
3733 result[i+1] = NULL;
3734 i++;
3735 }
3736 }
3737
3738 amfree(used);
3739 return result;
3740 }
3741
3742 static void
holdingdisk_state(char * time_str)3743 holdingdisk_state(
3744 char * time_str)
3745 {
3746 holdalloc_t *ha;
3747 int dsk;
3748 off_t diff;
3749
3750 g_printf(_("driver: hdisk-state time %s"), time_str);
3751
3752 for(ha = holdalloc, dsk = 0; ha != NULL; ha = ha->next, dsk++) {
3753 diff = ha->disksize - ha->allocated_space;
3754 g_printf(_(" hdisk %d: free %lld dumpers %d"), dsk,
3755 (long long)diff, ha->allocated_dumpers);
3756 }
3757 g_printf("\n");
3758 }
3759
3760 static void
update_failed_dump(disk_t * dp)3761 update_failed_dump(
3762 disk_t * dp)
3763 {
3764 time_t save_timestamp = sched(dp)->timestamp;
3765 /* setting timestamp to 0 removes the current level from the
3766 * database, so that we ensure that it will not be bumped to the
3767 * next level on the next run. If we didn't do this, dumpdates or
3768 * gnutar-lists might have been updated already, and a bumped
3769 * incremental might be created. */
3770 sched(dp)->timestamp = 0;
3771 update_info_dumper(dp, (off_t)-1, (off_t)-1, (time_t)-1);
3772 sched(dp)->timestamp = save_timestamp;
3773 }
3774
3775 /* ------------------- */
3776
3777 static int
queue_length(disklist_t q)3778 queue_length(
3779 disklist_t q)
3780 {
3781 disk_t *p;
3782 int len;
3783
3784 for(len = 0, p = q.head; p != NULL; len++, p = p->next)
3785 (void)len; /* Quiet lint */
3786 return len;
3787 }
3788
3789 static void
short_dump_state(void)3790 short_dump_state(void)
3791 {
3792 int i, nidle;
3793 char *wall_time;
3794
3795 wall_time = walltime_str(curclock());
3796
3797 g_printf(_("driver: state time %s "), wall_time);
3798 g_printf(_("free kps: %lu space: %lld taper: "),
3799 free_kps(NULL),
3800 (long long)free_space());
3801 if(degraded_mode) g_printf(_("DOWN"));
3802 else {
3803 taper_t *taper;
3804 int writing = 0;
3805 for(taper = tapetable; taper < tapetable+conf_taper_parallel_write;
3806 taper++) {
3807 if (taper->state & TAPER_STATE_DUMP_TO_TAPE ||
3808 taper->state & TAPER_STATE_FILE_TO_TAPE)
3809 writing = 1;
3810 }
3811 if(writing)
3812 g_printf(_("writing"));
3813 else
3814 g_printf(_("idle"));
3815 }
3816 nidle = 0;
3817 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
3818 g_printf(_(" idle-dumpers: %d"), nidle);
3819 g_printf(_(" qlen tapeq: %d"), queue_length(tapeq));
3820 g_printf(_(" runq: %d"), queue_length(runq));
3821 g_printf(_(" directq: %d"), queue_length(directq));
3822 g_printf(_(" roomq: %d"), queue_length(roomq));
3823 g_printf(_(" wakeup: %d"), (int)sleep_time);
3824 g_printf(_(" driver-idle: %s\n"), _(idle_strings[idle_reason]));
3825 interface_state(wall_time);
3826 holdingdisk_state(wall_time);
3827 fflush(stdout);
3828 }
3829
3830 static TapeAction
tape_action(taper_t * taper,char ** why_no_new_tape)3831 tape_action(
3832 taper_t *taper,
3833 char **why_no_new_tape)
3834 {
3835 TapeAction result = TAPE_ACTION_NO_ACTION;
3836 dumper_t *dumper;
3837 taper_t *taper1;
3838 disk_t *dp;
3839 off_t dumpers_size; /* dumper size to holding disk */
3840 off_t runq_size;
3841 off_t directq_size;
3842 off_t tapeq_size;
3843 off_t sched_size;
3844 off_t dump_to_disk_size;
3845 int dump_to_disk_terminated;
3846 int nb_taper_active = nb_sent_new_tape;
3847 int nb_taper_flushing = 0;
3848 int nb_taper_waiting = 0;
3849 int nb_init = 0;
3850 int dle_free = 0; /* number of dle that fit on started tape */
3851 int new_dle = 0; /* number of dle that doesn't fit on started tape */
3852 off_t new_data = 0; /* size of dle that doesn't fit on started tape */
3853 off_t data_free = 0; /* space available on started tape */
3854 gboolean taperflush_criteria;
3855 gboolean flush_criteria;
3856
3857 driver_debug(2, "tape_action: ENTER %p %s\n", taper, taper->name);
3858 dumpers_size = 0;
3859 for(dumper = dmptable; dumper < (dmptable+inparallel); dumper++) {
3860 if (dumper->busy && !sched(dumper->dp)->taper)
3861 dumpers_size += sched(dumper->dp)->est_size;
3862 }
3863 driver_debug(2, _("dumpers_size: %lld\n"), (long long)dumpers_size);
3864
3865 runq_size = 0;
3866 for(dp = runq.head; dp != NULL; dp = dp->next) {
3867 runq_size += sched(dp)->est_size;
3868 }
3869 driver_debug(2, _("runq_size: %lld\n"), (long long)runq_size);
3870
3871 directq_size = 0;
3872 for(dp = directq.head; dp != NULL; dp = dp->next) {
3873 directq_size += sched(dp)->est_size;
3874 }
3875 driver_debug(2, _("directq_size: %lld\n"), (long long)directq_size);
3876
3877 tapeq_size = directq_size;
3878 for(dp = tapeq.head; dp != NULL; dp = dp->next) {
3879 tapeq_size += sched(dp)->act_size;
3880 }
3881
3882 for (taper1 = tapetable; taper1 < tapetable+conf_taper_parallel_write;
3883 taper1++) {
3884 if (taper1->state & TAPER_STATE_FILE_TO_TAPE ||
3885 taper1->state & TAPER_STATE_DUMP_TO_TAPE) {
3886 nb_taper_flushing++;
3887 }
3888 if (taper1->state & TAPER_STATE_TAPE_STARTED &&
3889 taper1->state & TAPER_STATE_IDLE) {
3890 nb_taper_waiting++;
3891 }
3892 if (taper1->state & TAPER_STATE_RESERVATION &&
3893 taper1->state & TAPER_STATE_IDLE &&
3894 taper1->nb_dle == 0) {
3895 nb_init++;
3896 }
3897 }
3898
3899 /* Add what is currently written to tape and in the go. */
3900 new_data = 0;
3901 data_free = 0;
3902 for (taper1 = tapetable; taper1 < tapetable+conf_taper_parallel_write;
3903 taper1++) {
3904 if (taper1->state & TAPER_STATE_TAPE_STARTED) {
3905 dle_free += (conf_max_dle_by_volume - taper1->nb_dle);
3906 }
3907 if (taper1->disk) {
3908 off_t data_to_go;
3909 off_t t_size;
3910 if (taper1->dumper) {
3911 t_size = sched(taper1->disk)->est_size;
3912 } else {
3913 t_size = sched(taper1->disk)->act_size;
3914 }
3915 data_to_go = t_size - taper1->written;
3916 if (data_to_go > taper1->left) {
3917 if (taper1->state & TAPER_STATE_TAPE_STARTED) {
3918 dle_free -= (conf_max_dle_by_volume - taper1->nb_dle) + 1;
3919 new_data += data_to_go - taper1->left;
3920 } else {
3921 dle_free -= 2;
3922 new_data += data_to_go;
3923 }
3924 } else {
3925 if (!(taper1->state & TAPER_STATE_TAPE_STARTED)) {
3926 dle_free--;
3927 new_data += data_to_go;
3928 } else {
3929 data_free += taper1->left - data_to_go;
3930 }
3931 }
3932 } else {
3933 data_free += taper1->left;
3934 }
3935 }
3936
3937 if (dle_free < 0) dle_free = 0;
3938 new_dle = queue_length(tapeq) + queue_length(directq) - dle_free;
3939 driver_debug(2, _("dle_free: %d\n"), dle_free);
3940 driver_debug(2, _("new_dle: %d\n"), new_dle);
3941 if (new_dle > 0) {
3942 if (taperflush == 0 &&
3943 flush_threshold_dumped == 0 &&
3944 flush_threshold_scheduled == 0) {
3945 /* shortcut, will trigger taperflush_criteria and/or flush_criteria */
3946 new_data += 1;
3947 } else {
3948 /* sum the size of the first new-dle in tapeq */
3949 /* they should be the reverse taperalgo */
3950 for (dp = tapeq.head;
3951 dp != NULL && new_dle > 0;
3952 dp = dp->next, new_dle--) {
3953 new_data += sched(dp)->act_size;
3954 }
3955 }
3956 }
3957 driver_debug(2, _("new_data: %lld\n"), (long long)new_data);
3958 driver_debug(2, _("data_free: %lld\n"), (long long)data_free);
3959 ;
3960 driver_debug(2, _("tapeq_size: %lld\n"), (long long)tapeq_size);
3961
3962 sched_size = runq_size + tapeq_size + dumpers_size;
3963 driver_debug(2, _("sched_size: %lld\n"), (long long)sched_size);
3964
3965 dump_to_disk_size = dumpers_size + runq_size + directq_size;
3966 driver_debug(2, _("dump_to_disk_size: %lld\n"), (long long)dump_to_disk_size);
3967
3968 dump_to_disk_terminated = schedule_done && dump_to_disk_size == 0;
3969
3970 for (taper1 = tapetable; taper1 < tapetable + conf_taper_parallel_write;
3971 taper1++) {
3972 if (taper1->state & TAPER_STATE_TAPE_STARTED) {
3973 nb_taper_active++;
3974 }
3975 }
3976
3977 taperflush_criteria = (taperflush < tapeq_size &&
3978 (new_dle > 0 || new_data > 0 || force_flush == 1 || dump_to_disk_terminated));
3979 flush_criteria = (flush_threshold_dumped < tapeq_size &&
3980 flush_threshold_scheduled < sched_size &&
3981 (new_dle > 0 || new_data > 0)) ||
3982 taperflush_criteria;
3983
3984 driver_debug(2, "taperflush %lld\n", (long long)taperflush);
3985 driver_debug(2, "flush_threshold_dumped %lld\n", (long long)flush_threshold_dumped);
3986 driver_debug(2, "flush_threshold_scheduled %lld\n", (long long)flush_threshold_scheduled);
3987 driver_debug(2, "force_flush %d\n", force_flush);
3988 driver_debug(2, "dump_to_disk_terminated %d\n", dump_to_disk_terminated);
3989 driver_debug(2, "queue_length(runq) %d\n", queue_length(runq));
3990 driver_debug(2, "queue_length(directq) %d\n", queue_length(directq));
3991 driver_debug(2, "queue_length(tapeq) %d\n", queue_length(tapeq));
3992 driver_debug(2, "taperflush_criteria %d\n", taperflush_criteria);
3993 driver_debug(2, "flush_criteria %d\n", flush_criteria);
3994 driver_debug(2, "current_tape %d\n", current_tape);
3995 driver_debug(2, "conf_runtapes %d\n", conf_runtapes);
3996 driver_debug(2, "taper_nb_scan_volume %d\n", taper_nb_scan_volume);
3997 driver_debug(2, "taper_sent_first_write %p\n", taper_sent_first_write);
3998 driver_debug(2, "nb_taper_active %d\n", nb_taper_active);
3999
4000 // Changing conditionals can produce a driver hang, take care.
4001 //
4002 // when to start writting to a new tape
4003 if (taper->state & TAPER_STATE_TAPE_REQUESTED) {
4004 driver_debug(2, "tape_action: TAPER_STATE_TAPE_REQUESTED\n");
4005 if (current_tape >= conf_runtapes && taper_nb_scan_volume == 0 &&
4006 nb_taper_active == 0 && taper_sent_first_write == NULL) {
4007 *why_no_new_tape = g_strdup_printf(_("%d tapes filled; runtapes=%d "
4008 "does not allow additional tapes"), current_tape, conf_runtapes);
4009 driver_debug(2, "tape_action: TAPER_STATE_TAPE_REQUESTED return TAPE_ACTION_NO_NEW_TAPE\n");
4010 result |= TAPE_ACTION_NO_NEW_TAPE;
4011 } else if (nb_init > 0) {
4012 driver_debug(2, "tape_action: TAPER_STATE_TAPE_REQUESTED return TAPE_ACTION_MOVE (nb_init > 0)\n");
4013 result |= TAPE_ACTION_MOVE;
4014 } else if (current_tape < conf_runtapes &&
4015 taper_nb_scan_volume == 0 &&
4016 (taper_sent_first_write == taper ||
4017 flush_criteria ||
4018 !taper->allow_take_scribe_from ||
4019 nb_taper_active == 0)) {
4020 driver_debug(2, "tape_action: TAPER_STATE_TAPE_REQUESTED return TAPE_ACTION_SCAN\n");
4021 result |= TAPE_ACTION_SCAN;
4022 } else if (nb_taper_waiting && taper->allow_take_scribe_from) {
4023 driver_debug(2, "tape_action: TAPER_STATE_TAPE_REQUESTED return TAPE_ACTION_MOVE\n");
4024 result |= TAPE_ACTION_MOVE;
4025 }
4026 } else if (taper->state & TAPER_STATE_WAIT_FOR_TAPE) {
4027 driver_debug(2, "tape_action: TAPER_STATE_WAIT_FOR_TAPE\n");
4028 if (((taper->state & TAPER_STATE_DUMP_TO_TAPE) || // for dump to tape
4029 !empty(directq) || // if a dle is waiting for a dump to tape
4030 !empty(roomq) || // holding disk constraint
4031 idle_reason == IDLE_NO_DISKSPACE || // holding disk constraint
4032 taper_sent_first_write == taper ||
4033 flush_criteria // flush criteria
4034 )) {
4035 driver_debug(2, "tape_action: TAPER_STATE_WAIT_FOR_TAPE return TAPE_ACTION_NEW_TAPE\n");
4036 result |= TAPE_ACTION_NEW_TAPE;
4037 // when to stop using new tape
4038 } else if ((taperflush >= tapeq_size && // taperflush criteria
4039 (force_flush == 1 || // if force_flush
4040 dump_to_disk_terminated)) // or all dump to disk
4041 ) {
4042 if (nb_taper_active <= 0) {
4043 if (current_tape >= conf_runtapes) {
4044 *why_no_new_tape = g_strdup_printf(_("%d tapes filled; runtapes=%d "
4045 "does not allow additional tapes"), current_tape, conf_runtapes);
4046 driver_debug(2, "tape_action: TAPER_STATE_WAIT_FOR_TAPE return TAPE_ACTION_NO_NEW_TAPE\n");
4047 result |= TAPE_ACTION_NO_NEW_TAPE;
4048 } else if (dumpers_size <= 0) {
4049 *why_no_new_tape = _("taperflush criteria not met");
4050 driver_debug(2, "tape_action: TAPER_STATE_WAIT_FOR_TAPE return TAPE_ACTION_NO_NEW_TAPE\n");
4051 result |= TAPE_ACTION_NO_NEW_TAPE;
4052 }
4053 }
4054 }
4055 }
4056
4057 // when to start a flush
4058 if (taper->state & TAPER_STATE_IDLE) {
4059 driver_debug(2, "tape_action: TAPER_STATE_IDLE\n");
4060 if (!degraded_mode &&
4061 ((taper->state & TAPER_STATE_TAPE_STARTED) ||
4062 ((!empty(directq) ||
4063 (!empty(tapeq) &&
4064 (!empty(roomq) || // holding disk constraint
4065 idle_reason == IDLE_NO_DISKSPACE || // holding disk constraint
4066 flush_criteria))) && // flush
4067 (current_tape < conf_runtapes &&
4068 taper_nb_scan_volume == 0 &&
4069 taper_sent_first_write == NULL &&
4070 ((new_dle > 0 || new_data > 0) &&
4071 (flush_criteria ||
4072 nb_taper_active == 0)))))) {
4073
4074 if (nb_taper_flushing == 0) {
4075 driver_debug(2, "tape_action: TAPER_STATE_IDLE return TAPE_ACTION_START_A_FLUSH\n");
4076 result |= TAPE_ACTION_START_A_FLUSH;
4077 } else {
4078 driver_debug(2, "tape_action: TAPER_STATE_IDLE return TAPE_ACTION_START_A_FLUSH_FIT\n");
4079 result |= TAPE_ACTION_START_A_FLUSH_FIT;
4080 }
4081 } else {
4082 driver_debug(2, "tape_action: TAPER_STATE_IDLE return TAPE_ACTION_NO_ACTION\n");
4083 }
4084 }
4085 return result;
4086 }
4087
4088 static int
no_taper_flushing(void)4089 no_taper_flushing(void)
4090 {
4091 taper_t *taper;
4092
4093 for (taper = tapetable; taper < tapetable + conf_taper_parallel_write;
4094 taper++) {
4095 if (taper->state & TAPER_STATE_FILE_TO_TAPE)
4096 return 0;
4097 }
4098 return 1;
4099 }
4100
4101 static int
active_dumper(void)4102 active_dumper(void)
4103 {
4104 int i, nidle=0;
4105
4106 for(i = 0; i < inparallel; i++) if(!dmptable[i].busy) nidle++;
4107 return inparallel - nidle;
4108 }
4109 #if 0
4110 static void
4111 dump_state(
4112 const char *str)
4113 {
4114 int i;
4115 disk_t *dp;
4116 char *qname;
4117
4118 g_printf("================\n");
4119 g_printf(_("driver state at time %s: %s\n"), walltime_str(curclock()), str);
4120 g_printf(_("free kps: %lu, space: %lld\n"),
4121 free_kps(NULL),
4122 (long long)free_space());
4123 if(degraded_mode) g_printf(_("taper: DOWN\n"));
4124 else if(taper->status == TAPER_IDLE) g_printf(_("taper: idle\n"));
4125 else g_printf(_("taper: writing %s:%s.%d est size %lld\n"),
4126 taper->disk->host->hostname, taper->disk->name,
4127 sched(taper->disk)->level,
4128 (long long)sched(taper->disk)->est_size);
4129 for(i = 0; i < inparallel; i++) {
4130 dp = dmptable[i].dp;
4131 if(!dmptable[i].busy)
4132 g_printf(_("%s: idle\n"), dmptable[i].name);
4133 else
4134 qname = quote_string(dp->name);
4135 g_printf(_("%s: dumping %s:%s.%d est kps %d size %lld time %lu\n"),
4136 dmptable[i].name, dp->host->hostname, qname, sched(dp)->level,
4137 sched(dp)->est_kps, (long long)sched(dp)->est_size, sched(dp)->est_time);
4138 amfree(qname);
4139 }
4140 dump_queue("TAPE", tapeq, 5, stdout);
4141 dump_queue("ROOM", roomq, 5, stdout);
4142 dump_queue("RUN ", runq, 5, stdout);
4143 g_printf("================\n");
4144 fflush(stdout);
4145 }
4146 #endif
4147