1# Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import errno
16import os
17import signal
18import sys
19import time
20
21import ovs.dirs
22import ovs.fatal_signal
23import ovs.process
24import ovs.socket_util
25import ovs.timeval
26import ovs.util
27import ovs.vlog
28
29if sys.platform != 'win32':
30    import fcntl
31    import resource
32else:
33    import ovs.winutils as winutils
34    import ovs.fcntl_win as fcntl
35    import pywintypes
36    import subprocess
37    import win32process
38
39vlog = ovs.vlog.Vlog("daemon")
40
41# --detach: Should we run in the background?
42_detach = False
43
44# Running as the child process - Windows only.
45_detached = False
46
47# --pidfile: Name of pidfile (null if none).
48_pidfile = None
49
50# Our pidfile's inode and device, if we have created one.
51_pidfile_dev = None
52_pidfile_ino = None
53
54# --overwrite-pidfile: Create pidfile even if one already exists and is locked?
55_overwrite_pidfile = False
56
57# --no-chdir: Should we chdir to "/"?
58_chdir = True
59
60# --monitor: Should a supervisory process monitor the daemon and restart it if
61# it dies due to an error signal?
62_monitor = False
63
64# File descriptor used by daemonize_start() and daemonize_complete().
65_daemonize_fd = None
66
67RESTART_EXIT_CODE = 5
68
69
70def make_pidfile_name(name):
71    """Returns the file name that would be used for a pidfile if 'name' were
72    provided to set_pidfile()."""
73    if name is None or name == "":
74        return "%s/%s.pid" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME)
75    else:
76        return ovs.util.abs_file_name(ovs.dirs.RUNDIR, name)
77
78
79def set_pidfile(name):
80    """Sets up a following call to daemonize() to create a pidfile named
81    'name'.  If 'name' begins with '/', then it is treated as an absolute path.
82    Otherwise, it is taken relative to ovs.util.RUNDIR, which is
83    $(prefix)/var/run by default.
84
85    If 'name' is null, then ovs.util.PROGRAM_NAME followed by ".pid" is
86    used."""
87    global _pidfile
88    _pidfile = make_pidfile_name(name)
89
90
91def set_no_chdir():
92    """Sets that we do not chdir to "/"."""
93    global _chdir
94    _chdir = False
95
96
97def ignore_existing_pidfile():
98    """Normally, daemonize() or daemonize_start() will terminate the program
99    with a message if a locked pidfile already exists.  If this function is
100    called, an existing pidfile will be replaced, with a warning."""
101    global _overwrite_pidfile
102    _overwrite_pidfile = True
103
104
105def set_detach():
106    """Sets up a following call to daemonize() to detach from the foreground
107    session, running this process in the background."""
108    global _detach
109    _detach = True
110
111
112def set_detached(wp):
113    """Sets up a following call to daemonize() to fork a supervisory
114    process to monitor the daemon and restart it if it dies due to
115    an error signal. Used on Windows only."""
116    global _detached
117    global _daemonize_fd
118    _detached = True
119    _daemonize_fd = int(wp)
120
121
122def get_detach():
123    """Will daemonize() really detach?"""
124    return _detach
125
126
127def set_monitor():
128    """Sets up a following call to daemonize() to fork a supervisory process to
129    monitor the daemon and restart it if it dies due to an error signal."""
130    global _monitor
131    _monitor = True
132
133
134def _fatal(msg):
135    vlog.err(msg)
136    sys.stderr.write("%s\n" % msg)
137    sys.exit(1)
138
139
140def _make_pidfile():
141    """If a pidfile has been configured, creates it and stores the running
142    process's pid in it.  Ensures that the pidfile will be deleted when the
143    process exits."""
144    pid = os.getpid()
145
146    # Create a temporary pidfile.
147    if sys.platform != 'win32':
148        tmpfile = "%s.tmp%d" % (_pidfile, pid)
149        ovs.fatal_signal.add_file_to_unlink(tmpfile)
150    else:
151        tmpfile = "%s" % _pidfile
152
153    try:
154        # This is global to keep Python from garbage-collecting and
155        # therefore closing our file after this function exits.  That would
156        # unlock the lock for us, and we don't want that.
157        global file_handle
158
159        file_handle = open(tmpfile, "w")
160    except IOError as e:
161        _fatal("%s: create failed (%s)" % (tmpfile, e.strerror))
162
163    try:
164        s = os.fstat(file_handle.fileno())
165    except IOError as e:
166        _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror))
167
168    try:
169        file_handle.write("%s\n" % pid)
170        file_handle.flush()
171    except OSError as e:
172        _fatal("%s: write failed: %s" % (tmpfile, e.strerror))
173
174    try:
175        if sys.platform != 'win32':
176            fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
177        else:
178            fcntl.lockf(file_handle, fcntl.LOCK_SH | fcntl.LOCK_NB)
179    except IOError as e:
180        _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror))
181
182    if sys.platform == 'win32':
183        # Ensure that the pidfile will gets closed and deleted on exit.
184        ovs.fatal_signal.add_file_to_close_and_unlink(_pidfile, file_handle)
185    else:
186        # Rename or link it to the correct name.
187        if _overwrite_pidfile:
188            try:
189                os.rename(tmpfile, _pidfile)
190            except OSError as e:
191                _fatal("failed to rename \"%s\" to \"%s\" (%s)"
192                       % (tmpfile, _pidfile, e.strerror))
193        else:
194            while True:
195                try:
196                    os.link(tmpfile, _pidfile)
197                    error = 0
198                except OSError as e:
199                    error = e.errno
200                if error == errno.EEXIST:
201                    _check_already_running()
202                elif error != errno.EINTR:
203                    break
204            if error:
205                _fatal("failed to link \"%s\" as \"%s\" (%s)"
206                       % (tmpfile, _pidfile, os.strerror(error)))
207
208        # Ensure that the pidfile will get deleted on exit.
209        ovs.fatal_signal.add_file_to_unlink(_pidfile)
210
211        # Delete the temporary pidfile if it still exists.
212        if not _overwrite_pidfile:
213            error = ovs.fatal_signal.unlink_file_now(tmpfile)
214            if error:
215                _fatal("%s: unlink failed (%s)" % (
216                    tmpfile, os.strerror(error)))
217
218    global _pidfile_dev
219    global _pidfile_ino
220    _pidfile_dev = s.st_dev
221    _pidfile_ino = s.st_ino
222
223
224def daemonize():
225    """If configured with set_pidfile() or set_detach(), creates the pid file
226    and detaches from the foreground session."""
227    daemonize_start()
228    daemonize_complete()
229
230
231def _waitpid(pid, options):
232    while True:
233        try:
234            return os.waitpid(pid, options)
235        except OSError as e:
236            if e.errno == errno.EINTR:
237                pass
238            return -e.errno, 0
239
240
241def _fork_and_wait_for_startup():
242    if sys.platform == 'win32':
243        return _fork_and_wait_for_startup_windows()
244
245    try:
246        rfd, wfd = os.pipe()
247    except OSError as e:
248        sys.stderr.write("pipe failed: %s\n" % os.strerror(e.errno))
249        sys.exit(1)
250
251    try:
252        pid = os.fork()
253    except OSError as e:
254        sys.stderr.write("could not fork: %s\n" % os.strerror(e.errno))
255        sys.exit(1)
256
257    if pid > 0:
258        # Running in parent process.
259        os.close(wfd)
260        ovs.fatal_signal.fork()
261        while True:
262            try:
263                s = os.read(rfd, 1)
264                error = 0
265            except OSError as e:
266                s = ""
267                error = e.errno
268            if error != errno.EINTR:
269                break
270        if len(s) != 1:
271            retval, status = _waitpid(pid, 0)
272            if retval == pid:
273                if os.WIFEXITED(status) and os.WEXITSTATUS(status):
274                    # Child exited with an error.  Convey the same error to
275                    # our parent process as a courtesy.
276                    sys.exit(os.WEXITSTATUS(status))
277                else:
278                    sys.stderr.write("fork child failed to signal "
279                                     "startup (%s)\n"
280                                     % ovs.process.status_msg(status))
281            else:
282                assert retval < 0
283                sys.stderr.write("waitpid failed (%s)\n"
284                                 % os.strerror(-retval))
285                sys.exit(1)
286
287        os.close(rfd)
288    else:
289        # Running in parent process.
290        os.close(rfd)
291        ovs.timeval.postfork()
292
293        global _daemonize_fd
294        _daemonize_fd = wfd
295    return pid
296
297
298def _fork_and_wait_for_startup_windows():
299    global _detached
300    if _detached:
301        # Running in child process
302        ovs.timeval.postfork()
303        return 0
304
305    """ close the log file, on Windows cannot be moved while the parent has
306    a reference on it."""
307    vlog.close_log_file()
308
309    try:
310        (rfd, wfd) = winutils.windows_create_pipe()
311    except pywintypes.error as e:
312        sys.stderr.write("pipe failed to create: %s\n" % e.strerror)
313        sys.exit(1)
314
315    try:
316        creationFlags = win32process.DETACHED_PROCESS
317        args = ("%s %s --pipe-handle=%ld" % (
318            sys.executable, " ".join(sys.argv), int(wfd)))
319        proc = subprocess.Popen(
320            args=args,
321            close_fds=False,
322            shell=False,
323            creationflags=creationFlags,
324            stdout=sys.stdout,
325            stderr=sys.stderr)
326        pid = proc.pid
327    except OSError as e:
328        sys.stderr.write("CreateProcess failed (%s)\n" % os.strerror(e.errno))
329        sys.exit(1)
330
331    # Running in parent process.
332    winutils.win32file.CloseHandle(wfd)
333    ovs.fatal_signal.fork()
334
335    error, s = winutils.windows_read_pipe(rfd, 1)
336    if error:
337        s = ""
338
339    if len(s) != 1:
340        retval = proc.wait()
341        if retval == 0:
342            sys.stderr.write("fork child failed to signal startup\n")
343        else:
344            # Child exited with an error. Convey the same error to
345            # our parent process as a courtesy.
346            sys.exit(retval)
347    winutils.win32file.CloseHandle(rfd)
348
349    return pid
350
351
352def _fork_notify_startup(fd):
353    if sys.platform == 'win32':
354        _fork_notify_startup_windows(fd)
355        return
356
357    if fd is not None:
358        error, bytes_written = ovs.socket_util.write_fully(fd, "0")
359        if error:
360            sys.stderr.write("could not write to pipe\n")
361            sys.exit(1)
362        os.close(fd)
363
364
365def _fork_notify_startup_windows(fd):
366    if fd is not None:
367        try:
368            # Python 2 requires a string as second parameter, while
369            # Python 3 requires a bytes-like object. b"0" fits for both
370            # python versions.
371            winutils.win32file.WriteFile(fd, b"0", None)
372        except winutils.pywintypes.error as e:
373            sys.stderr.write("could not write to pipe: %s\n" %
374                             os.strerror(e.winerror))
375            sys.exit(1)
376
377
378def _should_restart(status):
379    global RESTART_EXIT_CODE
380
381    if sys.platform == 'win32':
382        # The exit status is encoded in the high byte of the
383        # 16-bit number 'status'.
384        exit_status = status >> 8
385
386        if exit_status == RESTART_EXIT_CODE:
387            return True
388        return False
389
390    if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE:
391        return True
392
393    if os.WIFSIGNALED(status):
394        for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL",
395                        "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"):
396            if os.WTERMSIG(status) == getattr(signal, signame, None):
397                return True
398    return False
399
400
401def _monitor_daemon(daemon_pid):
402    # XXX should log daemon's stderr output at startup time
403    # XXX should use setproctitle module if available
404    last_restart = None
405    while True:
406        retval, status = _waitpid(daemon_pid, 0)
407        if retval < 0:
408            sys.stderr.write("waitpid failed\n")
409            sys.exit(1)
410        elif retval == daemon_pid:
411            status_msg = ("pid %d died, %s"
412                          % (daemon_pid, ovs.process.status_msg(status)))
413
414            if _should_restart(status):
415                if sys.platform != 'win32' and os.WCOREDUMP(status):
416                    # Disable further core dumps to save disk space.
417                    try:
418                        resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
419                    except resource.error:
420                        vlog.warn("failed to disable core dumps")
421
422                # Throttle restarts to no more than once every 10 seconds.
423                if (last_restart is not None and
424                    ovs.timeval.msec() < last_restart + 10000):
425                    vlog.warn("%s, waiting until 10 seconds since last "
426                              "restart" % status_msg)
427                    while True:
428                        now = ovs.timeval.msec()
429                        wakeup = last_restart + 10000
430                        if now > wakeup:
431                            break
432                        sys.stdout.write("sleep %f\n" % (
433                            (wakeup - now) / 1000.0))
434                        time.sleep((wakeup - now) / 1000.0)
435                last_restart = ovs.timeval.msec()
436
437                vlog.err("%s, restarting" % status_msg)
438                daemon_pid = _fork_and_wait_for_startup()
439                if not daemon_pid:
440                    break
441            else:
442                vlog.info("%s, exiting" % status_msg)
443                sys.exit(0)
444
445    # Running in new daemon process.
446
447
448def _close_standard_fds():
449    """Close stdin, stdout, stderr.  If we're started from e.g. an SSH session,
450    then this keeps us from holding that session open artificially."""
451    null_fd = ovs.socket_util.get_null_fd()
452    if null_fd >= 0:
453        os.dup2(null_fd, 0)
454        os.dup2(null_fd, 1)
455        os.dup2(null_fd, 2)
456
457
458def daemonize_start():
459    """If daemonization is configured, then starts daemonization, by forking
460    and returning in the child process.  The parent process hangs around until
461    the child lets it know either that it completed startup successfully (by
462    calling daemon_complete()) or that it failed to start up (by exiting with a
463    nonzero exit code)."""
464
465    if _detach:
466        if _fork_and_wait_for_startup() > 0:
467            # Running in parent process.
468            sys.exit(0)
469
470        if sys.platform != 'win32':
471            # Running in daemon or monitor process.
472            os.setsid()
473
474    if _monitor:
475        saved_daemonize_fd = _daemonize_fd
476        daemon_pid = _fork_and_wait_for_startup()
477        if daemon_pid > 0:
478            # Running in monitor process.
479            _fork_notify_startup(saved_daemonize_fd)
480            if sys.platform != 'win32':
481                _close_standard_fds()
482            _monitor_daemon(daemon_pid)
483        # Running in daemon process
484
485    if _pidfile:
486        _make_pidfile()
487
488
489def daemonize_complete():
490    """If daemonization is configured, then this function notifies the parent
491    process that the child process has completed startup successfully."""
492    _fork_notify_startup(_daemonize_fd)
493
494    if _detach:
495        if _chdir:
496            os.chdir("/")
497        _close_standard_fds()
498
499
500def usage():
501    sys.stdout.write("""
502Daemon options:
503   --detach                run in background as daemon
504   --no-chdir              do not chdir to '/'
505   --pidfile[=FILE]        create pidfile (default: %s/%s.pid)
506   --overwrite-pidfile     with --pidfile, start even if already running
507""" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME))
508
509
510def __read_pidfile(pidfile, delete_if_stale):
511    if _pidfile_dev is not None:
512        try:
513            s = os.stat(pidfile)
514            if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev:
515                # It's our own pidfile.  We can't afford to open it,
516                # because closing *any* fd for a file that a process
517                # has locked also releases all the locks on that file.
518                #
519                # Fortunately, we know the associated pid anyhow.
520                return os.getpid()
521        except OSError:
522            pass
523
524    try:
525        file_handle = open(pidfile, "r+")
526    except IOError as e:
527        if e.errno == errno.ENOENT and delete_if_stale:
528            return 0
529        vlog.warn("%s: open: %s" % (pidfile, e.strerror))
530        return -e.errno
531
532    # Python fcntl doesn't directly support F_GETLK so we have to just try
533    # to lock it.
534    try:
535        fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
536
537        # pidfile exists but wasn't locked by anyone.  Now we have the lock.
538        if not delete_if_stale:
539            file_handle.close()
540            vlog.warn("%s: pid file is stale" % pidfile)
541            return -errno.ESRCH
542
543        # Is the file we have locked still named 'pidfile'?
544        try:
545            raced = False
546            s = os.stat(pidfile)
547            s2 = os.fstat(file_handle.fileno())
548            if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev:
549                raced = True
550        except IOError:
551            raced = True
552        if raced:
553            vlog.warn("%s: lost race to delete pidfile" % pidfile)
554            return -errno.EALREADY
555
556        # We won the right to delete the stale pidfile.
557        try:
558            os.unlink(pidfile)
559        except IOError as e:
560            vlog.warn("%s: failed to delete stale pidfile (%s)"
561                            % (pidfile, e.strerror))
562            return -e.errno
563        else:
564            vlog.dbg("%s: deleted stale pidfile" % pidfile)
565            file_handle.close()
566            return 0
567    except IOError as e:
568        if e.errno not in [errno.EACCES, errno.EAGAIN]:
569            vlog.warn("%s: fcntl: %s" % (pidfile, e.strerror))
570            return -e.errno
571
572    # Someone else has the pidfile locked.
573    try:
574        try:
575            error = int(file_handle.readline())
576        except IOError as e:
577            vlog.warn("%s: read: %s" % (pidfile, e.strerror))
578            error = -e.errno
579        except ValueError:
580            vlog.warn("%s does not contain a pid" % pidfile)
581            error = -errno.EINVAL
582
583        return error
584    finally:
585        try:
586            file_handle.close()
587        except IOError:
588            pass
589
590
591def read_pidfile(pidfile):
592    """Opens and reads a PID from 'pidfile'.  Returns the positive PID if
593    successful, otherwise a negative errno value."""
594    return __read_pidfile(pidfile, False)
595
596
597def _check_already_running():
598    pid = __read_pidfile(_pidfile, True)
599    if pid > 0:
600        _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid))
601    elif pid < 0:
602        _fatal("%s: pidfile check failed (%s), aborting"
603               % (_pidfile, os.strerror(pid)))
604
605
606def add_args(parser):
607    """Populates 'parser', an ArgumentParser allocated using the argparse
608    module, with the command line arguments required by the daemon module."""
609
610    pidfile = make_pidfile_name(None)
611
612    group = parser.add_argument_group(title="Daemon Options")
613    group.add_argument("--detach", action="store_true",
614            help="Run in background as a daemon.")
615    group.add_argument("--no-chdir", action="store_true",
616            help="Do not chdir to '/'.")
617    group.add_argument("--monitor", action="store_true",
618            help="Monitor %s process." % ovs.util.PROGRAM_NAME)
619    group.add_argument("--pidfile", nargs="?", const=pidfile,
620            help="Create pidfile (default %s)." % pidfile)
621    group.add_argument("--overwrite-pidfile", action="store_true",
622            help="With --pidfile, start even if already running.")
623    if sys.platform == 'win32':
624        group.add_argument("--pipe-handle",
625                           help=("With --pidfile, start even if "
626                                 "already running."))
627
628
629def handle_args(args):
630    """Handles daemon module settings in 'args'.  'args' is an object
631    containing values parsed by the parse_args() method of ArgumentParser.  The
632    parent ArgumentParser should have been prepared by add_args() before
633    calling parse_args()."""
634
635    if sys.platform == 'win32':
636        if args.pipe_handle:
637            set_detached(args.pipe_handle)
638
639    if args.detach:
640        set_detach()
641
642    if args.no_chdir:
643        set_no_chdir()
644
645    if args.pidfile:
646        set_pidfile(args.pidfile)
647
648    if args.overwrite_pidfile:
649        ignore_existing_pidfile()
650
651    if args.monitor:
652        set_monitor()
653