1#+##############################################################################
2#                                                                              #
3# File: No/Worries/PidFile.pm                                                  #
4#                                                                              #
5# Description: pid file handling without worries                               #
6#                                                                              #
7#-##############################################################################
8
9#
10# module definition
11#
12
13package No::Worries::PidFile;
14use strict;
15use warnings;
16our $VERSION  = "1.6";
17our $REVISION = sprintf("%d.%02d", q$Revision: 1.20 $ =~ /(\d+)\.(\d+)/);
18
19#
20# used modules
21#
22
23use Fcntl qw(:DEFAULT :flock :seek);
24use No::Worries qw($_IntegerRegexp $_NumberRegexp);
25use No::Worries::Die qw(dief);
26use No::Worries::Export qw(export_control);
27use No::Worries::Proc qw(proc_terminate);
28use No::Worries::Stat qw(ST_MTIME);
29use Params::Validate qw(validate :types);
30use POSIX qw(:errno_h);
31use Time::HiRes qw();
32
33#
34# safely read something from an open file
35#
36
37sub _read ($$;$) {
38    my($path, $fh, $noclose) = @_;
39    my($data, $done);
40
41    flock($fh, LOCK_EX)
42        or dief("cannot flock(%s, LOCK_EX): %s", $path, $!);
43    sysseek($fh, 0, SEEK_SET)
44        or dief("cannot sysseek(%s, 0, SEEK_SET): %s", $path, $!);
45    $data = "";
46    $done = -1;
47    while ($done) {
48        $done = sysread($fh, $data, 16, length($data));
49        dief("cannot sysread(%s, %d): %s", $path, 16, $!)
50            unless defined($done);
51    }
52    if ($noclose) {
53        flock($fh, LOCK_UN)
54            or dief("cannot flock(%s, LOCK_UN): %s", $path, $!);
55    } else {
56        close($fh)
57            or dief("cannot close(%s): %s", $path, $!);
58    }
59    return($data);
60}
61
62#
63# safely write something to an open file
64#
65
66sub _write ($$$) {
67    my($path, $fh, $data) = @_;
68    my($length, $offset, $done);
69
70    flock($fh, LOCK_EX)
71        or dief("cannot flock(%s, LOCK_EX): %s", $path, $!);
72    sysseek($fh, 0, SEEK_SET)
73        or dief("cannot sysseek(%s, 0, SEEK_SET): %s", $path, $!);
74    truncate($fh, 0)
75        or dief("cannot truncate(%s, 0): %s", $path, $!);
76    $length = length($data);
77    $offset = 0;
78    while ($length) {
79        $done = syswrite($fh, $data, $length, $offset);
80        dief("cannot syswrite(%s, %d): %s", $path, $length, $!)
81            unless defined($done);
82        $length -= $done;
83        $offset += $done;
84    }
85    close($fh)
86        or dief("cannot close(%s): %s", $path, $!);
87}
88
89#
90# check if a process is alive by killing it ;-)
91#
92
93sub _alive ($) {
94    my($pid) = @_;
95
96    return(1) if kill(0, $pid);
97    return(0) if $! == ESRCH;
98    dief("cannot kill(0, %d): %s", $pid, $!);
99}
100
101#
102# kill a process
103#
104
105sub _kill ($$$%) {
106    my($path, $fh, $pid, %option) = @_;
107    my($maxtime);
108
109    # gently
110    $option{callback}->("(pid $pid) is being told to quit...");
111    _write($path, $fh, "$pid\nquit\n");
112    $maxtime = Time::HiRes::time() + $option{linger};
113    while (1) {
114        last unless _alive($pid);
115        last if Time::HiRes::time() > $maxtime;
116        Time::HiRes::sleep(0.1);
117    }
118    if (_alive($pid)) {
119        # forcedly
120        $option{callback}->("(pid $pid) is still running, killing it now...");
121        if ($option{kill}) {
122            proc_terminate($pid, kill => $option{kill});
123        } else {
124            proc_terminate($pid);
125        }
126        $option{callback}->("(pid $pid) has been successfully killed");
127    } else {
128        $option{callback}->("does not seem to be running anymore");
129    }
130}
131
132#
133# check a process
134#
135
136sub _status ($%) {
137    my($path, %option) = @_;
138    my($fh, @stat, $data, $pid, $status, $message, $lsb);
139
140    $status = 0;
141    unless (sysopen($fh, $path, O_RDWR)) {
142        if ($! == ENOENT) {
143            ($message, $lsb) =
144                ("does not seem to be running (no pid file)", 3);
145            goto done;
146        }
147        dief("cannot sysopen(%s, O_RDWR): %s", $path, $!);
148    }
149    @stat = stat($fh)
150        or dief("cannot stat(%s): %s", $path, $!);
151    $data = _read($path, $fh);
152    if ($data eq "") {
153        # this can happen in pf_set(), between open() and lock()
154        ($message, $lsb) =
155            ("does not seem to be running yet (empty pid file)", 4);
156        goto done;
157    }
158    if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) {
159        $pid = $1;
160    } else {
161        dief("unexpected pid file contents in %s: %s", $path, $data);
162    }
163    unless (_alive($pid)) {
164        ($message, $lsb) =
165            ("(pid $pid) does not seem to be running anymore", 1);
166        goto done;
167    }
168    $data = localtime($stat[ST_MTIME]);
169    if ($option{freshness} and
170        $stat[ST_MTIME] < Time::HiRes::time() - $option{freshness}) {
171        ($message, $lsb) =
172            ("(pid $pid) does not seem to be running anymore since $data", 4);
173        goto done;
174    }
175    # so far so good ;-)
176    ($status, $message, $lsb) = (1, "(pid $pid) was active on $data", 0);
177  done:
178    return($status, $message, $lsb);
179}
180
181#
182# set the pid file
183#
184
185my %pf_set_options = (
186    pid => { optional => 1, type => SCALAR, regex => $_IntegerRegexp },
187);
188
189sub pf_set ($@) {
190    my($path, %option, $fh);
191
192    $path = shift(@_);
193    %option = validate(@_, \%pf_set_options) if @_;
194    $option{pid} ||= $$;
195    sysopen($fh, $path, O_WRONLY|O_CREAT|O_EXCL)
196        or dief("cannot sysopen(%s, O_WRONLY|O_CREAT|O_EXCL): %s", $path, $!);
197    _write($path, $fh, "$option{pid}\n");
198}
199
200#
201# check the pid file
202#
203
204my %pf_check_options = (
205    pid => { optional => 1, type => SCALAR, regex => $_IntegerRegexp },
206);
207
208sub pf_check ($@) {
209    my($path, %option, $fh, $data, $pid, $action);
210
211    $path = shift(@_);
212    %option = validate(@_, \%pf_check_options) if @_;
213    $option{pid} ||= $$;
214    sysopen($fh, $path, O_RDWR)
215        or dief("cannot sysopen(%s, O_RDWR): %s", $path, $!);
216    $data = _read($path, $fh);
217    if ($data =~ /^(\d+)\s*$/s) {
218        ($pid, $action) = ($1, "");
219    } elsif ($data =~ /^(\d+)\s+([a-z]+)\s*$/s) {
220        ($pid, $action) = ($1, $2);
221    } else {
222        dief("unexpected pid file contents in %s: %s", $path, $data)
223    }
224    dief("pid file %s has been taken over by pid %d!", $path, $pid)
225        unless $pid == $option{pid};
226    return($action);
227}
228
229#
230# touch the pid file
231#
232
233sub pf_touch ($) {
234    my($path) = @_;
235    my($now);
236
237    $now = time();
238    utime($now, $now, $path)
239        or dief("cannot utime(%d, %d, %s): %s", $now, $now, $path, $!);
240}
241
242#
243# unset the pid file
244#
245
246sub pf_unset ($) {
247    my($path) = @_;
248
249    unless (unlink($path)) {
250        return if $! == ENOENT;
251        dief("cannot unlink(%s): %s", $path, $!);
252    }
253}
254
255#
256# use the pid file to find out the program status
257#
258
259my %pf_status_options = (
260    freshness => { optional => 1, type => SCALAR, regex => $_NumberRegexp },
261    timeout   => { optional => 1, type => SCALAR, regex => $_NumberRegexp },
262);
263
264sub pf_status ($@) {
265    my($path, %option, $maxtime, $status, $message, $lsb);
266
267    $path = shift(@_);
268    %option = validate(@_, \%pf_status_options) if @_;
269    if ($option{timeout}) {
270        # check multiple times until success or timeout
271        $maxtime = Time::HiRes::time() + $option{timeout};
272        while (1) {
273            ($status, $message, $lsb) = _status($path, %option);
274            last if $status or Time::HiRes::time() > $maxtime;
275            Time::HiRes::sleep(0.1);
276        }
277    } else {
278        # check only once
279        ($status, $message, $lsb) = _status($path, %option);
280    }
281    return($status, $message, $lsb) if wantarray();
282    return($status);
283}
284
285#
286# use the pid file to make the program quit
287#
288
289my %pf_quit_options = (
290    callback  => { optional => 1, type => CODEREF },
291    linger    => { optional => 1, type => SCALAR, regex => $_NumberRegexp },
292    kill      => { optional => 1, type => SCALAR },
293);
294
295sub pf_quit ($@) {
296    my($path, %option, $fh, $data, $pid);
297
298    $path = shift(@_);
299    %option = validate(@_, \%pf_quit_options) if @_;
300    $option{callback} ||= sub { printf("%s\n", $_[0]) };
301    $option{linger} ||= 5;
302    unless (sysopen($fh, $path, O_RDWR)) {
303        if ($! == ENOENT) {
304            $option{callback}->("does not seem to be running (no pid file)");
305            return;
306        }
307        dief("cannot sysopen(%s, O_RDWR): %s", $path, $!);
308    }
309    $data = _read($path, $fh, 1);
310    if ($data eq "") {
311        # this can happen while setting the pid file, between open and lock in pf_set()
312        # but what can we do? we wait a bit, try again and complain if itis still empty
313        sleep(1);
314        $data = _read($path, $fh, 1);
315    }
316    if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) {
317        $pid = $1;
318    } else {
319        dief("unexpected pid file contents in %s: %s", $path, $data);
320    }
321    _kill($path, $fh, $pid, %option);
322    # in any case, we make sure that _this_ pid file does not exist anymore
323    # we have to be extra careful to make sure it is the same pid file
324    unless (sysopen($fh, $path, O_RDWR)) {
325        return if $! == ENOENT;
326        dief("cannot sysopen(%s, O_RDWR): %s", $path, $!);
327    }
328    $data = _read($path, $fh);
329    return if $data eq "";
330    if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) {
331        return unless $1 == $pid;
332    } else {
333        dief("unexpected pid file contents in %s: %s", $path, $data);
334    }
335    # same pid so assume same pid file... remove it
336    $option{callback}->("removing stale pid file: $path");
337    unless (unlink($path)) {
338        # take into account a potential race condition...
339        dief("cannot unlink(%s): %s", $path, $!) unless $! == ENOENT;
340    }
341}
342
343#
344# sleep for some time, taking into account an optional pid file
345#
346
347my %pf_sleep_options = (
348    time => { optional => 1, type => SCALAR, regex => $_NumberRegexp },
349);
350
351sub pf_sleep ($@) {
352    my($path, %option, $end, $sleep);
353
354    $path = shift(@_);
355    %option = validate(@_, \%pf_sleep_options) if @_;
356    $option{time} = 1 unless defined($option{time});
357    if ($path) {
358        $end = Time::HiRes::time() + $option{time} if $option{time};
359        while (1) {
360            return(0) if pf_check($path) eq "quit";
361            pf_touch($path);
362            last unless $option{time};
363            $sleep = $end - Time::HiRes::time();
364            last if $sleep <= 0;
365            $sleep = 1 if $sleep > 1;
366            Time::HiRes::sleep($sleep);
367        }
368    } else {
369        Time::HiRes::sleep($option{time}) if $option{time};
370    }
371    return(1);
372}
373
374#
375# export control
376#
377
378sub import : method {
379    my($pkg, %exported);
380
381    $pkg = shift(@_);
382    grep($exported{$_}++, map("pf_$_",
383                              qw(set check touch unset status quit sleep)));
384    export_control(scalar(caller()), $pkg, \%exported, @_);
385}
386
3871;
388
389__DATA__
390
391=head1 NAME
392
393No::Worries::PidFile - pid file handling without worries
394
395=head1 SYNOPSIS
396
397  use No::Worries::PidFile qw(*);
398
399  # idiomatic daemon code
400  pf_set($pidfile);
401  while (1) {
402      ...
403      $action = pf_check($pidfile);
404      last if $action eq "quit";
405      pf_touch($pidfile);
406      ...
407  }
408  pf_unset($pidfile);
409
410  # idiomatic daemon code with sleeping
411  pf_set($pidfile);
412  while (1) {
413      ...
414      pf_sleep($pidfile, time => 5) or last;
415      ...
416  }
417  pf_unset($pidfile);
418
419  # here is how to handle a --status option
420  if ($Option{status}) {
421      ($status, $message, $code) = pf_status($pidfile, freshness => 10);
422      printf("myprog %s\n", $message);
423      exit($code);
424  }
425
426  # here is how to handle a --quit option
427  if ($Option{quit}) {
428      pf_quit($pidfile,
429          linger   => 10,
430          callback => sub { printf("myprog %s\n", $_[0]) },
431      );
432  }
433
434=head1 DESCRIPTION
435
436This module eases pid file handling by providing high level functions to set,
437check, touch and unset pid files. All the functions die() on error.
438
439The pid file usually contains the process id on a single line, followed by a
440newline. However, it can also be followed by an optional I<action>, also
441followed by a newline. This allows some kind of inter-process communication: a
442process using pf_quit() will append the C<quit> I<action> to the pid file and
443the owning process will detect this via pf_check().
444
445All the functions properly handle concurrency. For instance, when two
446processes start at the exact same time and call pf_set(), only one will
447succeed and the other one will get an error.
448
449Since an existing pid file will make pf_set() fail, it is very important to
450remove the pid file in all situations, including errors. The recommended way
451to do so is to use an END block:
452
453  # we need to know about transient processes
454  use No::Worries::Proc qw();
455  # we need to record what needs to be cleaned up
456  our(%NeedsCleanup);
457  # we set the pid file here and remember to clean it up
458  pf_set($pidfile);
459  $NeedsCleanup{pidfile} = 1;
460  # ... anything can happen here ...
461  # cleanup code in an END block
462  END {
463      # transient processes do not need cleanup
464      return if $No::Worries::Proc::Transient;
465      # cleanup the pid file if needed
466      pf_unset($pidfile) if $NeedsCleanup{pidfile};
467  }
468
469=head1 FUNCTIONS
470
471This module provides the following functions (none of them being exported by
472default):
473
474=over
475
476=item pf_set(PATH[, OPTIONS])
477
478set the pid file by writing the given pid at the given path; supported
479options:
480
481=over
482
483=item * C<pid>: the pid to use (default: C<$$>)
484
485=back
486
487=item pf_check(PATH[, OPTIONS])
488
489check the pid file and make sure the given pid is present, also return the
490I<action> in the pid file or the empty string; supported options:
491
492=over
493
494=item * C<pid>: the pid to use (default: C<$$>)
495
496=back
497
498=item pf_unset(PATH)
499
500unset the pid file by removing the given path
501
502=item pf_touch(PATH)
503
504touch the pid file (i.e. update the file modification time) to show that the
505owning process is alive
506
507=item pf_sleep(PATH[, OPTIONS])
508
509check and touch the pid file and eventually sleep for the givent amount of
510time, returning true if the program should continue or false if it has been
511told to stop via pf_quit(); supported options:
512
513=over
514
515=item * C<time>: the time to sleep (default: 1, can be fractional)
516
517=back
518
519=item pf_status(PATH[, OPTIONS])
520
521use information from the pid file (including its last modification time) to
522guess the status of the corresponding process, return the status (true means
523that the process seems to be running); in list context, also return an
524informative message and an LSB compatible exit code; supported options:
525
526=over
527
528=item * C<freshness>: maximum age allowed for an active pid file
529
530=item * C<timeout>: check multiple times until success or timeout
531
532=back
533
534=item pf_quit(PATH[, OPTIONS])
535
536tell the process corresponding to the pid file to quit (setting its I<action>
537to C<quit>), wait a bit to check that it indeed stopped and kill it using
538L<No::Worries::Proc>'s proc_terminate() is everything else fails; supported
539options:
540
541=over
542
543=item * C<callback>: code that will be called with information to report
544
545=item * C<linger>: maximum time to wait after having told the process to quit
546(default: 5)
547
548=item * C<kill>: kill specification to use when killing the process
549
550=back
551
552=back
553
554=head1 SEE ALSO
555
556L<http://refspecs.linuxbase.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html>,
557L<No::Worries>,
558L<No::Worries::Proc>.
559
560=head1 AUTHOR
561
562Lionel Cons L<http://cern.ch/lionel.cons>
563
564Copyright (C) CERN 2012-2019
565