1 /* Temporary files with automatic cleanup.
2    Copyright (C) 2006-2021 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2006.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <config.h>
19 
20 /* Specification.  */
21 #include "clean-temp-simple.h"
22 #include "clean-temp-private.h"
23 
24 #include <errno.h>
25 #include <limits.h>
26 #include <signal.h>
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "error.h"
33 #include "fatal-signal.h"
34 #include "asyncsafe-spin.h"
35 #include "glthread/lock.h"
36 #include "thread-optim.h"
37 #include "gl_list.h"
38 #include "gl_linkedhash_list.h"
39 #include "gettext.h"
40 
41 #define _(str) gettext (str)
42 
43 
44 /* Lock that protects the file_cleanup_list from concurrent modification in
45    different threads.  */
46 gl_lock_define_initialized (static, file_cleanup_list_lock)
47 
48 /* List of all temporary files without temporary directories.  */
49 static gl_list_t /* <char *> */ volatile file_cleanup_list;
50 
51 
52 /* List of all temporary directories.  */
53 struct all_tempdirs dir_cleanup_list /* = { NULL, 0, 0 } */;
54 
55 
56 /* List of all open file descriptors to temporary files.  */
57 gl_list_t /* <closeable_fd *> */ volatile descriptors;
58 
59 
60 /* For the subdirs and for the files, we use a gl_list_t of type LINKEDHASH.
61    Why?  We need a data structure that
62 
63      1) Can contain an arbitrary number of 'char *' values.  The strings
64         are compared via strcmp, not pointer comparison.
65      2) Has insertion and deletion operations that are fast: ideally O(1),
66         or possibly O(log n).  This is important for GNU sort, which may
67         create a large number of temporary files.
68      3) Allows iteration through all elements from within a signal handler.
69      4) May or may not allow duplicates.  It doesn't matter here, since
70         any file or subdir can only be removed once.
71 
72    Criterion 1) would allow any gl_list_t or gl_oset_t implementation.
73 
74    Criterion 2) leaves only GL_LINKEDHASH_LIST, GL_TREEHASH_LIST, or
75    GL_TREE_OSET.
76 
77    Criterion 3) puts at disadvantage GL_TREEHASH_LIST and GL_TREE_OSET.
78    Namely, iteration through the elements of a binary tree requires access
79    to many ->left, ->right, ->parent pointers. However, the rebalancing
80    code for insertion and deletion in an AVL or red-black tree is so
81    complicated that we cannot assume that >left, ->right, ->parent pointers
82    are in a consistent state throughout these operations.  Therefore, to
83    avoid a crash in the signal handler, all destructive operations to the
84    lists would have to be protected by a
85        block_fatal_signals ();
86        ...
87        unblock_fatal_signals ();
88    pair.  Which causes extra system calls.
89 
90    Criterion 3) would also discourage GL_ARRAY_LIST and GL_CARRAY_LIST,
91    if they were not already excluded.  Namely, these implementations use
92    xrealloc(), leaving a time window in which in the list->elements pointer
93    points to already deallocated memory.  To avoid a crash in the signal
94    handler at such a moment, all destructive operations would have to
95    protected by block/unblock_fatal_signals (), in this case too.
96 
97    A list of type GL_LINKEDHASH_LIST without duplicates fulfills all
98    requirements:
99      2) Insertion and deletion are O(1) on average.
100      3) The gl_list_iterator, gl_list_iterator_next implementations do
101         not trigger memory allocations, nor other system calls, and are
102         therefore safe to be called from a signal handler.
103         Furthermore, since SIGNAL_SAFE_LIST is defined, the implementation
104         of the destructive functions ensures that the list structure is
105         safe to be traversed at any moment, even when interrupted by an
106         asynchronous signal.
107  */
108 
109 /* String equality and hash code functions used by the lists.  */
110 
111 bool
clean_temp_string_equals(const void * x1,const void * x2)112 clean_temp_string_equals (const void *x1, const void *x2)
113 {
114   const char *s1 = (const char *) x1;
115   const char *s2 = (const char *) x2;
116   return strcmp (s1, s2) == 0;
117 }
118 
119 #define SIZE_BITS (sizeof (size_t) * CHAR_BIT)
120 
121 /* A hash function for NUL-terminated char* strings using
122    the method described by Bruno Haible.
123    See https://www.haible.de/bruno/hashfunc.html.  */
124 size_t
clean_temp_string_hash(const void * x)125 clean_temp_string_hash (const void *x)
126 {
127   const char *s = (const char *) x;
128   size_t h = 0;
129 
130   for (; *s; s++)
131     h = *s + ((h << 9) | (h >> (SIZE_BITS - 9)));
132 
133   return h;
134 }
135 
136 
137 /* The set of fatal signal handlers.
138    Cached here because we are not allowed to call get_fatal_signal_set ()
139    from a signal handler.  */
140 static const sigset_t *fatal_signal_set /* = NULL */;
141 
142 static void
init_fatal_signal_set(void)143 init_fatal_signal_set (void)
144 {
145   if (fatal_signal_set == NULL)
146     fatal_signal_set = get_fatal_signal_set ();
147 }
148 
149 
150 /* Close a file descriptor.
151    Avoids race conditions with normal thread code or signal-handler code that
152    might want to close the same file descriptor.  */
153 _GL_ASYNC_SAFE int
clean_temp_asyncsafe_close(struct closeable_fd * element)154 clean_temp_asyncsafe_close (struct closeable_fd *element)
155 {
156   sigset_t saved_mask;
157   int ret;
158   int saved_errno;
159 
160   asyncsafe_spin_lock (&element->lock, fatal_signal_set, &saved_mask);
161   if (!element->closed)
162     {
163       ret = close (element->fd);
164       saved_errno = errno;
165       element->closed = true;
166     }
167   else
168     {
169       ret = 0;
170       saved_errno = 0;
171     }
172   asyncsafe_spin_unlock (&element->lock, &saved_mask);
173   element->done = true;
174 
175   errno = saved_errno;
176   return ret;
177 }
178 /* Initializations for use of this function.  */
179 void
clean_temp_init_asyncsafe_close(void)180 clean_temp_init_asyncsafe_close (void)
181 {
182   init_fatal_signal_set ();
183 }
184 
185 /* The signal handler.  It gets called asynchronously.  */
186 static _GL_ASYNC_SAFE void
cleanup_action(int sig _GL_UNUSED)187 cleanup_action (int sig _GL_UNUSED)
188 {
189   size_t i;
190 
191   /* First close all file descriptors to temporary files.  */
192   {
193     gl_list_t fds = descriptors;
194 
195     if (fds != NULL)
196       {
197         gl_list_iterator_t iter;
198         const void *element;
199 
200         iter = gl_list_iterator (fds);
201         while (gl_list_iterator_next (&iter, &element, NULL))
202           {
203             clean_temp_asyncsafe_close ((struct closeable_fd *) element);
204           }
205         gl_list_iterator_free (&iter);
206       }
207   }
208 
209   {
210     gl_list_t files = file_cleanup_list;
211 
212     if (files != NULL)
213       {
214         gl_list_iterator_t iter;
215         const void *element;
216 
217         iter = gl_list_iterator (files);
218         while (gl_list_iterator_next (&iter, &element, NULL))
219           {
220             const char *file = (const char *) element;
221             unlink (file);
222           }
223         gl_list_iterator_free (&iter);
224       }
225   }
226 
227   for (i = 0; i < dir_cleanup_list.tempdir_count; i++)
228     {
229       struct tempdir *dir = dir_cleanup_list.tempdir_list[i];
230 
231       if (dir != NULL)
232         {
233           gl_list_iterator_t iter;
234           const void *element;
235 
236           /* First cleanup the files in the subdirectories.  */
237           iter = gl_list_iterator (dir->files);
238           while (gl_list_iterator_next (&iter, &element, NULL))
239             {
240               const char *file = (const char *) element;
241               unlink (file);
242             }
243           gl_list_iterator_free (&iter);
244 
245           /* Then cleanup the subdirectories.  */
246           iter = gl_list_iterator (dir->subdirs);
247           while (gl_list_iterator_next (&iter, &element, NULL))
248             {
249               const char *subdir = (const char *) element;
250               rmdir (subdir);
251             }
252           gl_list_iterator_free (&iter);
253 
254           /* Then cleanup the temporary directory itself.  */
255           rmdir (dir->dirname);
256         }
257     }
258 }
259 
260 
261 /* Set to -1 if initialization of this facility failed.  */
262 static int volatile init_failed /* = 0 */;
263 
264 /* Initializes this facility.  */
265 static void
do_clean_temp_init(void)266 do_clean_temp_init (void)
267 {
268   /* Initialize the data used by the cleanup handler.  */
269   init_fatal_signal_set ();
270   /* Register the cleanup handler.  */
271   if (at_fatal_signal (&cleanup_action) < 0)
272     init_failed = -1;
273 }
274 
275 /* Ensure that do_clean_temp_init is called once only.  */
gl_once_define(static,clean_temp_once)276 gl_once_define(static, clean_temp_once)
277 
278 /* Initializes this facility upon first use.
279    Return 0 upon success, or -1 if there was a memory allocation problem.  */
280 int
281 clean_temp_init (void)
282 {
283   gl_once (clean_temp_once, do_clean_temp_init);
284   return init_failed;
285 }
286 
287 
288 /* Remove a file, with optional error message.
289    Return 0 upon success, or -1 if there was some problem.  */
290 int
clean_temp_unlink(const char * absolute_file_name,bool cleanup_verbose)291 clean_temp_unlink (const char *absolute_file_name, bool cleanup_verbose)
292 {
293   if (unlink (absolute_file_name) < 0 && cleanup_verbose
294       && errno != ENOENT)
295     {
296       error (0, errno,
297              _("cannot remove temporary file %s"), absolute_file_name);
298       return -1;
299     }
300   return 0;
301 }
302 
303 
304 /* ============= Temporary files without temporary directories ============= */
305 
306 /* Register the given ABSOLUTE_FILE_NAME as being a file that needs to be
307    removed.
308    Should be called before the file ABSOLUTE_FILE_NAME is created.
309    Return 0 upon success, or -1 if there was a memory allocation problem.  */
310 int
register_temporary_file(const char * absolute_file_name)311 register_temporary_file (const char *absolute_file_name)
312 {
313   bool mt = gl_multithreaded ();
314 
315   if (mt) gl_lock_lock (file_cleanup_list_lock);
316 
317   int ret = 0;
318 
319   /* Make sure that this facility and the file_cleanup_list are initialized.  */
320   if (file_cleanup_list == NULL)
321     {
322       if (clean_temp_init () < 0)
323         {
324           ret = -1;
325           goto done;
326         }
327       file_cleanup_list =
328         gl_list_nx_create_empty (GL_LINKEDHASH_LIST,
329                                  clean_temp_string_equals,
330                                  clean_temp_string_hash,
331                                  NULL, false);
332       if (file_cleanup_list == NULL)
333         {
334           ret = -1;
335           goto done;
336         }
337     }
338 
339   /* Add absolute_file_name to file_cleanup_list, without duplicates.  */
340   if (gl_list_search (file_cleanup_list, absolute_file_name) == NULL)
341     {
342       char *absolute_file_name_copy = strdup (absolute_file_name);
343       if (absolute_file_name_copy == NULL)
344         {
345           ret = -1;
346           goto done;
347         }
348       if (gl_list_nx_add_first (file_cleanup_list, absolute_file_name_copy)
349           == NULL)
350         {
351           free (absolute_file_name_copy);
352           ret = -1;
353           goto done;
354         }
355     }
356 
357  done:
358   if (mt) gl_lock_unlock (file_cleanup_list_lock);
359 
360   return ret;
361 }
362 
363 /* Unregister the given ABSOLUTE_FILE_NAME as being a file that needs to be
364    removed.
365    Should be called when the file ABSOLUTE_FILE_NAME could not be created.  */
366 void
unregister_temporary_file(const char * absolute_file_name)367 unregister_temporary_file (const char *absolute_file_name)
368 {
369   bool mt = gl_multithreaded ();
370 
371   if (mt) gl_lock_lock (file_cleanup_list_lock);
372 
373   gl_list_t list = file_cleanup_list;
374   if (list != NULL)
375     {
376       gl_list_node_t node = gl_list_search (list, absolute_file_name);
377       if (node != NULL)
378         {
379           char *old_string = (char *) gl_list_node_value (list, node);
380 
381           gl_list_remove_node (list, node);
382           free (old_string);
383         }
384     }
385 
386   if (mt) gl_lock_unlock (file_cleanup_list_lock);
387 }
388 
389 /* Remove the given ABSOLUTE_FILE_NAME and unregister it.
390    CLEANUP_VERBOSE determines whether errors are reported to standard error.
391    Return 0 upon success, or -1 if there was some problem.  */
392 int
cleanup_temporary_file(const char * absolute_file_name,bool cleanup_verbose)393 cleanup_temporary_file (const char *absolute_file_name, bool cleanup_verbose)
394 {
395   int err;
396 
397   err = clean_temp_unlink (absolute_file_name, cleanup_verbose);
398   unregister_temporary_file (absolute_file_name);
399 
400   return err;
401 }
402