1 /* utimecmp.c -- compare file timestamps
2 
3    Copyright (C) 2004-2007, 2009-2020 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 /* Written by Paul Eggert.  */
19 
20 #include <config.h>
21 
22 #include "utimecmp.h"
23 
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <sys/stat.h>
30 #include <time.h>
31 #include <unistd.h>
32 
33 #include "dirname.h"
34 #include "hash.h"
35 #include "intprops.h"
36 #include "stat-time.h"
37 #include "verify.h"
38 
39 #ifndef MAX
40 # define MAX(a, b) ((a) > (b) ? (a) : (b))
41 #endif
42 
43 #define BILLION (1000 * 1000 * 1000)
44 
45 /* Best possible resolution that utimens can set and stat can return,
46    due to system-call limitations.  It must be a power of 10 that is
47    no greater than 1 billion.  */
48 #if HAVE_UTIMENSAT
49 enum { SYSCALL_RESOLUTION = 1 };
50 #elif defined _WIN32 && ! defined __CYGWIN__
51 /* On native Windows, file times have 100 ns resolution. See
52    <https://docs.microsoft.com/en-us/windows/desktop/api/minwinbase/ns-minwinbase-filetime>  */
53 enum { SYSCALL_RESOLUTION = 100 };
54 #elif ((HAVE_FUTIMESAT || HAVE_WORKING_UTIMES)                  \
55        && (defined HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC             \
56            || defined HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC     \
57            || defined HAVE_STRUCT_STAT_ST_ATIMENSEC             \
58            || defined HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC  \
59            || defined HAVE_STRUCT_STAT_ST_SPARE1))
60 enum { SYSCALL_RESOLUTION = 1000 };
61 #else
62 enum { SYSCALL_RESOLUTION = BILLION };
63 #endif
64 
65 /* Describe a file system and its timestamp resolution in nanoseconds.  */
66 struct fs_res
67 {
68   /* Device number of file system.  */
69   dev_t dev;
70 
71   /* An upper bound on the timestamp resolution of this file system,
72      ignoring any resolution that cannot be set via utimens.  It is
73      represented by an integer count of nanoseconds.  It must be
74      either 2 billion, or a power of 10 that is no greater than a
75      billion and is no less than SYSCALL_RESOLUTION.  */
76   int resolution;
77 
78   /* True if RESOLUTION is known to be exact, and is not merely an
79      upper bound on the true resolution.  */
80   bool exact;
81 };
82 
83 /* Hash some device info.  */
84 static size_t
dev_info_hash(void const * x,size_t table_size)85 dev_info_hash (void const *x, size_t table_size)
86 {
87   struct fs_res const *p = x;
88 
89   /* Beware signed arithmetic gotchas.  */
90   if (TYPE_SIGNED (dev_t) && SIZE_MAX < MAX (INT_MAX, TYPE_MAXIMUM (dev_t)))
91     {
92       uintmax_t dev = p->dev;
93       return dev % table_size;
94     }
95 
96   return p->dev % table_size;
97 }
98 
99 /* Compare two dev_info structs.  */
100 static bool
dev_info_compare(void const * x,void const * y)101 dev_info_compare (void const *x, void const *y)
102 {
103   struct fs_res const *a = x;
104   struct fs_res const *b = y;
105   return a->dev == b->dev;
106 }
107 
108 /* Return -1, 0, 1 based on whether the destination file (relative
109    to openat-like directory file descriptor DFD with name
110    DST_NAME and status DST_STAT) is older than SRC_STAT, the same age
111    as SRC_STAT, or newer than SRC_STAT, respectively.
112 
113    DST_NAME may be NULL if OPTIONS is 0.
114 
115    If OPTIONS & UTIMECMP_TRUNCATE_SOURCE, do the comparison after SRC is
116    converted to the destination's timestamp resolution as filtered through
117    utimens.  In this case, return -2 if the exact answer cannot be
118    determined; this can happen only if the timestamps are very close and
119    there is some trouble accessing the file system (e.g., the user does not
120    have permission to futz with the destination's timestamps).  */
121 
122 int
utimecmp(char const * dst_name,struct stat const * dst_stat,struct stat const * src_stat,int options)123 utimecmp (char const *dst_name,
124           struct stat const *dst_stat,
125           struct stat const *src_stat,
126           int options)
127 {
128   return utimecmpat (AT_FDCWD, dst_name, dst_stat, src_stat, options);
129 }
130 
131 int
utimecmpat(int dfd,char const * dst_name,struct stat const * dst_stat,struct stat const * src_stat,int options)132 utimecmpat (int dfd, char const *dst_name,
133             struct stat const *dst_stat,
134             struct stat const *src_stat,
135             int options)
136 {
137   /* Things to watch out for:
138 
139      The code uses a static hash table internally and is not safe in the
140      presence of signals, multiple threads, etc.  However, memory pressure
141      that prevents use of the hash table is not fatal - we just fall back
142      to redoing the computations on every call in that case.
143 
144      int and long int might be 32 bits.  Many of the calculations store
145      numbers up to 2 billion, and multiply by 10; they have to avoid
146      multiplying 2 billion by 10, as this exceeds 32-bit capabilities.
147 
148      time_t might be unsigned.  */
149 
150   verify (TYPE_IS_INTEGER (time_t));
151 
152   /* Destination and source timestamps.  */
153   time_t dst_s = dst_stat->st_mtime;
154   time_t src_s = src_stat->st_mtime;
155   int dst_ns = get_stat_mtime_ns (dst_stat);
156   int src_ns = get_stat_mtime_ns (src_stat);
157 
158   if (options & UTIMECMP_TRUNCATE_SOURCE)
159     {
160       /* Look up the timestamp resolution for the destination device.  */
161 
162       /* Hash table for caching information learned about devices.  */
163       static Hash_table *ht;
164 
165       /* Information about the destination file system.  */
166       static struct fs_res *new_dst_res;
167       struct fs_res *dst_res = NULL;
168       struct fs_res tmp_dst_res;
169 
170       /* timestamp resolution in nanoseconds.  */
171       int res;
172 
173       /* Quick exit, if possible.  Since the worst resolution is 2
174          seconds, anything that differs by more than that does not
175          needs source truncation.  */
176       if (dst_s == src_s && dst_ns == src_ns)
177         return 0;
178       if (dst_s <= src_s - 2)
179         return -1;
180       if (src_s <= dst_s - 2)
181         return 1;
182 
183       /* Try to do a hash lookup, but fall back to stack variables and
184          recomputation on low memory situations.  */
185       if (! ht)
186         ht = hash_initialize (16, NULL, dev_info_hash, dev_info_compare, free);
187       if (ht)
188         {
189           if (! new_dst_res)
190             {
191               new_dst_res = malloc (sizeof *new_dst_res);
192               if (!new_dst_res)
193                 goto low_memory;
194               new_dst_res->resolution = 2 * BILLION;
195               new_dst_res->exact = false;
196             }
197           new_dst_res->dev = dst_stat->st_dev;
198           dst_res = hash_insert (ht, new_dst_res);
199           if (! dst_res)
200             goto low_memory;
201 
202           if (dst_res == new_dst_res)
203             {
204               /* NEW_DST_RES is now in use in the hash table, so allocate a
205                  new entry next time.  */
206               new_dst_res = NULL;
207             }
208         }
209       else
210         {
211         low_memory:
212           if (ht)
213             {
214               tmp_dst_res.dev = dst_stat->st_dev;
215               dst_res = hash_lookup (ht, &tmp_dst_res);
216             }
217           if (!dst_res)
218             {
219               dst_res = &tmp_dst_res;
220               dst_res->resolution = 2 * BILLION;
221               dst_res->exact = false;
222             }
223         }
224 
225       res = dst_res->resolution;
226 
227 #ifdef _PC_TIMESTAMP_RESOLUTION
228       /* If the system will tell us the resolution, we're set!  */
229       if (! dst_res->exact)
230         {
231           res = -1;
232           if (dfd == AT_FDCWD)
233             res = pathconf (dst_name, _PC_TIMESTAMP_RESOLUTION);
234           else
235             {
236               char *dstdir = mdir_name (dst_name);
237               if (dstdir)
238                 {
239                   int destdirfd = openat (dfd, dstdir,
240                                           O_SEARCH | O_CLOEXEC | O_DIRECTORY);
241                   if (0 <= destdirfd)
242                     {
243                       res = fpathconf (destdirfd, _PC_TIMESTAMP_RESOLUTION);
244                       close (destdirfd);
245                     }
246                   free (dstdir);
247                 }
248             }
249           if (0 < res)
250             {
251               dst_res->resolution = res;
252               dst_res->exact = true;
253             }
254         }
255 #endif
256 
257       if (! dst_res->exact)
258         {
259           /* This file system's resolution is not known exactly.
260              Deduce it, and store the result in the hash table.  */
261 
262           time_t dst_a_s = dst_stat->st_atime;
263           time_t dst_c_s = dst_stat->st_ctime;
264           time_t dst_m_s = dst_s;
265           int dst_a_ns = get_stat_atime_ns (dst_stat);
266           int dst_c_ns = get_stat_ctime_ns (dst_stat);
267           int dst_m_ns = dst_ns;
268 
269           /* Set RES to an upper bound on the file system resolution
270              (after truncation due to SYSCALL_RESOLUTION) by inspecting
271              the atime, ctime and mtime of the existing destination.
272              We don't know of any file system that stores atime or
273              ctime with a higher precision than mtime, so it's valid to
274              look at them too.  */
275           {
276             bool odd_second = (dst_a_s | dst_c_s | dst_m_s) & 1;
277 
278             if (SYSCALL_RESOLUTION == BILLION)
279               {
280                 if (odd_second | dst_a_ns | dst_c_ns | dst_m_ns)
281                   res = BILLION;
282               }
283             else
284               {
285                 int a = dst_a_ns;
286                 int c = dst_c_ns;
287                 int m = dst_m_ns;
288 
289                 /* Write it this way to avoid mistaken GCC warning
290                    about integer overflow in constant expression.  */
291                 int SR10 = SYSCALL_RESOLUTION;  SR10 *= 10;
292 
293                 if ((a % SR10 | c % SR10 | m % SR10) != 0)
294                   res = SYSCALL_RESOLUTION;
295                 else
296                   for (res = SR10, a /= SR10, c /= SR10, m /= SR10;
297                        (res < dst_res->resolution
298                         && (a % 10 | c % 10 | m % 10) == 0);
299                        res *= 10, a /= 10, c /= 10, m /= 10)
300                     if (res == BILLION)
301                       {
302                         if (! odd_second)
303                           res *= 2;
304                         break;
305                       }
306               }
307 
308             dst_res->resolution = res;
309           }
310 
311           if (SYSCALL_RESOLUTION < res)
312             {
313               struct timespec timespec[2];
314               struct stat dst_status;
315 
316               /* Ignore source timestamp information that must necessarily
317                  be lost when filtered through utimens.  */
318               src_ns -= src_ns % SYSCALL_RESOLUTION;
319 
320               /* If the timestamps disagree widely enough, there's no need
321                  to interrogate the file system to deduce the exact
322                  timestamp resolution; return the answer directly.  */
323               {
324                 time_t s = src_s & ~ (res == 2 * BILLION ? 1 : 0);
325                 if (src_s < dst_s || (src_s == dst_s && src_ns <= dst_ns))
326                   return 1;
327                 if (dst_s < s
328                     || (dst_s == s && dst_ns < src_ns - src_ns % res))
329                   return -1;
330               }
331 
332               /* Determine the actual timestamp resolution for the
333                  destination file system (after truncation due to
334                  SYSCALL_RESOLUTION) by setting the access timestamp of the
335                  destination to the existing access time, except with
336                  trailing nonzero digits.  */
337 
338               timespec[0].tv_sec = dst_a_s;
339               timespec[0].tv_nsec = dst_a_ns;
340               timespec[1].tv_sec = dst_m_s | (res == 2 * BILLION);
341               timespec[1].tv_nsec = dst_m_ns + res / 9;
342 
343               if (utimensat (dfd, dst_name, timespec, AT_SYMLINK_NOFOLLOW))
344                 return -2;
345 
346               /* Read the modification time that was set.  */
347               {
348                 int stat_result
349                   = fstatat (dfd, dst_name, &dst_status, AT_SYMLINK_NOFOLLOW);
350 
351                 if (stat_result
352                     | (dst_status.st_mtime ^ dst_m_s)
353                     | (get_stat_mtime_ns (&dst_status) ^ dst_m_ns))
354                   {
355                     /* The modification time changed, or we can't tell whether
356                        it changed.  Change it back as best we can.  */
357                     timespec[1].tv_sec = dst_m_s;
358                     timespec[1].tv_nsec = dst_m_ns;
359                     utimensat (dfd, dst_name, timespec, AT_SYMLINK_NOFOLLOW);
360                   }
361 
362                 if (stat_result != 0)
363                   return -2;
364               }
365 
366               /* Determine the exact resolution from the modification time
367                  that was read back.  */
368               {
369                 int old_res = res;
370                 int a = (BILLION * (dst_status.st_mtime & 1)
371                          + get_stat_mtime_ns (&dst_status));
372 
373                 res = SYSCALL_RESOLUTION;
374 
375                 for (a /= res; a % 10 == 0; a /= 10)
376                   {
377                     if (res == BILLION)
378                       {
379                         res *= 2;
380                         break;
381                       }
382                     res *= 10;
383                     if (res == old_res)
384                       break;
385                   }
386               }
387             }
388 
389           dst_res->resolution = res;
390           dst_res->exact = true;
391         }
392 
393       /* Truncate the source's timestamp according to the resolution.  */
394       src_s &= ~ (res == 2 * BILLION ? 1 : 0);
395       src_ns -= src_ns % res;
396     }
397 
398   /* Compare the timestamps and return -1, 0, 1 accordingly.  */
399   return (dst_s < src_s ? -1
400           : dst_s > src_s ? 1
401           : dst_ns < src_ns ? -1
402           : dst_ns > src_ns);
403 }
404