1 /* extent-scan.c -- core functions for scanning extents
2    Copyright (C) 2010-2018 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 
17    Written by Jie Liu (jeff.liu@oracle.com).  */
18 
19 #include <config.h>
20 #include <sys/types.h>
21 #include <sys/ioctl.h>
22 #include <sys/utsname.h>
23 #include <assert.h>
24 
25 #include "system.h"
26 #include "extent-scan.h"
27 #include "fiemap.h"
28 #include "xstrtol.h"
29 
30 
31 /* Work around Linux kernel issues on BTRFS and EXT4.  */
32 static bool
extent_need_sync(void)33 extent_need_sync (void)
34 {
35   /* For now always return true, to be on the safe side.
36      If/when FIEMAP semantics are well defined (before SEEK_HOLE support
37      is usable) and kernels implementing them are in use, we may relax
38      this once again.  */
39   return true;
40 
41 #if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE
42   static int need_sync = -1;
43 
44   if (need_sync == -1)
45     {
46       struct utsname name;
47       need_sync = 0; /* No workaround by default.  */
48 
49 # ifdef __linux__
50       if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0)
51         {
52            unsigned long val;
53            if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK)
54              {
55                if (val < 39)
56                  need_sync = 1;
57              }
58         }
59 # endif
60     }
61 
62   return need_sync;
63 #endif
64 }
65 
66 /* Allocate space for struct extent_scan, initialize the entries if
67    necessary and return it as the input argument of extent_scan_read().  */
68 extern void
extent_scan_init(int src_fd,struct extent_scan * scan)69 extent_scan_init (int src_fd, struct extent_scan *scan)
70 {
71   scan->fd = src_fd;
72   scan->ei_count = 0;
73   scan->ext_info = NULL;
74   scan->scan_start = 0;
75   scan->initial_scan_failed = false;
76   scan->hit_final_extent = false;
77   scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
78 }
79 
80 #ifdef __linux__
81 # ifndef FS_IOC_FIEMAP
82 #  define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
83 # endif
84 /* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
85    obtain a map of file extents excluding holes.  */
86 extern bool
extent_scan_read(struct extent_scan * scan)87 extent_scan_read (struct extent_scan *scan)
88 {
89   unsigned int si = 0;
90   struct extent_info *last_ei = scan->ext_info;
91 
92   while (true)
93     {
94       union { struct fiemap f; char c[4096]; } fiemap_buf;
95       struct fiemap *fiemap = &fiemap_buf.f;
96       struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
97       enum { count = (sizeof fiemap_buf - sizeof *fiemap)/sizeof *fm_extents };
98       verify (count > 1);
99 
100       /* This is required at least to initialize fiemap->fm_start,
101          but also serves (in mid 2010) to appease valgrind, which
102          appears not to know the semantics of the FIEMAP ioctl. */
103       memset (&fiemap_buf, 0, sizeof fiemap_buf);
104 
105       fiemap->fm_start = scan->scan_start;
106       fiemap->fm_flags = scan->fm_flags;
107       fiemap->fm_extent_count = count;
108       fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
109 
110       /* Fall back to the standard copy if call ioctl(2) failed for
111          the first time.  */
112       if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
113         {
114           if (scan->scan_start == 0)
115             scan->initial_scan_failed = true;
116           return false;
117         }
118 
119       /* If 0 extents are returned, then no more scans are needed.  */
120       if (fiemap->fm_mapped_extents == 0)
121         {
122           scan->hit_final_extent = true;
123           return scan->scan_start != 0;
124         }
125 
126       assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents);
127       scan->ei_count += fiemap->fm_mapped_extents;
128       {
129         /* last_ei points into a buffer that may be freed via xnrealloc.
130            Record its offset and adjust after allocation.  */
131         size_t prev_idx = last_ei - scan->ext_info;
132         scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count,
133                                     sizeof (struct extent_info));
134         last_ei = scan->ext_info + prev_idx;
135       }
136 
137       unsigned int i = 0;
138       for (i = 0; i < fiemap->fm_mapped_extents; i++)
139         {
140           assert (fm_extents[i].fe_logical
141                   <= OFF_T_MAX - fm_extents[i].fe_length);
142 
143           verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags);
144 
145           if (si && last_ei->ext_flags
146               == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
147               && (last_ei->ext_logical + last_ei->ext_length
148                   == fm_extents[i].fe_logical))
149             {
150               /* Merge previous with last.  */
151               last_ei->ext_length += fm_extents[i].fe_length;
152               /* Copy flags in case different.  */
153               last_ei->ext_flags = fm_extents[i].fe_flags;
154             }
155           else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
156                    || (si && (last_ei->ext_logical + last_ei->ext_length
157                               > fm_extents[i].fe_logical)))
158             {
159               /* BTRFS before 2.6.38 could return overlapping extents
160                  for sparse files.  We adjust the returned extents
161                  rather than failing, as otherwise it would be inefficient
162                  to detect this on the initial scan.  */
163               uint64_t new_logical;
164               uint64_t length_adjust;
165               if (si == 0)
166                 new_logical = scan->scan_start;
167               else
168                 {
169                   /* We could return here if scan->scan_start == 0
170                      but don't so as to minimize special cases.  */
171                   new_logical = last_ei->ext_logical + last_ei->ext_length;
172                 }
173               length_adjust = new_logical - fm_extents[i].fe_logical;
174               /* If an extent is contained within the previous one, fail.  */
175               if (length_adjust < fm_extents[i].fe_length)
176                 {
177                   if (scan->scan_start == 0)
178                     scan->initial_scan_failed = true;
179                   return false;
180                 }
181               fm_extents[i].fe_logical = new_logical;
182               fm_extents[i].fe_length -= length_adjust;
183               /* Process the adjusted extent again.  */
184               i--;
185               continue;
186             }
187           else
188             {
189               last_ei = scan->ext_info + si;
190               last_ei->ext_logical = fm_extents[i].fe_logical;
191               last_ei->ext_length = fm_extents[i].fe_length;
192               last_ei->ext_flags = fm_extents[i].fe_flags;
193               si++;
194             }
195         }
196 
197       if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
198         scan->hit_final_extent = true;
199 
200       /* If we have enough extents, discard the last as it might
201          be merged with one from the next scan.  */
202       if (si > count && !scan->hit_final_extent)
203         last_ei = scan->ext_info + --si - 1;
204 
205       /* We don't bother reallocating any trailing slots.  */
206       scan->ei_count = si;
207 
208       if (scan->hit_final_extent)
209         break;
210       else
211         scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
212 
213       if (si >= count)
214         break;
215     }
216 
217   return true;
218 }
219 #else
220 extern bool
extent_scan_read(struct extent_scan * scan _GL_UNUSED)221 extent_scan_read (struct extent_scan *scan _GL_UNUSED)
222 {
223   scan->initial_scan_failed = true;
224   errno = ENOTSUP;
225   return false;
226 }
227 #endif
228