1 /*
2 * file.c
3 *
4 * file system operations
5 *
6 * Copyright (c) 2010-2020, PostgreSQL Global Development Group
7 * src/bin/pg_upgrade/file.c
8 */
9
10 #include "postgres_fe.h"
11
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #ifdef HAVE_COPYFILE_H
15 #include <copyfile.h>
16 #endif
17 #ifdef __linux__
18 #include <sys/ioctl.h>
19 #include <linux/fs.h>
20 #endif
21
22 #include "access/visibilitymapdefs.h"
23 #include "common/file_perm.h"
24 #include "pg_upgrade.h"
25 #include "storage/bufpage.h"
26 #include "storage/checksum.h"
27 #include "storage/checksum_impl.h"
28
29
30 /*
31 * cloneFile()
32 *
33 * Clones/reflinks a relation file from src to dst.
34 *
35 * schemaName/relName are relation's SQL name (used for error messages only).
36 */
37 void
cloneFile(const char * src,const char * dst,const char * schemaName,const char * relName)38 cloneFile(const char *src, const char *dst,
39 const char *schemaName, const char *relName)
40 {
41 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
42 if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
43 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
44 schemaName, relName, src, dst, strerror(errno));
45 #elif defined(__linux__) && defined(FICLONE)
46 int src_fd;
47 int dest_fd;
48
49 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
50 pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
51 schemaName, relName, src, strerror(errno));
52
53 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
54 pg_file_create_mode)) < 0)
55 pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
56 schemaName, relName, dst, strerror(errno));
57
58 if (ioctl(dest_fd, FICLONE, src_fd) < 0)
59 {
60 unlink(dst);
61 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
62 schemaName, relName, src, dst, strerror(errno));
63 }
64
65 close(src_fd);
66 close(dest_fd);
67 #endif
68 }
69
70
71 /*
72 * copyFile()
73 *
74 * Copies a relation file from src to dst.
75 * schemaName/relName are relation's SQL name (used for error messages only).
76 */
77 void
copyFile(const char * src,const char * dst,const char * schemaName,const char * relName)78 copyFile(const char *src, const char *dst,
79 const char *schemaName, const char *relName)
80 {
81 #ifndef WIN32
82 int src_fd;
83 int dest_fd;
84 char *buffer;
85
86 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
87 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
88 schemaName, relName, src, strerror(errno));
89
90 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
91 pg_file_create_mode)) < 0)
92 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
93 schemaName, relName, dst, strerror(errno));
94
95 /* copy in fairly large chunks for best efficiency */
96 #define COPY_BUF_SIZE (50 * BLCKSZ)
97
98 buffer = (char *) pg_malloc(COPY_BUF_SIZE);
99
100 /* perform data copying i.e read src source, write to destination */
101 while (true)
102 {
103 ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
104
105 if (nbytes < 0)
106 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
107 schemaName, relName, src, strerror(errno));
108
109 if (nbytes == 0)
110 break;
111
112 errno = 0;
113 if (write(dest_fd, buffer, nbytes) != nbytes)
114 {
115 /* if write didn't set errno, assume problem is no disk space */
116 if (errno == 0)
117 errno = ENOSPC;
118 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
119 schemaName, relName, dst, strerror(errno));
120 }
121 }
122
123 pg_free(buffer);
124 close(src_fd);
125 close(dest_fd);
126
127 #else /* WIN32 */
128
129 if (CopyFile(src, dst, true) == 0)
130 {
131 _dosmaperr(GetLastError());
132 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
133 schemaName, relName, src, dst, strerror(errno));
134 }
135
136 #endif /* WIN32 */
137 }
138
139
140 /*
141 * linkFile()
142 *
143 * Hard-links a relation file from src to dst.
144 * schemaName/relName are relation's SQL name (used for error messages only).
145 */
146 void
linkFile(const char * src,const char * dst,const char * schemaName,const char * relName)147 linkFile(const char *src, const char *dst,
148 const char *schemaName, const char *relName)
149 {
150 if (link(src, dst) < 0)
151 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
152 schemaName, relName, src, dst, strerror(errno));
153 }
154
155
156 /*
157 * rewriteVisibilityMap()
158 *
159 * Transform a visibility map file, copying from src to dst.
160 * schemaName/relName are relation's SQL name (used for error messages only).
161 *
162 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
163 * visibility map included one bit per heap page; it now includes two.
164 * When upgrading a cluster from before that time to a current PostgreSQL
165 * version, we could refuse to copy visibility maps from the old cluster
166 * to the new cluster; the next VACUUM would recreate them, but at the
167 * price of scanning the entire table. So, instead, we rewrite the old
168 * visibility maps in the new format. That way, the all-visible bits
169 * remain set for the pages for which they were set previously. The
170 * all-frozen bits are never set by this conversion; we leave that to VACUUM.
171 */
172 void
rewriteVisibilityMap(const char * fromfile,const char * tofile,const char * schemaName,const char * relName)173 rewriteVisibilityMap(const char *fromfile, const char *tofile,
174 const char *schemaName, const char *relName)
175 {
176 int src_fd;
177 int dst_fd;
178 PGAlignedBlock buffer;
179 PGAlignedBlock new_vmbuf;
180 ssize_t totalBytesRead = 0;
181 ssize_t src_filesize;
182 int rewriteVmBytesPerPage;
183 BlockNumber new_blkno = 0;
184 struct stat statbuf;
185
186 /* Compute number of old-format bytes per new page */
187 rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
188
189 if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
190 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
191 schemaName, relName, fromfile, strerror(errno));
192
193 if (fstat(src_fd, &statbuf) != 0)
194 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
195 schemaName, relName, fromfile, strerror(errno));
196
197 if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
198 pg_file_create_mode)) < 0)
199 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
200 schemaName, relName, tofile, strerror(errno));
201
202 /* Save old file size */
203 src_filesize = statbuf.st_size;
204
205 /*
206 * Turn each visibility map page into 2 pages one by one. Each new page
207 * has the same page header as the old one. If the last section of the
208 * last page is empty, we skip it, mostly to avoid turning one-page
209 * visibility maps for small relations into two pages needlessly.
210 */
211 while (totalBytesRead < src_filesize)
212 {
213 ssize_t bytesRead;
214 char *old_cur;
215 char *old_break;
216 char *old_blkend;
217 PageHeaderData pageheader;
218 bool old_lastblk;
219
220 if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
221 {
222 if (bytesRead < 0)
223 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
224 schemaName, relName, fromfile, strerror(errno));
225 else
226 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
227 schemaName, relName, fromfile);
228 }
229
230 totalBytesRead += BLCKSZ;
231 old_lastblk = (totalBytesRead == src_filesize);
232
233 /* Save the page header data */
234 memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
235
236 /*
237 * These old_* variables point to old visibility map page. old_cur
238 * points to current position on old page. old_blkend points to end of
239 * old block. old_break is the end+1 position on the old page for the
240 * data that will be transferred to the current new page.
241 */
242 old_cur = buffer.data + SizeOfPageHeaderData;
243 old_blkend = buffer.data + bytesRead;
244 old_break = old_cur + rewriteVmBytesPerPage;
245
246 while (old_break <= old_blkend)
247 {
248 char *new_cur;
249 bool empty = true;
250 bool old_lastpart;
251
252 /* First, copy old page header to new page */
253 memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
254
255 /* Rewriting the last part of the last old page? */
256 old_lastpart = old_lastblk && (old_break == old_blkend);
257
258 new_cur = new_vmbuf.data + SizeOfPageHeaderData;
259
260 /* Process old page bytes one by one, and turn it into new page. */
261 while (old_cur < old_break)
262 {
263 uint8 byte = *(uint8 *) old_cur;
264 uint16 new_vmbits = 0;
265 int i;
266
267 /* Generate new format bits while keeping old information */
268 for (i = 0; i < BITS_PER_BYTE; i++)
269 {
270 if (byte & (1 << i))
271 {
272 empty = false;
273 new_vmbits |=
274 VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
275 }
276 }
277
278 /* Copy new visibility map bytes to new-format page */
279 new_cur[0] = (char) (new_vmbits & 0xFF);
280 new_cur[1] = (char) (new_vmbits >> 8);
281
282 old_cur++;
283 new_cur += BITS_PER_HEAPBLOCK;
284 }
285
286 /* If the last part of the last page is empty, skip writing it */
287 if (old_lastpart && empty)
288 break;
289
290 /* Set new checksum for visibility map page, if enabled */
291 if (new_cluster.controldata.data_checksum_version != 0)
292 ((PageHeader) new_vmbuf.data)->pd_checksum =
293 pg_checksum_page(new_vmbuf.data, new_blkno);
294
295 errno = 0;
296 if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
297 {
298 /* if write didn't set errno, assume problem is no disk space */
299 if (errno == 0)
300 errno = ENOSPC;
301 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
302 schemaName, relName, tofile, strerror(errno));
303 }
304
305 /* Advance for next new page */
306 old_break += rewriteVmBytesPerPage;
307 new_blkno++;
308 }
309 }
310
311 /* Clean up */
312 close(dst_fd);
313 close(src_fd);
314 }
315
316 void
check_file_clone(void)317 check_file_clone(void)
318 {
319 char existing_file[MAXPGPATH];
320 char new_link_file[MAXPGPATH];
321
322 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
323 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
324 unlink(new_link_file); /* might fail */
325
326 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
327 if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
328 pg_fatal("could not clone file between old and new data directories: %s\n",
329 strerror(errno));
330 #elif defined(__linux__) && defined(FICLONE)
331 {
332 int src_fd;
333 int dest_fd;
334
335 if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
336 pg_fatal("could not open file \"%s\": %s\n",
337 existing_file, strerror(errno));
338
339 if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
340 pg_file_create_mode)) < 0)
341 pg_fatal("could not create file \"%s\": %s\n",
342 new_link_file, strerror(errno));
343
344 if (ioctl(dest_fd, FICLONE, src_fd) < 0)
345 pg_fatal("could not clone file between old and new data directories: %s\n",
346 strerror(errno));
347
348 close(src_fd);
349 close(dest_fd);
350 }
351 #else
352 pg_fatal("file cloning not supported on this platform\n");
353 #endif
354
355 unlink(new_link_file);
356 }
357
358 void
check_hard_link(void)359 check_hard_link(void)
360 {
361 char existing_file[MAXPGPATH];
362 char new_link_file[MAXPGPATH];
363
364 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
365 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
366 unlink(new_link_file); /* might fail */
367
368 if (link(existing_file, new_link_file) < 0)
369 pg_fatal("could not create hard link between old and new data directories: %s\n"
370 "In link mode the old and new data directories must be on the same file system.\n",
371 strerror(errno));
372
373 unlink(new_link_file);
374 }
375