1 /*
2 * file.c
3 *
4 * file system operations
5 *
6 * Copyright (c) 2010-2019, PostgreSQL Global Development Group
7 * src/bin/pg_upgrade/file.c
8 */
9
10 #include "postgres_fe.h"
11
12 #include "access/visibilitymap.h"
13 #include "common/file_perm.h"
14 #include "pg_upgrade.h"
15 #include "storage/bufpage.h"
16 #include "storage/checksum.h"
17 #include "storage/checksum_impl.h"
18
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #ifdef HAVE_COPYFILE_H
22 #include <copyfile.h>
23 #endif
24 #ifdef __linux__
25 #include <sys/ioctl.h>
26 #include <linux/fs.h>
27 #endif
28
29
30 #ifdef WIN32
31 static int win32_pghardlink(const char *src, const char *dst);
32 #endif
33
34
35 /*
36 * cloneFile()
37 *
38 * Clones/reflinks a relation file from src to dst.
39 *
40 * schemaName/relName are relation's SQL name (used for error messages only).
41 */
42 void
cloneFile(const char * src,const char * dst,const char * schemaName,const char * relName)43 cloneFile(const char *src, const char *dst,
44 const char *schemaName, const char *relName)
45 {
46 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
47 if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
48 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
49 schemaName, relName, src, dst, strerror(errno));
50 #elif defined(__linux__) && defined(FICLONE)
51 int src_fd;
52 int dest_fd;
53
54 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
55 pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
56 schemaName, relName, src, strerror(errno));
57
58 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
59 pg_file_create_mode)) < 0)
60 pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
61 schemaName, relName, dst, strerror(errno));
62
63 if (ioctl(dest_fd, FICLONE, src_fd) < 0)
64 {
65 unlink(dst);
66 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
67 schemaName, relName, src, dst, strerror(errno));
68 }
69
70 close(src_fd);
71 close(dest_fd);
72 #endif
73 }
74
75
76 /*
77 * copyFile()
78 *
79 * Copies a relation file from src to dst.
80 * schemaName/relName are relation's SQL name (used for error messages only).
81 */
82 void
copyFile(const char * src,const char * dst,const char * schemaName,const char * relName)83 copyFile(const char *src, const char *dst,
84 const char *schemaName, const char *relName)
85 {
86 #ifndef WIN32
87 int src_fd;
88 int dest_fd;
89 char *buffer;
90
91 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
92 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
93 schemaName, relName, src, strerror(errno));
94
95 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
96 pg_file_create_mode)) < 0)
97 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
98 schemaName, relName, dst, strerror(errno));
99
100 /* copy in fairly large chunks for best efficiency */
101 #define COPY_BUF_SIZE (50 * BLCKSZ)
102
103 buffer = (char *) pg_malloc(COPY_BUF_SIZE);
104
105 /* perform data copying i.e read src source, write to destination */
106 while (true)
107 {
108 ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
109
110 if (nbytes < 0)
111 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
112 schemaName, relName, src, strerror(errno));
113
114 if (nbytes == 0)
115 break;
116
117 errno = 0;
118 if (write(dest_fd, buffer, nbytes) != nbytes)
119 {
120 /* if write didn't set errno, assume problem is no disk space */
121 if (errno == 0)
122 errno = ENOSPC;
123 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
124 schemaName, relName, dst, strerror(errno));
125 }
126 }
127
128 pg_free(buffer);
129 close(src_fd);
130 close(dest_fd);
131
132 #else /* WIN32 */
133
134 if (CopyFile(src, dst, true) == 0)
135 {
136 _dosmaperr(GetLastError());
137 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
138 schemaName, relName, src, dst, strerror(errno));
139 }
140
141 #endif /* WIN32 */
142 }
143
144
145 /*
146 * linkFile()
147 *
148 * Hard-links a relation file from src to dst.
149 * schemaName/relName are relation's SQL name (used for error messages only).
150 */
151 void
linkFile(const char * src,const char * dst,const char * schemaName,const char * relName)152 linkFile(const char *src, const char *dst,
153 const char *schemaName, const char *relName)
154 {
155 if (pg_link_file(src, dst) < 0)
156 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
157 schemaName, relName, src, dst, strerror(errno));
158 }
159
160
161 /*
162 * rewriteVisibilityMap()
163 *
164 * Transform a visibility map file, copying from src to dst.
165 * schemaName/relName are relation's SQL name (used for error messages only).
166 *
167 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
168 * visibility map included one bit per heap page; it now includes two.
169 * When upgrading a cluster from before that time to a current PostgreSQL
170 * version, we could refuse to copy visibility maps from the old cluster
171 * to the new cluster; the next VACUUM would recreate them, but at the
172 * price of scanning the entire table. So, instead, we rewrite the old
173 * visibility maps in the new format. That way, the all-visible bits
174 * remain set for the pages for which they were set previously. The
175 * all-frozen bits are never set by this conversion; we leave that to VACUUM.
176 */
177 void
rewriteVisibilityMap(const char * fromfile,const char * tofile,const char * schemaName,const char * relName)178 rewriteVisibilityMap(const char *fromfile, const char *tofile,
179 const char *schemaName, const char *relName)
180 {
181 int src_fd;
182 int dst_fd;
183 PGAlignedBlock buffer;
184 PGAlignedBlock new_vmbuf;
185 ssize_t totalBytesRead = 0;
186 ssize_t src_filesize;
187 int rewriteVmBytesPerPage;
188 BlockNumber new_blkno = 0;
189 struct stat statbuf;
190
191 /* Compute number of old-format bytes per new page */
192 rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
193
194 if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
195 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
196 schemaName, relName, fromfile, strerror(errno));
197
198 if (fstat(src_fd, &statbuf) != 0)
199 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
200 schemaName, relName, fromfile, strerror(errno));
201
202 if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
203 pg_file_create_mode)) < 0)
204 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
205 schemaName, relName, tofile, strerror(errno));
206
207 /* Save old file size */
208 src_filesize = statbuf.st_size;
209
210 /*
211 * Turn each visibility map page into 2 pages one by one. Each new page
212 * has the same page header as the old one. If the last section of the
213 * last page is empty, we skip it, mostly to avoid turning one-page
214 * visibility maps for small relations into two pages needlessly.
215 */
216 while (totalBytesRead < src_filesize)
217 {
218 ssize_t bytesRead;
219 char *old_cur;
220 char *old_break;
221 char *old_blkend;
222 PageHeaderData pageheader;
223 bool old_lastblk;
224
225 if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
226 {
227 if (bytesRead < 0)
228 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
229 schemaName, relName, fromfile, strerror(errno));
230 else
231 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
232 schemaName, relName, fromfile);
233 }
234
235 totalBytesRead += BLCKSZ;
236 old_lastblk = (totalBytesRead == src_filesize);
237
238 /* Save the page header data */
239 memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
240
241 /*
242 * These old_* variables point to old visibility map page. old_cur
243 * points to current position on old page. old_blkend points to end of
244 * old block. old_break is the end+1 position on the old page for the
245 * data that will be transferred to the current new page.
246 */
247 old_cur = buffer.data + SizeOfPageHeaderData;
248 old_blkend = buffer.data + bytesRead;
249 old_break = old_cur + rewriteVmBytesPerPage;
250
251 while (old_break <= old_blkend)
252 {
253 char *new_cur;
254 bool empty = true;
255 bool old_lastpart;
256
257 /* First, copy old page header to new page */
258 memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
259
260 /* Rewriting the last part of the last old page? */
261 old_lastpart = old_lastblk && (old_break == old_blkend);
262
263 new_cur = new_vmbuf.data + SizeOfPageHeaderData;
264
265 /* Process old page bytes one by one, and turn it into new page. */
266 while (old_cur < old_break)
267 {
268 uint8 byte = *(uint8 *) old_cur;
269 uint16 new_vmbits = 0;
270 int i;
271
272 /* Generate new format bits while keeping old information */
273 for (i = 0; i < BITS_PER_BYTE; i++)
274 {
275 if (byte & (1 << i))
276 {
277 empty = false;
278 new_vmbits |=
279 VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
280 }
281 }
282
283 /* Copy new visibility map bytes to new-format page */
284 new_cur[0] = (char) (new_vmbits & 0xFF);
285 new_cur[1] = (char) (new_vmbits >> 8);
286
287 old_cur++;
288 new_cur += BITS_PER_HEAPBLOCK;
289 }
290
291 /* If the last part of the last page is empty, skip writing it */
292 if (old_lastpart && empty)
293 break;
294
295 /* Set new checksum for visibility map page, if enabled */
296 if (new_cluster.controldata.data_checksum_version != 0)
297 ((PageHeader) new_vmbuf.data)->pd_checksum =
298 pg_checksum_page(new_vmbuf.data, new_blkno);
299
300 errno = 0;
301 if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
302 {
303 /* if write didn't set errno, assume problem is no disk space */
304 if (errno == 0)
305 errno = ENOSPC;
306 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
307 schemaName, relName, tofile, strerror(errno));
308 }
309
310 /* Advance for next new page */
311 old_break += rewriteVmBytesPerPage;
312 new_blkno++;
313 }
314 }
315
316 /* Clean up */
317 close(dst_fd);
318 close(src_fd);
319 }
320
321 void
check_file_clone(void)322 check_file_clone(void)
323 {
324 char existing_file[MAXPGPATH];
325 char new_link_file[MAXPGPATH];
326
327 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
328 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
329 unlink(new_link_file); /* might fail */
330
331 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
332 if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
333 pg_fatal("could not clone file between old and new data directories: %s\n",
334 strerror(errno));
335 #elif defined(__linux__) && defined(FICLONE)
336 {
337 int src_fd;
338 int dest_fd;
339
340 if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
341 pg_fatal("could not open file \"%s\": %s\n",
342 existing_file, strerror(errno));
343
344 if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
345 pg_file_create_mode)) < 0)
346 pg_fatal("could not create file \"%s\": %s\n",
347 new_link_file, strerror(errno));
348
349 if (ioctl(dest_fd, FICLONE, src_fd) < 0)
350 pg_fatal("could not clone file between old and new data directories: %s\n",
351 strerror(errno));
352
353 close(src_fd);
354 close(dest_fd);
355 }
356 #else
357 pg_fatal("file cloning not supported on this platform\n");
358 #endif
359
360 unlink(new_link_file);
361 }
362
363 void
check_hard_link(void)364 check_hard_link(void)
365 {
366 char existing_file[MAXPGPATH];
367 char new_link_file[MAXPGPATH];
368
369 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
370 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
371 unlink(new_link_file); /* might fail */
372
373 if (pg_link_file(existing_file, new_link_file) < 0)
374 pg_fatal("could not create hard link between old and new data directories: %s\n"
375 "In link mode the old and new data directories must be on the same file system.\n",
376 strerror(errno));
377
378 unlink(new_link_file);
379 }
380
381 #ifdef WIN32
382 /* implementation of pg_link_file() on Windows */
383 static int
win32_pghardlink(const char * src,const char * dst)384 win32_pghardlink(const char *src, const char *dst)
385 {
386 /*
387 * CreateHardLinkA returns zero for failure
388 * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
389 */
390 if (CreateHardLinkA(dst, src, NULL) == 0)
391 {
392 _dosmaperr(GetLastError());
393 return -1;
394 }
395 else
396 return 0;
397 }
398 #endif
399