1 /*
2 * file.c
3 *
4 * file system operations
5 *
6 * Copyright (c) 2010-2018, PostgreSQL Global Development Group
7 * src/bin/pg_upgrade/file.c
8 */
9
10 #include "postgres_fe.h"
11
12 #include "access/visibilitymap.h"
13 #include "common/file_perm.h"
14 #include "pg_upgrade.h"
15 #include "storage/bufpage.h"
16 #include "storage/checksum.h"
17 #include "storage/checksum_impl.h"
18
19 #include <sys/stat.h>
20 #include <fcntl.h>
21
22
23 #ifdef WIN32
24 static int win32_pghardlink(const char *src, const char *dst);
25 #endif
26
27
28 /*
29 * copyFile()
30 *
31 * Copies a relation file from src to dst.
32 * schemaName/relName are relation's SQL name (used for error messages only).
33 */
34 void
copyFile(const char * src,const char * dst,const char * schemaName,const char * relName)35 copyFile(const char *src, const char *dst,
36 const char *schemaName, const char *relName)
37 {
38 #ifndef WIN32
39 int src_fd;
40 int dest_fd;
41 char *buffer;
42
43 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
44 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
45 schemaName, relName, src, strerror(errno));
46
47 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
48 pg_file_create_mode)) < 0)
49 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
50 schemaName, relName, dst, strerror(errno));
51
52 /* copy in fairly large chunks for best efficiency */
53 #define COPY_BUF_SIZE (50 * BLCKSZ)
54
55 buffer = (char *) pg_malloc(COPY_BUF_SIZE);
56
57 /* perform data copying i.e read src source, write to destination */
58 while (true)
59 {
60 ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
61
62 if (nbytes < 0)
63 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
64 schemaName, relName, src, strerror(errno));
65
66 if (nbytes == 0)
67 break;
68
69 errno = 0;
70 if (write(dest_fd, buffer, nbytes) != nbytes)
71 {
72 /* if write didn't set errno, assume problem is no disk space */
73 if (errno == 0)
74 errno = ENOSPC;
75 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
76 schemaName, relName, dst, strerror(errno));
77 }
78 }
79
80 pg_free(buffer);
81 close(src_fd);
82 close(dest_fd);
83
84 #else /* WIN32 */
85
86 if (CopyFile(src, dst, true) == 0)
87 {
88 _dosmaperr(GetLastError());
89 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
90 schemaName, relName, src, dst, strerror(errno));
91 }
92
93 #endif /* WIN32 */
94 }
95
96
97 /*
98 * linkFile()
99 *
100 * Hard-links a relation file from src to dst.
101 * schemaName/relName are relation's SQL name (used for error messages only).
102 */
103 void
linkFile(const char * src,const char * dst,const char * schemaName,const char * relName)104 linkFile(const char *src, const char *dst,
105 const char *schemaName, const char *relName)
106 {
107 if (pg_link_file(src, dst) < 0)
108 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
109 schemaName, relName, src, dst, strerror(errno));
110 }
111
112
113 /*
114 * rewriteVisibilityMap()
115 *
116 * Transform a visibility map file, copying from src to dst.
117 * schemaName/relName are relation's SQL name (used for error messages only).
118 *
119 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
120 * visibility map included one bit per heap page; it now includes two.
121 * When upgrading a cluster from before that time to a current PostgreSQL
122 * version, we could refuse to copy visibility maps from the old cluster
123 * to the new cluster; the next VACUUM would recreate them, but at the
124 * price of scanning the entire table. So, instead, we rewrite the old
125 * visibility maps in the new format. That way, the all-visible bits
126 * remain set for the pages for which they were set previously. The
127 * all-frozen bits are never set by this conversion; we leave that to VACUUM.
128 */
129 void
rewriteVisibilityMap(const char * fromfile,const char * tofile,const char * schemaName,const char * relName)130 rewriteVisibilityMap(const char *fromfile, const char *tofile,
131 const char *schemaName, const char *relName)
132 {
133 int src_fd;
134 int dst_fd;
135 PGAlignedBlock buffer;
136 PGAlignedBlock new_vmbuf;
137 ssize_t totalBytesRead = 0;
138 ssize_t src_filesize;
139 int rewriteVmBytesPerPage;
140 BlockNumber new_blkno = 0;
141 struct stat statbuf;
142
143 /* Compute number of old-format bytes per new page */
144 rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
145
146 if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
147 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
148 schemaName, relName, fromfile, strerror(errno));
149
150 if (fstat(src_fd, &statbuf) != 0)
151 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
152 schemaName, relName, fromfile, strerror(errno));
153
154 if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
155 pg_file_create_mode)) < 0)
156 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
157 schemaName, relName, tofile, strerror(errno));
158
159 /* Save old file size */
160 src_filesize = statbuf.st_size;
161
162 /*
163 * Turn each visibility map page into 2 pages one by one. Each new page
164 * has the same page header as the old one. If the last section of the
165 * last page is empty, we skip it, mostly to avoid turning one-page
166 * visibility maps for small relations into two pages needlessly.
167 */
168 while (totalBytesRead < src_filesize)
169 {
170 ssize_t bytesRead;
171 char *old_cur;
172 char *old_break;
173 char *old_blkend;
174 PageHeaderData pageheader;
175 bool old_lastblk;
176
177 if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
178 {
179 if (bytesRead < 0)
180 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
181 schemaName, relName, fromfile, strerror(errno));
182 else
183 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
184 schemaName, relName, fromfile);
185 }
186
187 totalBytesRead += BLCKSZ;
188 old_lastblk = (totalBytesRead == src_filesize);
189
190 /* Save the page header data */
191 memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
192
193 /*
194 * These old_* variables point to old visibility map page. old_cur
195 * points to current position on old page. old_blkend points to end of
196 * old block. old_break is the end+1 position on the old page for the
197 * data that will be transferred to the current new page.
198 */
199 old_cur = buffer.data + SizeOfPageHeaderData;
200 old_blkend = buffer.data + bytesRead;
201 old_break = old_cur + rewriteVmBytesPerPage;
202
203 while (old_break <= old_blkend)
204 {
205 char *new_cur;
206 bool empty = true;
207 bool old_lastpart;
208
209 /* First, copy old page header to new page */
210 memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
211
212 /* Rewriting the last part of the last old page? */
213 old_lastpart = old_lastblk && (old_break == old_blkend);
214
215 new_cur = new_vmbuf.data + SizeOfPageHeaderData;
216
217 /* Process old page bytes one by one, and turn it into new page. */
218 while (old_cur < old_break)
219 {
220 uint8 byte = *(uint8 *) old_cur;
221 uint16 new_vmbits = 0;
222 int i;
223
224 /* Generate new format bits while keeping old information */
225 for (i = 0; i < BITS_PER_BYTE; i++)
226 {
227 if (byte & (1 << i))
228 {
229 empty = false;
230 new_vmbits |=
231 VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
232 }
233 }
234
235 /* Copy new visibility map bytes to new-format page */
236 new_cur[0] = (char) (new_vmbits & 0xFF);
237 new_cur[1] = (char) (new_vmbits >> 8);
238
239 old_cur++;
240 new_cur += BITS_PER_HEAPBLOCK;
241 }
242
243 /* If the last part of the last page is empty, skip writing it */
244 if (old_lastpart && empty)
245 break;
246
247 /* Set new checksum for visibility map page, if enabled */
248 if (new_cluster.controldata.data_checksum_version != 0)
249 ((PageHeader) new_vmbuf.data)->pd_checksum =
250 pg_checksum_page(new_vmbuf.data, new_blkno);
251
252 errno = 0;
253 if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
254 {
255 /* if write didn't set errno, assume problem is no disk space */
256 if (errno == 0)
257 errno = ENOSPC;
258 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
259 schemaName, relName, tofile, strerror(errno));
260 }
261
262 /* Advance for next new page */
263 old_break += rewriteVmBytesPerPage;
264 new_blkno++;
265 }
266 }
267
268 /* Clean up */
269 close(dst_fd);
270 close(src_fd);
271 }
272
273 void
check_hard_link(void)274 check_hard_link(void)
275 {
276 char existing_file[MAXPGPATH];
277 char new_link_file[MAXPGPATH];
278
279 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
280 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
281 unlink(new_link_file); /* might fail */
282
283 if (pg_link_file(existing_file, new_link_file) < 0)
284 pg_fatal("could not create hard link between old and new data directories: %s\n"
285 "In link mode the old and new data directories must be on the same file system.\n",
286 strerror(errno));
287
288 unlink(new_link_file);
289 }
290
291 #ifdef WIN32
292 /* implementation of pg_link_file() on Windows */
293 static int
win32_pghardlink(const char * src,const char * dst)294 win32_pghardlink(const char *src, const char *dst)
295 {
296 /*
297 * CreateHardLinkA returns zero for failure
298 * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
299 */
300 if (CreateHardLinkA(dst, src, NULL) == 0)
301 {
302 _dosmaperr(GetLastError());
303 return -1;
304 }
305 else
306 return 0;
307 }
308 #endif
309