1 /*
2 * file.c
3 *
4 * file system operations
5 *
6 * Copyright (c) 2010-2017, PostgreSQL Global Development Group
7 * src/bin/pg_upgrade/file.c
8 */
9
10 #include "postgres_fe.h"
11
12 #include "access/visibilitymap.h"
13 #include "pg_upgrade.h"
14 #include "storage/bufpage.h"
15 #include "storage/checksum.h"
16 #include "storage/checksum_impl.h"
17
18 #include <sys/stat.h>
19 #include <fcntl.h>
20
21
22 #ifdef WIN32
23 static int win32_pghardlink(const char *src, const char *dst);
24 #endif
25
26
27 /*
28 * copyFile()
29 *
30 * Copies a relation file from src to dst.
31 * schemaName/relName are relation's SQL name (used for error messages only).
32 */
33 void
copyFile(const char * src,const char * dst,const char * schemaName,const char * relName)34 copyFile(const char *src, const char *dst,
35 const char *schemaName, const char *relName)
36 {
37 #ifndef WIN32
38 int src_fd;
39 int dest_fd;
40 char *buffer;
41
42 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
43 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
44 schemaName, relName, src, strerror(errno));
45
46 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
47 S_IRUSR | S_IWUSR)) < 0)
48 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
49 schemaName, relName, dst, strerror(errno));
50
51 /* copy in fairly large chunks for best efficiency */
52 #define COPY_BUF_SIZE (50 * BLCKSZ)
53
54 buffer = (char *) pg_malloc(COPY_BUF_SIZE);
55
56 /* perform data copying i.e read src source, write to destination */
57 while (true)
58 {
59 ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
60
61 if (nbytes < 0)
62 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
63 schemaName, relName, src, strerror(errno));
64
65 if (nbytes == 0)
66 break;
67
68 errno = 0;
69 if (write(dest_fd, buffer, nbytes) != nbytes)
70 {
71 /* if write didn't set errno, assume problem is no disk space */
72 if (errno == 0)
73 errno = ENOSPC;
74 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
75 schemaName, relName, dst, strerror(errno));
76 }
77 }
78
79 pg_free(buffer);
80 close(src_fd);
81 close(dest_fd);
82
83 #else /* WIN32 */
84
85 if (CopyFile(src, dst, true) == 0)
86 {
87 _dosmaperr(GetLastError());
88 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
89 schemaName, relName, src, dst, strerror(errno));
90 }
91
92 #endif /* WIN32 */
93 }
94
95
96 /*
97 * linkFile()
98 *
99 * Hard-links a relation file from src to dst.
100 * schemaName/relName are relation's SQL name (used for error messages only).
101 */
102 void
linkFile(const char * src,const char * dst,const char * schemaName,const char * relName)103 linkFile(const char *src, const char *dst,
104 const char *schemaName, const char *relName)
105 {
106 if (pg_link_file(src, dst) < 0)
107 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
108 schemaName, relName, src, dst, strerror(errno));
109 }
110
111
112 /*
113 * rewriteVisibilityMap()
114 *
115 * Transform a visibility map file, copying from src to dst.
116 * schemaName/relName are relation's SQL name (used for error messages only).
117 *
118 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
119 * visibility map included one bit per heap page; it now includes two.
120 * When upgrading a cluster from before that time to a current PostgreSQL
121 * version, we could refuse to copy visibility maps from the old cluster
122 * to the new cluster; the next VACUUM would recreate them, but at the
123 * price of scanning the entire table. So, instead, we rewrite the old
124 * visibility maps in the new format. That way, the all-visible bits
125 * remain set for the pages for which they were set previously. The
126 * all-frozen bits are never set by this conversion; we leave that to VACUUM.
127 */
128 void
rewriteVisibilityMap(const char * fromfile,const char * tofile,const char * schemaName,const char * relName)129 rewriteVisibilityMap(const char *fromfile, const char *tofile,
130 const char *schemaName, const char *relName)
131 {
132 int src_fd;
133 int dst_fd;
134 PGAlignedBlock buffer;
135 PGAlignedBlock new_vmbuf;
136 ssize_t totalBytesRead = 0;
137 ssize_t src_filesize;
138 int rewriteVmBytesPerPage;
139 BlockNumber new_blkno = 0;
140 struct stat statbuf;
141
142 /* Compute number of old-format bytes per new page */
143 rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
144
145 if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
146 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n",
147 schemaName, relName, fromfile, strerror(errno));
148
149 if (fstat(src_fd, &statbuf) != 0)
150 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n",
151 schemaName, relName, fromfile, strerror(errno));
152
153 if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
154 S_IRUSR | S_IWUSR)) < 0)
155 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n",
156 schemaName, relName, tofile, strerror(errno));
157
158 /* Save old file size */
159 src_filesize = statbuf.st_size;
160
161 /*
162 * Turn each visibility map page into 2 pages one by one. Each new page
163 * has the same page header as the old one. If the last section of the
164 * last page is empty, we skip it, mostly to avoid turning one-page
165 * visibility maps for small relations into two pages needlessly.
166 */
167 while (totalBytesRead < src_filesize)
168 {
169 ssize_t bytesRead;
170 char *old_cur;
171 char *old_break;
172 char *old_blkend;
173 PageHeaderData pageheader;
174 bool old_lastblk;
175
176 if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
177 {
178 if (bytesRead < 0)
179 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
180 schemaName, relName, fromfile, strerror(errno));
181 else
182 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n",
183 schemaName, relName, fromfile);
184 }
185
186 totalBytesRead += BLCKSZ;
187 old_lastblk = (totalBytesRead == src_filesize);
188
189 /* Save the page header data */
190 memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
191
192 /*
193 * These old_* variables point to old visibility map page. old_cur
194 * points to current position on old page. old_blkend points to end of
195 * old block. old_break is the end+1 position on the old page for the
196 * data that will be transferred to the current new page.
197 */
198 old_cur = buffer.data + SizeOfPageHeaderData;
199 old_blkend = buffer.data + bytesRead;
200 old_break = old_cur + rewriteVmBytesPerPage;
201
202 while (old_break <= old_blkend)
203 {
204 char *new_cur;
205 bool empty = true;
206 bool old_lastpart;
207
208 /* First, copy old page header to new page */
209 memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
210
211 /* Rewriting the last part of the last old page? */
212 old_lastpart = old_lastblk && (old_break == old_blkend);
213
214 new_cur = new_vmbuf.data + SizeOfPageHeaderData;
215
216 /* Process old page bytes one by one, and turn it into new page. */
217 while (old_cur < old_break)
218 {
219 uint8 byte = *(uint8 *) old_cur;
220 uint16 new_vmbits = 0;
221 int i;
222
223 /* Generate new format bits while keeping old information */
224 for (i = 0; i < BITS_PER_BYTE; i++)
225 {
226 if (byte & (1 << i))
227 {
228 empty = false;
229 new_vmbits |=
230 VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
231 }
232 }
233
234 /* Copy new visibility map bytes to new-format page */
235 new_cur[0] = (char) (new_vmbits & 0xFF);
236 new_cur[1] = (char) (new_vmbits >> 8);
237
238 old_cur++;
239 new_cur += BITS_PER_HEAPBLOCK;
240 }
241
242 /* If the last part of the last page is empty, skip writing it */
243 if (old_lastpart && empty)
244 break;
245
246 /* Set new checksum for visibility map page, if enabled */
247 if (new_cluster.controldata.data_checksum_version != 0)
248 ((PageHeader) new_vmbuf.data)->pd_checksum =
249 pg_checksum_page(new_vmbuf.data, new_blkno);
250
251 errno = 0;
252 if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
253 {
254 /* if write didn't set errno, assume problem is no disk space */
255 if (errno == 0)
256 errno = ENOSPC;
257 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n",
258 schemaName, relName, tofile, strerror(errno));
259 }
260
261 /* Advance for next new page */
262 old_break += rewriteVmBytesPerPage;
263 new_blkno++;
264 }
265 }
266
267 /* Clean up */
268 close(dst_fd);
269 close(src_fd);
270 }
271
272 void
check_hard_link(void)273 check_hard_link(void)
274 {
275 char existing_file[MAXPGPATH];
276 char new_link_file[MAXPGPATH];
277
278 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
279 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
280 unlink(new_link_file); /* might fail */
281
282 if (pg_link_file(existing_file, new_link_file) < 0)
283 pg_fatal("could not create hard link between old and new data directories: %s\n"
284 "In link mode the old and new data directories must be on the same file system.\n",
285 strerror(errno));
286
287 unlink(new_link_file);
288 }
289
290 #ifdef WIN32
291 /* implementation of pg_link_file() on Windows */
292 static int
win32_pghardlink(const char * src,const char * dst)293 win32_pghardlink(const char *src, const char *dst)
294 {
295 /*
296 * CreateHardLinkA returns zero for failure
297 * http://msdn.microsoft.com/en-us/library/aa363860(VS.85).aspx
298 */
299 if (CreateHardLinkA(dst, src, NULL) == 0)
300 {
301 _dosmaperr(GetLastError());
302 return -1;
303 }
304 else
305 return 0;
306 }
307 #endif
308