1 /* 2 * file.c 3 * 4 * file system operations 5 * 6 * Copyright (c) 2010-2020, PostgreSQL Global Development Group 7 * src/bin/pg_upgrade/file.c 8 */ 9 10 #include "postgres_fe.h" 11 12 #include <sys/stat.h> 13 #include <fcntl.h> 14 #ifdef HAVE_COPYFILE_H 15 #include <copyfile.h> 16 #endif 17 #ifdef __linux__ 18 #include <sys/ioctl.h> 19 #include <linux/fs.h> 20 #endif 21 22 #include "access/visibilitymapdefs.h" 23 #include "common/file_perm.h" 24 #include "pg_upgrade.h" 25 #include "storage/bufpage.h" 26 #include "storage/checksum.h" 27 #include "storage/checksum_impl.h" 28 29 30 /* 31 * cloneFile() 32 * 33 * Clones/reflinks a relation file from src to dst. 34 * 35 * schemaName/relName are relation's SQL name (used for error messages only). 36 */ 37 void 38 cloneFile(const char *src, const char *dst, 39 const char *schemaName, const char *relName) 40 { 41 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) 42 if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0) 43 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", 44 schemaName, relName, src, dst, strerror(errno)); 45 #elif defined(__linux__) && defined(FICLONE) 46 int src_fd; 47 int dest_fd; 48 49 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) 50 pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n", 51 schemaName, relName, src, strerror(errno)); 52 53 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 54 pg_file_create_mode)) < 0) 55 pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n", 56 schemaName, relName, dst, strerror(errno)); 57 58 if (ioctl(dest_fd, FICLONE, src_fd) < 0) 59 { 60 unlink(dst); 61 pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", 62 schemaName, relName, src, dst, strerror(errno)); 63 } 64 65 close(src_fd); 66 close(dest_fd); 67 #endif 68 } 69 70 71 /* 72 * copyFile() 73 * 74 * Copies a relation file from src to dst. 75 * schemaName/relName are relation's SQL name (used for error messages only). 76 */ 77 void 78 copyFile(const char *src, const char *dst, 79 const char *schemaName, const char *relName) 80 { 81 #ifndef WIN32 82 int src_fd; 83 int dest_fd; 84 char *buffer; 85 86 if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) 87 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", 88 schemaName, relName, src, strerror(errno)); 89 90 if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 91 pg_file_create_mode)) < 0) 92 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", 93 schemaName, relName, dst, strerror(errno)); 94 95 /* copy in fairly large chunks for best efficiency */ 96 #define COPY_BUF_SIZE (50 * BLCKSZ) 97 98 buffer = (char *) pg_malloc(COPY_BUF_SIZE); 99 100 /* perform data copying i.e read src source, write to destination */ 101 while (true) 102 { 103 ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE); 104 105 if (nbytes < 0) 106 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", 107 schemaName, relName, src, strerror(errno)); 108 109 if (nbytes == 0) 110 break; 111 112 errno = 0; 113 if (write(dest_fd, buffer, nbytes) != nbytes) 114 { 115 /* if write didn't set errno, assume problem is no disk space */ 116 if (errno == 0) 117 errno = ENOSPC; 118 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", 119 schemaName, relName, dst, strerror(errno)); 120 } 121 } 122 123 pg_free(buffer); 124 close(src_fd); 125 close(dest_fd); 126 127 #else /* WIN32 */ 128 129 if (CopyFile(src, dst, true) == 0) 130 { 131 _dosmaperr(GetLastError()); 132 pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", 133 schemaName, relName, src, dst, strerror(errno)); 134 } 135 136 #endif /* WIN32 */ 137 } 138 139 140 /* 141 * linkFile() 142 * 143 * Hard-links a relation file from src to dst. 144 * schemaName/relName are relation's SQL name (used for error messages only). 145 */ 146 void 147 linkFile(const char *src, const char *dst, 148 const char *schemaName, const char *relName) 149 { 150 if (link(src, dst) < 0) 151 pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", 152 schemaName, relName, src, dst, strerror(errno)); 153 } 154 155 156 /* 157 * rewriteVisibilityMap() 158 * 159 * Transform a visibility map file, copying from src to dst. 160 * schemaName/relName are relation's SQL name (used for error messages only). 161 * 162 * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's 163 * visibility map included one bit per heap page; it now includes two. 164 * When upgrading a cluster from before that time to a current PostgreSQL 165 * version, we could refuse to copy visibility maps from the old cluster 166 * to the new cluster; the next VACUUM would recreate them, but at the 167 * price of scanning the entire table. So, instead, we rewrite the old 168 * visibility maps in the new format. That way, the all-visible bits 169 * remain set for the pages for which they were set previously. The 170 * all-frozen bits are never set by this conversion; we leave that to VACUUM. 171 */ 172 void 173 rewriteVisibilityMap(const char *fromfile, const char *tofile, 174 const char *schemaName, const char *relName) 175 { 176 int src_fd; 177 int dst_fd; 178 PGAlignedBlock buffer; 179 PGAlignedBlock new_vmbuf; 180 ssize_t totalBytesRead = 0; 181 ssize_t src_filesize; 182 int rewriteVmBytesPerPage; 183 BlockNumber new_blkno = 0; 184 struct stat statbuf; 185 186 /* Compute number of old-format bytes per new page */ 187 rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; 188 189 if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) 190 pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", 191 schemaName, relName, fromfile, strerror(errno)); 192 193 if (fstat(src_fd, &statbuf) != 0) 194 pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n", 195 schemaName, relName, fromfile, strerror(errno)); 196 197 if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 198 pg_file_create_mode)) < 0) 199 pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", 200 schemaName, relName, tofile, strerror(errno)); 201 202 /* Save old file size */ 203 src_filesize = statbuf.st_size; 204 205 /* 206 * Turn each visibility map page into 2 pages one by one. Each new page 207 * has the same page header as the old one. If the last section of the 208 * last page is empty, we skip it, mostly to avoid turning one-page 209 * visibility maps for small relations into two pages needlessly. 210 */ 211 while (totalBytesRead < src_filesize) 212 { 213 ssize_t bytesRead; 214 char *old_cur; 215 char *old_break; 216 char *old_blkend; 217 PageHeaderData pageheader; 218 bool old_lastblk; 219 220 if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ) 221 { 222 if (bytesRead < 0) 223 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", 224 schemaName, relName, fromfile, strerror(errno)); 225 else 226 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n", 227 schemaName, relName, fromfile); 228 } 229 230 totalBytesRead += BLCKSZ; 231 old_lastblk = (totalBytesRead == src_filesize); 232 233 /* Save the page header data */ 234 memcpy(&pageheader, buffer.data, SizeOfPageHeaderData); 235 236 /* 237 * These old_* variables point to old visibility map page. old_cur 238 * points to current position on old page. old_blkend points to end of 239 * old block. old_break is the end+1 position on the old page for the 240 * data that will be transferred to the current new page. 241 */ 242 old_cur = buffer.data + SizeOfPageHeaderData; 243 old_blkend = buffer.data + bytesRead; 244 old_break = old_cur + rewriteVmBytesPerPage; 245 246 while (old_break <= old_blkend) 247 { 248 char *new_cur; 249 bool empty = true; 250 bool old_lastpart; 251 252 /* First, copy old page header to new page */ 253 memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData); 254 255 /* Rewriting the last part of the last old page? */ 256 old_lastpart = old_lastblk && (old_break == old_blkend); 257 258 new_cur = new_vmbuf.data + SizeOfPageHeaderData; 259 260 /* Process old page bytes one by one, and turn it into new page. */ 261 while (old_cur < old_break) 262 { 263 uint8 byte = *(uint8 *) old_cur; 264 uint16 new_vmbits = 0; 265 int i; 266 267 /* Generate new format bits while keeping old information */ 268 for (i = 0; i < BITS_PER_BYTE; i++) 269 { 270 if (byte & (1 << i)) 271 { 272 empty = false; 273 new_vmbits |= 274 VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i); 275 } 276 } 277 278 /* Copy new visibility map bytes to new-format page */ 279 new_cur[0] = (char) (new_vmbits & 0xFF); 280 new_cur[1] = (char) (new_vmbits >> 8); 281 282 old_cur++; 283 new_cur += BITS_PER_HEAPBLOCK; 284 } 285 286 /* If the last part of the last page is empty, skip writing it */ 287 if (old_lastpart && empty) 288 break; 289 290 /* Set new checksum for visibility map page, if enabled */ 291 if (new_cluster.controldata.data_checksum_version != 0) 292 ((PageHeader) new_vmbuf.data)->pd_checksum = 293 pg_checksum_page(new_vmbuf.data, new_blkno); 294 295 errno = 0; 296 if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ) 297 { 298 /* if write didn't set errno, assume problem is no disk space */ 299 if (errno == 0) 300 errno = ENOSPC; 301 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", 302 schemaName, relName, tofile, strerror(errno)); 303 } 304 305 /* Advance for next new page */ 306 old_break += rewriteVmBytesPerPage; 307 new_blkno++; 308 } 309 } 310 311 /* Clean up */ 312 close(dst_fd); 313 close(src_fd); 314 } 315 316 void 317 check_file_clone(void) 318 { 319 char existing_file[MAXPGPATH]; 320 char new_link_file[MAXPGPATH]; 321 322 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata); 323 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata); 324 unlink(new_link_file); /* might fail */ 325 326 #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) 327 if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0) 328 pg_fatal("could not clone file between old and new data directories: %s\n", 329 strerror(errno)); 330 #elif defined(__linux__) && defined(FICLONE) 331 { 332 int src_fd; 333 int dest_fd; 334 335 if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0) 336 pg_fatal("could not open file \"%s\": %s\n", 337 existing_file, strerror(errno)); 338 339 if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 340 pg_file_create_mode)) < 0) 341 pg_fatal("could not create file \"%s\": %s\n", 342 new_link_file, strerror(errno)); 343 344 if (ioctl(dest_fd, FICLONE, src_fd) < 0) 345 pg_fatal("could not clone file between old and new data directories: %s\n", 346 strerror(errno)); 347 348 close(src_fd); 349 close(dest_fd); 350 } 351 #else 352 pg_fatal("file cloning not supported on this platform\n"); 353 #endif 354 355 unlink(new_link_file); 356 } 357 358 void 359 check_hard_link(void) 360 { 361 char existing_file[MAXPGPATH]; 362 char new_link_file[MAXPGPATH]; 363 364 snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata); 365 snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata); 366 unlink(new_link_file); /* might fail */ 367 368 if (link(existing_file, new_link_file) < 0) 369 pg_fatal("could not create hard link between old and new data directories: %s\n" 370 "In link mode the old and new data directories must be on the same file system.\n", 371 strerror(errno)); 372 373 unlink(new_link_file); 374 } 375