1 /*-------------------------------------------------------------------------
2 *
3 * reinit.c
4 * Reinitialization of unlogged relations
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/storage/file/reinit.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 #include "postgres.h"
16
17 #include <unistd.h>
18
19 #include "catalog/catalog.h"
20 #include "common/relpath.h"
21 #include "storage/copydir.h"
22 #include "storage/fd.h"
23 #include "storage/reinit.h"
24 #include "utils/hsearch.h"
25 #include "utils/memutils.h"
26
27 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
28 int op);
29 static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
30 int op);
31 static bool parse_filename_for_nontemp_relation(const char *name,
32 int *oidchars, ForkNumber *fork);
33
34 typedef struct
35 {
36 char oid[OIDCHARS + 1];
37 } unlogged_relation_entry;
38
39 /*
40 * Reset unlogged relations from before the last restart.
41 *
42 * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
43 * relation with an "init" fork, except for the "init" fork itself.
44 *
45 * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
46 * fork.
47 */
48 void
ResetUnloggedRelations(int op)49 ResetUnloggedRelations(int op)
50 {
51 char temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
52 DIR *spc_dir;
53 struct dirent *spc_de;
54 MemoryContext tmpctx,
55 oldctx;
56
57 /* Log it. */
58 elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
59 (op & UNLOGGED_RELATION_CLEANUP) != 0,
60 (op & UNLOGGED_RELATION_INIT) != 0);
61
62 /*
63 * Just to be sure we don't leak any memory, let's create a temporary
64 * memory context for this operation.
65 */
66 tmpctx = AllocSetContextCreate(CurrentMemoryContext,
67 "ResetUnloggedRelations",
68 ALLOCSET_DEFAULT_SIZES);
69 oldctx = MemoryContextSwitchTo(tmpctx);
70
71 /*
72 * First process unlogged files in pg_default ($PGDATA/base)
73 */
74 ResetUnloggedRelationsInTablespaceDir("base", op);
75
76 /*
77 * Cycle through directories for all non-default tablespaces.
78 */
79 spc_dir = AllocateDir("pg_tblspc");
80
81 while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
82 {
83 if (strcmp(spc_de->d_name, ".") == 0 ||
84 strcmp(spc_de->d_name, "..") == 0)
85 continue;
86
87 snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
88 spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
89 ResetUnloggedRelationsInTablespaceDir(temp_path, op);
90 }
91
92 FreeDir(spc_dir);
93
94 /*
95 * Restore memory context.
96 */
97 MemoryContextSwitchTo(oldctx);
98 MemoryContextDelete(tmpctx);
99 }
100
101 /* Process one tablespace directory for ResetUnloggedRelations */
102 static void
ResetUnloggedRelationsInTablespaceDir(const char * tsdirname,int op)103 ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
104 {
105 DIR *ts_dir;
106 struct dirent *de;
107 char dbspace_path[MAXPGPATH * 2];
108
109 ts_dir = AllocateDir(tsdirname);
110 if (ts_dir == NULL)
111 {
112 /* anything except ENOENT is fishy */
113 if (errno != ENOENT)
114 elog(LOG,
115 "could not open tablespace directory \"%s\": %m",
116 tsdirname);
117 return;
118 }
119
120 while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
121 {
122 int i = 0;
123
124 /*
125 * We're only interested in the per-database directories, which have
126 * numeric names. Note that this code will also (properly) ignore "."
127 * and "..".
128 */
129 while (isdigit((unsigned char) de->d_name[i]))
130 ++i;
131 if (de->d_name[i] != '\0' || i == 0)
132 continue;
133
134 snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
135 tsdirname, de->d_name);
136 ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
137 }
138
139 FreeDir(ts_dir);
140 }
141
142 /* Process one per-dbspace directory for ResetUnloggedRelations */
143 static void
ResetUnloggedRelationsInDbspaceDir(const char * dbspacedirname,int op)144 ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
145 {
146 DIR *dbspace_dir;
147 struct dirent *de;
148 char rm_path[MAXPGPATH * 2];
149
150 /* Caller must specify at least one operation. */
151 Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
152
153 /*
154 * Cleanup is a two-pass operation. First, we go through and identify all
155 * the files with init forks. Then, we go through again and nuke
156 * everything with the same OID except the init fork.
157 */
158 if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
159 {
160 HTAB *hash = NULL;
161 HASHCTL ctl;
162
163 /* Open the directory. */
164 dbspace_dir = AllocateDir(dbspacedirname);
165 if (dbspace_dir == NULL)
166 {
167 elog(LOG,
168 "could not open dbspace directory \"%s\": %m",
169 dbspacedirname);
170 return;
171 }
172
173 /*
174 * It's possible that someone could create a ton of unlogged relations
175 * in the same database & tablespace, so we'd better use a hash table
176 * rather than an array or linked list to keep track of which files
177 * need to be reset. Otherwise, this cleanup operation would be
178 * O(n^2).
179 */
180 ctl.keysize = sizeof(unlogged_relation_entry);
181 ctl.entrysize = sizeof(unlogged_relation_entry);
182 hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
183
184 /* Scan the directory. */
185 while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
186 {
187 ForkNumber forkNum;
188 int oidchars;
189 unlogged_relation_entry ent;
190
191 /* Skip anything that doesn't look like a relation data file. */
192 if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
193 &forkNum))
194 continue;
195
196 /* Also skip it unless this is the init fork. */
197 if (forkNum != INIT_FORKNUM)
198 continue;
199
200 /*
201 * Put the OID portion of the name into the hash table, if it
202 * isn't already.
203 */
204 memset(ent.oid, 0, sizeof(ent.oid));
205 memcpy(ent.oid, de->d_name, oidchars);
206 hash_search(hash, &ent, HASH_ENTER, NULL);
207 }
208
209 /* Done with the first pass. */
210 FreeDir(dbspace_dir);
211
212 /*
213 * If we didn't find any init forks, there's no point in continuing;
214 * we can bail out now.
215 */
216 if (hash_get_num_entries(hash) == 0)
217 {
218 hash_destroy(hash);
219 return;
220 }
221
222 /*
223 * Now, make a second pass and remove anything that matches. First,
224 * reopen the directory.
225 */
226 dbspace_dir = AllocateDir(dbspacedirname);
227 if (dbspace_dir == NULL)
228 {
229 elog(LOG,
230 "could not open dbspace directory \"%s\": %m",
231 dbspacedirname);
232 hash_destroy(hash);
233 return;
234 }
235
236 /* Scan the directory. */
237 while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
238 {
239 ForkNumber forkNum;
240 int oidchars;
241 bool found;
242 unlogged_relation_entry ent;
243
244 /* Skip anything that doesn't look like a relation data file. */
245 if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
246 &forkNum))
247 continue;
248
249 /* We never remove the init fork. */
250 if (forkNum == INIT_FORKNUM)
251 continue;
252
253 /*
254 * See whether the OID portion of the name shows up in the hash
255 * table.
256 */
257 memset(ent.oid, 0, sizeof(ent.oid));
258 memcpy(ent.oid, de->d_name, oidchars);
259 hash_search(hash, &ent, HASH_FIND, &found);
260
261 /* If so, nuke it! */
262 if (found)
263 {
264 snprintf(rm_path, sizeof(rm_path), "%s/%s",
265 dbspacedirname, de->d_name);
266
267 /*
268 * It's tempting to actually throw an error here, but since
269 * this code gets run during database startup, that could
270 * result in the database failing to start. (XXX Should we do
271 * it anyway?)
272 */
273 if (unlink(rm_path))
274 elog(LOG, "could not unlink file \"%s\": %m", rm_path);
275 else
276 elog(DEBUG2, "unlinked file \"%s\"", rm_path);
277 }
278 }
279
280 /* Cleanup is complete. */
281 FreeDir(dbspace_dir);
282 hash_destroy(hash);
283 }
284
285 /*
286 * Initialization happens after cleanup is complete: we copy each init
287 * fork file to the corresponding main fork file. Note that if we are
288 * asked to do both cleanup and init, we may never get here: if the
289 * cleanup code determines that there are no init forks in this dbspace,
290 * it will return before we get to this point.
291 */
292 if ((op & UNLOGGED_RELATION_INIT) != 0)
293 {
294 /* Open the directory. */
295 dbspace_dir = AllocateDir(dbspacedirname);
296 if (dbspace_dir == NULL)
297 {
298 /* we just saw this directory, so it really ought to be there */
299 elog(LOG,
300 "could not open dbspace directory \"%s\": %m",
301 dbspacedirname);
302 return;
303 }
304
305 /* Scan the directory. */
306 while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
307 {
308 ForkNumber forkNum;
309 int oidchars;
310 char oidbuf[OIDCHARS + 1];
311 char srcpath[MAXPGPATH * 2];
312 char dstpath[MAXPGPATH];
313
314 /* Skip anything that doesn't look like a relation data file. */
315 if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
316 &forkNum))
317 continue;
318
319 /* Also skip it unless this is the init fork. */
320 if (forkNum != INIT_FORKNUM)
321 continue;
322
323 /* Construct source pathname. */
324 snprintf(srcpath, sizeof(srcpath), "%s/%s",
325 dbspacedirname, de->d_name);
326
327 /* Construct destination pathname. */
328 memcpy(oidbuf, de->d_name, oidchars);
329 oidbuf[oidchars] = '\0';
330 snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
331 dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
332 strlen(forkNames[INIT_FORKNUM]));
333
334 /* OK, we're ready to perform the actual copy. */
335 elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
336 copy_file(srcpath, dstpath);
337 }
338
339 FreeDir(dbspace_dir);
340
341 /*
342 * copy_file() above has already called pg_flush_data() on the files
343 * it created. Now we need to fsync those files, because a checkpoint
344 * won't do it for us while we're in recovery. We do this in a
345 * separate pass to allow the kernel to perform all the flushes
346 * (especially the metadata ones) at once.
347 */
348 dbspace_dir = AllocateDir(dbspacedirname);
349 if (dbspace_dir == NULL)
350 {
351 /* we just saw this directory, so it really ought to be there */
352 elog(LOG,
353 "could not open dbspace directory \"%s\": %m",
354 dbspacedirname);
355 return;
356 }
357
358 while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
359 {
360 ForkNumber forkNum;
361 int oidchars;
362 char oidbuf[OIDCHARS + 1];
363 char mainpath[MAXPGPATH];
364
365 /* Skip anything that doesn't look like a relation data file. */
366 if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
367 &forkNum))
368 continue;
369
370 /* Also skip it unless this is the init fork. */
371 if (forkNum != INIT_FORKNUM)
372 continue;
373
374 /* Construct main fork pathname. */
375 memcpy(oidbuf, de->d_name, oidchars);
376 oidbuf[oidchars] = '\0';
377 snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
378 dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
379 strlen(forkNames[INIT_FORKNUM]));
380
381 fsync_fname(mainpath, false);
382 }
383
384 FreeDir(dbspace_dir);
385
386 fsync_fname(dbspacedirname, true);
387 }
388 }
389
390 /*
391 * Basic parsing of putative relation filenames.
392 *
393 * This function returns true if the file appears to be in the correct format
394 * for a non-temporary relation and false otherwise.
395 *
396 * NB: If this function returns true, the caller is entitled to assume that
397 * *oidchars has been set to the a value no more than OIDCHARS, and thus
398 * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
399 * portion of the filename. This is critical to protect against a possible
400 * buffer overrun.
401 */
402 static bool
parse_filename_for_nontemp_relation(const char * name,int * oidchars,ForkNumber * fork)403 parse_filename_for_nontemp_relation(const char *name, int *oidchars,
404 ForkNumber *fork)
405 {
406 int pos;
407
408 /* Look for a non-empty string of digits (that isn't too long). */
409 for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
410 ;
411 if (pos == 0 || pos > OIDCHARS)
412 return false;
413 *oidchars = pos;
414
415 /* Check for a fork name. */
416 if (name[pos] != '_')
417 *fork = MAIN_FORKNUM;
418 else
419 {
420 int forkchar;
421
422 forkchar = forkname_chars(&name[pos + 1], fork);
423 if (forkchar <= 0)
424 return false;
425 pos += forkchar + 1;
426 }
427
428 /* Check for a segment number. */
429 if (name[pos] == '.')
430 {
431 int segchar;
432
433 for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
434 ;
435 if (segchar <= 1)
436 return false;
437 pos += segchar;
438 }
439
440 /* Now we should be at the end. */
441 if (name[pos] != '\0')
442 return false;
443 return true;
444 }
445