1 /*
2  * pg_archivecleanup.c
3  *
4  * To be used as archive_cleanup_command to clean an archive when using
5  * standby mode.
6  *
7  * src/bin/pg_archivecleanup/pg_archivecleanup.c
8  */
9 #include "postgres_fe.h"
10 
11 #include <ctype.h>
12 #include <dirent.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <signal.h>
16 #include <sys/time.h>
17 
18 #include "pg_getopt.h"
19 
20 #include "common/logging.h"
21 
22 #include "access/xlog_internal.h"
23 
24 const char *progname;
25 
26 /* Options and defaults */
27 bool		dryrun = false;		/* are we performing a dry-run operation? */
28 char	   *additional_ext = NULL;	/* Extension to remove from filenames */
29 
30 char	   *archiveLocation;	/* where to find the archive? */
31 char	   *restartWALFileName; /* the file from which we can restart restore */
32 char		exclusiveCleanupFileName[MAXFNAMELEN];	/* the oldest file we want
33 													 * to remain in archive */
34 
35 
36 /* =====================================================================
37  *
38  *		  Customizable section
39  *
40  * =====================================================================
41  *
42  *	Currently, this section assumes that the Archive is a locally
43  *	accessible directory. If you want to make other assumptions,
44  *	such as using a vendor-specific archive and access API, these
45  *	routines are the ones you'll need to change. You're
46  *	encouraged to submit any changes to pgsql-hackers@lists.postgresql.org
47  *	or personally to the current maintainer. Those changes may be
48  *	folded in to later versions of this program.
49  */
50 
51 /*
52  *	Initialize allows customized commands into the archive cleanup program.
53  *
54  *	You may wish to add code to check for tape libraries, etc..
55  */
56 static void
Initialize(void)57 Initialize(void)
58 {
59 	/*
60 	 * This code assumes that archiveLocation is a directory, so we use stat
61 	 * to test if it's accessible.
62 	 */
63 	struct stat stat_buf;
64 
65 	if (stat(archiveLocation, &stat_buf) != 0 ||
66 		!S_ISDIR(stat_buf.st_mode))
67 	{
68 		pg_log_error("archive location \"%s\" does not exist",
69 					 archiveLocation);
70 		exit(2);
71 	}
72 }
73 
74 static void
TrimExtension(char * filename,char * extension)75 TrimExtension(char *filename, char *extension)
76 {
77 	int			flen;
78 	int			elen;
79 
80 	if (extension == NULL)
81 		return;
82 
83 	elen = strlen(extension);
84 	flen = strlen(filename);
85 
86 	if (flen > elen && strcmp(filename + flen - elen, extension) == 0)
87 		filename[flen - elen] = '\0';
88 }
89 
90 static void
CleanupPriorWALFiles(void)91 CleanupPriorWALFiles(void)
92 {
93 	int			rc;
94 	DIR		   *xldir;
95 	struct dirent *xlde;
96 	char		walfile[MAXPGPATH];
97 
98 	if ((xldir = opendir(archiveLocation)) != NULL)
99 	{
100 		while (errno = 0, (xlde = readdir(xldir)) != NULL)
101 		{
102 			/*
103 			 * Truncation is essentially harmless, because we skip names of
104 			 * length other than XLOG_FNAME_LEN.  (In principle, one could use
105 			 * a 1000-character additional_ext and get trouble.)
106 			 */
107 			strlcpy(walfile, xlde->d_name, MAXPGPATH);
108 			TrimExtension(walfile, additional_ext);
109 
110 			/*
111 			 * We ignore the timeline part of the XLOG segment identifiers in
112 			 * deciding whether a segment is still needed.  This ensures that
113 			 * we won't prematurely remove a segment from a parent timeline.
114 			 * We could probably be a little more proactive about removing
115 			 * segments of non-parent timelines, but that would be a whole lot
116 			 * more complicated.
117 			 *
118 			 * We use the alphanumeric sorting property of the filenames to
119 			 * decide which ones are earlier than the exclusiveCleanupFileName
120 			 * file. Note that this means files are not removed in the order
121 			 * they were originally written, in case this worries you.
122 			 */
123 			if ((IsXLogFileName(walfile) || IsPartialXLogFileName(walfile)) &&
124 				strcmp(walfile + 8, exclusiveCleanupFileName + 8) < 0)
125 			{
126 				char		WALFilePath[MAXPGPATH * 2]; /* the file path
127 														 * including archive */
128 
129 				/*
130 				 * Use the original file name again now, including any
131 				 * extension that might have been chopped off before testing
132 				 * the sequence.
133 				 */
134 				snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
135 						 archiveLocation, xlde->d_name);
136 
137 				if (dryrun)
138 				{
139 					/*
140 					 * Prints the name of the file to be removed and skips the
141 					 * actual removal.  The regular printout is so that the
142 					 * user can pipe the output into some other program.
143 					 */
144 					printf("%s\n", WALFilePath);
145 					pg_log_debug("file \"%s\" would be removed", WALFilePath);
146 					continue;
147 				}
148 
149 				pg_log_debug("removing file \"%s\"", WALFilePath);
150 
151 				rc = unlink(WALFilePath);
152 				if (rc != 0)
153 				{
154 					pg_log_error("could not remove file \"%s\": %m",
155 								 WALFilePath);
156 					break;
157 				}
158 			}
159 		}
160 
161 		if (errno)
162 			pg_log_error("could not read archive location \"%s\": %m",
163 						 archiveLocation);
164 		if (closedir(xldir))
165 			pg_log_error("could not close archive location \"%s\": %m",
166 						 archiveLocation);
167 	}
168 	else
169 		pg_log_error("could not open archive location \"%s\": %m",
170 					 archiveLocation);
171 }
172 
173 /*
174  * SetWALFileNameForCleanup()
175  *
176  *	  Set the earliest WAL filename that we want to keep on the archive
177  *	  and decide whether we need cleanup
178  */
179 static void
SetWALFileNameForCleanup(void)180 SetWALFileNameForCleanup(void)
181 {
182 	bool		fnameOK = false;
183 
184 	TrimExtension(restartWALFileName, additional_ext);
185 
186 	/*
187 	 * If restartWALFileName is a WAL file name then just use it directly. If
188 	 * restartWALFileName is a .partial or .backup filename, make sure we use
189 	 * the prefix of the filename, otherwise we will remove wrong files since
190 	 * 000000010000000000000010.partial and
191 	 * 000000010000000000000010.00000020.backup are after
192 	 * 000000010000000000000010.
193 	 */
194 	if (IsXLogFileName(restartWALFileName))
195 	{
196 		strcpy(exclusiveCleanupFileName, restartWALFileName);
197 		fnameOK = true;
198 	}
199 	else if (IsPartialXLogFileName(restartWALFileName))
200 	{
201 		int			args;
202 		uint32		tli = 1,
203 					log = 0,
204 					seg = 0;
205 
206 		args = sscanf(restartWALFileName, "%08X%08X%08X.partial",
207 					  &tli, &log, &seg);
208 		if (args == 3)
209 		{
210 			fnameOK = true;
211 
212 			/*
213 			 * Use just the prefix of the filename, ignore everything after
214 			 * first period
215 			 */
216 			XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
217 		}
218 	}
219 	else if (IsBackupHistoryFileName(restartWALFileName))
220 	{
221 		int			args;
222 		uint32		tli = 1,
223 					log = 0,
224 					seg = 0,
225 					offset = 0;
226 
227 		args = sscanf(restartWALFileName, "%08X%08X%08X.%08X.backup", &tli, &log, &seg, &offset);
228 		if (args == 4)
229 		{
230 			fnameOK = true;
231 
232 			/*
233 			 * Use just the prefix of the filename, ignore everything after
234 			 * first period
235 			 */
236 			XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
237 		}
238 	}
239 
240 	if (!fnameOK)
241 	{
242 		pg_log_error("invalid file name argument");
243 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
244 		exit(2);
245 	}
246 }
247 
248 /* =====================================================================
249  *		  End of Customizable section
250  * =====================================================================
251  */
252 
253 static void
usage(void)254 usage(void)
255 {
256 	printf(_("%s removes older WAL files from PostgreSQL archives.\n\n"), progname);
257 	printf(_("Usage:\n"));
258 	printf(_("  %s [OPTION]... ARCHIVELOCATION OLDESTKEPTWALFILE\n"), progname);
259 	printf(_("\nOptions:\n"));
260 	printf(_("  -d             generate debug output (verbose mode)\n"));
261 	printf(_("  -n             dry run, show the names of the files that would be removed\n"));
262 	printf(_("  -V, --version  output version information, then exit\n"));
263 	printf(_("  -x EXT         clean up files if they have this extension\n"));
264 	printf(_("  -?, --help     show this help, then exit\n"));
265 	printf(_("\n"
266 			 "For use as archive_cleanup_command in postgresql.conf:\n"
267 			 "  archive_cleanup_command = 'pg_archivecleanup [OPTION]... ARCHIVELOCATION %%r'\n"
268 			 "e.g.\n"
269 			 "  archive_cleanup_command = 'pg_archivecleanup /mnt/server/archiverdir %%r'\n"));
270 	printf(_("\n"
271 			 "Or for use as a standalone archive cleaner:\n"
272 			 "e.g.\n"
273 			 "  pg_archivecleanup /mnt/server/archiverdir 000000010000000000000010.00000020.backup\n"));
274 	printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
275 }
276 
277 /*------------ MAIN ----------------------------------------*/
278 int
main(int argc,char ** argv)279 main(int argc, char **argv)
280 {
281 	int			c;
282 
283 	pg_logging_init(argv[0]);
284 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_archivecleanup"));
285 	progname = get_progname(argv[0]);
286 
287 	if (argc > 1)
288 	{
289 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
290 		{
291 			usage();
292 			exit(0);
293 		}
294 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
295 		{
296 			puts("pg_archivecleanup (PostgreSQL) " PG_VERSION);
297 			exit(0);
298 		}
299 	}
300 
301 	while ((c = getopt(argc, argv, "x:dn")) != -1)
302 	{
303 		switch (c)
304 		{
305 			case 'd':			/* Debug mode */
306 				pg_logging_set_level(PG_LOG_DEBUG);
307 				break;
308 			case 'n':			/* Dry-Run mode */
309 				dryrun = true;
310 				break;
311 			case 'x':
312 				additional_ext = pg_strdup(optarg); /* Extension to remove
313 													 * from xlogfile names */
314 				break;
315 			default:
316 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
317 				exit(2);
318 				break;
319 		}
320 	}
321 
322 	/*
323 	 * We will go to the archiveLocation to check restartWALFileName.
324 	 * restartWALFileName may not exist anymore, which would not be an error,
325 	 * so we separate the archiveLocation and restartWALFileName so we can
326 	 * check separately whether archiveLocation exists, if not that is an
327 	 * error
328 	 */
329 	if (optind < argc)
330 	{
331 		archiveLocation = argv[optind];
332 		optind++;
333 	}
334 	else
335 	{
336 		pg_log_error("must specify archive location");
337 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
338 		exit(2);
339 	}
340 
341 	if (optind < argc)
342 	{
343 		restartWALFileName = argv[optind];
344 		optind++;
345 	}
346 	else
347 	{
348 		pg_log_error("must specify oldest kept WAL file");
349 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
350 		exit(2);
351 	}
352 
353 	if (optind < argc)
354 	{
355 		pg_log_error("too many command-line arguments");
356 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
357 		exit(2);
358 	}
359 
360 	/*
361 	 * Check archive exists and other initialization if required.
362 	 */
363 	Initialize();
364 
365 	/*
366 	 * Check filename is a valid name, then process to find cut-off
367 	 */
368 	SetWALFileNameForCleanup();
369 
370 	pg_log_debug("keeping WAL file \"%s/%s\" and later",
371 				 archiveLocation, exclusiveCleanupFileName);
372 
373 	/*
374 	 * Remove WAL files older than cut-off
375 	 */
376 	CleanupPriorWALFiles();
377 
378 	exit(0);
379 }
380