1 /*
2  * pg_archivecleanup.c
3  *
4  * To be used as archive_cleanup_command to clean an archive when using
5  * standby mode.
6  *
7  * src/bin/pg_archivecleanup/pg_archivecleanup.c
8  */
9 #include "postgres_fe.h"
10 
11 #include <ctype.h>
12 #include <dirent.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <signal.h>
16 #include <sys/time.h>
17 
18 #include "access/xlog_internal.h"
19 #include "common/logging.h"
20 #include "pg_getopt.h"
21 
22 const char *progname;
23 
24 /* Options and defaults */
25 bool		dryrun = false;		/* are we performing a dry-run operation? */
26 char	   *additional_ext = NULL;	/* Extension to remove from filenames */
27 
28 char	   *archiveLocation;	/* where to find the archive? */
29 char	   *restartWALFileName; /* the file from which we can restart restore */
30 char		exclusiveCleanupFileName[MAXFNAMELEN];	/* the oldest file we want
31 													 * to remain in archive */
32 
33 
34 /* =====================================================================
35  *
36  *		  Customizable section
37  *
38  * =====================================================================
39  *
40  *	Currently, this section assumes that the Archive is a locally
41  *	accessible directory. If you want to make other assumptions,
42  *	such as using a vendor-specific archive and access API, these
43  *	routines are the ones you'll need to change. You're
44  *	encouraged to submit any changes to pgsql-hackers@lists.postgresql.org
45  *	or personally to the current maintainer. Those changes may be
46  *	folded in to later versions of this program.
47  */
48 
49 /*
50  *	Initialize allows customized commands into the archive cleanup program.
51  *
52  *	You may wish to add code to check for tape libraries, etc..
53  */
54 static void
Initialize(void)55 Initialize(void)
56 {
57 	/*
58 	 * This code assumes that archiveLocation is a directory, so we use stat
59 	 * to test if it's accessible.
60 	 */
61 	struct stat stat_buf;
62 
63 	if (stat(archiveLocation, &stat_buf) != 0 ||
64 		!S_ISDIR(stat_buf.st_mode))
65 	{
66 		pg_log_error("archive location \"%s\" does not exist",
67 					 archiveLocation);
68 		exit(2);
69 	}
70 }
71 
72 static void
TrimExtension(char * filename,char * extension)73 TrimExtension(char *filename, char *extension)
74 {
75 	int			flen;
76 	int			elen;
77 
78 	if (extension == NULL)
79 		return;
80 
81 	elen = strlen(extension);
82 	flen = strlen(filename);
83 
84 	if (flen > elen && strcmp(filename + flen - elen, extension) == 0)
85 		filename[flen - elen] = '\0';
86 }
87 
88 static void
CleanupPriorWALFiles(void)89 CleanupPriorWALFiles(void)
90 {
91 	int			rc;
92 	DIR		   *xldir;
93 	struct dirent *xlde;
94 	char		walfile[MAXPGPATH];
95 
96 	if ((xldir = opendir(archiveLocation)) != NULL)
97 	{
98 		while (errno = 0, (xlde = readdir(xldir)) != NULL)
99 		{
100 			/*
101 			 * Truncation is essentially harmless, because we skip names of
102 			 * length other than XLOG_FNAME_LEN.  (In principle, one could use
103 			 * a 1000-character additional_ext and get trouble.)
104 			 */
105 			strlcpy(walfile, xlde->d_name, MAXPGPATH);
106 			TrimExtension(walfile, additional_ext);
107 
108 			/*
109 			 * We ignore the timeline part of the XLOG segment identifiers in
110 			 * deciding whether a segment is still needed.  This ensures that
111 			 * we won't prematurely remove a segment from a parent timeline.
112 			 * We could probably be a little more proactive about removing
113 			 * segments of non-parent timelines, but that would be a whole lot
114 			 * more complicated.
115 			 *
116 			 * We use the alphanumeric sorting property of the filenames to
117 			 * decide which ones are earlier than the exclusiveCleanupFileName
118 			 * file. Note that this means files are not removed in the order
119 			 * they were originally written, in case this worries you.
120 			 */
121 			if ((IsXLogFileName(walfile) || IsPartialXLogFileName(walfile)) &&
122 				strcmp(walfile + 8, exclusiveCleanupFileName + 8) < 0)
123 			{
124 				char		WALFilePath[MAXPGPATH * 2]; /* the file path
125 														 * including archive */
126 
127 				/*
128 				 * Use the original file name again now, including any
129 				 * extension that might have been chopped off before testing
130 				 * the sequence.
131 				 */
132 				snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
133 						 archiveLocation, xlde->d_name);
134 
135 				if (dryrun)
136 				{
137 					/*
138 					 * Prints the name of the file to be removed and skips the
139 					 * actual removal.  The regular printout is so that the
140 					 * user can pipe the output into some other program.
141 					 */
142 					printf("%s\n", WALFilePath);
143 					pg_log_debug("file \"%s\" would be removed", WALFilePath);
144 					continue;
145 				}
146 
147 				pg_log_debug("removing file \"%s\"", WALFilePath);
148 
149 				rc = unlink(WALFilePath);
150 				if (rc != 0)
151 				{
152 					pg_log_error("could not remove file \"%s\": %m",
153 								 WALFilePath);
154 					break;
155 				}
156 			}
157 		}
158 
159 		if (errno)
160 			pg_log_error("could not read archive location \"%s\": %m",
161 						 archiveLocation);
162 		if (closedir(xldir))
163 			pg_log_error("could not close archive location \"%s\": %m",
164 						 archiveLocation);
165 	}
166 	else
167 		pg_log_error("could not open archive location \"%s\": %m",
168 					 archiveLocation);
169 }
170 
171 /*
172  * SetWALFileNameForCleanup()
173  *
174  *	  Set the earliest WAL filename that we want to keep on the archive
175  *	  and decide whether we need cleanup
176  */
177 static void
SetWALFileNameForCleanup(void)178 SetWALFileNameForCleanup(void)
179 {
180 	bool		fnameOK = false;
181 
182 	TrimExtension(restartWALFileName, additional_ext);
183 
184 	/*
185 	 * If restartWALFileName is a WAL file name then just use it directly. If
186 	 * restartWALFileName is a .partial or .backup filename, make sure we use
187 	 * the prefix of the filename, otherwise we will remove wrong files since
188 	 * 000000010000000000000010.partial and
189 	 * 000000010000000000000010.00000020.backup are after
190 	 * 000000010000000000000010.
191 	 */
192 	if (IsXLogFileName(restartWALFileName))
193 	{
194 		strcpy(exclusiveCleanupFileName, restartWALFileName);
195 		fnameOK = true;
196 	}
197 	else if (IsPartialXLogFileName(restartWALFileName))
198 	{
199 		int			args;
200 		uint32		tli = 1,
201 					log = 0,
202 					seg = 0;
203 
204 		args = sscanf(restartWALFileName, "%08X%08X%08X.partial",
205 					  &tli, &log, &seg);
206 		if (args == 3)
207 		{
208 			fnameOK = true;
209 
210 			/*
211 			 * Use just the prefix of the filename, ignore everything after
212 			 * first period
213 			 */
214 			XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
215 		}
216 	}
217 	else if (IsBackupHistoryFileName(restartWALFileName))
218 	{
219 		int			args;
220 		uint32		tli = 1,
221 					log = 0,
222 					seg = 0,
223 					offset = 0;
224 
225 		args = sscanf(restartWALFileName, "%08X%08X%08X.%08X.backup", &tli, &log, &seg, &offset);
226 		if (args == 4)
227 		{
228 			fnameOK = true;
229 
230 			/*
231 			 * Use just the prefix of the filename, ignore everything after
232 			 * first period
233 			 */
234 			XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
235 		}
236 	}
237 
238 	if (!fnameOK)
239 	{
240 		pg_log_error("invalid file name argument");
241 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
242 		exit(2);
243 	}
244 }
245 
246 /* =====================================================================
247  *		  End of Customizable section
248  * =====================================================================
249  */
250 
251 static void
usage(void)252 usage(void)
253 {
254 	printf(_("%s removes older WAL files from PostgreSQL archives.\n\n"), progname);
255 	printf(_("Usage:\n"));
256 	printf(_("  %s [OPTION]... ARCHIVELOCATION OLDESTKEPTWALFILE\n"), progname);
257 	printf(_("\nOptions:\n"));
258 	printf(_("  -d             generate debug output (verbose mode)\n"));
259 	printf(_("  -n             dry run, show the names of the files that would be removed\n"));
260 	printf(_("  -V, --version  output version information, then exit\n"));
261 	printf(_("  -x EXT         clean up files if they have this extension\n"));
262 	printf(_("  -?, --help     show this help, then exit\n"));
263 	printf(_("\n"
264 			 "For use as archive_cleanup_command in postgresql.conf:\n"
265 			 "  archive_cleanup_command = 'pg_archivecleanup [OPTION]... ARCHIVELOCATION %%r'\n"
266 			 "e.g.\n"
267 			 "  archive_cleanup_command = 'pg_archivecleanup /mnt/server/archiverdir %%r'\n"));
268 	printf(_("\n"
269 			 "Or for use as a standalone archive cleaner:\n"
270 			 "e.g.\n"
271 			 "  pg_archivecleanup /mnt/server/archiverdir 000000010000000000000010.00000020.backup\n"));
272 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
273 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
274 }
275 
276 /*------------ MAIN ----------------------------------------*/
277 int
main(int argc,char ** argv)278 main(int argc, char **argv)
279 {
280 	int			c;
281 
282 	pg_logging_init(argv[0]);
283 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_archivecleanup"));
284 	progname = get_progname(argv[0]);
285 
286 	if (argc > 1)
287 	{
288 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
289 		{
290 			usage();
291 			exit(0);
292 		}
293 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
294 		{
295 			puts("pg_archivecleanup (PostgreSQL) " PG_VERSION);
296 			exit(0);
297 		}
298 	}
299 
300 	while ((c = getopt(argc, argv, "x:dn")) != -1)
301 	{
302 		switch (c)
303 		{
304 			case 'd':			/* Debug mode */
305 				pg_logging_increase_verbosity();
306 				break;
307 			case 'n':			/* Dry-Run mode */
308 				dryrun = true;
309 				break;
310 			case 'x':
311 				additional_ext = pg_strdup(optarg); /* Extension to remove
312 													 * from xlogfile names */
313 				break;
314 			default:
315 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
316 				exit(2);
317 				break;
318 		}
319 	}
320 
321 	/*
322 	 * We will go to the archiveLocation to check restartWALFileName.
323 	 * restartWALFileName may not exist anymore, which would not be an error,
324 	 * so we separate the archiveLocation and restartWALFileName so we can
325 	 * check separately whether archiveLocation exists, if not that is an
326 	 * error
327 	 */
328 	if (optind < argc)
329 	{
330 		archiveLocation = argv[optind];
331 		optind++;
332 	}
333 	else
334 	{
335 		pg_log_error("must specify archive location");
336 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
337 		exit(2);
338 	}
339 
340 	if (optind < argc)
341 	{
342 		restartWALFileName = argv[optind];
343 		optind++;
344 	}
345 	else
346 	{
347 		pg_log_error("must specify oldest kept WAL file");
348 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
349 		exit(2);
350 	}
351 
352 	if (optind < argc)
353 	{
354 		pg_log_error("too many command-line arguments");
355 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
356 		exit(2);
357 	}
358 
359 	/*
360 	 * Check archive exists and other initialization if required.
361 	 */
362 	Initialize();
363 
364 	/*
365 	 * Check filename is a valid name, then process to find cut-off
366 	 */
367 	SetWALFileNameForCleanup();
368 
369 	pg_log_debug("keeping WAL file \"%s/%s\" and later",
370 				 archiveLocation, exclusiveCleanupFileName);
371 
372 	/*
373 	 * Remove WAL files older than cut-off
374 	 */
375 	CleanupPriorWALFiles();
376 
377 	exit(0);
378 }
379