1 /*
2  * psql - the PostgreSQL interactive terminal
3  *
4  * Copyright (c) 2000-2021, PostgreSQL Global Development Group
5  *
6  * src/bin/psql/copy.c
7  */
8 #include "postgres_fe.h"
9 
10 #include <signal.h>
11 #include <sys/stat.h>
12 #ifndef WIN32
13 #include <unistd.h>				/* for isatty */
14 #else
15 #include <io.h>					/* I think */
16 #endif
17 
18 #include "common.h"
19 #include "common/logging.h"
20 #include "copy.h"
21 #include "libpq-fe.h"
22 #include "pqexpbuffer.h"
23 #include "prompt.h"
24 #include "settings.h"
25 #include "stringutils.h"
26 
27 /*
28  * parse_slash_copy
29  * -- parses \copy command line
30  *
31  * The documented syntax is:
32  *	\copy tablename [(columnlist)] from|to filename [options]
33  *	\copy ( query stmt ) to filename [options]
34  *
35  * where 'filename' can be one of the following:
36  *	'<file path>' | PROGRAM '<command>' | stdin | stdout | pstdout | pstdout
37  * and 'query' can be one of the following:
38  *	SELECT | UPDATE | INSERT | DELETE
39  *
40  * An undocumented fact is that you can still write BINARY before the
41  * tablename; this is a hangover from the pre-7.3 syntax.  The options
42  * syntax varies across backend versions, but we avoid all that mess
43  * by just transmitting the stuff after the filename literally.
44  *
45  * table name can be double-quoted and can have a schema part.
46  * column names can be double-quoted.
47  * filename can be single-quoted like SQL literals.
48  * command must be single-quoted like SQL literals.
49  *
50  * returns a malloc'ed structure with the options, or NULL on parsing error
51  */
52 
53 struct copy_options
54 {
55 	char	   *before_tofrom;	/* COPY string before TO/FROM */
56 	char	   *after_tofrom;	/* COPY string after TO/FROM filename */
57 	char	   *file;			/* NULL = stdin/stdout */
58 	bool		program;		/* is 'file' a program to popen? */
59 	bool		psql_inout;		/* true = use psql stdin/stdout */
60 	bool		from;			/* true = FROM, false = TO */
61 };
62 
63 
64 static void
free_copy_options(struct copy_options * ptr)65 free_copy_options(struct copy_options *ptr)
66 {
67 	if (!ptr)
68 		return;
69 	free(ptr->before_tofrom);
70 	free(ptr->after_tofrom);
71 	free(ptr->file);
72 	free(ptr);
73 }
74 
75 
76 /* concatenate "more" onto "var", freeing the original value of *var */
77 static void
xstrcat(char ** var,const char * more)78 xstrcat(char **var, const char *more)
79 {
80 	char	   *newvar;
81 
82 	newvar = psprintf("%s%s", *var, more);
83 	free(*var);
84 	*var = newvar;
85 }
86 
87 
88 static struct copy_options *
parse_slash_copy(const char * args)89 parse_slash_copy(const char *args)
90 {
91 	struct copy_options *result;
92 	char	   *token;
93 	const char *whitespace = " \t\n\r";
94 	char		nonstd_backslash = standard_strings() ? 0 : '\\';
95 
96 	if (!args)
97 	{
98 		pg_log_error("\\copy: arguments required");
99 		return NULL;
100 	}
101 
102 	result = pg_malloc0(sizeof(struct copy_options));
103 
104 	result->before_tofrom = pg_strdup("");	/* initialize for appending */
105 
106 	token = strtokx(args, whitespace, ".,()", "\"",
107 					0, false, false, pset.encoding);
108 	if (!token)
109 		goto error;
110 
111 	/* The following can be removed when we drop 7.3 syntax support */
112 	if (pg_strcasecmp(token, "binary") == 0)
113 	{
114 		xstrcat(&result->before_tofrom, token);
115 		token = strtokx(NULL, whitespace, ".,()", "\"",
116 						0, false, false, pset.encoding);
117 		if (!token)
118 			goto error;
119 	}
120 
121 	/* Handle COPY (query) case */
122 	if (token[0] == '(')
123 	{
124 		int			parens = 1;
125 
126 		while (parens > 0)
127 		{
128 			xstrcat(&result->before_tofrom, " ");
129 			xstrcat(&result->before_tofrom, token);
130 			token = strtokx(NULL, whitespace, "()", "\"'",
131 							nonstd_backslash, true, false, pset.encoding);
132 			if (!token)
133 				goto error;
134 			if (token[0] == '(')
135 				parens++;
136 			else if (token[0] == ')')
137 				parens--;
138 		}
139 	}
140 
141 	xstrcat(&result->before_tofrom, " ");
142 	xstrcat(&result->before_tofrom, token);
143 	token = strtokx(NULL, whitespace, ".,()", "\"",
144 					0, false, false, pset.encoding);
145 	if (!token)
146 		goto error;
147 
148 	/*
149 	 * strtokx() will not have returned a multi-character token starting with
150 	 * '.', so we don't need strcmp() here.  Likewise for '(', etc, below.
151 	 */
152 	if (token[0] == '.')
153 	{
154 		/* handle schema . table */
155 		xstrcat(&result->before_tofrom, token);
156 		token = strtokx(NULL, whitespace, ".,()", "\"",
157 						0, false, false, pset.encoding);
158 		if (!token)
159 			goto error;
160 		xstrcat(&result->before_tofrom, token);
161 		token = strtokx(NULL, whitespace, ".,()", "\"",
162 						0, false, false, pset.encoding);
163 		if (!token)
164 			goto error;
165 	}
166 
167 	if (token[0] == '(')
168 	{
169 		/* handle parenthesized column list */
170 		for (;;)
171 		{
172 			xstrcat(&result->before_tofrom, " ");
173 			xstrcat(&result->before_tofrom, token);
174 			token = strtokx(NULL, whitespace, "()", "\"",
175 							0, false, false, pset.encoding);
176 			if (!token)
177 				goto error;
178 			if (token[0] == ')')
179 				break;
180 		}
181 		xstrcat(&result->before_tofrom, " ");
182 		xstrcat(&result->before_tofrom, token);
183 		token = strtokx(NULL, whitespace, ".,()", "\"",
184 						0, false, false, pset.encoding);
185 		if (!token)
186 			goto error;
187 	}
188 
189 	if (pg_strcasecmp(token, "from") == 0)
190 		result->from = true;
191 	else if (pg_strcasecmp(token, "to") == 0)
192 		result->from = false;
193 	else
194 		goto error;
195 
196 	/* { 'filename' | PROGRAM 'command' | STDIN | STDOUT | PSTDIN | PSTDOUT } */
197 	token = strtokx(NULL, whitespace, ";", "'",
198 					0, false, false, pset.encoding);
199 	if (!token)
200 		goto error;
201 
202 	if (pg_strcasecmp(token, "program") == 0)
203 	{
204 		int			toklen;
205 
206 		token = strtokx(NULL, whitespace, ";", "'",
207 						0, false, false, pset.encoding);
208 		if (!token)
209 			goto error;
210 
211 		/*
212 		 * The shell command must be quoted. This isn't fool-proof, but
213 		 * catches most quoting errors.
214 		 */
215 		toklen = strlen(token);
216 		if (token[0] != '\'' || toklen < 2 || token[toklen - 1] != '\'')
217 			goto error;
218 
219 		strip_quotes(token, '\'', 0, pset.encoding);
220 
221 		result->program = true;
222 		result->file = pg_strdup(token);
223 	}
224 	else if (pg_strcasecmp(token, "stdin") == 0 ||
225 			 pg_strcasecmp(token, "stdout") == 0)
226 	{
227 		result->file = NULL;
228 	}
229 	else if (pg_strcasecmp(token, "pstdin") == 0 ||
230 			 pg_strcasecmp(token, "pstdout") == 0)
231 	{
232 		result->psql_inout = true;
233 		result->file = NULL;
234 	}
235 	else
236 	{
237 		/* filename can be optionally quoted */
238 		strip_quotes(token, '\'', 0, pset.encoding);
239 		result->file = pg_strdup(token);
240 		expand_tilde(&result->file);
241 	}
242 
243 	/* Collect the rest of the line (COPY options) */
244 	token = strtokx(NULL, "", NULL, NULL,
245 					0, false, false, pset.encoding);
246 	if (token)
247 		result->after_tofrom = pg_strdup(token);
248 
249 	return result;
250 
251 error:
252 	if (token)
253 		pg_log_error("\\copy: parse error at \"%s\"", token);
254 	else
255 		pg_log_error("\\copy: parse error at end of line");
256 	free_copy_options(result);
257 
258 	return NULL;
259 }
260 
261 
262 /*
263  * Execute a \copy command (frontend copy). We have to open a file (or execute
264  * a command), then submit a COPY query to the backend and either feed it data
265  * from the file or route its response into the file.
266  */
267 bool
do_copy(const char * args)268 do_copy(const char *args)
269 {
270 	PQExpBufferData query;
271 	FILE	   *copystream;
272 	struct copy_options *options;
273 	bool		success;
274 
275 	/* parse options */
276 	options = parse_slash_copy(args);
277 
278 	if (!options)
279 		return false;
280 
281 	/* prepare to read or write the target file */
282 	if (options->file && !options->program)
283 		canonicalize_path(options->file);
284 
285 	if (options->from)
286 	{
287 		if (options->file)
288 		{
289 			if (options->program)
290 			{
291 				fflush(stdout);
292 				fflush(stderr);
293 				errno = 0;
294 				copystream = popen(options->file, PG_BINARY_R);
295 			}
296 			else
297 				copystream = fopen(options->file, PG_BINARY_R);
298 		}
299 		else if (!options->psql_inout)
300 			copystream = pset.cur_cmd_source;
301 		else
302 			copystream = stdin;
303 	}
304 	else
305 	{
306 		if (options->file)
307 		{
308 			if (options->program)
309 			{
310 				fflush(stdout);
311 				fflush(stderr);
312 				errno = 0;
313 				disable_sigpipe_trap();
314 				copystream = popen(options->file, PG_BINARY_W);
315 			}
316 			else
317 				copystream = fopen(options->file, PG_BINARY_W);
318 		}
319 		else if (!options->psql_inout)
320 			copystream = pset.queryFout;
321 		else
322 			copystream = stdout;
323 	}
324 
325 	if (!copystream)
326 	{
327 		if (options->program)
328 			pg_log_error("could not execute command \"%s\": %m",
329 						 options->file);
330 		else
331 			pg_log_error("%s: %m",
332 						 options->file);
333 		free_copy_options(options);
334 		return false;
335 	}
336 
337 	if (!options->program)
338 	{
339 		struct stat st;
340 		int			result;
341 
342 		/* make sure the specified file is not a directory */
343 		if ((result = fstat(fileno(copystream), &st)) < 0)
344 			pg_log_error("could not stat file \"%s\": %m",
345 						 options->file);
346 
347 		if (result == 0 && S_ISDIR(st.st_mode))
348 			pg_log_error("%s: cannot copy from/to a directory",
349 						 options->file);
350 
351 		if (result < 0 || S_ISDIR(st.st_mode))
352 		{
353 			fclose(copystream);
354 			free_copy_options(options);
355 			return false;
356 		}
357 	}
358 
359 	/* build the command we will send to the backend */
360 	initPQExpBuffer(&query);
361 	printfPQExpBuffer(&query, "COPY ");
362 	appendPQExpBufferStr(&query, options->before_tofrom);
363 	if (options->from)
364 		appendPQExpBufferStr(&query, " FROM STDIN ");
365 	else
366 		appendPQExpBufferStr(&query, " TO STDOUT ");
367 	if (options->after_tofrom)
368 		appendPQExpBufferStr(&query, options->after_tofrom);
369 
370 	/* run it like a user command, but with copystream as data source/sink */
371 	pset.copyStream = copystream;
372 	success = SendQuery(query.data);
373 	pset.copyStream = NULL;
374 	termPQExpBuffer(&query);
375 
376 	if (options->file != NULL)
377 	{
378 		if (options->program)
379 		{
380 			int			pclose_rc = pclose(copystream);
381 
382 			if (pclose_rc != 0)
383 			{
384 				if (pclose_rc < 0)
385 					pg_log_error("could not close pipe to external command: %m");
386 				else
387 				{
388 					char	   *reason = wait_result_to_str(pclose_rc);
389 
390 					pg_log_error("%s: %s", options->file,
391 								 reason ? reason : "");
392 					if (reason)
393 						free(reason);
394 				}
395 				success = false;
396 			}
397 			restore_sigpipe_trap();
398 		}
399 		else
400 		{
401 			if (fclose(copystream) != 0)
402 			{
403 				pg_log_error("%s: %m", options->file);
404 				success = false;
405 			}
406 		}
407 	}
408 	free_copy_options(options);
409 	return success;
410 }
411 
412 
413 /*
414  * Functions for handling COPY IN/OUT data transfer.
415  *
416  * If you want to use COPY TO STDOUT/FROM STDIN in your application,
417  * this is the code to steal ;)
418  */
419 
420 /*
421  * handleCopyOut
422  * receives data as a result of a COPY ... TO STDOUT command
423  *
424  * conn should be a database connection that you just issued COPY TO on
425  * and got back a PGRES_COPY_OUT result.
426  *
427  * copystream is the file stream for the data to go to.
428  * copystream can be NULL to eat the data without writing it anywhere.
429  *
430  * The final status for the COPY is returned into *res (but note
431  * we already reported the error, if it's not a success result).
432  *
433  * result is true if successful, false if not.
434  */
435 bool
handleCopyOut(PGconn * conn,FILE * copystream,PGresult ** res)436 handleCopyOut(PGconn *conn, FILE *copystream, PGresult **res)
437 {
438 	bool		OK = true;
439 	char	   *buf;
440 	int			ret;
441 
442 	for (;;)
443 	{
444 		ret = PQgetCopyData(conn, &buf, 0);
445 
446 		if (ret < 0)
447 			break;				/* done or server/connection error */
448 
449 		if (buf)
450 		{
451 			if (OK && copystream && fwrite(buf, 1, ret, copystream) != ret)
452 			{
453 				pg_log_error("could not write COPY data: %m");
454 				/* complain only once, keep reading data from server */
455 				OK = false;
456 			}
457 			PQfreemem(buf);
458 		}
459 	}
460 
461 	if (OK && copystream && fflush(copystream))
462 	{
463 		pg_log_error("could not write COPY data: %m");
464 		OK = false;
465 	}
466 
467 	if (ret == -2)
468 	{
469 		pg_log_error("COPY data transfer failed: %s", PQerrorMessage(conn));
470 		OK = false;
471 	}
472 
473 	/*
474 	 * Check command status and return to normal libpq state.
475 	 *
476 	 * If for some reason libpq is still reporting PGRES_COPY_OUT state, we
477 	 * would like to forcibly exit that state, since our caller would be
478 	 * unable to distinguish that situation from reaching the next COPY in a
479 	 * command string that happened to contain two consecutive COPY TO STDOUT
480 	 * commands.  However, libpq provides no API for doing that, and in
481 	 * principle it's a libpq bug anyway if PQgetCopyData() returns -1 or -2
482 	 * but hasn't exited COPY_OUT state internally.  So we ignore the
483 	 * possibility here.
484 	 */
485 	*res = PQgetResult(conn);
486 	if (PQresultStatus(*res) != PGRES_COMMAND_OK)
487 	{
488 		pg_log_info("%s", PQerrorMessage(conn));
489 		OK = false;
490 	}
491 
492 	return OK;
493 }
494 
495 /*
496  * handleCopyIn
497  * sends data to complete a COPY ... FROM STDIN command
498  *
499  * conn should be a database connection that you just issued COPY FROM on
500  * and got back a PGRES_COPY_IN result.
501  * copystream is the file stream to read the data from.
502  * isbinary can be set from PQbinaryTuples().
503  * The final status for the COPY is returned into *res (but note
504  * we already reported the error, if it's not a success result).
505  *
506  * result is true if successful, false if not.
507  */
508 
509 /* read chunk size for COPY IN - size is not critical */
510 #define COPYBUFSIZ 8192
511 
512 bool
handleCopyIn(PGconn * conn,FILE * copystream,bool isbinary,PGresult ** res)513 handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
514 {
515 	bool		OK;
516 	char		buf[COPYBUFSIZ];
517 	bool		showprompt;
518 
519 	/*
520 	 * Establish longjmp destination for exiting from wait-for-input. (This is
521 	 * only effective while sigint_interrupt_enabled is TRUE.)
522 	 */
523 	if (sigsetjmp(sigint_interrupt_jmp, 1) != 0)
524 	{
525 		/* got here with longjmp */
526 
527 		/* Terminate data transfer */
528 		PQputCopyEnd(conn,
529 					 (PQprotocolVersion(conn) < 3) ? NULL :
530 					 _("canceled by user"));
531 
532 		OK = false;
533 		goto copyin_cleanup;
534 	}
535 
536 	/* Prompt if interactive input */
537 	if (isatty(fileno(copystream)))
538 	{
539 		showprompt = true;
540 		if (!pset.quiet)
541 			puts(_("Enter data to be copied followed by a newline.\n"
542 				   "End with a backslash and a period on a line by itself, or an EOF signal."));
543 	}
544 	else
545 		showprompt = false;
546 
547 	OK = true;
548 
549 	if (isbinary)
550 	{
551 		/* interactive input probably silly, but give one prompt anyway */
552 		if (showprompt)
553 		{
554 			const char *prompt = get_prompt(PROMPT_COPY, NULL);
555 
556 			fputs(prompt, stdout);
557 			fflush(stdout);
558 		}
559 
560 		for (;;)
561 		{
562 			int			buflen;
563 
564 			/* enable longjmp while waiting for input */
565 			sigint_interrupt_enabled = true;
566 
567 			buflen = fread(buf, 1, COPYBUFSIZ, copystream);
568 
569 			sigint_interrupt_enabled = false;
570 
571 			if (buflen <= 0)
572 				break;
573 
574 			if (PQputCopyData(conn, buf, buflen) <= 0)
575 			{
576 				OK = false;
577 				break;
578 			}
579 		}
580 	}
581 	else
582 	{
583 		bool		copydone = false;
584 
585 		while (!copydone)
586 		{						/* for each input line ... */
587 			bool		firstload;
588 			bool		linedone;
589 
590 			if (showprompt)
591 			{
592 				const char *prompt = get_prompt(PROMPT_COPY, NULL);
593 
594 				fputs(prompt, stdout);
595 				fflush(stdout);
596 			}
597 
598 			firstload = true;
599 			linedone = false;
600 
601 			while (!linedone)
602 			{					/* for each bufferload in line ... */
603 				int			linelen;
604 				char	   *fgresult;
605 
606 				/* enable longjmp while waiting for input */
607 				sigint_interrupt_enabled = true;
608 
609 				fgresult = fgets(buf, sizeof(buf), copystream);
610 
611 				sigint_interrupt_enabled = false;
612 
613 				if (!fgresult)
614 				{
615 					copydone = true;
616 					break;
617 				}
618 
619 				linelen = strlen(buf);
620 
621 				/* current line is done? */
622 				if (linelen > 0 && buf[linelen - 1] == '\n')
623 					linedone = true;
624 
625 				/* check for EOF marker, but not on a partial line */
626 				if (firstload)
627 				{
628 					/*
629 					 * This code erroneously assumes '\.' on a line alone
630 					 * inside a quoted CSV string terminates the \copy.
631 					 * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
632 					 */
633 					if (strcmp(buf, "\\.\n") == 0 ||
634 						strcmp(buf, "\\.\r\n") == 0)
635 					{
636 						copydone = true;
637 						break;
638 					}
639 
640 					firstload = false;
641 				}
642 
643 				if (PQputCopyData(conn, buf, linelen) <= 0)
644 				{
645 					OK = false;
646 					copydone = true;
647 					break;
648 				}
649 			}
650 
651 			if (copystream == pset.cur_cmd_source)
652 			{
653 				pset.lineno++;
654 				pset.stmt_lineno++;
655 			}
656 		}
657 	}
658 
659 	/* Check for read error */
660 	if (ferror(copystream))
661 		OK = false;
662 
663 	/*
664 	 * Terminate data transfer.  We can't send an error message if we're using
665 	 * protocol version 2.  (libpq no longer supports protocol version 2, but
666 	 * keep the version checks just in case you're using a pre-v14 libpq.so at
667 	 * runtime)
668 	 */
669 	if (PQputCopyEnd(conn,
670 					 (OK || PQprotocolVersion(conn) < 3) ? NULL :
671 					 _("aborted because of read failure")) <= 0)
672 		OK = false;
673 
674 copyin_cleanup:
675 
676 	/*
677 	 * Clear the EOF flag on the stream, in case copying ended due to an EOF
678 	 * signal.  This allows an interactive TTY session to perform another COPY
679 	 * FROM STDIN later.  (In non-STDIN cases, we're about to close the file
680 	 * anyway, so it doesn't matter.)  Although we don't ever test the flag
681 	 * with feof(), some fread() implementations won't read more data if it's
682 	 * set.  This also clears the error flag, but we already checked that.
683 	 */
684 	clearerr(copystream);
685 
686 	/*
687 	 * Check command status and return to normal libpq state.
688 	 *
689 	 * We do not want to return with the status still PGRES_COPY_IN: our
690 	 * caller would be unable to distinguish that situation from reaching the
691 	 * next COPY in a command string that happened to contain two consecutive
692 	 * COPY FROM STDIN commands.  We keep trying PQputCopyEnd() in the hope
693 	 * it'll work eventually.  (What's actually likely to happen is that in
694 	 * attempting to flush the data, libpq will eventually realize that the
695 	 * connection is lost.  But that's fine; it will get us out of COPY_IN
696 	 * state, which is what we need.)
697 	 */
698 	while (*res = PQgetResult(conn), PQresultStatus(*res) == PGRES_COPY_IN)
699 	{
700 		OK = false;
701 		PQclear(*res);
702 		/* We can't send an error message if we're using protocol version 2 */
703 		PQputCopyEnd(conn,
704 					 (PQprotocolVersion(conn) < 3) ? NULL :
705 					 _("trying to exit copy mode"));
706 	}
707 	if (PQresultStatus(*res) != PGRES_COMMAND_OK)
708 	{
709 		pg_log_info("%s", PQerrorMessage(conn));
710 		OK = false;
711 	}
712 
713 	return OK;
714 }
715