1<!-- 2doc/src/sgml/ref/copy.sgml 3PostgreSQL documentation 4--> 5 6<refentry id="sql-copy"> 7 <indexterm zone="sql-copy"> 8 <primary>COPY</primary> 9 </indexterm> 10 11 <refmeta> 12 <refentrytitle>COPY</refentrytitle> 13 <manvolnum>7</manvolnum> 14 <refmiscinfo>SQL - Language Statements</refmiscinfo> 15 </refmeta> 16 17 <refnamediv> 18 <refname>COPY</refname> 19 <refpurpose>copy data between a file and a table</refpurpose> 20 </refnamediv> 21 22 <refsynopsisdiv> 23<synopsis> 24COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] 25 FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN } 26 [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ] 27 [ WHERE <replaceable class="parameter">condition</replaceable> ] 28 29COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) } 30 TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT } 31 [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ] 32 33<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase> 34 35 FORMAT <replaceable class="parameter">format_name</replaceable> 36 FREEZE [ <replaceable class="parameter">boolean</replaceable> ] 37 DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>' 38 NULL '<replaceable class="parameter">null_string</replaceable>' 39 HEADER [ <replaceable class="parameter">boolean</replaceable> ] 40 QUOTE '<replaceable class="parameter">quote_character</replaceable>' 41 ESCAPE '<replaceable class="parameter">escape_character</replaceable>' 42 FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } 43 FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) 44 FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) 45 ENCODING '<replaceable class="parameter">encoding_name</replaceable>' 46</synopsis> 47 </refsynopsisdiv> 48 49 <refsect1> 50 <title>Description</title> 51 52 <para> 53 <command>COPY</command> moves data between 54 <productname>PostgreSQL</productname> tables and standard file-system 55 files. <command>COPY TO</command> copies the contents of a table 56 <emphasis>to</emphasis> a file, while <command>COPY FROM</command> copies 57 data <emphasis>from</emphasis> a file to a table (appending the data to 58 whatever is in the table already). <command>COPY TO</command> 59 can also copy the results of a <command>SELECT</command> query. 60 </para> 61 62 <para> 63 If a column list is specified, <command>COPY TO</command> copies only 64 the data in the specified columns to the file. For <command>COPY 65 FROM</command>, each field in the file is inserted, in order, into the 66 specified column. Table columns not specified in the <command>COPY 67 FROM</command> column list will receive their default values. 68 </para> 69 70 <para> 71 <command>COPY</command> with a file name instructs the 72 <productname>PostgreSQL</productname> server to directly read from 73 or write to a file. The file must be accessible by the 74 <productname>PostgreSQL</productname> user (the user ID the server 75 runs as) and the name must be specified from the viewpoint of the 76 server. When <literal>PROGRAM</literal> is specified, the server 77 executes the given command and reads from the standard output of the 78 program, or writes to the standard input of the program. The command 79 must be specified from the viewpoint of the server, and be executable 80 by the <productname>PostgreSQL</productname> user. When 81 <literal>STDIN</literal> or <literal>STDOUT</literal> is 82 specified, data is transmitted via the connection between the 83 client and the server. 84 </para> 85 </refsect1> 86 87 <refsect1> 88 <title>Parameters</title> 89 90 <variablelist> 91 <varlistentry> 92 <term><replaceable class="parameter">table_name</replaceable></term> 93 <listitem> 94 <para> 95 The name (optionally schema-qualified) of an existing table. 96 </para> 97 </listitem> 98 </varlistentry> 99 100 <varlistentry> 101 <term><replaceable class="parameter">column_name</replaceable></term> 102 <listitem> 103 <para> 104 An optional list of columns to be copied. If no column list is 105 specified, all columns of the table except generated columns will be 106 copied. 107 </para> 108 </listitem> 109 </varlistentry> 110 111 <varlistentry> 112 <term><replaceable class="parameter">query</replaceable></term> 113 <listitem> 114 <para> 115 A <xref linkend="sql-select"/>, <xref linkend="sql-values"/>, 116 <xref linkend="sql-insert"/>, <xref linkend="sql-update"/> or 117 <xref linkend="sql-delete"/> command whose results are to be 118 copied. Note that parentheses are required around the query. 119 </para> 120 <para> 121 For <command>INSERT</command>, <command>UPDATE</command> and 122 <command>DELETE</command> queries a RETURNING clause must be provided, 123 and the target relation must not have a conditional rule, nor 124 an <literal>ALSO</literal> rule, nor an <literal>INSTEAD</literal> rule 125 that expands to multiple statements. 126 </para> 127 </listitem> 128 </varlistentry> 129 130 <varlistentry> 131 <term><replaceable class="parameter">filename</replaceable></term> 132 <listitem> 133 <para> 134 The path name of the input or output file. An input file name can be 135 an absolute or relative path, but an output file name must be an absolute 136 path. Windows users might need to use an <literal>E''</literal> string and 137 double any backslashes used in the path name. 138 </para> 139 </listitem> 140 </varlistentry> 141 142 <varlistentry> 143 <term><literal>PROGRAM</literal></term> 144 <listitem> 145 <para> 146 A command to execute. In <command>COPY FROM</command>, the input is 147 read from standard output of the command, and in <command>COPY TO</command>, 148 the output is written to the standard input of the command. 149 </para> 150 <para> 151 Note that the command is invoked by the shell, so if you need to pass 152 any arguments to shell command that come from an untrusted source, you 153 must be careful to strip or escape any special characters that might 154 have a special meaning for the shell. For security reasons, it is best 155 to use a fixed command string, or at least avoid passing any user input 156 in it. 157 </para> 158 </listitem> 159 </varlistentry> 160 161 <varlistentry> 162 <term><literal>STDIN</literal></term> 163 <listitem> 164 <para> 165 Specifies that input comes from the client application. 166 </para> 167 </listitem> 168 </varlistentry> 169 170 <varlistentry> 171 <term><literal>STDOUT</literal></term> 172 <listitem> 173 <para> 174 Specifies that output goes to the client application. 175 </para> 176 </listitem> 177 </varlistentry> 178 179 <varlistentry> 180 <term><replaceable class="parameter">boolean</replaceable></term> 181 <listitem> 182 <para> 183 Specifies whether the selected option should be turned on or off. 184 You can write <literal>TRUE</literal>, <literal>ON</literal>, or 185 <literal>1</literal> to enable the option, and <literal>FALSE</literal>, 186 <literal>OFF</literal>, or <literal>0</literal> to disable it. The 187 <replaceable class="parameter">boolean</replaceable> value can also 188 be omitted, in which case <literal>TRUE</literal> is assumed. 189 </para> 190 </listitem> 191 </varlistentry> 192 193 <varlistentry> 194 <term><literal>FORMAT</literal></term> 195 <listitem> 196 <para> 197 Selects the data format to be read or written: 198 <literal>text</literal>, 199 <literal>csv</literal> (Comma Separated Values), 200 or <literal>binary</literal>. 201 The default is <literal>text</literal>. 202 </para> 203 </listitem> 204 </varlistentry> 205 206 <varlistentry> 207 <term><literal>FREEZE</literal></term> 208 <listitem> 209 <para> 210 Requests copying the data with rows already frozen, just as they 211 would be after running the <command>VACUUM FREEZE</command> command. 212 This is intended as a performance option for initial data loading. 213 Rows will be frozen only if the table being loaded has been created 214 or truncated in the current subtransaction, there are no cursors 215 open and there are no older snapshots held by this transaction. It is 216 currently not possible to perform a <command>COPY FREEZE</command> on 217 a partitioned table. 218 </para> 219 <para> 220 Note that all other sessions will immediately be able to see the data 221 once it has been successfully loaded. This violates the normal rules 222 of MVCC visibility and users specifying should be aware of the 223 potential problems this might cause. 224 </para> 225 </listitem> 226 </varlistentry> 227 228 <varlistentry> 229 <term><literal>DELIMITER</literal></term> 230 <listitem> 231 <para> 232 Specifies the character that separates columns within each row 233 (line) of the file. The default is a tab character in text format, 234 a comma in <literal>CSV</literal> format. 235 This must be a single one-byte character. 236 This option is not allowed when using <literal>binary</literal> format. 237 </para> 238 </listitem> 239 </varlistentry> 240 241 <varlistentry> 242 <term><literal>NULL</literal></term> 243 <listitem> 244 <para> 245 Specifies the string that represents a null value. The default is 246 <literal>\N</literal> (backslash-N) in text format, and an unquoted empty 247 string in <literal>CSV</literal> format. You might prefer an 248 empty string even in text format for cases where you don't want to 249 distinguish nulls from empty strings. 250 This option is not allowed when using <literal>binary</literal> format. 251 </para> 252 253 <note> 254 <para> 255 When using <command>COPY FROM</command>, any data item that matches 256 this string will be stored as a null value, so you should make 257 sure that you use the same string as you used with 258 <command>COPY TO</command>. 259 </para> 260 </note> 261 262 </listitem> 263 </varlistentry> 264 265 <varlistentry> 266 <term><literal>HEADER</literal></term> 267 <listitem> 268 <para> 269 Specifies that the file contains a header line with the names of each 270 column in the file. On output, the first line contains the column 271 names from the table, and on input, the first line is ignored. 272 This option is allowed only when using <literal>CSV</literal> format. 273 </para> 274 </listitem> 275 </varlistentry> 276 277 <varlistentry> 278 <term><literal>QUOTE</literal></term> 279 <listitem> 280 <para> 281 Specifies the quoting character to be used when a data value is quoted. 282 The default is double-quote. 283 This must be a single one-byte character. 284 This option is allowed only when using <literal>CSV</literal> format. 285 </para> 286 </listitem> 287 </varlistentry> 288 289 <varlistentry> 290 <term><literal>ESCAPE</literal></term> 291 <listitem> 292 <para> 293 Specifies the character that should appear before a 294 data character that matches the <literal>QUOTE</literal> value. 295 The default is the same as the <literal>QUOTE</literal> value (so that 296 the quoting character is doubled if it appears in the data). 297 This must be a single one-byte character. 298 This option is allowed only when using <literal>CSV</literal> format. 299 </para> 300 </listitem> 301 </varlistentry> 302 303 <varlistentry> 304 <term><literal>FORCE_QUOTE</literal></term> 305 <listitem> 306 <para> 307 Forces quoting to be 308 used for all non-<literal>NULL</literal> values in each specified column. 309 <literal>NULL</literal> output is never quoted. If <literal>*</literal> is specified, 310 non-<literal>NULL</literal> values will be quoted in all columns. 311 This option is allowed only in <command>COPY TO</command>, and only when 312 using <literal>CSV</literal> format. 313 </para> 314 </listitem> 315 </varlistentry> 316 317 <varlistentry> 318 <term><literal>FORCE_NOT_NULL</literal></term> 319 <listitem> 320 <para> 321 Do not match the specified columns' values against the null string. 322 In the default case where the null string is empty, this means that 323 empty values will be read as zero-length strings rather than nulls, 324 even when they are not quoted. 325 This option is allowed only in <command>COPY FROM</command>, and only when 326 using <literal>CSV</literal> format. 327 </para> 328 </listitem> 329 </varlistentry> 330 331 <varlistentry> 332 <term><literal>FORCE_NULL</literal></term> 333 <listitem> 334 <para> 335 Match the specified columns' values against the null string, even 336 if it has been quoted, and if a match is found set the value to 337 <literal>NULL</literal>. In the default case where the null string is empty, 338 this converts a quoted empty string into NULL. 339 This option is allowed only in <command>COPY FROM</command>, and only when 340 using <literal>CSV</literal> format. 341 </para> 342 </listitem> 343 </varlistentry> 344 345 <varlistentry> 346 <term><literal>ENCODING</literal></term> 347 <listitem> 348 <para> 349 Specifies that the file is encoded in the <replaceable 350 class="parameter">encoding_name</replaceable>. If this option is 351 omitted, the current client encoding is used. See the Notes below 352 for more details. 353 </para> 354 </listitem> 355 </varlistentry> 356 357 <varlistentry> 358 <term><literal>WHERE</literal></term> 359 <listitem> 360 <para> 361 The optional <literal>WHERE</literal> clause has the general form 362<synopsis> 363WHERE <replaceable class="parameter">condition</replaceable> 364</synopsis> 365 where <replaceable class="parameter">condition</replaceable> is 366 any expression that evaluates to a result of type 367 <type>boolean</type>. Any row that does not satisfy this 368 condition will not be inserted to the table. A row satisfies the 369 condition if it returns true when the actual row values are 370 substituted for any variable references. 371 </para> 372 373 <para> 374 Currently, subqueries are not allowed in <literal>WHERE</literal> 375 expressions, and the evaluation does not see any changes made by the 376 <command>COPY</command> itself (this matters when the expression 377 contains calls to <literal>VOLATILE</literal> functions). 378 </para> 379 380 </listitem> 381 </varlistentry> 382 383 </variablelist> 384 </refsect1> 385 386 <refsect1> 387 <title>Outputs</title> 388 389 <para> 390 On successful completion, a <command>COPY</command> command returns a command 391 tag of the form 392<screen> 393COPY <replaceable class="parameter">count</replaceable> 394</screen> 395 The <replaceable class="parameter">count</replaceable> is the number 396 of rows copied. 397 </para> 398 399 <note> 400 <para> 401 <application>psql</application> will print this command tag only if the command 402 was not <literal>COPY ... TO STDOUT</literal>, or the 403 equivalent <application>psql</application> meta-command 404 <literal>\copy ... to stdout</literal>. This is to prevent confusing the 405 command tag with the data that was just printed. 406 </para> 407 </note> 408 </refsect1> 409 410 <refsect1> 411 <title>Notes</title> 412 413 <para> 414 <command>COPY TO</command> can be used only with plain 415 tables, not views, and does not copy rows from child tables 416 or child partitions. For example, <literal>COPY <replaceable 417 class="parameter">table</replaceable> TO</literal> copies 418 the same rows as <literal>SELECT * FROM ONLY <replaceable 419 class="parameter">table</replaceable></literal>. 420 The syntax <literal>COPY (SELECT * FROM <replaceable 421 class="parameter">table</replaceable>) TO ...</literal> can be used to 422 dump all of the rows in an inheritance hierarchy, partitioned table, 423 or view. 424 </para> 425 426 <para> 427 <command>COPY FROM</command> can be used with plain, foreign, or 428 partitioned tables or with views that have 429 <literal>INSTEAD OF INSERT</literal> triggers. 430 </para> 431 432 <para> 433 You must have select privilege on the table 434 whose values are read by <command>COPY TO</command>, and 435 insert privilege on the table into which values 436 are inserted by <command>COPY FROM</command>. It is sufficient 437 to have column privileges on the column(s) listed in the command. 438 </para> 439 440 <para> 441 If row-level security is enabled for the table, the relevant 442 <command>SELECT</command> policies will apply to <literal>COPY 443 <replaceable class="parameter">table</replaceable> TO</literal> statements. 444 Currently, <command>COPY FROM</command> is not supported for tables 445 with row-level security. Use equivalent <command>INSERT</command> 446 statements instead. 447 </para> 448 449 <para> 450 Files named in a <command>COPY</command> command are read or written 451 directly by the server, not by the client application. Therefore, 452 they must reside on or be accessible to the database server machine, 453 not the client. They must be accessible to and readable or writable 454 by the <productname>PostgreSQL</productname> user (the user ID the 455 server runs as), not the client. Similarly, 456 the command specified with <literal>PROGRAM</literal> is executed directly 457 by the server, not by the client application, must be executable by the 458 <productname>PostgreSQL</productname> user. 459 <command>COPY</command> naming a file or command is only allowed to 460 database superusers or users who are granted one of the default roles 461 <literal>pg_read_server_files</literal>, 462 <literal>pg_write_server_files</literal>, 463 or <literal>pg_execute_server_program</literal>, since it allows reading 464 or writing any file or running a program that the server has privileges to 465 access. 466 </para> 467 468 <para> 469 Do not confuse <command>COPY</command> with the 470 <application>psql</application> instruction 471 <command><link linkend="app-psql-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes 472 <command>COPY FROM STDIN</command> or <command>COPY TO 473 STDOUT</command>, and then fetches/stores the data in a file 474 accessible to the <application>psql</application> client. Thus, 475 file accessibility and access rights depend on the client rather 476 than the server when <command>\copy</command> is used. 477 </para> 478 479 <para> 480 It is recommended that the file name used in <command>COPY</command> 481 always be specified as an absolute path. This is enforced by the 482 server in the case of <command>COPY TO</command>, but for 483 <command>COPY FROM</command> you do have the option of reading from 484 a file specified by a relative path. The path will be interpreted 485 relative to the working directory of the server process (normally 486 the cluster's data directory), not the client's working directory. 487 </para> 488 489 <para> 490 Executing a command with <literal>PROGRAM</literal> might be restricted 491 by the operating system's access control mechanisms, such as SELinux. 492 </para> 493 494 <para> 495 <command>COPY FROM</command> will invoke any triggers and check 496 constraints on the destination table. However, it will not invoke rules. 497 </para> 498 499 <para> 500 For identity columns, the <command>COPY FROM</command> command will always 501 write the column values provided in the input data, like 502 the <command>INSERT</command> option <literal>OVERRIDING SYSTEM 503 VALUE</literal>. 504 </para> 505 506 <para> 507 <command>COPY</command> input and output is affected by 508 <varname>DateStyle</varname>. To ensure portability to other 509 <productname>PostgreSQL</productname> installations that might use 510 non-default <varname>DateStyle</varname> settings, 511 <varname>DateStyle</varname> should be set to <literal>ISO</literal> before 512 using <command>COPY TO</command>. It is also a good idea to avoid dumping 513 data with <varname>IntervalStyle</varname> set to 514 <literal>sql_standard</literal>, because negative interval values might be 515 misinterpreted by a server that has a different setting for 516 <varname>IntervalStyle</varname>. 517 </para> 518 519 <para> 520 Input data is interpreted according to <literal>ENCODING</literal> 521 option or the current client encoding, and output data is encoded 522 in <literal>ENCODING</literal> or the current client encoding, even 523 if the data does not pass through the client but is read from or 524 written to a file directly by the server. 525 </para> 526 527 <para> 528 <command>COPY</command> stops operation at the first error. This 529 should not lead to problems in the event of a <command>COPY 530 TO</command>, but the target table will already have received 531 earlier rows in a <command>COPY FROM</command>. These rows will not 532 be visible or accessible, but they still occupy disk space. This might 533 amount to a considerable amount of wasted disk space if the failure 534 happened well into a large copy operation. You might wish to invoke 535 <command>VACUUM</command> to recover the wasted space. 536 </para> 537 538 <para> 539 <literal>FORCE_NULL</literal> and <literal>FORCE_NOT_NULL</literal> can be used 540 simultaneously on the same column. This results in converting quoted 541 null strings to null values and unquoted null strings to empty strings. 542 </para> 543 544 </refsect1> 545 546 <refsect1> 547 <title>File Formats</title> 548 549 <refsect2> 550 <title>Text Format</title> 551 552 <para> 553 When the <literal>text</literal> format is used, 554 the data read or written is a text file with one line per table row. 555 Columns in a row are separated by the delimiter character. 556 The column values themselves are strings generated by the 557 output function, or acceptable to the input function, of each 558 attribute's data type. The specified null string is used in 559 place of columns that are null. 560 <command>COPY FROM</command> will raise an error if any line of the 561 input file contains more or fewer columns than are expected. 562 </para> 563 564 <para> 565 End of data can be represented by a single line containing just 566 backslash-period (<literal>\.</literal>). An end-of-data marker is 567 not necessary when reading from a file, since the end of file 568 serves perfectly well; it is needed only when copying data to or from 569 client applications using pre-3.0 client protocol. 570 </para> 571 572 <para> 573 Backslash characters (<literal>\</literal>) can be used in the 574 <command>COPY</command> data to quote data characters that might 575 otherwise be taken as row or column delimiters. In particular, the 576 following characters <emphasis>must</emphasis> be preceded by a backslash if 577 they appear as part of a column value: backslash itself, 578 newline, carriage return, and the current delimiter character. 579 </para> 580 581 <para> 582 The specified null string is sent by <command>COPY TO</command> without 583 adding any backslashes; conversely, <command>COPY FROM</command> matches 584 the input against the null string before removing backslashes. Therefore, 585 a null string such as <literal>\N</literal> cannot be confused with 586 the actual data value <literal>\N</literal> (which would be represented 587 as <literal>\\N</literal>). 588 </para> 589 590 <para> 591 The following special backslash sequences are recognized by 592 <command>COPY FROM</command>: 593 594 <informaltable> 595 <tgroup cols="2"> 596 <thead> 597 <row> 598 <entry>Sequence</entry> 599 <entry>Represents</entry> 600 </row> 601 </thead> 602 603 <tbody> 604 <row> 605 <entry><literal>\b</literal></entry> 606 <entry>Backspace (ASCII 8)</entry> 607 </row> 608 <row> 609 <entry><literal>\f</literal></entry> 610 <entry>Form feed (ASCII 12)</entry> 611 </row> 612 <row> 613 <entry><literal>\n</literal></entry> 614 <entry>Newline (ASCII 10)</entry> 615 </row> 616 <row> 617 <entry><literal>\r</literal></entry> 618 <entry>Carriage return (ASCII 13)</entry> 619 </row> 620 <row> 621 <entry><literal>\t</literal></entry> 622 <entry>Tab (ASCII 9)</entry> 623 </row> 624 <row> 625 <entry><literal>\v</literal></entry> 626 <entry>Vertical tab (ASCII 11)</entry> 627 </row> 628 <row> 629 <entry><literal>\</literal><replaceable>digits</replaceable></entry> 630 <entry>Backslash followed by one to three octal digits specifies 631 the byte with that numeric code</entry> 632 </row> 633 <row> 634 <entry><literal>\x</literal><replaceable>digits</replaceable></entry> 635 <entry>Backslash <literal>x</literal> followed by one or two hex digits specifies 636 the byte with that numeric code</entry> 637 </row> 638 </tbody> 639 </tgroup> 640 </informaltable> 641 642 Presently, <command>COPY TO</command> will never emit an octal or 643 hex-digits backslash sequence, but it does use the other sequences 644 listed above for those control characters. 645 </para> 646 647 <para> 648 Any other backslashed character that is not mentioned in the above table 649 will be taken to represent itself. However, beware of adding backslashes 650 unnecessarily, since that might accidentally produce a string matching the 651 end-of-data marker (<literal>\.</literal>) or the null string (<literal>\N</literal> by 652 default). These strings will be recognized before any other backslash 653 processing is done. 654 </para> 655 656 <para> 657 It is strongly recommended that applications generating <command>COPY</command> data convert 658 data newlines and carriage returns to the <literal>\n</literal> and 659 <literal>\r</literal> sequences respectively. At present it is 660 possible to represent a data carriage return by a backslash and carriage 661 return, and to represent a data newline by a backslash and newline. 662 However, these representations might not be accepted in future releases. 663 They are also highly vulnerable to corruption if the <command>COPY</command> file is 664 transferred across different machines (for example, from Unix to Windows 665 or vice versa). 666 </para> 667 668 <para> 669 All backslash sequences are interpreted after encoding conversion. 670 The bytes specified with the octal and hex-digit backslash sequences must 671 form valid characters in the database encoding. 672 </para> 673 674 <para> 675 <command>COPY TO</command> will terminate each row with a Unix-style 676 newline (<quote><literal>\n</literal></quote>). Servers running on Microsoft Windows instead 677 output carriage return/newline (<quote><literal>\r\n</literal></quote>), but only for 678 <command>COPY</command> to a server file; for consistency across platforms, 679 <command>COPY TO STDOUT</command> always sends <quote><literal>\n</literal></quote> 680 regardless of server platform. 681 <command>COPY FROM</command> can handle lines ending with newlines, 682 carriage returns, or carriage return/newlines. To reduce the risk of 683 error due to un-backslashed newlines or carriage returns that were 684 meant as data, <command>COPY FROM</command> will complain if the line 685 endings in the input are not all alike. 686 </para> 687 </refsect2> 688 689 <refsect2> 690 <title>CSV Format</title> 691 692 <para> 693 This format option is used for importing and exporting the Comma 694 Separated Value (<literal>CSV</literal>) file format used by many other 695 programs, such as spreadsheets. Instead of the escaping rules used by 696 <productname>PostgreSQL</productname>'s standard text format, it 697 produces and recognizes the common CSV escaping mechanism. 698 </para> 699 700 <para> 701 The values in each record are separated by the <literal>DELIMITER</literal> 702 character. If the value contains the delimiter character, the 703 <literal>QUOTE</literal> character, the <literal>NULL</literal> string, a carriage 704 return, or line feed character, then the whole value is prefixed and 705 suffixed by the <literal>QUOTE</literal> character, and any occurrence 706 within the value of a <literal>QUOTE</literal> character or the 707 <literal>ESCAPE</literal> character is preceded by the escape character. 708 You can also use <literal>FORCE_QUOTE</literal> to force quotes when outputting 709 non-<literal>NULL</literal> values in specific columns. 710 </para> 711 712 <para> 713 The <literal>CSV</literal> format has no standard way to distinguish a 714 <literal>NULL</literal> value from an empty string. 715 <productname>PostgreSQL</productname>'s <command>COPY</command> handles this by quoting. 716 A <literal>NULL</literal> is output as the <literal>NULL</literal> parameter string 717 and is not quoted, while a non-<literal>NULL</literal> value matching the 718 <literal>NULL</literal> parameter string is quoted. For example, with the 719 default settings, a <literal>NULL</literal> is written as an unquoted empty 720 string, while an empty string data value is written with double quotes 721 (<literal>""</literal>). Reading values follows similar rules. You can 722 use <literal>FORCE_NOT_NULL</literal> to prevent <literal>NULL</literal> input 723 comparisons for specific columns. You can also use 724 <literal>FORCE_NULL</literal> to convert quoted null string data values to 725 <literal>NULL</literal>. 726 </para> 727 728 <para> 729 Because backslash is not a special character in the <literal>CSV</literal> 730 format, <literal>\.</literal>, the end-of-data marker, could also appear 731 as a data value. To avoid any misinterpretation, a <literal>\.</literal> 732 data value appearing as a lone entry on a line is automatically 733 quoted on output, and on input, if quoted, is not interpreted as the 734 end-of-data marker. If you are loading a file created by another 735 application that has a single unquoted column and might have a 736 value of <literal>\.</literal>, you might need to quote that value in the 737 input file. 738 </para> 739 740 <note> 741 <para> 742 In <literal>CSV</literal> format, all characters are significant. A quoted value 743 surrounded by white space, or any characters other than 744 <literal>DELIMITER</literal>, will include those characters. This can cause 745 errors if you import data from a system that pads <literal>CSV</literal> 746 lines with white space out to some fixed width. If such a situation 747 arises you might need to preprocess the <literal>CSV</literal> file to remove 748 the trailing white space, before importing the data into 749 <productname>PostgreSQL</productname>. 750 </para> 751 </note> 752 753 <note> 754 <para> 755 CSV format will both recognize and produce CSV files with quoted 756 values containing embedded carriage returns and line feeds. Thus 757 the files are not strictly one line per table row like text-format 758 files. 759 </para> 760 </note> 761 762 <note> 763 <para> 764 Many programs produce strange and occasionally perverse CSV files, 765 so the file format is more a convention than a standard. Thus you 766 might encounter some files that cannot be imported using this 767 mechanism, and <command>COPY</command> might produce files that other 768 programs cannot process. 769 </para> 770 </note> 771 772 </refsect2> 773 774 <refsect2> 775 <title>Binary Format</title> 776 777 <para> 778 The <literal>binary</literal> format option causes all data to be 779 stored/read as binary format rather than as text. It is 780 somewhat faster than the text and <literal>CSV</literal> formats, 781 but a binary-format file is less portable across machine architectures and 782 <productname>PostgreSQL</productname> versions. 783 Also, the binary format is very data type specific; for example 784 it will not work to output binary data from a <type>smallint</type> column 785 and read it into an <type>integer</type> column, even though that would work 786 fine in text format. 787 </para> 788 789 <para> 790 The <literal>binary</literal> file format consists 791 of a file header, zero or more tuples containing the row data, and 792 a file trailer. Headers and data are in network byte order. 793 </para> 794 795 <note> 796 <para> 797 <productname>PostgreSQL</productname> releases before 7.4 used a 798 different binary file format. 799 </para> 800 </note> 801 802 <refsect3> 803 <title>File Header</title> 804 805 <para> 806 The file header consists of 15 bytes of fixed fields, followed 807 by a variable-length header extension area. The fixed fields are: 808 809 <variablelist> 810 <varlistentry> 811 <term>Signature</term> 812 <listitem> 813 <para> 81411-byte sequence <literal>PGCOPY\n\377\r\n\0</literal> — note that the zero byte 815is a required part of the signature. (The signature is designed to allow 816easy identification of files that have been munged by a non-8-bit-clean 817transfer. This signature will be changed by end-of-line-translation 818filters, dropped zero bytes, dropped high bits, or parity changes.) 819 </para> 820 </listitem> 821 </varlistentry> 822 823 <varlistentry> 824 <term>Flags field</term> 825 <listitem> 826 <para> 82732-bit integer bit mask to denote important aspects of the file format. Bits 828are numbered from 0 (<acronym>LSB</acronym>) to 31 (<acronym>MSB</acronym>). Note that 829this field is stored in network byte order (most significant byte first), 830as are all the integer fields used in the file format. Bits 83116–31 are reserved to denote critical file format issues; a reader 832should abort if it finds an unexpected bit set in this range. Bits 0–15 833are reserved to signal backwards-compatible format issues; a reader 834should simply ignore any unexpected bits set in this range. Currently 835only one flag bit is defined, and the rest must be zero: 836 <variablelist> 837 <varlistentry> 838 <term>Bit 16</term> 839 <listitem> 840 <para> 841 If 1, OIDs are included in the data; if 0, not. Oid system columns 842 are not supported in <productname>PostgreSQL</productname> 843 anymore, but the format still contains the indicator. 844 </para> 845 </listitem> 846 </varlistentry> 847 </variablelist></para> 848 </listitem> 849 </varlistentry> 850 851 <varlistentry> 852 <term>Header extension area length</term> 853 <listitem> 854 <para> 85532-bit integer, length in bytes of remainder of header, not including self. 856Currently, this is zero, and the first tuple follows 857immediately. Future changes to the format might allow additional data 858to be present in the header. A reader should silently skip over any header 859extension data it does not know what to do with. 860 </para> 861 </listitem> 862 </varlistentry> 863 </variablelist> 864 </para> 865 866 <para> 867The header extension area is envisioned to contain a sequence of 868self-identifying chunks. The flags field is not intended to tell readers 869what is in the extension area. Specific design of header extension contents 870is left for a later release. 871 </para> 872 873 <para> 874 This design allows for both backwards-compatible header additions (add 875 header extension chunks, or set low-order flag bits) and 876 non-backwards-compatible changes (set high-order flag bits to signal such 877 changes, and add supporting data to the extension area if needed). 878 </para> 879 </refsect3> 880 881 <refsect3> 882 <title>Tuples</title> 883 <para> 884Each tuple begins with a 16-bit integer count of the number of fields in the 885tuple. (Presently, all tuples in a table will have the same count, but that 886might not always be true.) Then, repeated for each field in the tuple, there 887is a 32-bit length word followed by that many bytes of field data. (The 888length word does not include itself, and can be zero.) As a special case, 889-1 indicates a NULL field value. No value bytes follow in the NULL case. 890 </para> 891 892 <para> 893There is no alignment padding or any other extra data between fields. 894 </para> 895 896 <para> 897Presently, all data values in a binary-format file are 898assumed to be in binary format (format code one). It is anticipated that a 899future extension might add a header field that allows per-column format codes 900to be specified. 901 </para> 902 903 <para> 904To determine the appropriate binary format for the actual tuple data you 905should consult the <productname>PostgreSQL</productname> source, in 906particular the <function>*send</function> and <function>*recv</function> functions for 907each column's data type (typically these functions are found in the 908<filename>src/backend/utils/adt/</filename> directory of the source 909distribution). 910 </para> 911 912 <para> 913If OIDs are included in the file, the OID field immediately follows the 914field-count word. It is a normal field except that it's not included in the 915field-count. Note that oid system columns are not supported in current 916versions of <productname>PostgreSQL</productname>. 917 </para> 918 </refsect3> 919 920 <refsect3> 921 <title>File Trailer</title> 922 923 <para> 924 The file trailer consists of a 16-bit integer word containing -1. This 925 is easily distinguished from a tuple's field-count word. 926 </para> 927 928 <para> 929 A reader should report an error if a field-count word is neither -1 930 nor the expected number of columns. This provides an extra 931 check against somehow getting out of sync with the data. 932 </para> 933 </refsect3> 934 </refsect2> 935 </refsect1> 936 937 <refsect1> 938 <title>Examples</title> 939 940 <para> 941 The following example copies a table to the client 942 using the vertical bar (<literal>|</literal>) as the field delimiter: 943<programlisting> 944COPY country TO STDOUT (DELIMITER '|'); 945</programlisting> 946 </para> 947 948 <para> 949 To copy data from a file into the <literal>country</literal> table: 950<programlisting> 951COPY country FROM '/usr1/proj/bray/sql/country_data'; 952</programlisting> 953 </para> 954 955 <para> 956 To copy into a file just the countries whose names start with 'A': 957<programlisting> 958COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy'; 959</programlisting> 960 </para> 961 962 <para> 963 To copy into a compressed file, you can pipe the output through an external 964 compression program: 965<programlisting> 966COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz'; 967</programlisting> 968 </para> 969 970 <para> 971 Here is a sample of data suitable for copying into a table from 972 <literal>STDIN</literal>: 973<programlisting> 974AF AFGHANISTAN 975AL ALBANIA 976DZ ALGERIA 977ZM ZAMBIA 978ZW ZIMBABWE 979</programlisting> 980 Note that the white space on each line is actually a tab character. 981 </para> 982 983 <para> 984 The following is the same data, output in binary format. 985 The data is shown after filtering through the 986 Unix utility <command>od -c</command>. The table has three columns; 987 the first has type <type>char(2)</type>, the second has type <type>text</type>, 988 and the third has type <type>integer</type>. All the rows have a null value 989 in the third column. 990<programlisting> 9910000000 P G C O P Y \n 377 \r \n \0 \0 \0 \0 \0 \0 9920000020 \0 \0 \0 \0 003 \0 \0 \0 002 A F \0 \0 \0 013 A 9930000040 F G H A N I S T A N 377 377 377 377 \0 003 9940000060 \0 \0 \0 002 A L \0 \0 \0 007 A L B A N I 9950000100 A 377 377 377 377 \0 003 \0 \0 \0 002 D Z \0 \0 \0 9960000120 007 A L G E R I A 377 377 377 377 \0 003 \0 \0 9970000140 \0 002 Z M \0 \0 \0 006 Z A M B I A 377 377 9980000160 377 377 \0 003 \0 \0 \0 002 Z W \0 \0 \0 \b Z I 9990000200 M B A B W E 377 377 377 377 377 377 1000</programlisting></para> 1001 </refsect1> 1002 1003 <refsect1> 1004 <title>Compatibility</title> 1005 1006 <para> 1007 There is no <command>COPY</command> statement in the SQL standard. 1008 </para> 1009 1010 <para> 1011 The following syntax was used before <productname>PostgreSQL</productname> 1012 version 9.0 and is still supported: 1013 1014<synopsis> 1015COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] 1016 FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } 1017 [ [ WITH ] 1018 [ BINARY ] 1019 [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ] 1020 [ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ] 1021 [ CSV [ HEADER ] 1022 [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ] 1023 [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ] 1024 [ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ] 1025 1026COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) } 1027 TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } 1028 [ [ WITH ] 1029 [ BINARY ] 1030 [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ] 1031 [ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ] 1032 [ CSV [ HEADER ] 1033 [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ] 1034 [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ] 1035 [ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ] 1036</synopsis> 1037 1038 Note that in this syntax, <literal>BINARY</literal> and <literal>CSV</literal> are 1039 treated as independent keywords, not as arguments of a <literal>FORMAT</literal> 1040 option. 1041 </para> 1042 1043 <para> 1044 The following syntax was used before <productname>PostgreSQL</productname> 1045 version 7.3 and is still supported: 1046 1047<synopsis> 1048COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> 1049 FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } 1050 [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ] 1051 [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ] 1052 1053COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> 1054 TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } 1055 [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ] 1056 [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ] 1057</synopsis></para> 1058 </refsect1> 1059</refentry> 1060