1<!-- 2doc/src/sgml/ref/copy.sgml 3PostgreSQL documentation 4--> 5 6<refentry id="SQL-COPY"> 7 <indexterm zone="sql-copy"> 8 <primary>COPY</primary> 9 </indexterm> 10 11 <refmeta> 12 <refentrytitle>COPY</refentrytitle> 13 <manvolnum>7</manvolnum> 14 <refmiscinfo>SQL - Language Statements</refmiscinfo> 15 </refmeta> 16 17 <refnamediv> 18 <refname>COPY</refname> 19 <refpurpose>copy data between a file and a table</refpurpose> 20 </refnamediv> 21 22 <refsynopsisdiv> 23<synopsis> 24COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] 25 FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN } 26 [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ] 27 28COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) } 29 TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT } 30 [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ] 31 32<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase> 33 34 FORMAT <replaceable class="parameter">format_name</replaceable> 35 OIDS [ <replaceable class="parameter">boolean</replaceable> ] 36 FREEZE [ <replaceable class="parameter">boolean</replaceable> ] 37 DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>' 38 NULL '<replaceable class="parameter">null_string</replaceable>' 39 HEADER [ <replaceable class="parameter">boolean</replaceable> ] 40 QUOTE '<replaceable class="parameter">quote_character</replaceable>' 41 ESCAPE '<replaceable class="parameter">escape_character</replaceable>' 42 FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } 43 FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) 44 FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) 45 ENCODING '<replaceable class="parameter">encoding_name</replaceable>' 46</synopsis> 47 </refsynopsisdiv> 48 49 <refsect1> 50 <title>Description</title> 51 52 <para> 53 <command>COPY</command> moves data between 54 <productname>PostgreSQL</productname> tables and standard file-system 55 files. <command>COPY TO</command> copies the contents of a table 56 <emphasis>to</> a file, while <command>COPY FROM</command> copies 57 data <emphasis>from</> a file to a table (appending the data to 58 whatever is in the table already). <command>COPY TO</command> 59 can also copy the results of a <command>SELECT</> query. 60 </para> 61 62 <para> 63 If a column list is specified, <command>COPY TO</command> copies only 64 the data in the specified columns to the file. For <command>COPY 65 FROM</command>, each field in the file is inserted, in order, into the 66 specified column. Table columns not specified in the <command>COPY 67 FROM</command> column list will receive their default values. 68 </para> 69 70 <para> 71 <command>COPY</command> with a file name instructs the 72 <productname>PostgreSQL</productname> server to directly read from 73 or write to a file. The file must be accessible by the 74 <productname>PostgreSQL</productname> user (the user ID the server 75 runs as) and the name must be specified from the viewpoint of the 76 server. When <literal>PROGRAM</literal> is specified, the server 77 executes the given command and reads from the standard output of the 78 program, or writes to the standard input of the program. The command 79 must be specified from the viewpoint of the server, and be executable 80 by the <productname>PostgreSQL</productname> user. When 81 <literal>STDIN</literal> or <literal>STDOUT</literal> is 82 specified, data is transmitted via the connection between the 83 client and the server. 84 </para> 85 </refsect1> 86 87 <refsect1> 88 <title>Parameters</title> 89 90 <variablelist> 91 <varlistentry> 92 <term><replaceable class="parameter">table_name</replaceable></term> 93 <listitem> 94 <para> 95 The name (optionally schema-qualified) of an existing table. 96 </para> 97 </listitem> 98 </varlistentry> 99 100 <varlistentry> 101 <term><replaceable class="parameter">column_name</replaceable></term> 102 <listitem> 103 <para> 104 An optional list of columns to be copied. If no column list is 105 specified, all columns of the table will be copied. 106 </para> 107 </listitem> 108 </varlistentry> 109 110 <varlistentry> 111 <term><replaceable class="parameter">query</replaceable></term> 112 <listitem> 113 <para> 114 A <xref linkend="sql-select">, <xref linkend="sql-values">, 115 <xref linkend="sql-insert">, <xref linkend="sql-update"> or 116 <xref linkend="sql-delete"> command whose results are to be 117 copied. Note that parentheses are required around the query. 118 </para> 119 <para> 120 For <command>INSERT</>, <command>UPDATE</> and 121 <command>DELETE</> queries a RETURNING clause must be provided, 122 and the target relation must not have a conditional rule, nor 123 an <literal>ALSO</> rule, nor an <literal>INSTEAD</> rule 124 that expands to multiple statements. 125 </para> 126 </listitem> 127 </varlistentry> 128 129 <varlistentry> 130 <term><replaceable class="parameter">filename</replaceable></term> 131 <listitem> 132 <para> 133 The path name of the input or output file. An input file name can be 134 an absolute or relative path, but an output file name must be an absolute 135 path. Windows users might need to use an <literal>E''</> string and 136 double any backslashes used in the path name. 137 </para> 138 </listitem> 139 </varlistentry> 140 141 <varlistentry> 142 <term><literal>PROGRAM</literal></term> 143 <listitem> 144 <para> 145 A command to execute. In <command>COPY FROM</command>, the input is 146 read from standard output of the command, and in <command>COPY TO</>, 147 the output is written to the standard input of the command. 148 </para> 149 <para> 150 Note that the command is invoked by the shell, so if you need to pass 151 any arguments to shell command that come from an untrusted source, you 152 must be careful to strip or escape any special characters that might 153 have a special meaning for the shell. For security reasons, it is best 154 to use a fixed command string, or at least avoid passing any user input 155 in it. 156 </para> 157 </listitem> 158 </varlistentry> 159 160 <varlistentry> 161 <term><literal>STDIN</literal></term> 162 <listitem> 163 <para> 164 Specifies that input comes from the client application. 165 </para> 166 </listitem> 167 </varlistentry> 168 169 <varlistentry> 170 <term><literal>STDOUT</literal></term> 171 <listitem> 172 <para> 173 Specifies that output goes to the client application. 174 </para> 175 </listitem> 176 </varlistentry> 177 178 <varlistentry> 179 <term><replaceable class="parameter">boolean</replaceable></term> 180 <listitem> 181 <para> 182 Specifies whether the selected option should be turned on or off. 183 You can write <literal>TRUE</literal>, <literal>ON</>, or 184 <literal>1</literal> to enable the option, and <literal>FALSE</literal>, 185 <literal>OFF</>, or <literal>0</literal> to disable it. The 186 <replaceable class="parameter">boolean</replaceable> value can also 187 be omitted, in which case <literal>TRUE</literal> is assumed. 188 </para> 189 </listitem> 190 </varlistentry> 191 192 <varlistentry> 193 <term><literal>FORMAT</literal></term> 194 <listitem> 195 <para> 196 Selects the data format to be read or written: 197 <literal>text</>, 198 <literal>csv</> (Comma Separated Values), 199 or <literal>binary</>. 200 The default is <literal>text</>. 201 </para> 202 </listitem> 203 </varlistentry> 204 205 <varlistentry> 206 <term><literal>OIDS</literal></term> 207 <listitem> 208 <para> 209 Specifies copying the OID for each row. (An error is raised if 210 <literal>OIDS</literal> is specified for a table that does not 211 have OIDs, or in the case of copying a <replaceable 212 class="parameter">query</replaceable>.) 213 </para> 214 </listitem> 215 </varlistentry> 216 217 <varlistentry> 218 <term><literal>FREEZE</literal></term> 219 <listitem> 220 <para> 221 Requests copying the data with rows already frozen, just as they 222 would be after running the <command>VACUUM FREEZE</> command. 223 This is intended as a performance option for initial data loading. 224 Rows will be frozen only if the table being loaded has been created 225 or truncated in the current subtransaction, there are no cursors 226 open and there are no older snapshots held by this transaction. It is 227 currently not possible to perform a <command>COPY FREEZE</command> on 228 a partitioned table. 229 </para> 230 <para> 231 Note that all other sessions will immediately be able to see the data 232 once it has been successfully loaded. This violates the normal rules 233 of MVCC visibility and users specifying should be aware of the 234 potential problems this might cause. 235 </para> 236 </listitem> 237 </varlistentry> 238 239 <varlistentry> 240 <term><literal>DELIMITER</literal></term> 241 <listitem> 242 <para> 243 Specifies the character that separates columns within each row 244 (line) of the file. The default is a tab character in text format, 245 a comma in <literal>CSV</> format. 246 This must be a single one-byte character. 247 This option is not allowed when using <literal>binary</> format. 248 </para> 249 </listitem> 250 </varlistentry> 251 252 <varlistentry> 253 <term><literal>NULL</literal></term> 254 <listitem> 255 <para> 256 Specifies the string that represents a null value. The default is 257 <literal>\N</literal> (backslash-N) in text format, and an unquoted empty 258 string in <literal>CSV</> format. You might prefer an 259 empty string even in text format for cases where you don't want to 260 distinguish nulls from empty strings. 261 This option is not allowed when using <literal>binary</> format. 262 </para> 263 264 <note> 265 <para> 266 When using <command>COPY FROM</command>, any data item that matches 267 this string will be stored as a null value, so you should make 268 sure that you use the same string as you used with 269 <command>COPY TO</command>. 270 </para> 271 </note> 272 273 </listitem> 274 </varlistentry> 275 276 <varlistentry> 277 <term><literal>HEADER</literal></term> 278 <listitem> 279 <para> 280 Specifies that the file contains a header line with the names of each 281 column in the file. On output, the first line contains the column 282 names from the table, and on input, the first line is ignored. 283 This option is allowed only when using <literal>CSV</> format. 284 </para> 285 </listitem> 286 </varlistentry> 287 288 <varlistentry> 289 <term><literal>QUOTE</literal></term> 290 <listitem> 291 <para> 292 Specifies the quoting character to be used when a data value is quoted. 293 The default is double-quote. 294 This must be a single one-byte character. 295 This option is allowed only when using <literal>CSV</> format. 296 </para> 297 </listitem> 298 </varlistentry> 299 300 <varlistentry> 301 <term><literal>ESCAPE</literal></term> 302 <listitem> 303 <para> 304 Specifies the character that should appear before a 305 data character that matches the <literal>QUOTE</> value. 306 The default is the same as the <literal>QUOTE</> value (so that 307 the quoting character is doubled if it appears in the data). 308 This must be a single one-byte character. 309 This option is allowed only when using <literal>CSV</> format. 310 </para> 311 </listitem> 312 </varlistentry> 313 314 <varlistentry> 315 <term><literal>FORCE_QUOTE</></term> 316 <listitem> 317 <para> 318 Forces quoting to be 319 used for all non-<literal>NULL</> values in each specified column. 320 <literal>NULL</> output is never quoted. If <literal>*</> is specified, 321 non-<literal>NULL</> values will be quoted in all columns. 322 This option is allowed only in <command>COPY TO</>, and only when 323 using <literal>CSV</> format. 324 </para> 325 </listitem> 326 </varlistentry> 327 328 <varlistentry> 329 <term><literal>FORCE_NOT_NULL</></term> 330 <listitem> 331 <para> 332 Do not match the specified columns' values against the null string. 333 In the default case where the null string is empty, this means that 334 empty values will be read as zero-length strings rather than nulls, 335 even when they are not quoted. 336 This option is allowed only in <command>COPY FROM</>, and only when 337 using <literal>CSV</> format. 338 </para> 339 </listitem> 340 </varlistentry> 341 342 <varlistentry> 343 <term><literal>FORCE_NULL</></term> 344 <listitem> 345 <para> 346 Match the specified columns' values against the null string, even 347 if it has been quoted, and if a match is found set the value to 348 <literal>NULL</>. In the default case where the null string is empty, 349 this converts a quoted empty string into NULL. 350 This option is allowed only in <command>COPY FROM</>, and only when 351 using <literal>CSV</> format. 352 </para> 353 </listitem> 354 </varlistentry> 355 356 <varlistentry> 357 <term><literal>ENCODING</></term> 358 <listitem> 359 <para> 360 Specifies that the file is encoded in the <replaceable 361 class="parameter">encoding_name</replaceable>. If this option is 362 omitted, the current client encoding is used. See the Notes below 363 for more details. 364 </para> 365 </listitem> 366 </varlistentry> 367 368 </variablelist> 369 </refsect1> 370 371 <refsect1> 372 <title>Outputs</title> 373 374 <para> 375 On successful completion, a <command>COPY</> command returns a command 376 tag of the form 377<screen> 378COPY <replaceable class="parameter">count</replaceable> 379</screen> 380 The <replaceable class="parameter">count</replaceable> is the number 381 of rows copied. 382 </para> 383 384 <note> 385 <para> 386 <application>psql</> will print this command tag only if the command 387 was not <literal>COPY ... TO STDOUT</>, or the 388 equivalent <application>psql</> meta-command 389 <literal>\copy ... to stdout</>. This is to prevent confusing the 390 command tag with the data that was just printed. 391 </para> 392 </note> 393 </refsect1> 394 395 <refsect1> 396 <title>Notes</title> 397 398 <para> 399 <command>COPY TO</command> can be used only with plain 400 tables, not views, and does not copy rows from child tables 401 or child partitions. For example, <literal>COPY <replaceable 402 class="parameter">table</replaceable> TO</literal> copies 403 the same rows as <literal>SELECT * FROM ONLY <replaceable 404 class="parameter">table</replaceable></literal>. 405 The syntax <literal>COPY (SELECT * FROM <replaceable 406 class="parameter">table</replaceable>) TO ...</literal> can be used to 407 dump all of the rows in an inheritance hierarchy, partitioned table, 408 or view. 409 </para> 410 411 <para> 412 <command>COPY FROM</command> can be used with plain tables and with views 413 that have <literal>INSTEAD OF INSERT</> triggers. 414 </para> 415 416 <para> 417 You must have select privilege on the table 418 whose values are read by <command>COPY TO</command>, and 419 insert privilege on the table into which values 420 are inserted by <command>COPY FROM</command>. It is sufficient 421 to have column privileges on the column(s) listed in the command. 422 </para> 423 424 <para> 425 If row-level security is enabled for the table, the relevant 426 <command>SELECT</command> policies will apply to <literal>COPY 427 <replaceable class="parameter">table</> TO</literal> statements. 428 Currently, <command>COPY FROM</command> is not supported for tables 429 with row-level security. Use equivalent <command>INSERT</command> 430 statements instead. 431 </para> 432 433 <para> 434 Files named in a <command>COPY</command> command are read or written 435 directly by the server, not by the client application. Therefore, 436 they must reside on or be accessible to the database server machine, 437 not the client. They must be accessible to and readable or writable 438 by the <productname>PostgreSQL</productname> user (the user ID the 439 server runs as), not the client. Similarly, 440 the command specified with <literal>PROGRAM</literal> is executed directly 441 by the server, not by the client application, must be executable by the 442 <productname>PostgreSQL</productname> user. 443 <command>COPY</command> naming a file or command is only allowed to 444 database superusers, since it allows reading or writing any file that the 445 server has privileges to access. 446 </para> 447 448 <para> 449 Do not confuse <command>COPY</command> with the 450 <application>psql</application> instruction 451 <command><link linkend="APP-PSQL-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes 452 <command>COPY FROM STDIN</command> or <command>COPY TO 453 STDOUT</command>, and then fetches/stores the data in a file 454 accessible to the <application>psql</application> client. Thus, 455 file accessibility and access rights depend on the client rather 456 than the server when <command>\copy</command> is used. 457 </para> 458 459 <para> 460 It is recommended that the file name used in <command>COPY</command> 461 always be specified as an absolute path. This is enforced by the 462 server in the case of <command>COPY TO</command>, but for 463 <command>COPY FROM</command> you do have the option of reading from 464 a file specified by a relative path. The path will be interpreted 465 relative to the working directory of the server process (normally 466 the cluster's data directory), not the client's working directory. 467 </para> 468 469 <para> 470 Executing a command with <literal>PROGRAM</literal> might be restricted 471 by the operating system's access control mechanisms, such as SELinux. 472 </para> 473 474 <para> 475 <command>COPY FROM</command> will invoke any triggers and check 476 constraints on the destination table. However, it will not invoke rules. 477 </para> 478 479 <para> 480 For identity columns, the <command>COPY FROM</command> command will always 481 write the column values provided in the input data, like 482 the <command>INSERT</command> option <literal>OVERRIDING SYSTEM 483 VALUE</literal>. 484 </para> 485 486 <para> 487 <command>COPY</command> input and output is affected by 488 <varname>DateStyle</varname>. To ensure portability to other 489 <productname>PostgreSQL</productname> installations that might use 490 non-default <varname>DateStyle</varname> settings, 491 <varname>DateStyle</varname> should be set to <literal>ISO</> before 492 using <command>COPY TO</>. It is also a good idea to avoid dumping 493 data with <varname>IntervalStyle</varname> set to 494 <literal>sql_standard</>, because negative interval values might be 495 misinterpreted by a server that has a different setting for 496 <varname>IntervalStyle</varname>. 497 </para> 498 499 <para> 500 Input data is interpreted according to <literal>ENCODING</literal> 501 option or the current client encoding, and output data is encoded 502 in <literal>ENCODING</literal> or the current client encoding, even 503 if the data does not pass through the client but is read from or 504 written to a file directly by the server. 505 </para> 506 507 <para> 508 <command>COPY</command> stops operation at the first error. This 509 should not lead to problems in the event of a <command>COPY 510 TO</command>, but the target table will already have received 511 earlier rows in a <command>COPY FROM</command>. These rows will not 512 be visible or accessible, but they still occupy disk space. This might 513 amount to a considerable amount of wasted disk space if the failure 514 happened well into a large copy operation. You might wish to invoke 515 <command>VACUUM</command> to recover the wasted space. 516 </para> 517 518 <para> 519 <literal>FORCE_NULL</> and <literal>FORCE_NOT_NULL</> can be used 520 simultaneously on the same column. This results in converting quoted 521 null strings to null values and unquoted null strings to empty strings. 522 </para> 523 524 </refsect1> 525 526 <refsect1> 527 <title>File Formats</title> 528 529 <refsect2> 530 <title>Text Format</title> 531 532 <para> 533 When the <literal>text</> format is used, 534 the data read or written is a text file with one line per table row. 535 Columns in a row are separated by the delimiter character. 536 The column values themselves are strings generated by the 537 output function, or acceptable to the input function, of each 538 attribute's data type. The specified null string is used in 539 place of columns that are null. 540 <command>COPY FROM</command> will raise an error if any line of the 541 input file contains more or fewer columns than are expected. 542 If <literal>OIDS</literal> is specified, the OID is read or written as the first column, 543 preceding the user data columns. 544 </para> 545 546 <para> 547 End of data can be represented by a single line containing just 548 backslash-period (<literal>\.</>). An end-of-data marker is 549 not necessary when reading from a file, since the end of file 550 serves perfectly well; it is needed only when copying data to or from 551 client applications using pre-3.0 client protocol. 552 </para> 553 554 <para> 555 Backslash characters (<literal>\</>) can be used in the 556 <command>COPY</command> data to quote data characters that might 557 otherwise be taken as row or column delimiters. In particular, the 558 following characters <emphasis>must</> be preceded by a backslash if 559 they appear as part of a column value: backslash itself, 560 newline, carriage return, and the current delimiter character. 561 </para> 562 563 <para> 564 The specified null string is sent by <command>COPY TO</command> without 565 adding any backslashes; conversely, <command>COPY FROM</command> matches 566 the input against the null string before removing backslashes. Therefore, 567 a null string such as <literal>\N</literal> cannot be confused with 568 the actual data value <literal>\N</literal> (which would be represented 569 as <literal>\\N</literal>). 570 </para> 571 572 <para> 573 The following special backslash sequences are recognized by 574 <command>COPY FROM</command>: 575 576 <informaltable> 577 <tgroup cols="2"> 578 <thead> 579 <row> 580 <entry>Sequence</entry> 581 <entry>Represents</entry> 582 </row> 583 </thead> 584 585 <tbody> 586 <row> 587 <entry><literal>\b</></entry> 588 <entry>Backspace (ASCII 8)</entry> 589 </row> 590 <row> 591 <entry><literal>\f</></entry> 592 <entry>Form feed (ASCII 12)</entry> 593 </row> 594 <row> 595 <entry><literal>\n</></entry> 596 <entry>Newline (ASCII 10)</entry> 597 </row> 598 <row> 599 <entry><literal>\r</></entry> 600 <entry>Carriage return (ASCII 13)</entry> 601 </row> 602 <row> 603 <entry><literal>\t</></entry> 604 <entry>Tab (ASCII 9)</entry> 605 </row> 606 <row> 607 <entry><literal>\v</></entry> 608 <entry>Vertical tab (ASCII 11)</entry> 609 </row> 610 <row> 611 <entry><literal>\</><replaceable>digits</></entry> 612 <entry>Backslash followed by one to three octal digits specifies 613 the byte with that numeric code</entry> 614 </row> 615 <row> 616 <entry><literal>\x</><replaceable>digits</></entry> 617 <entry>Backslash <literal>x</> followed by one or two hex digits specifies 618 the byte with that numeric code</entry> 619 </row> 620 </tbody> 621 </tgroup> 622 </informaltable> 623 624 Presently, <command>COPY TO</command> will never emit an octal or 625 hex-digits backslash sequence, but it does use the other sequences 626 listed above for those control characters. 627 </para> 628 629 <para> 630 Any other backslashed character that is not mentioned in the above table 631 will be taken to represent itself. However, beware of adding backslashes 632 unnecessarily, since that might accidentally produce a string matching the 633 end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by 634 default). These strings will be recognized before any other backslash 635 processing is done. 636 </para> 637 638 <para> 639 It is strongly recommended that applications generating <command>COPY</command> data convert 640 data newlines and carriage returns to the <literal>\n</> and 641 <literal>\r</> sequences respectively. At present it is 642 possible to represent a data carriage return by a backslash and carriage 643 return, and to represent a data newline by a backslash and newline. 644 However, these representations might not be accepted in future releases. 645 They are also highly vulnerable to corruption if the <command>COPY</command> file is 646 transferred across different machines (for example, from Unix to Windows 647 or vice versa). 648 </para> 649 650 <para> 651 All backslash sequences are interpreted after encoding conversion. 652 The bytes specified with the octal and hex-digit backslash sequences must 653 form valid characters in the database encoding. 654 </para> 655 656 <para> 657 <command>COPY TO</command> will terminate each row with a Unix-style 658 newline (<quote><literal>\n</></>). Servers running on Microsoft Windows instead 659 output carriage return/newline (<quote><literal>\r\n</></>), but only for 660 <command>COPY</> to a server file; for consistency across platforms, 661 <command>COPY TO STDOUT</> always sends <quote><literal>\n</></> 662 regardless of server platform. 663 <command>COPY FROM</command> can handle lines ending with newlines, 664 carriage returns, or carriage return/newlines. To reduce the risk of 665 error due to un-backslashed newlines or carriage returns that were 666 meant as data, <command>COPY FROM</command> will complain if the line 667 endings in the input are not all alike. 668 </para> 669 </refsect2> 670 671 <refsect2> 672 <title>CSV Format</title> 673 674 <para> 675 This format option is used for importing and exporting the Comma 676 Separated Value (<literal>CSV</>) file format used by many other 677 programs, such as spreadsheets. Instead of the escaping rules used by 678 <productname>PostgreSQL</productname>'s standard text format, it 679 produces and recognizes the common CSV escaping mechanism. 680 </para> 681 682 <para> 683 The values in each record are separated by the <literal>DELIMITER</> 684 character. If the value contains the delimiter character, the 685 <literal>QUOTE</> character, the <literal>NULL</> string, a carriage 686 return, or line feed character, then the whole value is prefixed and 687 suffixed by the <literal>QUOTE</> character, and any occurrence 688 within the value of a <literal>QUOTE</> character or the 689 <literal>ESCAPE</> character is preceded by the escape character. 690 You can also use <literal>FORCE_QUOTE</> to force quotes when outputting 691 non-<literal>NULL</> values in specific columns. 692 </para> 693 694 <para> 695 The <literal>CSV</> format has no standard way to distinguish a 696 <literal>NULL</> value from an empty string. 697 <productname>PostgreSQL</>'s <command>COPY</> handles this by quoting. 698 A <literal>NULL</> is output as the <literal>NULL</> parameter string 699 and is not quoted, while a non-<literal>NULL</> value matching the 700 <literal>NULL</> parameter string is quoted. For example, with the 701 default settings, a <literal>NULL</> is written as an unquoted empty 702 string, while an empty string data value is written with double quotes 703 (<literal>""</>). Reading values follows similar rules. You can 704 use <literal>FORCE_NOT_NULL</> to prevent <literal>NULL</> input 705 comparisons for specific columns. You can also use 706 <literal>FORCE_NULL</> to convert quoted null string data values to 707 <literal>NULL</>. 708 </para> 709 710 <para> 711 Because backslash is not a special character in the <literal>CSV</> 712 format, <literal>\.</>, the end-of-data marker, could also appear 713 as a data value. To avoid any misinterpretation, a <literal>\.</> 714 data value appearing as a lone entry on a line is automatically 715 quoted on output, and on input, if quoted, is not interpreted as the 716 end-of-data marker. If you are loading a file created by another 717 application that has a single unquoted column and might have a 718 value of <literal>\.</>, you might need to quote that value in the 719 input file. 720 </para> 721 722 <note> 723 <para> 724 In <literal>CSV</> format, all characters are significant. A quoted value 725 surrounded by white space, or any characters other than 726 <literal>DELIMITER</>, will include those characters. This can cause 727 errors if you import data from a system that pads <literal>CSV</> 728 lines with white space out to some fixed width. If such a situation 729 arises you might need to preprocess the <literal>CSV</> file to remove 730 the trailing white space, before importing the data into 731 <productname>PostgreSQL</>. 732 </para> 733 </note> 734 735 <note> 736 <para> 737 CSV format will both recognize and produce CSV files with quoted 738 values containing embedded carriage returns and line feeds. Thus 739 the files are not strictly one line per table row like text-format 740 files. 741 </para> 742 </note> 743 744 <note> 745 <para> 746 Many programs produce strange and occasionally perverse CSV files, 747 so the file format is more a convention than a standard. Thus you 748 might encounter some files that cannot be imported using this 749 mechanism, and <command>COPY</> might produce files that other 750 programs cannot process. 751 </para> 752 </note> 753 754 </refsect2> 755 756 <refsect2> 757 <title>Binary Format</title> 758 759 <para> 760 The <literal>binary</literal> format option causes all data to be 761 stored/read as binary format rather than as text. It is 762 somewhat faster than the text and <literal>CSV</> formats, 763 but a binary-format file is less portable across machine architectures and 764 <productname>PostgreSQL</productname> versions. 765 Also, the binary format is very data type specific; for example 766 it will not work to output binary data from a <type>smallint</> column 767 and read it into an <type>integer</> column, even though that would work 768 fine in text format. 769 </para> 770 771 <para> 772 The <literal>binary</> file format consists 773 of a file header, zero or more tuples containing the row data, and 774 a file trailer. Headers and data are in network byte order. 775 </para> 776 777 <note> 778 <para> 779 <productname>PostgreSQL</productname> releases before 7.4 used a 780 different binary file format. 781 </para> 782 </note> 783 784 <refsect3> 785 <title>File Header</title> 786 787 <para> 788 The file header consists of 15 bytes of fixed fields, followed 789 by a variable-length header extension area. The fixed fields are: 790 791 <variablelist> 792 <varlistentry> 793 <term>Signature</term> 794 <listitem> 795 <para> 79611-byte sequence <literal>PGCOPY\n\377\r\n\0</> — note that the zero byte 797is a required part of the signature. (The signature is designed to allow 798easy identification of files that have been munged by a non-8-bit-clean 799transfer. This signature will be changed by end-of-line-translation 800filters, dropped zero bytes, dropped high bits, or parity changes.) 801 </para> 802 </listitem> 803 </varlistentry> 804 805 <varlistentry> 806 <term>Flags field</term> 807 <listitem> 808 <para> 80932-bit integer bit mask to denote important aspects of the file format. Bits 810are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>). Note that 811this field is stored in network byte order (most significant byte first), 812as are all the integer fields used in the file format. Bits 81316-31 are reserved to denote critical file format issues; a reader 814should abort if it finds an unexpected bit set in this range. Bits 0-15 815are reserved to signal backwards-compatible format issues; a reader 816should simply ignore any unexpected bits set in this range. Currently 817only one flag bit is defined, and the rest must be zero: 818 <variablelist> 819 <varlistentry> 820 <term>Bit 16</term> 821 <listitem> 822 <para> 823 if 1, OIDs are included in the data; if 0, not 824 </para> 825 </listitem> 826 </varlistentry> 827 </variablelist></para> 828 </listitem> 829 </varlistentry> 830 831 <varlistentry> 832 <term>Header extension area length</term> 833 <listitem> 834 <para> 83532-bit integer, length in bytes of remainder of header, not including self. 836Currently, this is zero, and the first tuple follows 837immediately. Future changes to the format might allow additional data 838to be present in the header. A reader should silently skip over any header 839extension data it does not know what to do with. 840 </para> 841 </listitem> 842 </varlistentry> 843 </variablelist> 844 </para> 845 846 <para> 847The header extension area is envisioned to contain a sequence of 848self-identifying chunks. The flags field is not intended to tell readers 849what is in the extension area. Specific design of header extension contents 850is left for a later release. 851 </para> 852 853 <para> 854 This design allows for both backwards-compatible header additions (add 855 header extension chunks, or set low-order flag bits) and 856 non-backwards-compatible changes (set high-order flag bits to signal such 857 changes, and add supporting data to the extension area if needed). 858 </para> 859 </refsect3> 860 861 <refsect3> 862 <title>Tuples</title> 863 <para> 864Each tuple begins with a 16-bit integer count of the number of fields in the 865tuple. (Presently, all tuples in a table will have the same count, but that 866might not always be true.) Then, repeated for each field in the tuple, there 867is a 32-bit length word followed by that many bytes of field data. (The 868length word does not include itself, and can be zero.) As a special case, 869-1 indicates a NULL field value. No value bytes follow in the NULL case. 870 </para> 871 872 <para> 873There is no alignment padding or any other extra data between fields. 874 </para> 875 876 <para> 877Presently, all data values in a binary-format file are 878assumed to be in binary format (format code one). It is anticipated that a 879future extension might add a header field that allows per-column format codes 880to be specified. 881 </para> 882 883 <para> 884To determine the appropriate binary format for the actual tuple data you 885should consult the <productname>PostgreSQL</productname> source, in 886particular the <function>*send</> and <function>*recv</> functions for 887each column's data type (typically these functions are found in the 888<filename>src/backend/utils/adt/</filename> directory of the source 889distribution). 890 </para> 891 892 <para> 893If OIDs are included in the file, the OID field immediately follows the 894field-count word. It is a normal field except that it's not included 895in the field-count. In particular it has a length word — this will allow 896handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow 897OIDs to be shown as null if that ever proves desirable. 898 </para> 899 </refsect3> 900 901 <refsect3> 902 <title>File Trailer</title> 903 904 <para> 905 The file trailer consists of a 16-bit integer word containing -1. This 906 is easily distinguished from a tuple's field-count word. 907 </para> 908 909 <para> 910 A reader should report an error if a field-count word is neither -1 911 nor the expected number of columns. This provides an extra 912 check against somehow getting out of sync with the data. 913 </para> 914 </refsect3> 915 </refsect2> 916 </refsect1> 917 918 <refsect1> 919 <title>Examples</title> 920 921 <para> 922 The following example copies a table to the client 923 using the vertical bar (<literal>|</literal>) as the field delimiter: 924<programlisting> 925COPY country TO STDOUT (DELIMITER '|'); 926</programlisting> 927 </para> 928 929 <para> 930 To copy data from a file into the <literal>country</> table: 931<programlisting> 932COPY country FROM '/usr1/proj/bray/sql/country_data'; 933</programlisting> 934 </para> 935 936 <para> 937 To copy into a file just the countries whose names start with 'A': 938<programlisting> 939COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy'; 940</programlisting> 941 </para> 942 943 <para> 944 To copy into a compressed file, you can pipe the output through an external 945 compression program: 946<programlisting> 947COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz'; 948</programlisting> 949 </para> 950 951 <para> 952 Here is a sample of data suitable for copying into a table from 953 <literal>STDIN</literal>: 954<programlisting> 955AF AFGHANISTAN 956AL ALBANIA 957DZ ALGERIA 958ZM ZAMBIA 959ZW ZIMBABWE 960</programlisting> 961 Note that the white space on each line is actually a tab character. 962 </para> 963 964 <para> 965 The following is the same data, output in binary format. 966 The data is shown after filtering through the 967 Unix utility <command>od -c</command>. The table has three columns; 968 the first has type <type>char(2)</type>, the second has type <type>text</type>, 969 and the third has type <type>integer</type>. All the rows have a null value 970 in the third column. 971<programlisting> 9720000000 P G C O P Y \n 377 \r \n \0 \0 \0 \0 \0 \0 9730000020 \0 \0 \0 \0 003 \0 \0 \0 002 A F \0 \0 \0 013 A 9740000040 F G H A N I S T A N 377 377 377 377 \0 003 9750000060 \0 \0 \0 002 A L \0 \0 \0 007 A L B A N I 9760000100 A 377 377 377 377 \0 003 \0 \0 \0 002 D Z \0 \0 \0 9770000120 007 A L G E R I A 377 377 377 377 \0 003 \0 \0 9780000140 \0 002 Z M \0 \0 \0 006 Z A M B I A 377 377 9790000160 377 377 \0 003 \0 \0 \0 002 Z W \0 \0 \0 \b Z I 9800000200 M B A B W E 377 377 377 377 377 377 981</programlisting></para> 982 </refsect1> 983 984 <refsect1> 985 <title>Compatibility</title> 986 987 <para> 988 There is no <command>COPY</command> statement in the SQL standard. 989 </para> 990 991 <para> 992 The following syntax was used before <productname>PostgreSQL</> 993 version 9.0 and is still supported: 994 995<synopsis> 996COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] 997 FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } 998 [ [ WITH ] 999 [ BINARY ] 1000 [ OIDS ] 1001 [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ] 1002 [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] 1003 [ CSV [ HEADER ] 1004 [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ] 1005 [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ] 1006 [ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ] 1007 1008COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) } 1009 TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } 1010 [ [ WITH ] 1011 [ BINARY ] 1012 [ OIDS ] 1013 [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ] 1014 [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] 1015 [ CSV [ HEADER ] 1016 [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ] 1017 [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ] 1018 [ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ] 1019</synopsis> 1020 1021 Note that in this syntax, <literal>BINARY</> and <literal>CSV</> are 1022 treated as independent keywords, not as arguments of a <literal>FORMAT</> 1023 option. 1024 </para> 1025 1026 <para> 1027 The following syntax was used before <productname>PostgreSQL</> 1028 version 7.3 and is still supported: 1029 1030<synopsis> 1031COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ] 1032 FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } 1033 [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ] 1034 [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ] 1035 1036COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ] 1037 TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } 1038 [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ] 1039 [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ] 1040</synopsis></para> 1041 </refsect1> 1042</refentry> 1043