1<!--
2doc/src/sgml/ref/copy.sgml
3PostgreSQL documentation
4-->
5
6<refentry id="SQL-COPY">
7 <indexterm zone="sql-copy">
8  <primary>COPY</primary>
9 </indexterm>
10
11 <refmeta>
12  <refentrytitle>COPY</refentrytitle>
13  <manvolnum>7</manvolnum>
14  <refmiscinfo>SQL - Language Statements</refmiscinfo>
15 </refmeta>
16
17 <refnamediv>
18  <refname>COPY</refname>
19  <refpurpose>copy data between a file and a table</refpurpose>
20 </refnamediv>
21
22 <refsynopsisdiv>
23<synopsis>
24COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
25    FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
26    [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
27
28COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
29    TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
30    [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
31
32<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
33
34    FORMAT <replaceable class="parameter">format_name</replaceable>
35    OIDS [ <replaceable class="parameter">boolean</replaceable> ]
36    FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
37    DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
38    NULL '<replaceable class="parameter">null_string</replaceable>'
39    HEADER [ <replaceable class="parameter">boolean</replaceable> ]
40    QUOTE '<replaceable class="parameter">quote_character</replaceable>'
41    ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
42    FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
43    FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
44    FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
45    ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
46</synopsis>
47 </refsynopsisdiv>
48
49 <refsect1>
50  <title>Description</title>
51
52  <para>
53   <command>COPY</command> moves data between
54   <productname>PostgreSQL</productname> tables and standard file-system
55   files. <command>COPY TO</command> copies the contents of a table
56   <emphasis>to</> a file, while <command>COPY FROM</command> copies
57   data <emphasis>from</> a file to a table (appending the data to
58   whatever is in the table already).  <command>COPY TO</command>
59   can also copy the results of a <command>SELECT</> query.
60  </para>
61
62  <para>
63   If a column list is specified, <command>COPY TO</command> copies only
64   the data in the specified columns to the file.  For <command>COPY
65   FROM</command>, each field in the file is inserted, in order, into the
66   specified column.  Table columns not specified in the <command>COPY
67   FROM</command> column list will receive their default values.
68  </para>
69
70  <para>
71   <command>COPY</command> with a file name instructs the
72   <productname>PostgreSQL</productname> server to directly read from
73   or write to a file. The file must be accessible by the
74   <productname>PostgreSQL</productname> user (the user ID the server
75   runs as) and the name must be specified from the viewpoint of the
76   server. When <literal>PROGRAM</literal> is specified, the server
77   executes the given command and reads from the standard output of the
78   program, or writes to the standard input of the program. The command
79   must be specified from the viewpoint of the server, and be executable
80   by the <productname>PostgreSQL</productname> user.  When
81   <literal>STDIN</literal> or <literal>STDOUT</literal> is
82   specified, data is transmitted via the connection between the
83   client and the server.
84  </para>
85 </refsect1>
86
87 <refsect1>
88  <title>Parameters</title>
89
90  <variablelist>
91   <varlistentry>
92    <term><replaceable class="parameter">table_name</replaceable></term>
93    <listitem>
94     <para>
95      The name (optionally schema-qualified) of an existing table.
96     </para>
97    </listitem>
98   </varlistentry>
99
100   <varlistentry>
101    <term><replaceable class="parameter">column_name</replaceable></term>
102     <listitem>
103     <para>
104      An optional list of columns to be copied.  If no column list is
105      specified, all columns of the table will be copied.
106     </para>
107    </listitem>
108   </varlistentry>
109
110   <varlistentry>
111    <term><replaceable class="parameter">query</replaceable></term>
112    <listitem>
113     <para>
114      A <xref linkend="sql-select">, <xref linkend="sql-values">,
115      <xref linkend="sql-insert">, <xref linkend="sql-update"> or
116      <xref linkend="sql-delete"> command whose results are to be
117      copied.  Note that parentheses are required around the query.
118     </para>
119     <para>
120      For <command>INSERT</>, <command>UPDATE</> and
121      <command>DELETE</> queries a RETURNING clause must be provided,
122      and the target relation must not have a conditional rule, nor
123      an <literal>ALSO</> rule, nor an <literal>INSTEAD</> rule
124      that expands to multiple statements.
125     </para>
126    </listitem>
127   </varlistentry>
128
129   <varlistentry>
130    <term><replaceable class="parameter">filename</replaceable></term>
131    <listitem>
132     <para>
133      The path name of the input or output file.  An input file name can be
134      an absolute or relative path, but an output file name must be an absolute
135      path.  Windows users might need to use an <literal>E''</> string and
136      double any backslashes used in the path name.
137     </para>
138    </listitem>
139   </varlistentry>
140
141   <varlistentry>
142    <term><literal>PROGRAM</literal></term>
143    <listitem>
144     <para>
145      A command to execute. In <command>COPY FROM</command>, the input is
146      read from standard output of the command, and in <command>COPY TO</>,
147      the output is written to the standard input of the command.
148     </para>
149     <para>
150      Note that the command is invoked by the shell, so if you need to pass
151      any arguments to shell command that come from an untrusted source, you
152      must be careful to strip or escape any special characters that might
153      have a special meaning for the shell. For security reasons, it is best
154      to use a fixed command string, or at least avoid passing any user input
155      in it.
156     </para>
157    </listitem>
158   </varlistentry>
159
160   <varlistentry>
161    <term><literal>STDIN</literal></term>
162    <listitem>
163     <para>
164      Specifies that input comes from the client application.
165     </para>
166    </listitem>
167   </varlistentry>
168
169   <varlistentry>
170    <term><literal>STDOUT</literal></term>
171    <listitem>
172     <para>
173      Specifies that output goes to the client application.
174     </para>
175    </listitem>
176   </varlistentry>
177
178   <varlistentry>
179    <term><replaceable class="parameter">boolean</replaceable></term>
180    <listitem>
181     <para>
182      Specifies whether the selected option should be turned on or off.
183      You can write <literal>TRUE</literal>, <literal>ON</>, or
184      <literal>1</literal> to enable the option, and <literal>FALSE</literal>,
185      <literal>OFF</>, or <literal>0</literal> to disable it.  The
186      <replaceable class="parameter">boolean</replaceable> value can also
187      be omitted, in which case <literal>TRUE</literal> is assumed.
188     </para>
189    </listitem>
190   </varlistentry>
191
192   <varlistentry>
193    <term><literal>FORMAT</literal></term>
194    <listitem>
195     <para>
196      Selects the data format to be read or written:
197      <literal>text</>,
198      <literal>csv</> (Comma Separated Values),
199      or <literal>binary</>.
200      The default is <literal>text</>.
201     </para>
202    </listitem>
203   </varlistentry>
204
205   <varlistentry>
206    <term><literal>OIDS</literal></term>
207    <listitem>
208     <para>
209      Specifies copying the OID for each row.  (An error is raised if
210      <literal>OIDS</literal> is specified for a table that does not
211      have OIDs, or in the case of copying a <replaceable
212      class="parameter">query</replaceable>.)
213     </para>
214    </listitem>
215   </varlistentry>
216
217   <varlistentry>
218    <term><literal>FREEZE</literal></term>
219    <listitem>
220     <para>
221      Requests copying the data with rows already frozen, just as they
222      would be after running the <command>VACUUM FREEZE</> command.
223      This is intended as a performance option for initial data loading.
224      Rows will be frozen only if the table being loaded has been created
225      or truncated in the current subtransaction, there are no cursors
226      open and there are no older snapshots held by this transaction.  It is
227      currently not possible to perform a <command>COPY FREEZE</command> on
228      a partitioned table.
229     </para>
230     <para>
231      Note that all other sessions will immediately be able to see the data
232      once it has been successfully loaded. This violates the normal rules
233      of MVCC visibility and users specifying should be aware of the
234      potential problems this might cause.
235     </para>
236    </listitem>
237   </varlistentry>
238
239   <varlistentry>
240    <term><literal>DELIMITER</literal></term>
241    <listitem>
242     <para>
243      Specifies the character that separates columns within each row
244      (line) of the file.  The default is a tab character in text format,
245      a comma in <literal>CSV</> format.
246      This must be a single one-byte character.
247      This option is not allowed when using <literal>binary</> format.
248     </para>
249    </listitem>
250   </varlistentry>
251
252   <varlistentry>
253    <term><literal>NULL</literal></term>
254    <listitem>
255     <para>
256      Specifies the string that represents a null value. The default is
257      <literal>\N</literal> (backslash-N) in text format, and an unquoted empty
258      string in <literal>CSV</> format. You might prefer an
259      empty string even in text format for cases where you don't want to
260      distinguish nulls from empty strings.
261      This option is not allowed when using <literal>binary</> format.
262     </para>
263
264     <note>
265      <para>
266       When using <command>COPY FROM</command>, any data item that matches
267       this string will be stored as a null value, so you should make
268       sure that you use the same string as you used with
269       <command>COPY TO</command>.
270      </para>
271     </note>
272
273    </listitem>
274   </varlistentry>
275
276   <varlistentry>
277    <term><literal>HEADER</literal></term>
278    <listitem>
279     <para>
280      Specifies that the file contains a header line with the names of each
281      column in the file.  On output, the first line contains the column
282      names from the table, and on input, the first line is ignored.
283      This option is allowed only when using <literal>CSV</> format.
284     </para>
285    </listitem>
286   </varlistentry>
287
288   <varlistentry>
289    <term><literal>QUOTE</literal></term>
290    <listitem>
291     <para>
292      Specifies the quoting character to be used when a data value is quoted.
293      The default is double-quote.
294      This must be a single one-byte character.
295      This option is allowed only when using <literal>CSV</> format.
296     </para>
297    </listitem>
298   </varlistentry>
299
300   <varlistentry>
301    <term><literal>ESCAPE</literal></term>
302    <listitem>
303     <para>
304      Specifies the character that should appear before a
305      data character that matches the <literal>QUOTE</> value.
306      The default is the same as the <literal>QUOTE</> value (so that
307      the quoting character is doubled if it appears in the data).
308      This must be a single one-byte character.
309      This option is allowed only when using <literal>CSV</> format.
310     </para>
311    </listitem>
312   </varlistentry>
313
314   <varlistentry>
315    <term><literal>FORCE_QUOTE</></term>
316    <listitem>
317     <para>
318      Forces quoting to be
319      used for all non-<literal>NULL</> values in each specified column.
320      <literal>NULL</> output is never quoted. If <literal>*</> is specified,
321      non-<literal>NULL</> values will be quoted in all columns.
322      This option is allowed only in <command>COPY TO</>, and only when
323      using <literal>CSV</> format.
324     </para>
325    </listitem>
326   </varlistentry>
327
328   <varlistentry>
329    <term><literal>FORCE_NOT_NULL</></term>
330    <listitem>
331     <para>
332      Do not match the specified columns' values against the null string.
333      In the default case where the null string is empty, this means that
334      empty values will be read as zero-length strings rather than nulls,
335      even when they are not quoted.
336      This option is allowed only in <command>COPY FROM</>, and only when
337      using <literal>CSV</> format.
338     </para>
339    </listitem>
340   </varlistentry>
341
342   <varlistentry>
343    <term><literal>FORCE_NULL</></term>
344    <listitem>
345     <para>
346      Match the specified columns' values against the null string, even
347      if it has been quoted, and if a match is found set the value to
348      <literal>NULL</>. In the default case where the null string is empty,
349      this converts a quoted empty string into NULL.
350      This option is allowed only in <command>COPY FROM</>, and only when
351      using <literal>CSV</> format.
352     </para>
353    </listitem>
354   </varlistentry>
355
356   <varlistentry>
357    <term><literal>ENCODING</></term>
358    <listitem>
359     <para>
360      Specifies that the file is encoded in the <replaceable
361      class="parameter">encoding_name</replaceable>.  If this option is
362      omitted, the current client encoding is used. See the Notes below
363      for more details.
364     </para>
365    </listitem>
366   </varlistentry>
367
368  </variablelist>
369 </refsect1>
370
371 <refsect1>
372  <title>Outputs</title>
373
374  <para>
375   On successful completion, a <command>COPY</> command returns a command
376   tag of the form
377<screen>
378COPY <replaceable class="parameter">count</replaceable>
379</screen>
380   The <replaceable class="parameter">count</replaceable> is the number
381   of rows copied.
382  </para>
383
384  <note>
385   <para>
386    <application>psql</> will print this command tag only if the command
387    was not <literal>COPY ... TO STDOUT</>, or the
388    equivalent <application>psql</> meta-command
389    <literal>\copy ... to stdout</>.  This is to prevent confusing the
390    command tag with the data that was just printed.
391   </para>
392  </note>
393 </refsect1>
394
395 <refsect1>
396  <title>Notes</title>
397
398   <para>
399    <command>COPY TO</command> can be used only with plain
400    tables, not views, and does not copy rows from child tables
401    or child partitions.  For example, <literal>COPY <replaceable
402    class="parameter">table</replaceable> TO</literal> copies
403    the same rows as <literal>SELECT * FROM ONLY <replaceable
404    class="parameter">table</replaceable></literal>.
405    The syntax <literal>COPY (SELECT * FROM <replaceable
406    class="parameter">table</replaceable>) TO ...</literal> can be used to
407    dump all of the rows in an inheritance hierarchy, partitioned table,
408    or view.
409   </para>
410
411   <para>
412    <command>COPY FROM</command> can be used with plain tables and with views
413    that have <literal>INSTEAD OF INSERT</> triggers.
414   </para>
415
416   <para>
417    You must have select privilege on the table
418    whose values are read by <command>COPY TO</command>, and
419    insert privilege on the table into which values
420    are inserted by <command>COPY FROM</command>.  It is sufficient
421    to have column privileges on the column(s) listed in the command.
422   </para>
423
424   <para>
425    If row-level security is enabled for the table, the relevant
426    <command>SELECT</command> policies will apply to <literal>COPY
427    <replaceable class="parameter">table</> TO</literal> statements.
428    Currently, <command>COPY FROM</command> is not supported for tables
429    with row-level security. Use equivalent <command>INSERT</command>
430    statements instead.
431   </para>
432
433   <para>
434    Files named in a <command>COPY</command> command are read or written
435    directly by the server, not by the client application. Therefore,
436    they must reside on or be accessible to the database server machine,
437    not the client. They must be accessible to and readable or writable
438    by the <productname>PostgreSQL</productname> user (the user ID the
439    server runs as), not the client. Similarly,
440    the command specified with <literal>PROGRAM</literal> is executed directly
441    by the server, not by the client application, must be executable by the
442    <productname>PostgreSQL</productname> user.
443    <command>COPY</command> naming a file or command is only allowed to
444    database superusers, since it allows reading or writing any file that the
445    server has privileges to access.
446   </para>
447
448   <para>
449    Do not confuse <command>COPY</command> with the
450    <application>psql</application> instruction
451    <command><link linkend="APP-PSQL-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes
452    <command>COPY FROM STDIN</command> or <command>COPY TO
453    STDOUT</command>, and then fetches/stores the data in a file
454    accessible to the <application>psql</application> client. Thus,
455    file accessibility and access rights depend on the client rather
456    than the server when <command>\copy</command> is used.
457   </para>
458
459   <para>
460    It is recommended that the file name used in <command>COPY</command>
461    always be specified as an absolute path. This is enforced by the
462    server in the case of <command>COPY TO</command>, but for
463    <command>COPY FROM</command> you do have the option of reading from
464    a file specified by a relative path. The path will be interpreted
465    relative to the working directory of the server process (normally
466    the cluster's data directory), not the client's working directory.
467   </para>
468
469   <para>
470    Executing a command with <literal>PROGRAM</literal> might be restricted
471    by the operating system's access control mechanisms, such as SELinux.
472   </para>
473
474   <para>
475    <command>COPY FROM</command> will invoke any triggers and check
476    constraints on the destination table. However, it will not invoke rules.
477   </para>
478
479   <para>
480    For identity columns, the <command>COPY FROM</command> command will always
481    write the column values provided in the input data, like
482    the <command>INSERT</command> option <literal>OVERRIDING SYSTEM
483    VALUE</literal>.
484   </para>
485
486   <para>
487    <command>COPY</command> input and output is affected by
488    <varname>DateStyle</varname>. To ensure portability to other
489    <productname>PostgreSQL</productname> installations that might use
490    non-default <varname>DateStyle</varname> settings,
491    <varname>DateStyle</varname> should be set to <literal>ISO</> before
492    using <command>COPY TO</>.  It is also a good idea to avoid dumping
493    data with <varname>IntervalStyle</varname> set to
494    <literal>sql_standard</>, because negative interval values might be
495    misinterpreted by a server that has a different setting for
496    <varname>IntervalStyle</varname>.
497   </para>
498
499   <para>
500    Input data is interpreted according to <literal>ENCODING</literal>
501    option or the current client encoding, and output data is encoded
502    in <literal>ENCODING</literal> or the current client encoding, even
503    if the data does not pass through the client but is read from or
504    written to a file directly by the server.
505   </para>
506
507   <para>
508    <command>COPY</command> stops operation at the first error. This
509    should not lead to problems in the event of a <command>COPY
510    TO</command>, but the target table will already have received
511    earlier rows in a <command>COPY FROM</command>. These rows will not
512    be visible or accessible, but they still occupy disk space. This might
513    amount to a considerable amount of wasted disk space if the failure
514    happened well into a large copy operation. You might wish to invoke
515    <command>VACUUM</command> to recover the wasted space.
516   </para>
517
518   <para>
519    <literal>FORCE_NULL</> and <literal>FORCE_NOT_NULL</> can be used
520    simultaneously on the same column. This results in converting quoted
521    null strings to null values and unquoted null strings to empty strings.
522   </para>
523
524 </refsect1>
525
526 <refsect1>
527  <title>File Formats</title>
528
529  <refsect2>
530   <title>Text Format</title>
531
532   <para>
533    When the <literal>text</> format is used,
534    the data read or written is a text file with one line per table row.
535    Columns in a row are separated by the delimiter character.
536    The column values themselves are strings generated by the
537    output function, or acceptable to the input function, of each
538    attribute's data type.  The specified null string is used in
539    place of columns that are null.
540    <command>COPY FROM</command> will raise an error if any line of the
541    input file contains more or fewer columns than are expected.
542    If <literal>OIDS</literal> is specified, the OID is read or written as the first column,
543    preceding the user data columns.
544   </para>
545
546   <para>
547    End of data can be represented by a single line containing just
548    backslash-period (<literal>\.</>).  An end-of-data marker is
549    not necessary when reading from a file, since the end of file
550    serves perfectly well; it is needed only when copying data to or from
551    client applications using pre-3.0 client protocol.
552   </para>
553
554   <para>
555    Backslash characters (<literal>\</>) can be used in the
556    <command>COPY</command> data to quote data characters that might
557    otherwise be taken as row or column delimiters. In particular, the
558    following characters <emphasis>must</> be preceded by a backslash if
559    they appear as part of a column value: backslash itself,
560    newline, carriage return, and the current delimiter character.
561   </para>
562
563   <para>
564    The specified null string is sent by <command>COPY TO</command> without
565    adding any backslashes; conversely, <command>COPY FROM</command> matches
566    the input against the null string before removing backslashes.  Therefore,
567    a null string such as <literal>\N</literal> cannot be confused with
568    the actual data value <literal>\N</literal> (which would be represented
569    as <literal>\\N</literal>).
570   </para>
571
572   <para>
573    The following special backslash sequences are recognized by
574    <command>COPY FROM</command>:
575
576   <informaltable>
577    <tgroup cols="2">
578     <thead>
579      <row>
580       <entry>Sequence</entry>
581       <entry>Represents</entry>
582      </row>
583     </thead>
584
585     <tbody>
586      <row>
587       <entry><literal>\b</></entry>
588       <entry>Backspace (ASCII 8)</entry>
589      </row>
590      <row>
591       <entry><literal>\f</></entry>
592       <entry>Form feed (ASCII 12)</entry>
593      </row>
594      <row>
595       <entry><literal>\n</></entry>
596       <entry>Newline (ASCII 10)</entry>
597      </row>
598      <row>
599       <entry><literal>\r</></entry>
600       <entry>Carriage return (ASCII 13)</entry>
601      </row>
602      <row>
603       <entry><literal>\t</></entry>
604       <entry>Tab (ASCII 9)</entry>
605      </row>
606      <row>
607       <entry><literal>\v</></entry>
608       <entry>Vertical tab (ASCII 11)</entry>
609      </row>
610      <row>
611       <entry><literal>\</><replaceable>digits</></entry>
612       <entry>Backslash followed by one to three octal digits specifies
613       the byte with that numeric code</entry>
614      </row>
615      <row>
616       <entry><literal>\x</><replaceable>digits</></entry>
617       <entry>Backslash <literal>x</> followed by one or two hex digits specifies
618       the byte with that numeric code</entry>
619      </row>
620     </tbody>
621    </tgroup>
622   </informaltable>
623
624    Presently, <command>COPY TO</command> will never emit an octal or
625    hex-digits backslash sequence, but it does use the other sequences
626    listed above for those control characters.
627   </para>
628
629   <para>
630    Any other backslashed character that is not mentioned in the above table
631    will be taken to represent itself.  However, beware of adding backslashes
632    unnecessarily, since that might accidentally produce a string matching the
633    end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by
634    default).  These strings will be recognized before any other backslash
635    processing is done.
636   </para>
637
638   <para>
639    It is strongly recommended that applications generating <command>COPY</command> data convert
640    data newlines and carriage returns to the <literal>\n</> and
641    <literal>\r</> sequences respectively.  At present it is
642    possible to represent a data carriage return by a backslash and carriage
643    return, and to represent a data newline by a backslash and newline.
644    However, these representations might not be accepted in future releases.
645    They are also highly vulnerable to corruption if the <command>COPY</command> file is
646    transferred across different machines (for example, from Unix to Windows
647    or vice versa).
648   </para>
649
650   <para>
651     All backslash sequences are interpreted after encoding conversion.
652     The bytes specified with the octal and hex-digit backslash sequences must
653     form valid characters in the database encoding.
654   </para>
655
656   <para>
657    <command>COPY TO</command> will terminate each row with a Unix-style
658    newline (<quote><literal>\n</></>).  Servers running on Microsoft Windows instead
659    output carriage return/newline (<quote><literal>\r\n</></>), but only for
660    <command>COPY</> to a server file; for consistency across platforms,
661    <command>COPY TO STDOUT</> always sends <quote><literal>\n</></>
662    regardless of server platform.
663    <command>COPY FROM</command> can handle lines ending with newlines,
664    carriage returns, or carriage return/newlines.  To reduce the risk of
665    error due to un-backslashed newlines or carriage returns that were
666    meant as data, <command>COPY FROM</command> will complain if the line
667    endings in the input are not all alike.
668   </para>
669  </refsect2>
670
671  <refsect2>
672   <title>CSV Format</title>
673
674   <para>
675    This format option is used for importing and exporting the Comma
676    Separated Value (<literal>CSV</>) file format used by many other
677    programs, such as spreadsheets. Instead of the escaping rules used by
678    <productname>PostgreSQL</productname>'s standard text format, it
679    produces and recognizes the common CSV escaping mechanism.
680   </para>
681
682   <para>
683    The values in each record are separated by the <literal>DELIMITER</>
684    character. If the value contains the delimiter character, the
685    <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
686    return, or line feed character, then the whole value is prefixed and
687    suffixed by the <literal>QUOTE</> character, and any occurrence
688    within the value of a <literal>QUOTE</> character or the
689    <literal>ESCAPE</> character is preceded by the escape character.
690    You can also use <literal>FORCE_QUOTE</> to force quotes when outputting
691    non-<literal>NULL</> values in specific columns.
692   </para>
693
694   <para>
695    The <literal>CSV</> format has no standard way to distinguish a
696    <literal>NULL</> value from an empty string.
697    <productname>PostgreSQL</>'s <command>COPY</> handles this by quoting.
698    A <literal>NULL</> is output as the <literal>NULL</> parameter string
699    and is not quoted, while a non-<literal>NULL</> value matching the
700    <literal>NULL</> parameter string is quoted.  For example, with the
701    default settings, a <literal>NULL</> is written as an unquoted empty
702    string, while an empty string data value is written with double quotes
703    (<literal>""</>). Reading values follows similar rules. You can
704    use <literal>FORCE_NOT_NULL</> to prevent <literal>NULL</> input
705    comparisons for specific columns. You can also use
706    <literal>FORCE_NULL</> to convert quoted null string data values to
707    <literal>NULL</>.
708   </para>
709
710   <para>
711    Because backslash is not a special character in the <literal>CSV</>
712    format, <literal>\.</>, the end-of-data marker, could also appear
713    as a data value.  To avoid any misinterpretation, a <literal>\.</>
714    data value appearing as a lone entry on a line is automatically
715    quoted on output, and on input, if quoted, is not interpreted as the
716    end-of-data marker.  If you are loading a file created by another
717    application that has a single unquoted column and might have a
718    value of <literal>\.</>, you might need to quote that value in the
719    input file.
720   </para>
721
722   <note>
723    <para>
724     In <literal>CSV</> format, all characters are significant. A quoted value
725     surrounded by white space, or any characters other than
726     <literal>DELIMITER</>, will include those characters. This can cause
727     errors if you import data from a system that pads <literal>CSV</>
728     lines with white space out to some fixed width. If such a situation
729     arises you might need to preprocess the <literal>CSV</> file to remove
730     the trailing white space, before importing the data into
731     <productname>PostgreSQL</>.
732    </para>
733   </note>
734
735   <note>
736    <para>
737     CSV format will both recognize and produce CSV files with quoted
738     values containing embedded carriage returns and line feeds. Thus
739     the files are not strictly one line per table row like text-format
740     files.
741    </para>
742   </note>
743
744   <note>
745    <para>
746     Many programs produce strange and occasionally perverse CSV files,
747     so the file format is more a convention than a standard. Thus you
748     might encounter some files that cannot be imported using this
749     mechanism, and <command>COPY</> might produce files that other
750     programs cannot process.
751    </para>
752   </note>
753
754  </refsect2>
755
756  <refsect2>
757   <title>Binary Format</title>
758
759   <para>
760    The <literal>binary</literal> format option causes all data to be
761    stored/read as binary format rather than as text.  It is
762    somewhat faster than the text and <literal>CSV</> formats,
763    but a binary-format file is less portable across machine architectures and
764    <productname>PostgreSQL</productname> versions.
765    Also, the binary format is very data type specific; for example
766    it will not work to output binary data from a <type>smallint</> column
767    and read it into an <type>integer</> column, even though that would work
768    fine in text format.
769   </para>
770
771   <para>
772    The <literal>binary</> file format consists
773    of a file header, zero or more tuples containing the row data, and
774    a file trailer.  Headers and data are in network byte order.
775   </para>
776
777   <note>
778    <para>
779     <productname>PostgreSQL</productname> releases before 7.4 used a
780     different binary file format.
781    </para>
782   </note>
783
784   <refsect3>
785    <title>File Header</title>
786
787    <para>
788     The file header consists of 15 bytes of fixed fields, followed
789     by a variable-length header extension area.  The fixed fields are:
790
791    <variablelist>
792     <varlistentry>
793      <term>Signature</term>
794      <listitem>
795       <para>
79611-byte sequence <literal>PGCOPY\n\377\r\n\0</> &mdash; note that the zero byte
797is a required part of the signature.  (The signature is designed to allow
798easy identification of files that have been munged by a non-8-bit-clean
799transfer.  This signature will be changed by end-of-line-translation
800filters, dropped zero bytes, dropped high bits, or parity changes.)
801       </para>
802      </listitem>
803     </varlistentry>
804
805     <varlistentry>
806      <term>Flags field</term>
807      <listitem>
808       <para>
80932-bit integer bit mask to denote important aspects of the file format. Bits
810are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>).  Note that
811this field is stored in network byte order (most significant byte first),
812as are all the integer fields used in the file format.  Bits
81316-31 are reserved to denote critical file format issues; a reader
814should abort if it finds an unexpected bit set in this range. Bits 0-15
815are reserved to signal backwards-compatible format issues; a reader
816should simply ignore any unexpected bits set in this range. Currently
817only one flag bit is defined, and the rest must be zero:
818        <variablelist>
819         <varlistentry>
820          <term>Bit 16</term>
821          <listitem>
822           <para>
823            if 1, OIDs are included in the data; if 0, not
824           </para>
825          </listitem>
826         </varlistentry>
827        </variablelist></para>
828      </listitem>
829     </varlistentry>
830
831     <varlistentry>
832      <term>Header extension area length</term>
833      <listitem>
834       <para>
83532-bit integer, length in bytes of remainder of header, not including self.
836Currently, this is zero, and the first tuple follows
837immediately.  Future changes to the format might allow additional data
838to be present in the header.  A reader should silently skip over any header
839extension data it does not know what to do with.
840       </para>
841      </listitem>
842     </varlistentry>
843    </variablelist>
844    </para>
845
846    <para>
847The header extension area is envisioned to contain a sequence of
848self-identifying chunks.  The flags field is not intended to tell readers
849what is in the extension area.  Specific design of header extension contents
850is left for a later release.
851    </para>
852
853    <para>
854     This design allows for both backwards-compatible header additions (add
855     header extension chunks, or set low-order flag bits) and
856     non-backwards-compatible changes (set high-order flag bits to signal such
857     changes, and add supporting data to the extension area if needed).
858    </para>
859   </refsect3>
860
861   <refsect3>
862    <title>Tuples</title>
863    <para>
864Each tuple begins with a 16-bit integer count of the number of fields in the
865tuple.  (Presently, all tuples in a table will have the same count, but that
866might not always be true.)  Then, repeated for each field in the tuple, there
867is a 32-bit length word followed by that many bytes of field data.  (The
868length word does not include itself, and can be zero.)  As a special case,
869-1 indicates a NULL field value.  No value bytes follow in the NULL case.
870    </para>
871
872    <para>
873There is no alignment padding or any other extra data between fields.
874    </para>
875
876    <para>
877Presently, all data values in a binary-format file are
878assumed to be in binary format (format code one).  It is anticipated that a
879future extension might add a header field that allows per-column format codes
880to be specified.
881    </para>
882
883    <para>
884To determine the appropriate binary format for the actual tuple data you
885should consult the <productname>PostgreSQL</productname> source, in
886particular the <function>*send</> and <function>*recv</> functions for
887each column's data type (typically these functions are found in the
888<filename>src/backend/utils/adt/</filename> directory of the source
889distribution).
890    </para>
891
892    <para>
893If OIDs are included in the file, the OID field immediately follows the
894field-count word.  It is a normal field except that it's not included
895in the field-count.  In particular it has a length word &mdash; this will allow
896handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow
897OIDs to be shown as null if that ever proves desirable.
898    </para>
899   </refsect3>
900
901   <refsect3>
902    <title>File Trailer</title>
903
904    <para>
905     The file trailer consists of a 16-bit integer word containing -1.  This
906     is easily distinguished from a tuple's field-count word.
907    </para>
908
909    <para>
910     A reader should report an error if a field-count word is neither -1
911     nor the expected number of columns.  This provides an extra
912     check against somehow getting out of sync with the data.
913    </para>
914   </refsect3>
915  </refsect2>
916 </refsect1>
917
918 <refsect1>
919  <title>Examples</title>
920
921  <para>
922   The following example copies a table to the client
923   using the vertical bar (<literal>|</literal>) as the field delimiter:
924<programlisting>
925COPY country TO STDOUT (DELIMITER '|');
926</programlisting>
927  </para>
928
929  <para>
930   To copy data from a file into the <literal>country</> table:
931<programlisting>
932COPY country FROM '/usr1/proj/bray/sql/country_data';
933</programlisting>
934  </para>
935
936  <para>
937   To copy into a file just the countries whose names start with 'A':
938<programlisting>
939COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
940</programlisting>
941  </para>
942
943  <para>
944   To copy into a compressed file, you can pipe the output through an external
945   compression program:
946<programlisting>
947COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz';
948</programlisting>
949  </para>
950
951  <para>
952   Here is a sample of data suitable for copying into a table from
953   <literal>STDIN</literal>:
954<programlisting>
955AF      AFGHANISTAN
956AL      ALBANIA
957DZ      ALGERIA
958ZM      ZAMBIA
959ZW      ZIMBABWE
960</programlisting>
961   Note that the white space on each line is actually a tab character.
962  </para>
963
964  <para>
965   The following is the same data, output in binary format.
966   The data is shown after filtering through the
967   Unix utility <command>od -c</command>. The table has three columns;
968   the first has type <type>char(2)</type>, the second has type <type>text</type>,
969   and the third has type <type>integer</type>. All the rows have a null value
970   in the third column.
971<programlisting>
9720000000   P   G   C   O   P   Y  \n 377  \r  \n  \0  \0  \0  \0  \0  \0
9730000020  \0  \0  \0  \0 003  \0  \0  \0 002   A   F  \0  \0  \0 013   A
9740000040   F   G   H   A   N   I   S   T   A   N 377 377 377 377  \0 003
9750000060  \0  \0  \0 002   A   L  \0  \0  \0 007   A   L   B   A   N   I
9760000100   A 377 377 377 377  \0 003  \0  \0  \0 002   D   Z  \0  \0  \0
9770000120 007   A   L   G   E   R   I   A 377 377 377 377  \0 003  \0  \0
9780000140  \0 002   Z   M  \0  \0  \0 006   Z   A   M   B   I   A 377 377
9790000160 377 377  \0 003  \0  \0  \0 002   Z   W  \0  \0  \0  \b   Z   I
9800000200   M   B   A   B   W   E 377 377 377 377 377 377
981</programlisting></para>
982 </refsect1>
983
984 <refsect1>
985  <title>Compatibility</title>
986
987  <para>
988   There is no <command>COPY</command> statement in the SQL standard.
989  </para>
990
991  <para>
992   The following syntax was used before <productname>PostgreSQL</>
993   version 9.0 and is still supported:
994
995<synopsis>
996COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
997    FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
998    [ [ WITH ]
999          [ BINARY ]
1000          [ OIDS ]
1001          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
1002          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
1003          [ CSV [ HEADER ]
1004                [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
1005                [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
1006                [ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ]
1007
1008COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
1009    TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
1010    [ [ WITH ]
1011          [ BINARY ]
1012          [ OIDS ]
1013          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
1014          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
1015          [ CSV [ HEADER ]
1016                [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
1017                [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
1018                [ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ]
1019</synopsis>
1020
1021   Note that in this syntax, <literal>BINARY</> and <literal>CSV</> are
1022   treated as independent keywords, not as arguments of a <literal>FORMAT</>
1023   option.
1024  </para>
1025
1026  <para>
1027   The following syntax was used before <productname>PostgreSQL</>
1028   version 7.3 and is still supported:
1029
1030<synopsis>
1031COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
1032    FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
1033    [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
1034    [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
1035
1036COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
1037    TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
1038    [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
1039    [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
1040</synopsis></para>
1041 </refsect1>
1042</refentry>
1043