1<!--
2doc/src/sgml/ref/copy.sgml
3PostgreSQL documentation
4-->
5
6<refentry id="sql-copy">
7 <indexterm zone="sql-copy">
8  <primary>COPY</primary>
9 </indexterm>
10
11 <refmeta>
12  <refentrytitle>COPY</refentrytitle>
13  <manvolnum>7</manvolnum>
14  <refmiscinfo>SQL - Language Statements</refmiscinfo>
15 </refmeta>
16
17 <refnamediv>
18  <refname>COPY</refname>
19  <refpurpose>copy data between a file and a table</refpurpose>
20 </refnamediv>
21
22 <refsynopsisdiv>
23<synopsis>
24COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
25    FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
26    [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
27    [ WHERE <replaceable class="parameter">condition</replaceable> ]
28
29COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
30    TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
31    [ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
32
33<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
34
35    FORMAT <replaceable class="parameter">format_name</replaceable>
36    FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
37    DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
38    NULL '<replaceable class="parameter">null_string</replaceable>'
39    HEADER [ <replaceable class="parameter">boolean</replaceable> ]
40    QUOTE '<replaceable class="parameter">quote_character</replaceable>'
41    ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
42    FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
43    FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
44    FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
45    ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
46</synopsis>
47 </refsynopsisdiv>
48
49 <refsect1>
50  <title>Description</title>
51
52  <para>
53   <command>COPY</command> moves data between
54   <productname>PostgreSQL</productname> tables and standard file-system
55   files. <command>COPY TO</command> copies the contents of a table
56   <emphasis>to</emphasis> a file, while <command>COPY FROM</command> copies
57   data <emphasis>from</emphasis> a file to a table (appending the data to
58   whatever is in the table already).  <command>COPY TO</command>
59   can also copy the results of a <command>SELECT</command> query.
60  </para>
61
62  <para>
63   If a column list is specified, <command>COPY TO</command> copies only
64   the data in the specified columns to the file.  For <command>COPY
65   FROM</command>, each field in the file is inserted, in order, into the
66   specified column.  Table columns not specified in the <command>COPY
67   FROM</command> column list will receive their default values.
68  </para>
69
70  <para>
71   <command>COPY</command> with a file name instructs the
72   <productname>PostgreSQL</productname> server to directly read from
73   or write to a file. The file must be accessible by the
74   <productname>PostgreSQL</productname> user (the user ID the server
75   runs as) and the name must be specified from the viewpoint of the
76   server. When <literal>PROGRAM</literal> is specified, the server
77   executes the given command and reads from the standard output of the
78   program, or writes to the standard input of the program. The command
79   must be specified from the viewpoint of the server, and be executable
80   by the <productname>PostgreSQL</productname> user.  When
81   <literal>STDIN</literal> or <literal>STDOUT</literal> is
82   specified, data is transmitted via the connection between the
83   client and the server.
84  </para>
85 </refsect1>
86
87 <refsect1>
88  <title>Parameters</title>
89
90  <variablelist>
91   <varlistentry>
92    <term><replaceable class="parameter">table_name</replaceable></term>
93    <listitem>
94     <para>
95      The name (optionally schema-qualified) of an existing table.
96     </para>
97    </listitem>
98   </varlistentry>
99
100   <varlistentry>
101    <term><replaceable class="parameter">column_name</replaceable></term>
102     <listitem>
103     <para>
104      An optional list of columns to be copied.  If no column list is
105      specified, all columns of the table except generated columns will be
106      copied.
107     </para>
108    </listitem>
109   </varlistentry>
110
111   <varlistentry>
112    <term><replaceable class="parameter">query</replaceable></term>
113    <listitem>
114     <para>
115      A <xref linkend="sql-select"/>, <xref linkend="sql-values"/>,
116      <xref linkend="sql-insert"/>, <xref linkend="sql-update"/> or
117      <xref linkend="sql-delete"/> command whose results are to be
118      copied.  Note that parentheses are required around the query.
119     </para>
120     <para>
121      For <command>INSERT</command>, <command>UPDATE</command> and
122      <command>DELETE</command> queries a RETURNING clause must be provided,
123      and the target relation must not have a conditional rule, nor
124      an <literal>ALSO</literal> rule, nor an <literal>INSTEAD</literal> rule
125      that expands to multiple statements.
126     </para>
127    </listitem>
128   </varlistentry>
129
130   <varlistentry>
131    <term><replaceable class="parameter">filename</replaceable></term>
132    <listitem>
133     <para>
134      The path name of the input or output file.  An input file name can be
135      an absolute or relative path, but an output file name must be an absolute
136      path.  Windows users might need to use an <literal>E''</literal> string and
137      double any backslashes used in the path name.
138     </para>
139    </listitem>
140   </varlistentry>
141
142   <varlistentry>
143    <term><literal>PROGRAM</literal></term>
144    <listitem>
145     <para>
146      A command to execute. In <command>COPY FROM</command>, the input is
147      read from standard output of the command, and in <command>COPY TO</command>,
148      the output is written to the standard input of the command.
149     </para>
150     <para>
151      Note that the command is invoked by the shell, so if you need to pass
152      any arguments to shell command that come from an untrusted source, you
153      must be careful to strip or escape any special characters that might
154      have a special meaning for the shell. For security reasons, it is best
155      to use a fixed command string, or at least avoid passing any user input
156      in it.
157     </para>
158    </listitem>
159   </varlistentry>
160
161   <varlistentry>
162    <term><literal>STDIN</literal></term>
163    <listitem>
164     <para>
165      Specifies that input comes from the client application.
166     </para>
167    </listitem>
168   </varlistentry>
169
170   <varlistentry>
171    <term><literal>STDOUT</literal></term>
172    <listitem>
173     <para>
174      Specifies that output goes to the client application.
175     </para>
176    </listitem>
177   </varlistentry>
178
179   <varlistentry>
180    <term><replaceable class="parameter">boolean</replaceable></term>
181    <listitem>
182     <para>
183      Specifies whether the selected option should be turned on or off.
184      You can write <literal>TRUE</literal>, <literal>ON</literal>, or
185      <literal>1</literal> to enable the option, and <literal>FALSE</literal>,
186      <literal>OFF</literal>, or <literal>0</literal> to disable it.  The
187      <replaceable class="parameter">boolean</replaceable> value can also
188      be omitted, in which case <literal>TRUE</literal> is assumed.
189     </para>
190    </listitem>
191   </varlistentry>
192
193   <varlistentry>
194    <term><literal>FORMAT</literal></term>
195    <listitem>
196     <para>
197      Selects the data format to be read or written:
198      <literal>text</literal>,
199      <literal>csv</literal> (Comma Separated Values),
200      or <literal>binary</literal>.
201      The default is <literal>text</literal>.
202     </para>
203    </listitem>
204   </varlistentry>
205
206   <varlistentry>
207    <term><literal>FREEZE</literal></term>
208    <listitem>
209     <para>
210      Requests copying the data with rows already frozen, just as they
211      would be after running the <command>VACUUM FREEZE</command> command.
212      This is intended as a performance option for initial data loading.
213      Rows will be frozen only if the table being loaded has been created
214      or truncated in the current subtransaction, there are no cursors
215      open and there are no older snapshots held by this transaction.  It is
216      currently not possible to perform a <command>COPY FREEZE</command> on
217      a partitioned table.
218     </para>
219     <para>
220      Note that all other sessions will immediately be able to see the data
221      once it has been successfully loaded. This violates the normal rules
222      of MVCC visibility and users specifying should be aware of the
223      potential problems this might cause.
224     </para>
225    </listitem>
226   </varlistentry>
227
228   <varlistentry>
229    <term><literal>DELIMITER</literal></term>
230    <listitem>
231     <para>
232      Specifies the character that separates columns within each row
233      (line) of the file.  The default is a tab character in text format,
234      a comma in <literal>CSV</literal> format.
235      This must be a single one-byte character.
236      This option is not allowed when using <literal>binary</literal> format.
237     </para>
238    </listitem>
239   </varlistentry>
240
241   <varlistentry>
242    <term><literal>NULL</literal></term>
243    <listitem>
244     <para>
245      Specifies the string that represents a null value. The default is
246      <literal>\N</literal> (backslash-N) in text format, and an unquoted empty
247      string in <literal>CSV</literal> format. You might prefer an
248      empty string even in text format for cases where you don't want to
249      distinguish nulls from empty strings.
250      This option is not allowed when using <literal>binary</literal> format.
251     </para>
252
253     <note>
254      <para>
255       When using <command>COPY FROM</command>, any data item that matches
256       this string will be stored as a null value, so you should make
257       sure that you use the same string as you used with
258       <command>COPY TO</command>.
259      </para>
260     </note>
261
262    </listitem>
263   </varlistentry>
264
265   <varlistentry>
266    <term><literal>HEADER</literal></term>
267    <listitem>
268     <para>
269      Specifies that the file contains a header line with the names of each
270      column in the file.  On output, the first line contains the column
271      names from the table, and on input, the first line is ignored.
272      This option is allowed only when using <literal>CSV</literal> format.
273     </para>
274    </listitem>
275   </varlistentry>
276
277   <varlistentry>
278    <term><literal>QUOTE</literal></term>
279    <listitem>
280     <para>
281      Specifies the quoting character to be used when a data value is quoted.
282      The default is double-quote.
283      This must be a single one-byte character.
284      This option is allowed only when using <literal>CSV</literal> format.
285     </para>
286    </listitem>
287   </varlistentry>
288
289   <varlistentry>
290    <term><literal>ESCAPE</literal></term>
291    <listitem>
292     <para>
293      Specifies the character that should appear before a
294      data character that matches the <literal>QUOTE</literal> value.
295      The default is the same as the <literal>QUOTE</literal> value (so that
296      the quoting character is doubled if it appears in the data).
297      This must be a single one-byte character.
298      This option is allowed only when using <literal>CSV</literal> format.
299     </para>
300    </listitem>
301   </varlistentry>
302
303   <varlistentry>
304    <term><literal>FORCE_QUOTE</literal></term>
305    <listitem>
306     <para>
307      Forces quoting to be
308      used for all non-<literal>NULL</literal> values in each specified column.
309      <literal>NULL</literal> output is never quoted. If <literal>*</literal> is specified,
310      non-<literal>NULL</literal> values will be quoted in all columns.
311      This option is allowed only in <command>COPY TO</command>, and only when
312      using <literal>CSV</literal> format.
313     </para>
314    </listitem>
315   </varlistentry>
316
317   <varlistentry>
318    <term><literal>FORCE_NOT_NULL</literal></term>
319    <listitem>
320     <para>
321      Do not match the specified columns' values against the null string.
322      In the default case where the null string is empty, this means that
323      empty values will be read as zero-length strings rather than nulls,
324      even when they are not quoted.
325      This option is allowed only in <command>COPY FROM</command>, and only when
326      using <literal>CSV</literal> format.
327     </para>
328    </listitem>
329   </varlistentry>
330
331   <varlistentry>
332    <term><literal>FORCE_NULL</literal></term>
333    <listitem>
334     <para>
335      Match the specified columns' values against the null string, even
336      if it has been quoted, and if a match is found set the value to
337      <literal>NULL</literal>. In the default case where the null string is empty,
338      this converts a quoted empty string into NULL.
339      This option is allowed only in <command>COPY FROM</command>, and only when
340      using <literal>CSV</literal> format.
341     </para>
342    </listitem>
343   </varlistentry>
344
345   <varlistentry>
346    <term><literal>ENCODING</literal></term>
347    <listitem>
348     <para>
349      Specifies that the file is encoded in the <replaceable
350      class="parameter">encoding_name</replaceable>.  If this option is
351      omitted, the current client encoding is used. See the Notes below
352      for more details.
353     </para>
354    </listitem>
355   </varlistentry>
356
357   <varlistentry>
358    <term><literal>WHERE</literal></term>
359    <listitem>
360   <para>
361    The optional <literal>WHERE</literal> clause has the general form
362<synopsis>
363WHERE <replaceable class="parameter">condition</replaceable>
364</synopsis>
365    where <replaceable class="parameter">condition</replaceable> is
366    any expression that evaluates to a result of type
367    <type>boolean</type>.  Any row that does not satisfy this
368    condition will not be inserted to the table.  A row satisfies the
369    condition if it returns true when the actual row values are
370    substituted for any variable references.
371   </para>
372
373   <para>
374    Currently, subqueries are not allowed in <literal>WHERE</literal>
375    expressions, and the evaluation does not see any changes made by the
376    <command>COPY</command> itself (this matters when the expression
377    contains calls to <literal>VOLATILE</literal> functions).
378   </para>
379
380    </listitem>
381   </varlistentry>
382
383  </variablelist>
384 </refsect1>
385
386 <refsect1>
387  <title>Outputs</title>
388
389  <para>
390   On successful completion, a <command>COPY</command> command returns a command
391   tag of the form
392<screen>
393COPY <replaceable class="parameter">count</replaceable>
394</screen>
395   The <replaceable class="parameter">count</replaceable> is the number
396   of rows copied.
397  </para>
398
399  <note>
400   <para>
401    <application>psql</application> will print this command tag only if the command
402    was not <literal>COPY ... TO STDOUT</literal>, or the
403    equivalent <application>psql</application> meta-command
404    <literal>\copy ... to stdout</literal>.  This is to prevent confusing the
405    command tag with the data that was just printed.
406   </para>
407  </note>
408 </refsect1>
409
410 <refsect1>
411  <title>Notes</title>
412
413   <para>
414    <command>COPY TO</command> can be used only with plain
415    tables, not views, and does not copy rows from child tables
416    or child partitions.  For example, <literal>COPY <replaceable
417    class="parameter">table</replaceable> TO</literal> copies
418    the same rows as <literal>SELECT * FROM ONLY <replaceable
419    class="parameter">table</replaceable></literal>.
420    The syntax <literal>COPY (SELECT * FROM <replaceable
421    class="parameter">table</replaceable>) TO ...</literal> can be used to
422    dump all of the rows in an inheritance hierarchy, partitioned table,
423    or view.
424   </para>
425
426   <para>
427    <command>COPY FROM</command> can be used with plain, foreign, or
428    partitioned tables or with views that have
429    <literal>INSTEAD OF INSERT</literal> triggers.
430   </para>
431
432   <para>
433    You must have select privilege on the table
434    whose values are read by <command>COPY TO</command>, and
435    insert privilege on the table into which values
436    are inserted by <command>COPY FROM</command>.  It is sufficient
437    to have column privileges on the column(s) listed in the command.
438   </para>
439
440   <para>
441    If row-level security is enabled for the table, the relevant
442    <command>SELECT</command> policies will apply to <literal>COPY
443    <replaceable class="parameter">table</replaceable> TO</literal> statements.
444    Currently, <command>COPY FROM</command> is not supported for tables
445    with row-level security. Use equivalent <command>INSERT</command>
446    statements instead.
447   </para>
448
449   <para>
450    Files named in a <command>COPY</command> command are read or written
451    directly by the server, not by the client application. Therefore,
452    they must reside on or be accessible to the database server machine,
453    not the client. They must be accessible to and readable or writable
454    by the <productname>PostgreSQL</productname> user (the user ID the
455    server runs as), not the client. Similarly,
456    the command specified with <literal>PROGRAM</literal> is executed directly
457    by the server, not by the client application, must be executable by the
458    <productname>PostgreSQL</productname> user.
459    <command>COPY</command> naming a file or command is only allowed to
460    database superusers or users who are granted one of the default roles
461    <literal>pg_read_server_files</literal>,
462    <literal>pg_write_server_files</literal>,
463    or <literal>pg_execute_server_program</literal>, since it allows reading
464    or writing any file or running a program that the server has privileges to
465    access.
466   </para>
467
468   <para>
469    Do not confuse <command>COPY</command> with the
470    <application>psql</application> instruction
471    <command><link linkend="app-psql-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes
472    <command>COPY FROM STDIN</command> or <command>COPY TO
473    STDOUT</command>, and then fetches/stores the data in a file
474    accessible to the <application>psql</application> client. Thus,
475    file accessibility and access rights depend on the client rather
476    than the server when <command>\copy</command> is used.
477   </para>
478
479   <para>
480    It is recommended that the file name used in <command>COPY</command>
481    always be specified as an absolute path. This is enforced by the
482    server in the case of <command>COPY TO</command>, but for
483    <command>COPY FROM</command> you do have the option of reading from
484    a file specified by a relative path. The path will be interpreted
485    relative to the working directory of the server process (normally
486    the cluster's data directory), not the client's working directory.
487   </para>
488
489   <para>
490    Executing a command with <literal>PROGRAM</literal> might be restricted
491    by the operating system's access control mechanisms, such as SELinux.
492   </para>
493
494   <para>
495    <command>COPY FROM</command> will invoke any triggers and check
496    constraints on the destination table. However, it will not invoke rules.
497   </para>
498
499   <para>
500    For identity columns, the <command>COPY FROM</command> command will always
501    write the column values provided in the input data, like
502    the <command>INSERT</command> option <literal>OVERRIDING SYSTEM
503    VALUE</literal>.
504   </para>
505
506   <para>
507    <command>COPY</command> input and output is affected by
508    <varname>DateStyle</varname>. To ensure portability to other
509    <productname>PostgreSQL</productname> installations that might use
510    non-default <varname>DateStyle</varname> settings,
511    <varname>DateStyle</varname> should be set to <literal>ISO</literal> before
512    using <command>COPY TO</command>.  It is also a good idea to avoid dumping
513    data with <varname>IntervalStyle</varname> set to
514    <literal>sql_standard</literal>, because negative interval values might be
515    misinterpreted by a server that has a different setting for
516    <varname>IntervalStyle</varname>.
517   </para>
518
519   <para>
520    Input data is interpreted according to <literal>ENCODING</literal>
521    option or the current client encoding, and output data is encoded
522    in <literal>ENCODING</literal> or the current client encoding, even
523    if the data does not pass through the client but is read from or
524    written to a file directly by the server.
525   </para>
526
527   <para>
528    <command>COPY</command> stops operation at the first error. This
529    should not lead to problems in the event of a <command>COPY
530    TO</command>, but the target table will already have received
531    earlier rows in a <command>COPY FROM</command>. These rows will not
532    be visible or accessible, but they still occupy disk space. This might
533    amount to a considerable amount of wasted disk space if the failure
534    happened well into a large copy operation. You might wish to invoke
535    <command>VACUUM</command> to recover the wasted space.
536   </para>
537
538   <para>
539    <literal>FORCE_NULL</literal> and <literal>FORCE_NOT_NULL</literal> can be used
540    simultaneously on the same column. This results in converting quoted
541    null strings to null values and unquoted null strings to empty strings.
542   </para>
543
544 </refsect1>
545
546 <refsect1>
547  <title>File Formats</title>
548
549  <refsect2>
550   <title>Text Format</title>
551
552   <para>
553    When the <literal>text</literal> format is used,
554    the data read or written is a text file with one line per table row.
555    Columns in a row are separated by the delimiter character.
556    The column values themselves are strings generated by the
557    output function, or acceptable to the input function, of each
558    attribute's data type.  The specified null string is used in
559    place of columns that are null.
560    <command>COPY FROM</command> will raise an error if any line of the
561    input file contains more or fewer columns than are expected.
562   </para>
563
564   <para>
565    End of data can be represented by a single line containing just
566    backslash-period (<literal>\.</literal>).  An end-of-data marker is
567    not necessary when reading from a file, since the end of file
568    serves perfectly well; it is needed only when copying data to or from
569    client applications using pre-3.0 client protocol.
570   </para>
571
572   <para>
573    Backslash characters (<literal>\</literal>) can be used in the
574    <command>COPY</command> data to quote data characters that might
575    otherwise be taken as row or column delimiters. In particular, the
576    following characters <emphasis>must</emphasis> be preceded by a backslash if
577    they appear as part of a column value: backslash itself,
578    newline, carriage return, and the current delimiter character.
579   </para>
580
581   <para>
582    The specified null string is sent by <command>COPY TO</command> without
583    adding any backslashes; conversely, <command>COPY FROM</command> matches
584    the input against the null string before removing backslashes.  Therefore,
585    a null string such as <literal>\N</literal> cannot be confused with
586    the actual data value <literal>\N</literal> (which would be represented
587    as <literal>\\N</literal>).
588   </para>
589
590   <para>
591    The following special backslash sequences are recognized by
592    <command>COPY FROM</command>:
593
594   <informaltable>
595    <tgroup cols="2">
596     <thead>
597      <row>
598       <entry>Sequence</entry>
599       <entry>Represents</entry>
600      </row>
601     </thead>
602
603     <tbody>
604      <row>
605       <entry><literal>\b</literal></entry>
606       <entry>Backspace (ASCII 8)</entry>
607      </row>
608      <row>
609       <entry><literal>\f</literal></entry>
610       <entry>Form feed (ASCII 12)</entry>
611      </row>
612      <row>
613       <entry><literal>\n</literal></entry>
614       <entry>Newline (ASCII 10)</entry>
615      </row>
616      <row>
617       <entry><literal>\r</literal></entry>
618       <entry>Carriage return (ASCII 13)</entry>
619      </row>
620      <row>
621       <entry><literal>\t</literal></entry>
622       <entry>Tab (ASCII 9)</entry>
623      </row>
624      <row>
625       <entry><literal>\v</literal></entry>
626       <entry>Vertical tab (ASCII 11)</entry>
627      </row>
628      <row>
629       <entry><literal>\</literal><replaceable>digits</replaceable></entry>
630       <entry>Backslash followed by one to three octal digits specifies
631       the byte with that numeric code</entry>
632      </row>
633      <row>
634       <entry><literal>\x</literal><replaceable>digits</replaceable></entry>
635       <entry>Backslash <literal>x</literal> followed by one or two hex digits specifies
636       the byte with that numeric code</entry>
637      </row>
638     </tbody>
639    </tgroup>
640   </informaltable>
641
642    Presently, <command>COPY TO</command> will never emit an octal or
643    hex-digits backslash sequence, but it does use the other sequences
644    listed above for those control characters.
645   </para>
646
647   <para>
648    Any other backslashed character that is not mentioned in the above table
649    will be taken to represent itself.  However, beware of adding backslashes
650    unnecessarily, since that might accidentally produce a string matching the
651    end-of-data marker (<literal>\.</literal>) or the null string (<literal>\N</literal> by
652    default).  These strings will be recognized before any other backslash
653    processing is done.
654   </para>
655
656   <para>
657    It is strongly recommended that applications generating <command>COPY</command> data convert
658    data newlines and carriage returns to the <literal>\n</literal> and
659    <literal>\r</literal> sequences respectively.  At present it is
660    possible to represent a data carriage return by a backslash and carriage
661    return, and to represent a data newline by a backslash and newline.
662    However, these representations might not be accepted in future releases.
663    They are also highly vulnerable to corruption if the <command>COPY</command> file is
664    transferred across different machines (for example, from Unix to Windows
665    or vice versa).
666   </para>
667
668   <para>
669     All backslash sequences are interpreted after encoding conversion.
670     The bytes specified with the octal and hex-digit backslash sequences must
671     form valid characters in the database encoding.
672   </para>
673
674   <para>
675    <command>COPY TO</command> will terminate each row with a Unix-style
676    newline (<quote><literal>\n</literal></quote>).  Servers running on Microsoft Windows instead
677    output carriage return/newline (<quote><literal>\r\n</literal></quote>), but only for
678    <command>COPY</command> to a server file; for consistency across platforms,
679    <command>COPY TO STDOUT</command> always sends <quote><literal>\n</literal></quote>
680    regardless of server platform.
681    <command>COPY FROM</command> can handle lines ending with newlines,
682    carriage returns, or carriage return/newlines.  To reduce the risk of
683    error due to un-backslashed newlines or carriage returns that were
684    meant as data, <command>COPY FROM</command> will complain if the line
685    endings in the input are not all alike.
686   </para>
687  </refsect2>
688
689  <refsect2>
690   <title>CSV Format</title>
691
692   <para>
693    This format option is used for importing and exporting the Comma
694    Separated Value (<literal>CSV</literal>) file format used by many other
695    programs, such as spreadsheets. Instead of the escaping rules used by
696    <productname>PostgreSQL</productname>'s standard text format, it
697    produces and recognizes the common CSV escaping mechanism.
698   </para>
699
700   <para>
701    The values in each record are separated by the <literal>DELIMITER</literal>
702    character. If the value contains the delimiter character, the
703    <literal>QUOTE</literal> character, the <literal>NULL</literal> string, a carriage
704    return, or line feed character, then the whole value is prefixed and
705    suffixed by the <literal>QUOTE</literal> character, and any occurrence
706    within the value of a <literal>QUOTE</literal> character or the
707    <literal>ESCAPE</literal> character is preceded by the escape character.
708    You can also use <literal>FORCE_QUOTE</literal> to force quotes when outputting
709    non-<literal>NULL</literal> values in specific columns.
710   </para>
711
712   <para>
713    The <literal>CSV</literal> format has no standard way to distinguish a
714    <literal>NULL</literal> value from an empty string.
715    <productname>PostgreSQL</productname>'s <command>COPY</command> handles this by quoting.
716    A <literal>NULL</literal> is output as the <literal>NULL</literal> parameter string
717    and is not quoted, while a non-<literal>NULL</literal> value matching the
718    <literal>NULL</literal> parameter string is quoted.  For example, with the
719    default settings, a <literal>NULL</literal> is written as an unquoted empty
720    string, while an empty string data value is written with double quotes
721    (<literal>""</literal>). Reading values follows similar rules. You can
722    use <literal>FORCE_NOT_NULL</literal> to prevent <literal>NULL</literal> input
723    comparisons for specific columns. You can also use
724    <literal>FORCE_NULL</literal> to convert quoted null string data values to
725    <literal>NULL</literal>.
726   </para>
727
728   <para>
729    Because backslash is not a special character in the <literal>CSV</literal>
730    format, <literal>\.</literal>, the end-of-data marker, could also appear
731    as a data value.  To avoid any misinterpretation, a <literal>\.</literal>
732    data value appearing as a lone entry on a line is automatically
733    quoted on output, and on input, if quoted, is not interpreted as the
734    end-of-data marker.  If you are loading a file created by another
735    application that has a single unquoted column and might have a
736    value of <literal>\.</literal>, you might need to quote that value in the
737    input file.
738   </para>
739
740   <note>
741    <para>
742     In <literal>CSV</literal> format, all characters are significant. A quoted value
743     surrounded by white space, or any characters other than
744     <literal>DELIMITER</literal>, will include those characters. This can cause
745     errors if you import data from a system that pads <literal>CSV</literal>
746     lines with white space out to some fixed width. If such a situation
747     arises you might need to preprocess the <literal>CSV</literal> file to remove
748     the trailing white space, before importing the data into
749     <productname>PostgreSQL</productname>.
750    </para>
751   </note>
752
753   <note>
754    <para>
755     CSV format will both recognize and produce CSV files with quoted
756     values containing embedded carriage returns and line feeds. Thus
757     the files are not strictly one line per table row like text-format
758     files.
759    </para>
760   </note>
761
762   <note>
763    <para>
764     Many programs produce strange and occasionally perverse CSV files,
765     so the file format is more a convention than a standard. Thus you
766     might encounter some files that cannot be imported using this
767     mechanism, and <command>COPY</command> might produce files that other
768     programs cannot process.
769    </para>
770   </note>
771
772  </refsect2>
773
774  <refsect2>
775   <title>Binary Format</title>
776
777   <para>
778    The <literal>binary</literal> format option causes all data to be
779    stored/read as binary format rather than as text.  It is
780    somewhat faster than the text and <literal>CSV</literal> formats,
781    but a binary-format file is less portable across machine architectures and
782    <productname>PostgreSQL</productname> versions.
783    Also, the binary format is very data type specific; for example
784    it will not work to output binary data from a <type>smallint</type> column
785    and read it into an <type>integer</type> column, even though that would work
786    fine in text format.
787   </para>
788
789   <para>
790    The <literal>binary</literal> file format consists
791    of a file header, zero or more tuples containing the row data, and
792    a file trailer.  Headers and data are in network byte order.
793   </para>
794
795   <note>
796    <para>
797     <productname>PostgreSQL</productname> releases before 7.4 used a
798     different binary file format.
799    </para>
800   </note>
801
802   <refsect3>
803    <title>File Header</title>
804
805    <para>
806     The file header consists of 15 bytes of fixed fields, followed
807     by a variable-length header extension area.  The fixed fields are:
808
809    <variablelist>
810     <varlistentry>
811      <term>Signature</term>
812      <listitem>
813       <para>
81411-byte sequence <literal>PGCOPY\n\377\r\n\0</literal> &mdash; note that the zero byte
815is a required part of the signature.  (The signature is designed to allow
816easy identification of files that have been munged by a non-8-bit-clean
817transfer.  This signature will be changed by end-of-line-translation
818filters, dropped zero bytes, dropped high bits, or parity changes.)
819       </para>
820      </listitem>
821     </varlistentry>
822
823     <varlistentry>
824      <term>Flags field</term>
825      <listitem>
826       <para>
82732-bit integer bit mask to denote important aspects of the file format. Bits
828are numbered from 0 (<acronym>LSB</acronym>) to 31 (<acronym>MSB</acronym>).  Note that
829this field is stored in network byte order (most significant byte first),
830as are all the integer fields used in the file format.  Bits
83116&ndash;31 are reserved to denote critical file format issues; a reader
832should abort if it finds an unexpected bit set in this range. Bits 0&ndash;15
833are reserved to signal backwards-compatible format issues; a reader
834should simply ignore any unexpected bits set in this range. Currently
835only one flag bit is defined, and the rest must be zero:
836        <variablelist>
837         <varlistentry>
838          <term>Bit 16</term>
839          <listitem>
840           <para>
841            If 1, OIDs are included in the data; if 0, not. Oid system columns
842            are not supported in <productname>PostgreSQL</productname>
843            anymore, but the format still contains the indicator.
844           </para>
845          </listitem>
846         </varlistentry>
847        </variablelist></para>
848      </listitem>
849     </varlistentry>
850
851     <varlistentry>
852      <term>Header extension area length</term>
853      <listitem>
854       <para>
85532-bit integer, length in bytes of remainder of header, not including self.
856Currently, this is zero, and the first tuple follows
857immediately.  Future changes to the format might allow additional data
858to be present in the header.  A reader should silently skip over any header
859extension data it does not know what to do with.
860       </para>
861      </listitem>
862     </varlistentry>
863    </variablelist>
864    </para>
865
866    <para>
867The header extension area is envisioned to contain a sequence of
868self-identifying chunks.  The flags field is not intended to tell readers
869what is in the extension area.  Specific design of header extension contents
870is left for a later release.
871    </para>
872
873    <para>
874     This design allows for both backwards-compatible header additions (add
875     header extension chunks, or set low-order flag bits) and
876     non-backwards-compatible changes (set high-order flag bits to signal such
877     changes, and add supporting data to the extension area if needed).
878    </para>
879   </refsect3>
880
881   <refsect3>
882    <title>Tuples</title>
883    <para>
884Each tuple begins with a 16-bit integer count of the number of fields in the
885tuple.  (Presently, all tuples in a table will have the same count, but that
886might not always be true.)  Then, repeated for each field in the tuple, there
887is a 32-bit length word followed by that many bytes of field data.  (The
888length word does not include itself, and can be zero.)  As a special case,
889-1 indicates a NULL field value.  No value bytes follow in the NULL case.
890    </para>
891
892    <para>
893There is no alignment padding or any other extra data between fields.
894    </para>
895
896    <para>
897Presently, all data values in a binary-format file are
898assumed to be in binary format (format code one).  It is anticipated that a
899future extension might add a header field that allows per-column format codes
900to be specified.
901    </para>
902
903    <para>
904To determine the appropriate binary format for the actual tuple data you
905should consult the <productname>PostgreSQL</productname> source, in
906particular the <function>*send</function> and <function>*recv</function> functions for
907each column's data type (typically these functions are found in the
908<filename>src/backend/utils/adt/</filename> directory of the source
909distribution).
910    </para>
911
912    <para>
913If OIDs are included in the file, the OID field immediately follows the
914field-count word.  It is a normal field except that it's not included in the
915field-count.  Note that oid system columns are not supported in current
916versions of <productname>PostgreSQL</productname>.
917    </para>
918   </refsect3>
919
920   <refsect3>
921    <title>File Trailer</title>
922
923    <para>
924     The file trailer consists of a 16-bit integer word containing -1.  This
925     is easily distinguished from a tuple's field-count word.
926    </para>
927
928    <para>
929     A reader should report an error if a field-count word is neither -1
930     nor the expected number of columns.  This provides an extra
931     check against somehow getting out of sync with the data.
932    </para>
933   </refsect3>
934  </refsect2>
935 </refsect1>
936
937 <refsect1>
938  <title>Examples</title>
939
940  <para>
941   The following example copies a table to the client
942   using the vertical bar (<literal>|</literal>) as the field delimiter:
943<programlisting>
944COPY country TO STDOUT (DELIMITER '|');
945</programlisting>
946  </para>
947
948  <para>
949   To copy data from a file into the <literal>country</literal> table:
950<programlisting>
951COPY country FROM '/usr1/proj/bray/sql/country_data';
952</programlisting>
953  </para>
954
955  <para>
956   To copy into a file just the countries whose names start with 'A':
957<programlisting>
958COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
959</programlisting>
960  </para>
961
962  <para>
963   To copy into a compressed file, you can pipe the output through an external
964   compression program:
965<programlisting>
966COPY country TO PROGRAM 'gzip > /usr1/proj/bray/sql/country_data.gz';
967</programlisting>
968  </para>
969
970  <para>
971   Here is a sample of data suitable for copying into a table from
972   <literal>STDIN</literal>:
973<programlisting>
974AF      AFGHANISTAN
975AL      ALBANIA
976DZ      ALGERIA
977ZM      ZAMBIA
978ZW      ZIMBABWE
979</programlisting>
980   Note that the white space on each line is actually a tab character.
981  </para>
982
983  <para>
984   The following is the same data, output in binary format.
985   The data is shown after filtering through the
986   Unix utility <command>od -c</command>. The table has three columns;
987   the first has type <type>char(2)</type>, the second has type <type>text</type>,
988   and the third has type <type>integer</type>. All the rows have a null value
989   in the third column.
990<programlisting>
9910000000   P   G   C   O   P   Y  \n 377  \r  \n  \0  \0  \0  \0  \0  \0
9920000020  \0  \0  \0  \0 003  \0  \0  \0 002   A   F  \0  \0  \0 013   A
9930000040   F   G   H   A   N   I   S   T   A   N 377 377 377 377  \0 003
9940000060  \0  \0  \0 002   A   L  \0  \0  \0 007   A   L   B   A   N   I
9950000100   A 377 377 377 377  \0 003  \0  \0  \0 002   D   Z  \0  \0  \0
9960000120 007   A   L   G   E   R   I   A 377 377 377 377  \0 003  \0  \0
9970000140  \0 002   Z   M  \0  \0  \0 006   Z   A   M   B   I   A 377 377
9980000160 377 377  \0 003  \0  \0  \0 002   Z   W  \0  \0  \0  \b   Z   I
9990000200   M   B   A   B   W   E 377 377 377 377 377 377
1000</programlisting></para>
1001 </refsect1>
1002
1003 <refsect1>
1004  <title>Compatibility</title>
1005
1006  <para>
1007   There is no <command>COPY</command> statement in the SQL standard.
1008  </para>
1009
1010  <para>
1011   The following syntax was used before <productname>PostgreSQL</productname>
1012   version 9.0 and is still supported:
1013
1014<synopsis>
1015COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
1016    FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
1017    [ [ WITH ]
1018          [ BINARY ]
1019          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
1020          [ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ]
1021          [ CSV [ HEADER ]
1022                [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
1023                [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
1024                [ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ]
1025
1026COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
1027    TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
1028    [ [ WITH ]
1029          [ BINARY ]
1030          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter_character</replaceable>' ]
1031          [ NULL [ AS ] '<replaceable class="parameter">null_string</replaceable>' ]
1032          [ CSV [ HEADER ]
1033                [ QUOTE [ AS ] '<replaceable class="parameter">quote_character</replaceable>' ]
1034                [ ESCAPE [ AS ] '<replaceable class="parameter">escape_character</replaceable>' ]
1035                [ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ]
1036</synopsis>
1037
1038   Note that in this syntax, <literal>BINARY</literal> and <literal>CSV</literal> are
1039   treated as independent keywords, not as arguments of a <literal>FORMAT</literal>
1040   option.
1041  </para>
1042
1043  <para>
1044   The following syntax was used before <productname>PostgreSQL</productname>
1045   version 7.3 and is still supported:
1046
1047<synopsis>
1048COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable>
1049    FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
1050    [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
1051    [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
1052
1053COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable>
1054    TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
1055    [ [USING] DELIMITERS '<replaceable class="parameter">delimiter_character</replaceable>' ]
1056    [ WITH NULL AS '<replaceable class="parameter">null_string</replaceable>' ]
1057</synopsis></para>
1058 </refsect1>
1059</refentry>
1060