1 /*-------------------------------------------------------------------------
2  *
3  * copyfrom_internal.h
4  *	  Internal definitions for COPY FROM command.
5  *
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/commands/copyfrom_internal.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef COPYFROM_INTERNAL_H
15 #define COPYFROM_INTERNAL_H
16 
17 #include "commands/copy.h"
18 #include "commands/trigger.h"
19 
20 /*
21  * Represents the different source cases we need to worry about at
22  * the bottom level
23  */
24 typedef enum CopySource
25 {
26 	COPY_FILE,					/* from file (or a piped program) */
27 	COPY_FRONTEND,				/* from frontend */
28 	COPY_CALLBACK				/* from callback function */
29 } CopySource;
30 
31 /*
32  *	Represents the end-of-line terminator type of the input
33  */
34 typedef enum EolType
35 {
36 	EOL_UNKNOWN,
37 	EOL_NL,
38 	EOL_CR,
39 	EOL_CRNL
40 } EolType;
41 
42 /*
43  * Represents the heap insert method to be used during COPY FROM.
44  */
45 typedef enum CopyInsertMethod
46 {
47 	CIM_SINGLE,					/* use table_tuple_insert or fdw routine */
48 	CIM_MULTI,					/* always use table_multi_insert */
49 	CIM_MULTI_CONDITIONAL		/* use table_multi_insert only if valid */
50 } CopyInsertMethod;
51 
52 /*
53  * This struct contains all the state variables used throughout a COPY FROM
54  * operation.
55  */
56 typedef struct CopyFromStateData
57 {
58 	/* low-level state data */
59 	CopySource	copy_src;		/* type of copy source */
60 	FILE	   *copy_file;		/* used if copy_src == COPY_FILE */
61 	StringInfo	fe_msgbuf;		/* used if copy_src == COPY_NEW_FE */
62 
63 	EolType		eol_type;		/* EOL type of input */
64 	int			file_encoding;	/* file or remote side's character encoding */
65 	bool		need_transcoding;	/* file encoding diff from server? */
66 	Oid			conversion_proc;	/* encoding conversion function */
67 
68 	/* parameters from the COPY command */
69 	Relation	rel;			/* relation to copy from */
70 	List	   *attnumlist;		/* integer list of attnums to copy */
71 	char	   *filename;		/* filename, or NULL for STDIN */
72 	bool		is_program;		/* is 'filename' a program to popen? */
73 	copy_data_source_cb data_source_cb; /* function for reading data */
74 
75 	CopyFormatOptions opts;
76 	bool	   *convert_select_flags;	/* per-column CSV/TEXT CS flags */
77 	Node	   *whereClause;	/* WHERE condition (or NULL) */
78 
79 	/* these are just for error messages, see CopyFromErrorCallback */
80 	const char *cur_relname;	/* table name for error messages */
81 	uint64		cur_lineno;		/* line number for error messages */
82 	const char *cur_attname;	/* current att for error messages */
83 	const char *cur_attval;		/* current att value for error messages */
84 
85 	/*
86 	 * Working state
87 	 */
88 	MemoryContext copycontext;	/* per-copy execution context */
89 
90 	AttrNumber	num_defaults;
91 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
92 	Oid		   *typioparams;	/* array of element types for in_functions */
93 	int		   *defmap;			/* array of default att numbers */
94 	ExprState **defexprs;		/* array of default att expressions */
95 	bool		volatile_defexprs;	/* is any of defexprs volatile? */
96 	List	   *range_table;
97 	ExprState  *qualexpr;
98 
99 	TransitionCaptureState *transition_capture;
100 
101 	/*
102 	 * These variables are used to reduce overhead in COPY FROM.
103 	 *
104 	 * attribute_buf holds the separated, de-escaped text for each field of
105 	 * the current line.  The CopyReadAttributes functions return arrays of
106 	 * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
107 	 * the buffer on each cycle.
108 	 *
109 	 * In binary COPY FROM, attribute_buf holds the binary data for the
110 	 * current field, but the usage is otherwise similar.
111 	 */
112 	StringInfoData attribute_buf;
113 
114 	/* field raw data pointers found by COPY FROM */
115 
116 	int			max_fields;
117 	char	  **raw_fields;
118 
119 	/*
120 	 * Similarly, line_buf holds the whole input line being processed. The
121 	 * input cycle is first to read the whole line into line_buf, and then
122 	 * extract the individual attribute fields into attribute_buf.  line_buf
123 	 * is preserved unmodified so that we can display it in error messages if
124 	 * appropriate.  (In binary mode, line_buf is not used.)
125 	 */
126 	StringInfoData line_buf;
127 	bool		line_buf_valid; /* contains the row being processed? */
128 
129 	/*
130 	 * input_buf holds input data, already converted to database encoding.
131 	 *
132 	 * In text mode, CopyReadLine parses this data sufficiently to locate line
133 	 * boundaries, then transfers the data to line_buf. We guarantee that
134 	 * there is a \0 at input_buf[input_buf_len] at all times.  (In binary
135 	 * mode, input_buf is not used.)
136 	 *
137 	 * If encoding conversion is not required, input_buf is not a separate
138 	 * buffer but points directly to raw_buf.  In that case, input_buf_len
139 	 * tracks the number of bytes that have been verified as valid in the
140 	 * database encoding, and raw_buf_len is the total number of bytes stored
141 	 * in the buffer.
142 	 */
143 #define INPUT_BUF_SIZE 65536	/* we palloc INPUT_BUF_SIZE+1 bytes */
144 	char	   *input_buf;
145 	int			input_buf_index;	/* next byte to process */
146 	int			input_buf_len;	/* total # of bytes stored */
147 	bool		input_reached_eof;	/* true if we reached EOF */
148 	bool		input_reached_error;	/* true if a conversion error happened */
149 	/* Shorthand for number of unconsumed bytes available in input_buf */
150 #define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index)
151 
152 	/*
153 	 * raw_buf holds raw input data read from the data source (file or client
154 	 * connection), not yet converted to the database encoding.  Like with
155 	 * 'input_buf', we guarantee that there is a \0 at raw_buf[raw_buf_len].
156 	 */
157 #define RAW_BUF_SIZE 65536		/* we palloc RAW_BUF_SIZE+1 bytes */
158 	char	   *raw_buf;
159 	int			raw_buf_index;	/* next byte to process */
160 	int			raw_buf_len;	/* total # of bytes stored */
161 	bool		raw_reached_eof;	/* true if we reached EOF */
162 
163 	/* Shorthand for number of unconsumed bytes available in raw_buf */
164 #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
165 
166 	uint64		bytes_processed;	/* number of bytes processed so far */
167 } CopyFromStateData;
168 
169 extern void ReceiveCopyBegin(CopyFromState cstate);
170 extern void ReceiveCopyBinaryHeader(CopyFromState cstate);
171 
172 #endif							/* COPYFROM_INTERNAL_H */
173