Skip to content

Commit 072132f

Browse files
committed
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header line from a CSV or text file. It is possible to load by mistake a file that matches the expected format, for example, if two text columns have been swapped, resulting in garbage in the database. This adds a new option value HEADER MATCH that checks the column names in the header line against the actual column names and errors out if they do not match. Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr> Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
1 parent edcedcc commit 072132f

File tree

8 files changed

+210
-8
lines changed

8 files changed

+210
-8
lines changed

contrib/file_fdw/expected/file_fdw.out

+18-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ CREATE FOREIGN TABLE agg_bad (
113113
) SERVER file_server
114114
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
115115
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
116+
-- test header matching
117+
\set filename :abs_srcdir '/data/list1.csv'
118+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
119+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
120+
SELECT * FROM header_match;
121+
1 | foo
122+
---+-----
123+
1 | bar
124+
(1 row)
125+
126+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
127+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
128+
SELECT * FROM header_doesnt_match; -- ERROR
129+
ERROR: column name mismatch in header line field 1: got "1", expected "a"
130+
CONTEXT: COPY header_doesnt_match, line 1: "1,foo"
116131
-- per-column options tests
117132
\set filename :abs_srcdir '/data/text.csv'
118133
CREATE FOREIGN TABLE text_csv (
@@ -464,12 +479,14 @@ SET ROLE regress_file_fdw_superuser;
464479
-- cleanup
465480
RESET ROLE;
466481
DROP EXTENSION file_fdw CASCADE;
467-
NOTICE: drop cascades to 7 other objects
482+
NOTICE: drop cascades to 9 other objects
468483
DETAIL: drop cascades to server file_server
469484
drop cascades to user mapping for regress_file_fdw_superuser on server file_server
470485
drop cascades to user mapping for regress_no_priv_user on server file_server
471486
drop cascades to foreign table agg_text
472487
drop cascades to foreign table agg_csv
473488
drop cascades to foreign table agg_bad
489+
drop cascades to foreign table header_match
490+
drop cascades to foreign table header_doesnt_match
474491
drop cascades to foreign table text_csv
475492
DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;

contrib/file_fdw/sql/file_fdw.sql

+9
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ CREATE FOREIGN TABLE agg_bad (
103103
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
104104
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
105105
106+
-- test header matching
107+
\set filename :abs_srcdir '/data/list1.csv'
108+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
109+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
110+
SELECT * FROM header_match;
111+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
112+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
113+
SELECT * FROM header_doesnt_match; -- ERROR
114+
106115
-- per-column options tests
107116
\set filename :abs_srcdir '/data/text.csv'
108117
CREATE FOREIGN TABLE text_csv (

doc/src/sgml/ref/copy.sgml

+6-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
3636
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
3737
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
3838
NULL '<replaceable class="parameter">null_string</replaceable>'
39-
HEADER [ <replaceable class="parameter">boolean</replaceable> ]
39+
HEADER [ <replaceable class="parameter">boolean</replaceable> | <literal>match</literal> ]
4040
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
4141
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
4242
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -276,7 +276,11 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
276276
<para>
277277
Specifies that the file contains a header line with the names of each
278278
column in the file. On output, the first line contains the column
279-
names from the table, and on input, the first line is ignored.
279+
names from the table. On input, the first line is discarded when this
280+
option is set to <literal>true</literal> (or equivalent Boolean value).
281+
If this option is set to <literal>match</literal>, the number and names
282+
of the columns in the header line must match the actual column names of
283+
the table, otherwise an error is raised.
280284
This option is not allowed when using <literal>binary</literal> format.
281285
</para>
282286
</listitem>

src/backend/commands/copy.c

+59-1
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
313313
table_close(rel, NoLock);
314314
}
315315

316+
/*
317+
* Extract a CopyHeaderChoice value from a DefElem. This is like
318+
* defGetBoolean() but also accepts the special value "match".
319+
*/
320+
static CopyHeaderChoice
321+
defGetCopyHeaderChoice(DefElem *def)
322+
{
323+
/*
324+
* If no parameter given, assume "true" is meant.
325+
*/
326+
if (def->arg == NULL)
327+
return COPY_HEADER_TRUE;
328+
329+
/*
330+
* Allow 0, 1, "true", "false", "on", "off", or "match".
331+
*/
332+
switch (nodeTag(def->arg))
333+
{
334+
case T_Integer:
335+
switch (intVal(def->arg))
336+
{
337+
case 0:
338+
return COPY_HEADER_FALSE;
339+
case 1:
340+
return COPY_HEADER_TRUE;
341+
default:
342+
/* otherwise, error out below */
343+
break;
344+
}
345+
break;
346+
default:
347+
{
348+
char *sval = defGetString(def);
349+
350+
/*
351+
* The set of strings accepted here should match up with the
352+
* grammar's opt_boolean_or_string production.
353+
*/
354+
if (pg_strcasecmp(sval, "true") == 0)
355+
return COPY_HEADER_TRUE;
356+
if (pg_strcasecmp(sval, "false") == 0)
357+
return COPY_HEADER_FALSE;
358+
if (pg_strcasecmp(sval, "on") == 0)
359+
return COPY_HEADER_TRUE;
360+
if (pg_strcasecmp(sval, "off") == 0)
361+
return COPY_HEADER_FALSE;
362+
if (pg_strcasecmp(sval, "match") == 0)
363+
return COPY_HEADER_MATCH;
364+
}
365+
break;
366+
}
367+
ereport(ERROR,
368+
(errcode(ERRCODE_SYNTAX_ERROR),
369+
errmsg("%s requires a Boolean value or \"match\"",
370+
def->defname)));
371+
return COPY_HEADER_FALSE; /* keep compiler quiet */
372+
}
373+
316374
/*
317375
* Process the statement option list for COPY.
318376
*
@@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate,
394452
if (header_specified)
395453
errorConflictingDefElem(defel, pstate);
396454
header_specified = true;
397-
opts_out->header_line = defGetBoolean(defel);
455+
opts_out->header_line = defGetCopyHeaderChoice(defel);
398456
}
399457
else if (strcmp(defel->defname, "quote") == 0)
400458
{

src/backend/commands/copyfromparse.c

+50-3
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
#include "miscadmin.h"
7373
#include "pgstat.h"
7474
#include "port/pg_bswap.h"
75+
#include "utils/builtins.h"
7576
#include "utils/memutils.h"
7677
#include "utils/rel.h"
7778

@@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
758759
/* only available for text or csv input */
759760
Assert(!cstate->opts.binary);
760761

761-
/* on input just throw the header line away */
762+
/* on input check that the header line is correct if needed */
762763
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
763764
{
765+
ListCell *cur;
766+
TupleDesc tupDesc;
767+
768+
tupDesc = RelationGetDescr(cstate->rel);
769+
764770
cstate->cur_lineno++;
765-
if (CopyReadLine(cstate))
766-
return false; /* done */
771+
done = CopyReadLine(cstate);
772+
773+
if (cstate->opts.header_line == COPY_HEADER_MATCH)
774+
{
775+
int fldnum;
776+
777+
if (cstate->opts.csv_mode)
778+
fldct = CopyReadAttributesCSV(cstate);
779+
else
780+
fldct = CopyReadAttributesText(cstate);
781+
782+
if (fldct != list_length(cstate->attnumlist))
783+
ereport(ERROR,
784+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
785+
errmsg("wrong number of fields in header line: field count is %d, expected %d",
786+
fldct, list_length(cstate->attnumlist))));
787+
788+
fldnum = 0;
789+
foreach(cur, cstate->attnumlist)
790+
{
791+
int attnum = lfirst_int(cur);
792+
char *colName = cstate->raw_fields[attnum - 1];
793+
Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
794+
795+
fldnum++;
796+
797+
if (colName == NULL)
798+
ereport(ERROR,
799+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
800+
errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
801+
fldnum, cstate->opts.null_print, NameStr(attr->attname))));
802+
803+
if (namestrcmp(&attr->attname, colName) != 0) {
804+
ereport(ERROR,
805+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
806+
errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
807+
fldnum, colName, NameStr(attr->attname))));
808+
}
809+
}
810+
}
811+
812+
if (done)
813+
return false;
767814
}
768815

769816
cstate->cur_lineno++;

src/include/commands/copy.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@
1919
#include "parser/parse_node.h"
2020
#include "tcop/dest.h"
2121

22+
/*
23+
* Represents whether a header line should be present, and whether it must
24+
* match the actual names (which implies "true").
25+
*/
26+
typedef enum CopyHeaderChoice
27+
{
28+
COPY_HEADER_FALSE = 0,
29+
COPY_HEADER_TRUE,
30+
COPY_HEADER_MATCH,
31+
} CopyHeaderChoice;
32+
2233
/*
2334
* A struct to hold COPY options, in a parsed form. All of these are related
2435
* to formatting, except for 'freeze', which doesn't really belong here, but
@@ -32,7 +43,7 @@ typedef struct CopyFormatOptions
3243
bool binary; /* binary format? */
3344
bool freeze; /* freeze rows on loading? */
3445
bool csv_mode; /* Comma Separated Value format? */
35-
bool header_line; /* header line? */
46+
CopyHeaderChoice header_line; /* header line? */
3647
char *null_print; /* NULL marker string (server encoding!) */
3748
int null_print_len; /* length of same */
3849
char *null_print_client; /* same converted to file encoding */

src/test/regress/expected/copy.out

+23
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,26 @@ INFO: progress: {"type": "FILE", "command": "COPY FROM", "relname": "tab_progre
176176
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
177177
drop function notice_after_tab_progress_reporting();
178178
drop table tab_progress_reporting;
179+
-- Test header matching feature
180+
create table header_copytest (
181+
a int,
182+
b int,
183+
c text
184+
);
185+
copy header_copytest from stdin with (header wrong_choice);
186+
ERROR: header requires a Boolean value or "match"
187+
copy header_copytest from stdin with (header match);
188+
copy header_copytest from stdin with (header match);
189+
ERROR: column name mismatch in header line field 3: got null value ("\N"), expected "c"
190+
CONTEXT: COPY header_copytest, line 1: "a b \N"
191+
copy header_copytest from stdin with (header match);
192+
ERROR: wrong number of fields in header line: field count is 2, expected 3
193+
CONTEXT: COPY header_copytest, line 1: "a b"
194+
copy header_copytest from stdin with (header match);
195+
ERROR: wrong number of fields in header line: field count is 4, expected 3
196+
CONTEXT: COPY header_copytest, line 1: "a b c d"
197+
copy header_copytest from stdin with (header match);
198+
ERROR: column name mismatch in header line field 3: got "d", expected "c"
199+
CONTEXT: COPY header_copytest, line 1: "a b d"
200+
copy header_copytest from stdin with (header match, format csv);
201+
drop table header_copytest;

src/test/regress/sql/copy.sql

+33
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,36 @@ copy tab_progress_reporting from :'filename'
197197
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
198198
drop function notice_after_tab_progress_reporting();
199199
drop table tab_progress_reporting;
200+
201+
-- Test header matching feature
202+
create table header_copytest (
203+
a int,
204+
b int,
205+
c text
206+
);
207+
copy header_copytest from stdin with (header wrong_choice);
208+
copy header_copytest from stdin with (header match);
209+
a b c
210+
1 2 foo
211+
\.
212+
copy header_copytest from stdin with (header match);
213+
a b \N
214+
1 2 foo
215+
\.
216+
copy header_copytest from stdin with (header match);
217+
a b
218+
1 2
219+
\.
220+
copy header_copytest from stdin with (header match);
221+
a b c d
222+
1 2 foo bar
223+
\.
224+
copy header_copytest from stdin with (header match);
225+
a b d
226+
1 2 foo
227+
\.
228+
copy header_copytest from stdin with (header match, format csv);
229+
a,b,c
230+
1,2,foo
231+
\.
232+
drop table header_copytest;

0 commit comments

Comments
 (0)