Skip to content

Commit e0bd601

Browse files
committed
Rearrange coding in COPY so that expansible string buffer for data being
read is reused for successive attributes, instead of being deleted and recreated from scratch for each value read in. This reduces palloc/pfree overhead a lot. COPY IN still seems to be noticeably slower than it was in 6.5 --- we need to figure out why. This change takes care of the only major performance loss I can see in copy.c itself, so the performance problem is at a lower level somewhere.
1 parent d00391e commit e0bd601

File tree

1 file changed

+95
-73
lines changed

1 file changed

+95
-73
lines changed

src/backend/commands/copy.c

Lines changed: 95 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
*
88
* IDENTIFICATION
9-
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.95 2000/01/14 22:11:33 petere Exp $
9+
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.96 2000/01/16 21:37:50 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -66,6 +66,21 @@ static int CountTuples(Relation relation);
6666
static int lineno;
6767
static bool fe_eof;
6868

69+
/*
70+
* These static variables are used to avoid incurring overhead for each
71+
* attribute processed. attribute_buf is reused on each CopyReadAttribute
72+
* call to hold the string being read in. Under normal use it will soon
73+
* grow to a suitable size, and then we will avoid palloc/pfree overhead
74+
* for subsequent attributes. Note that CopyReadAttribute returns a pointer
75+
* to attribute_buf's data buffer!
76+
* encoding, if needed, can be set once at the start of the copy operation.
77+
*/
78+
static StringInfoData attribute_buf;
79+
#ifdef MULTIBYTE
80+
static int encoding;
81+
#endif
82+
83+
6984
/*
7085
* Internal communications functions
7186
*/
@@ -276,78 +291,88 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
276291
"directly to or from a file. Anyone can COPY to stdout or "
277292
"from stdin. Psql's \\copy command also works for anyone.");
278293

279-
if (from)
280-
{ /* copy from file to database */
281-
if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
282-
elog(ERROR, "You can't change sequence relation %s", relname);
283-
if (pipe)
294+
/*
295+
* Set up variables to avoid per-attribute overhead.
296+
*/
297+
initStringInfo(&attribute_buf);
298+
#ifdef MULTIBYTE
299+
encoding = pg_get_client_encoding();
300+
#endif
301+
302+
if (from)
303+
{ /* copy from file to database */
304+
if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
305+
elog(ERROR, "You can't change sequence relation %s", relname);
306+
if (pipe)
307+
{
308+
if (IsUnderPostmaster)
284309
{
285-
if (IsUnderPostmaster)
286-
{
287-
ReceiveCopyBegin();
288-
fp = NULL;
289-
}
290-
else
291-
fp = stdin;
310+
ReceiveCopyBegin();
311+
fp = NULL;
292312
}
293313
else
294-
{
314+
fp = stdin;
315+
}
316+
else
317+
{
295318
#ifndef __CYGWIN32__
296-
fp = AllocateFile(filename, "r");
319+
fp = AllocateFile(filename, "r");
297320
#else
298-
fp = AllocateFile(filename, "rb");
321+
fp = AllocateFile(filename, "rb");
299322
#endif
300-
if (fp == NULL)
301-
elog(ERROR, "COPY command, running in backend with "
302-
"effective uid %d, could not open file '%s' for "
303-
"reading. Errno = %s (%d).",
304-
geteuid(), filename, strerror(errno), errno);
305-
}
306-
CopyFrom(rel, binary, oids, fp, delim, null_print);
323+
if (fp == NULL)
324+
elog(ERROR, "COPY command, running in backend with "
325+
"effective uid %d, could not open file '%s' for "
326+
"reading. Errno = %s (%d).",
327+
geteuid(), filename, strerror(errno), errno);
307328
}
308-
else
309-
{ /* copy from database to file */
310-
if (pipe)
329+
CopyFrom(rel, binary, oids, fp, delim, null_print);
330+
}
331+
else
332+
{ /* copy from database to file */
333+
if (pipe)
334+
{
335+
if (IsUnderPostmaster)
311336
{
312-
if (IsUnderPostmaster)
313-
{
314-
SendCopyBegin();
315-
pq_startcopyout();
316-
fp = NULL;
317-
}
318-
else
319-
fp = stdout;
337+
SendCopyBegin();
338+
pq_startcopyout();
339+
fp = NULL;
320340
}
321341
else
322-
{
323-
mode_t oumask; /* Pre-existing umask value */
342+
fp = stdout;
343+
}
344+
else
345+
{
346+
mode_t oumask; /* Pre-existing umask value */
324347

325348
oumask = umask((mode_t) 022);
326349
#ifndef __CYGWIN32__
327-
fp = AllocateFile(filename, "w");
350+
fp = AllocateFile(filename, "w");
328351
#else
329-
fp = AllocateFile(filename, "wb");
352+
fp = AllocateFile(filename, "wb");
330353
#endif
331-
umask(oumask);
332-
if (fp == NULL)
333-
elog(ERROR, "COPY command, running in backend with "
334-
"effective uid %d, could not open file '%s' for "
335-
"writing. Errno = %s (%d).",
336-
geteuid(), filename, strerror(errno), errno);
337-
}
338-
CopyTo(rel, binary, oids, fp, delim, null_print);
339-
}
340-
if (!pipe)
341-
{
342-
FreeFile(fp);
343-
}
344-
else if (!from)
345-
{
346-
if (!binary)
347-
CopySendData("\\.\n", 3, fp);
348-
if (IsUnderPostmaster)
349-
pq_endcopyout(false);
354+
umask(oumask);
355+
if (fp == NULL)
356+
elog(ERROR, "COPY command, running in backend with "
357+
"effective uid %d, could not open file '%s' for "
358+
"writing. Errno = %s (%d).",
359+
geteuid(), filename, strerror(errno), errno);
350360
}
361+
CopyTo(rel, binary, oids, fp, delim, null_print);
362+
}
363+
364+
if (!pipe)
365+
{
366+
FreeFile(fp);
367+
}
368+
else if (!from)
369+
{
370+
if (!binary)
371+
CopySendData("\\.\n", 3, fp);
372+
if (IsUnderPostmaster)
373+
pq_endcopyout(false);
374+
}
375+
pfree(attribute_buf.data);
351376

352377
/*
353378
* Close the relation. If reading, we can release the AccessShareLock
@@ -717,7 +742,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
717742
loaded_oid = oidin(string);
718743
if (loaded_oid < BootstrapObjectIdData)
719744
elog(ERROR, "COPY TEXT: Invalid Oid. line: %d", lineno);
720-
pfree(string);
721745
}
722746
}
723747
for (i = 0; i < attr_count && !done; i++)
@@ -727,8 +751,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
727751
{
728752
values[i] = PointerGetDatum(NULL);
729753
nulls[i] = 'n';
730-
if (string)
731-
pfree(string);
732754
}
733755
else if (string == NULL)
734756
done = 1;
@@ -745,7 +767,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null
745767
if (!PointerIsValid(values[i]) &&
746768
!(rel->rd_att->attrs[i]->attbyval))
747769
elog(ERROR, "copy from line %d: Bad file format", lineno);
748-
pfree(string);
749770
}
750771
}
751772
if (!done)
@@ -1115,9 +1136,10 @@ CopyReadNewline(FILE *fp, int *newline)
11151136
/*
11161137
* Read the value of a single attribute.
11171138
*
1118-
* Result is either a palloc'd string, or NULL (if EOF or a null attribute).
1119-
* *isnull is set true if a null attribute, else false.
1139+
* Result is either a string, or NULL (if EOF or a null attribute).
1140+
* Note that the caller should not pfree the string!
11201141
*
1142+
* *isnull is set true if a null attribute, else false.
11211143
* delim is the string of acceptable delimiter characters(s).
11221144
* *newline remembers whether we've seen a newline ending this tuple.
11231145
* null_print says how NULL values are represented
@@ -1126,19 +1148,20 @@ CopyReadNewline(FILE *fp, int *newline)
11261148
static char *
11271149
CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print)
11281150
{
1129-
StringInfoData attribute_buf;
11301151
char c;
11311152
#ifdef MULTIBYTE
11321153
int mblen;
1133-
int encoding;
11341154
unsigned char s[2];
11351155
char *cvt;
11361156
int j;
11371157

1138-
encoding = pg_get_client_encoding();
11391158
s[1] = 0;
11401159
#endif
11411160

1161+
/* reset attribute_buf to empty */
1162+
attribute_buf.len = 0;
1163+
attribute_buf.data[0] = '\0';
1164+
11421165
/* if last delimiter was a newline return a NULL attribute */
11431166
if (*newline)
11441167
{
@@ -1148,8 +1171,6 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
11481171

11491172
*isnull = (bool) false; /* set default */
11501173

1151-
initStringInfo(&attribute_buf);
1152-
11531174
if (CopyGetEof(fp))
11541175
goto endOfFile;
11551176

@@ -1265,17 +1286,20 @@ CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_
12651286
attribute_buf.len);
12661287
if (cvt != attribute_buf.data)
12671288
{
1268-
pfree(attribute_buf.data);
1269-
return cvt;
1289+
/* transfer converted data back to attribute_buf */
1290+
attribute_buf.len = 0;
1291+
attribute_buf.data[0] = '\0';
1292+
appendBinaryStringInfo(&attribute_buf, cvt, strlen(cvt));
1293+
pfree(cvt);
12701294
}
12711295
#endif
1296+
12721297
if (strcmp(attribute_buf.data, null_print)==0)
12731298
*isnull = true;
12741299

12751300
return attribute_buf.data;
12761301

12771302
endOfFile:
1278-
pfree(attribute_buf.data);
12791303
return NULL;
12801304
}
12811305

@@ -1286,13 +1310,11 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
12861310
char c;
12871311
#ifdef MULTIBYTE
12881312
char *string_start;
1289-
int encoding;
12901313
int mblen;
12911314
int i;
12921315
#endif
12931316

12941317
#ifdef MULTIBYTE
1295-
encoding = pg_get_client_encoding();
12961318
string = (char *) pg_server_to_client((unsigned char *) server_string,
12971319
strlen(server_string));
12981320
string_start = string;

0 commit comments

Comments
 (0)