Skip to content

Commit db2dcf5

Browse files
committed
Make some marginal performance improvements in reportErrorPosition(),
which turns out to be a dominant part of the runtime in scenarios involving lots of parse-time warnings (such as Stephen Frost's example of an INSERT with a lot of backslash-containing strings). There's not a whole lot we can do about the character-at-a-time scanning, but we can at least avoid traversing the query twice.
1 parent 8e4fe3b commit db2dcf5

File tree

1 file changed

+86
-66
lines changed

1 file changed

+86
-66
lines changed

src/interfaces/libpq/fe-protocol3.c

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.27 2006/08/18 19:52:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.28 2006/10/01 22:25:48 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -883,20 +883,25 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
883883
#define MIN_RIGHT_CUT 10 /* try to keep this far away from EOL */
884884

885885
char *wquery;
886-
int clen,
887-
slen,
886+
int slen,
887+
cno,
888888
i,
889-
w,
890889
*qidx,
891890
*scridx,
892891
qoffset,
893892
scroffset,
894893
ibeg,
895894
iend,
896895
loc_line;
897-
bool beg_trunc,
896+
bool mb_encoding,
897+
beg_trunc,
898898
end_trunc;
899899

900+
/* Convert loc from 1-based to 0-based; no-op if out of range */
901+
loc--;
902+
if (loc < 0)
903+
return;
904+
900905
/* Need a writable copy of the query */
901906
wquery = strdup(query);
902907
if (wquery == NULL)
@@ -905,13 +910,13 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
905910
/*
906911
* Each character might occupy multiple physical bytes in the string, and
907912
* in some Far Eastern character sets it might take more than one screen
908-
* column as well. We compute the starting byte offset and starting
913+
* column as well. We compute the starting byte offset and starting
909914
* screen column of each logical character, and store these in qidx[] and
910915
* scridx[] respectively.
911916
*/
912917

913918
/* we need a safe allocation size... */
914-
slen = strlen(query) + 1;
919+
slen = strlen(wquery) + 1;
915920

916921
qidx = (int *) malloc(slen * sizeof(int));
917922
if (qidx == NULL)
@@ -927,79 +932,93 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
927932
return;
928933
}
929934

935+
/* We can optimize a bit if it's a single-byte encoding */
936+
mb_encoding = (pg_encoding_max_length(encoding) != 1);
937+
938+
/*
939+
* Within the scanning loop, cno is the current character's logical number,
940+
* qoffset is its offset in wquery, and scroffset is its starting logical
941+
* screen column (all indexed from 0). "loc" is the logical character
942+
* number of the error location. We scan to determine loc_line (the
943+
* 1-based line number containing loc) and ibeg/iend (first character
944+
* number and last+1 character number of the line containing loc).
945+
* Note that qidx[] and scridx[] are filled only as far as iend.
946+
*/
930947
qoffset = 0;
931948
scroffset = 0;
932-
for (i = 0; query[qoffset] != '\0'; i++)
933-
{
934-
qidx[i] = qoffset;
935-
scridx[i] = scroffset;
936-
w = pg_encoding_dsplen(encoding, &query[qoffset]);
937-
/* treat control chars as width 1; see tab hack below */
938-
if (w <= 0)
939-
w = 1;
940-
scroffset += w;
941-
qoffset += pg_encoding_mblen(encoding, &query[qoffset]);
942-
}
943-
qidx[i] = qoffset;
944-
scridx[i] = scroffset;
945-
clen = i;
949+
loc_line = 1;
950+
ibeg = 0;
951+
iend = -1; /* -1 means not set yet */
946952

947-
/* convert loc to zero-based offset in qidx/scridx arrays */
948-
loc--;
949-
950-
/* do we have something to show? */
951-
if (loc >= 0 && loc <= clen)
953+
for (cno = 0; wquery[qoffset] != '\0'; cno++)
952954
{
953-
/* input line number of our syntax error. */
954-
loc_line = 1;
955-
/* first included char of extract. */
956-
ibeg = 0;
957-
/* last-plus-1 included char of extract. */
958-
iend = clen;
955+
char ch = wquery[qoffset];
956+
957+
qidx[cno] = qoffset;
958+
scridx[cno] = scroffset;
959959

960960
/*
961961
* Replace tabs with spaces in the writable copy. (Later we might
962962
* want to think about coping with their variable screen width, but
963963
* not today.)
964-
*
965-
* Extract line number and begin and end indexes of line containing
966-
* error location. There will not be any newlines or carriage returns
967-
* in the selected extract.
968964
*/
969-
for (i = 0; i < clen; i++)
965+
if (ch == '\t')
966+
wquery[qoffset] = ' ';
967+
968+
/*
969+
* If end-of-line, count lines and mark positions. Each \r or \n counts
970+
* as a line except when \r \n appear together.
971+
*/
972+
else if (ch == '\r' || ch == '\n')
970973
{
971-
/* character length must be 1 or it's not ASCII */
972-
if ((qidx[i + 1] - qidx[i]) == 1)
974+
if (cno < loc)
973975
{
974-
if (wquery[qidx[i]] == '\t')
975-
wquery[qidx[i]] = ' ';
976-
else if (wquery[qidx[i]] == '\r' || wquery[qidx[i]] == '\n')
977-
{
978-
if (i < loc)
979-
{
980-
/*
981-
* count lines before loc. Each \r or \n counts
982-
* as a line except when \r \n appear together.
983-
*/
984-
if (wquery[qidx[i]] == '\r' ||
985-
i == 0 ||
986-
(qidx[i] - qidx[i - 1]) != 1 ||
987-
wquery[qidx[i - 1]] != '\r')
988-
loc_line++;
989-
/* extract beginning = last line start before loc. */
990-
ibeg = i + 1;
991-
}
992-
else
993-
{
994-
/* set extract end. */
995-
iend = i;
996-
/* done scanning. */
997-
break;
998-
}
999-
}
976+
if (ch == '\r' ||
977+
cno == 0 ||
978+
wquery[qidx[cno - 1]] != '\r')
979+
loc_line++;
980+
/* extract beginning = last line start before loc. */
981+
ibeg = cno + 1;
982+
}
983+
else
984+
{
985+
/* set extract end. */
986+
iend = cno;
987+
/* done scanning. */
988+
break;
1000989
}
1001990
}
1002991

992+
/* Advance */
993+
if (mb_encoding)
994+
{
995+
int w;
996+
997+
w = pg_encoding_dsplen(encoding, &wquery[qoffset]);
998+
/* treat any non-tab control chars as width 1 */
999+
if (w <= 0)
1000+
w = 1;
1001+
scroffset += w;
1002+
qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]);
1003+
}
1004+
else
1005+
{
1006+
/* We assume wide chars only exist in multibyte encodings */
1007+
scroffset++;
1008+
qoffset++;
1009+
}
1010+
}
1011+
/* Fix up if we didn't find an end-of-line after loc */
1012+
if (iend < 0)
1013+
{
1014+
iend = cno; /* query length in chars, +1 */
1015+
qidx[iend] = qoffset;
1016+
scridx[iend] = scroffset;
1017+
}
1018+
1019+
/* Print only if loc is within computed query length */
1020+
if (loc <= cno)
1021+
{
10031022
/* If the line extracted is too long, we truncate it. */
10041023
beg_trunc = false;
10051024
end_trunc = false;
@@ -1050,7 +1069,8 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
10501069
scroffset = 0;
10511070
for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i]))
10521071
{
1053-
w = pg_encoding_dsplen(encoding, &msg->data[i]);
1072+
int w = pg_encoding_dsplen(encoding, &msg->data[i]);
1073+
10541074
if (w <= 0)
10551075
w = 1;
10561076
scroffset += w;

0 commit comments

Comments
 (0)