Skip to content

Commit cd19848

Browse files
committed
Be more careful to not lose sync in the FE/BE protocol.
If any error occurred while we were in the middle of reading a protocol message from the client, we could lose sync, and incorrectly try to interpret a part of another message as a new protocol message. That will usually lead to an "invalid frontend message" error that terminates the connection. However, this is a security issue because an attacker might be able to deliberately cause an error, inject a Query message in what's supposed to be just user data, and have the server execute it. We were quite careful to not have CHECK_FOR_INTERRUPTS() calls or other operations that could ereport(ERROR) in the middle of processing a message, but a query cancel interrupt or statement timeout could nevertheless cause it to happen. Also, the V2 fastpath and COPY handling were not so careful. It's very difficult to recover in the V2 COPY protocol, so we will just terminate the connection on error. In practice, that's what happened previously anyway, as we lost protocol sync. To fix, add a new variable in pqcomm.c, PqCommReadingMsg, that is set whenever we're in the middle of reading a message. When it's set, we cannot safely ERROR out and continue running, because we might've read only part of a message. PqCommReadingMsg acts somewhat similarly to critical sections in that if an error occurs while it's set, the error handler will force the connection to be terminated, as if the error was FATAL. It's not implemented by promoting ERROR to FATAL in elog.c, like ERROR is promoted to PANIC in critical sections, because we want to be able to use PG_TRY/CATCH to recover and regain protocol sync. pq_getmessage() takes advantage of that to prevent an OOM error from terminating the connection. To prevent unnecessary connection terminations, add a holdoff mechanism similar to HOLD/RESUME_INTERRUPTS() that can be used hold off query cancel interrupts, but still allow die interrupts. The rules on which interrupts are processed when are now a bit more complicated, so refactor ProcessInterrupts() and the calls to it in signal handlers so that the signal handlers always call it if ImmediateInterruptOK is set, and ProcessInterrupts() can decide to not do anything if the other conditions are not met. Reported by Emil Lenngren. Patch reviewed by Noah Misch and Andres Freund. Backpatch to all supported versions. Security: CVE-2015-0244
1 parent a558ad3 commit cd19848

File tree

13 files changed

+244
-107
lines changed

13 files changed

+244
-107
lines changed

src/backend/commands/copy.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,8 @@ ReceiveCopyBegin(CopyState cstate)
404404
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
405405
errmsg("COPY BINARY is not supported to stdout or from stdin")));
406406
pq_putemptymessage('G');
407+
/* any error in old protocol will make us lose sync */
408+
pq_startmsgread();
407409
cstate->copy_dest = COPY_OLD_FE;
408410
}
409411
else
@@ -414,6 +416,8 @@ ReceiveCopyBegin(CopyState cstate)
414416
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
415417
errmsg("COPY BINARY is not supported to stdout or from stdin")));
416418
pq_putemptymessage('D');
419+
/* any error in old protocol will make us lose sync */
420+
pq_startmsgread();
417421
cstate->copy_dest = COPY_OLD_FE;
418422
}
419423
/* We *must* flush here to ensure FE knows it can send. */
@@ -600,6 +604,8 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
600604
int mtype;
601605

602606
readmessage:
607+
HOLD_CANCEL_INTERRUPTS();
608+
pq_startmsgread();
603609
mtype = pq_getbyte();
604610
if (mtype == EOF)
605611
ereport(ERROR,
@@ -609,6 +615,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
609615
ereport(ERROR,
610616
(errcode(ERRCODE_CONNECTION_FAILURE),
611617
errmsg("unexpected EOF on client connection with an open transaction")));
618+
RESUME_CANCEL_INTERRUPTS();
612619
switch (mtype)
613620
{
614621
case 'd': /* CopyData */
@@ -2315,6 +2322,13 @@ CopyFrom(CopyState cstate)
23152322

23162323
MemoryContextSwitchTo(oldcontext);
23172324

2325+
/*
2326+
* In the old protocol, tell pqcomm that we can process normal protocol
2327+
* messages again.
2328+
*/
2329+
if (cstate->copy_dest == COPY_OLD_FE)
2330+
pq_endmsgread();
2331+
23182332
/* Execute AFTER STATEMENT insertion triggers */
23192333
ExecASInsertTriggers(estate, resultRelInfo);
23202334

src/backend/libpq/auth.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@ recv_password_packet(Port *port)
656656
{
657657
StringInfoData buf;
658658

659+
pq_startmsgread();
659660
if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
660661
{
661662
/* Expect 'p' message type */
@@ -1061,6 +1062,7 @@ pg_GSS_recvauth(Port *port)
10611062
*/
10621063
do
10631064
{
1065+
pq_startmsgread();
10641066
mtype = pq_getbyte();
10651067
if (mtype != 'p')
10661068
{
@@ -1295,6 +1297,7 @@ pg_SSPI_recvauth(Port *port)
12951297
*/
12961298
do
12971299
{
1300+
pq_startmsgread();
12981301
mtype = pq_getbyte();
12991302
if (mtype != 'p')
13001303
{

src/backend/libpq/pqcomm.c

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,9 @@ static int PqRecvLength; /* End of data available in PqRecvBuffer */
129129
/*
130130
* Message status
131131
*/
132-
static bool PqCommBusy;
133-
static bool DoingCopyOut;
132+
static bool PqCommBusy; /* busy sending data to the client */
133+
static bool PqCommReadingMsg; /* in the middle of reading a message */
134+
static bool DoingCopyOut; /* in old-protocol COPY OUT processing */
134135

135136

136137
/* Internal functions */
@@ -156,6 +157,7 @@ pq_init(void)
156157
PqSendBuffer = MemoryContextAlloc(TopMemoryContext, PqSendBufferSize);
157158
PqSendPointer = PqSendStart = PqRecvPointer = PqRecvLength = 0;
158159
PqCommBusy = false;
160+
PqCommReadingMsg = false;
159161
DoingCopyOut = false;
160162
on_proc_exit(pq_close, 0);
161163
}
@@ -890,6 +892,8 @@ pq_recvbuf(void)
890892
int
891893
pq_getbyte(void)
892894
{
895+
Assert(PqCommReadingMsg);
896+
893897
while (PqRecvPointer >= PqRecvLength)
894898
{
895899
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
@@ -928,6 +932,8 @@ pq_getbyte_if_available(unsigned char *c)
928932
{
929933
int r;
930934

935+
Assert(PqCommReadingMsg);
936+
931937
if (PqRecvPointer < PqRecvLength)
932938
{
933939
*c = PqRecvBuffer[PqRecvPointer++];
@@ -980,6 +986,8 @@ pq_getbytes(char *s, size_t len)
980986
{
981987
size_t amount;
982988

989+
Assert(PqCommReadingMsg);
990+
983991
while (len > 0)
984992
{
985993
while (PqRecvPointer >= PqRecvLength)
@@ -1012,6 +1020,8 @@ pq_discardbytes(size_t len)
10121020
{
10131021
size_t amount;
10141022

1023+
Assert(PqCommReadingMsg);
1024+
10151025
while (len > 0)
10161026
{
10171027
while (PqRecvPointer >= PqRecvLength)
@@ -1048,6 +1058,8 @@ pq_getstring(StringInfo s)
10481058
{
10491059
int i;
10501060

1061+
Assert(PqCommReadingMsg);
1062+
10511063
resetStringInfo(s);
10521064

10531065
/* Read until we get the terminating '\0' */
@@ -1079,6 +1091,58 @@ pq_getstring(StringInfo s)
10791091
}
10801092

10811093

1094+
/* --------------------------------
1095+
* pq_startmsgread - begin reading a message from the client.
1096+
*
1097+
* This must be called before any of the pq_get* functions.
1098+
* --------------------------------
1099+
*/
1100+
void
1101+
pq_startmsgread(void)
1102+
{
1103+
/*
1104+
* There shouldn't be a read active already, but let's check just to be
1105+
* sure.
1106+
*/
1107+
if (PqCommReadingMsg)
1108+
ereport(FATAL,
1109+
(errcode(ERRCODE_PROTOCOL_VIOLATION),
1110+
errmsg("terminating connection because protocol sync was lost")));
1111+
1112+
PqCommReadingMsg = true;
1113+
}
1114+
1115+
1116+
/* --------------------------------
1117+
* pq_endmsgread - finish reading message.
1118+
*
1119+
* This must be called after reading a V2 protocol message with
1120+
* pq_getstring() and friends, to indicate that we have read the whole
1121+
* message. In V3 protocol, pq_getmessage() does this implicitly.
1122+
* --------------------------------
1123+
*/
1124+
void
1125+
pq_endmsgread(void)
1126+
{
1127+
Assert(PqCommReadingMsg);
1128+
1129+
PqCommReadingMsg = false;
1130+
}
1131+
1132+
/* --------------------------------
1133+
* pq_is_reading_msg - are we currently reading a message?
1134+
*
1135+
* This is used in error recovery at the outer idle loop to detect if we have
1136+
* lost protocol sync, and need to terminate the connection. pq_startmsgread()
1137+
* will check for that too, but it's nicer to detect it earlier.
1138+
* --------------------------------
1139+
*/
1140+
bool
1141+
pq_is_reading_msg(void)
1142+
{
1143+
return PqCommReadingMsg;
1144+
}
1145+
10821146
/* --------------------------------
10831147
* pq_getmessage - get a message with length word from connection
10841148
*
@@ -1100,6 +1164,8 @@ pq_getmessage(StringInfo s, int maxlen)
11001164
{
11011165
int32 len;
11021166

1167+
Assert(PqCommReadingMsg);
1168+
11031169
resetStringInfo(s);
11041170

11051171
/* Read message length word */
@@ -1141,6 +1207,9 @@ pq_getmessage(StringInfo s, int maxlen)
11411207
ereport(COMMERROR,
11421208
(errcode(ERRCODE_PROTOCOL_VIOLATION),
11431209
errmsg("incomplete message from client")));
1210+
1211+
/* we discarded the rest of the message so we're back in sync. */
1212+
PqCommReadingMsg = false;
11441213
PG_RE_THROW();
11451214
}
11461215
PG_END_TRY();
@@ -1158,6 +1227,9 @@ pq_getmessage(StringInfo s, int maxlen)
11581227
s->data[len] = '\0';
11591228
}
11601229

1230+
/* finished reading the message. */
1231+
PqCommReadingMsg = false;
1232+
11611233
return 0;
11621234
}
11631235

src/backend/postmaster/postmaster.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,7 @@ ProcessStartupPacket(Port *port, bool SSLdone)
17551755
ProtocolVersion proto;
17561756
MemoryContext oldcontext;
17571757

1758+
pq_startmsgread();
17581759
if (pq_getbytes((char *) &len, 4) == EOF)
17591760
{
17601761
/*
@@ -1799,6 +1800,7 @@ ProcessStartupPacket(Port *port, bool SSLdone)
17991800
errmsg("incomplete startup packet")));
18001801
return STATUS_ERROR;
18011802
}
1803+
pq_endmsgread();
18021804

18031805
/*
18041806
* The first field is either a protocol version number or a special

src/backend/replication/walsender.c

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ ProcessRepliesIfAny(void)
698698

699699
for (;;)
700700
{
701+
pq_startmsgread();
701702
r = pq_getbyte_if_available(&firstchar);
702703
if (r < 0)
703704
{
@@ -710,9 +711,20 @@ ProcessRepliesIfAny(void)
710711
if (r == 0)
711712
{
712713
/* no data available without blocking */
714+
pq_endmsgread();
713715
break;
714716
}
715717

718+
/* Read the message contents */
719+
resetStringInfo(&reply_message);
720+
if (pq_getmessage(&reply_message, 0))
721+
{
722+
ereport(COMMERROR,
723+
(errcode(ERRCODE_PROTOCOL_VIOLATION),
724+
errmsg("unexpected EOF on standby connection")));
725+
proc_exit(0);
726+
}
727+
716728
/*
717729
* If we already received a CopyDone from the frontend, the frontend
718730
* should not send us anything until we've closed our end of the COPY.
@@ -748,16 +760,6 @@ ProcessRepliesIfAny(void)
748760
streamingDoneSending = true;
749761
}
750762

751-
/* consume the CopyData message */
752-
resetStringInfo(&reply_message);
753-
if (pq_getmessage(&reply_message, 0))
754-
{
755-
ereport(COMMERROR,
756-
(errcode(ERRCODE_PROTOCOL_VIOLATION),
757-
errmsg("unexpected EOF on standby connection")));
758-
proc_exit(0);
759-
}
760-
761763
streamingDoneReceiving = true;
762764
received = true;
763765
break;
@@ -794,19 +796,6 @@ ProcessStandbyMessage(void)
794796
{
795797
char msgtype;
796798

797-
resetStringInfo(&reply_message);
798-
799-
/*
800-
* Read the message contents.
801-
*/
802-
if (pq_getmessage(&reply_message, 0))
803-
{
804-
ereport(COMMERROR,
805-
(errcode(ERRCODE_PROTOCOL_VIOLATION),
806-
errmsg("unexpected EOF on standby connection")));
807-
proc_exit(0);
808-
}
809-
810799
/*
811800
* Check message type from the first byte.
812801
*/

src/backend/storage/lmgr/proc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,11 +668,16 @@ LockErrorCleanup(void)
668668
LWLockId partitionLock;
669669
DisableTimeoutParams timeouts[2];
670670

671+
HOLD_INTERRUPTS();
672+
671673
AbortStrongLockAcquire();
672674

673675
/* Nothing to do if we weren't waiting for a lock */
674676
if (lockAwaited == NULL)
677+
{
678+
RESUME_INTERRUPTS();
675679
return;
680+
}
676681

677682
/*
678683
* Turn off the deadlock and lock timeout timers, if they are still
@@ -722,6 +727,8 @@ LockErrorCleanup(void)
722727
* wakeup signal isn't harmful, and it seems not worth expending cycles to
723728
* get rid of a signal that most likely isn't there.
724729
*/
730+
731+
RESUME_INTERRUPTS();
725732
}
726733

727734

src/backend/tcop/fastpath.c

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static int16 parse_fcall_arguments_20(StringInfo msgBuf, struct fp_info * fip,
7575
* The caller should already have initialized buf to empty.
7676
* ----------------
7777
*/
78-
static int
78+
int
7979
GetOldFunctionMessage(StringInfo buf)
8080
{
8181
int32 ibuf;
@@ -280,33 +280,6 @@ HandleFunctionRequest(StringInfo msgBuf)
280280
bool was_logged = false;
281281
char msec_str[32];
282282

283-
/*
284-
* Read message contents if not already done.
285-
*/
286-
if (PG_PROTOCOL_MAJOR(FrontendProtocol) < 3)
287-
{
288-
if (GetOldFunctionMessage(msgBuf))
289-
{
290-
if (IsTransactionState())
291-
ereport(COMMERROR,
292-
(errcode(ERRCODE_CONNECTION_FAILURE),
293-
errmsg("unexpected EOF on client connection with an open transaction")));
294-
else
295-
{
296-
/*
297-
* Can't send DEBUG log messages to client at this point.
298-
* Since we're disconnecting right away, we don't need to
299-
* restore whereToSendOutput.
300-
*/
301-
whereToSendOutput = DestNone;
302-
ereport(DEBUG1,
303-
(errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
304-
errmsg("unexpected EOF on client connection")));
305-
}
306-
return EOF;
307-
}
308-
}
309-
310283
/*
311284
* Now that we've eaten the input message, check to see if we actually
312285
* want to do the function call or not. It's now safe to ereport(); we

0 commit comments

Comments
 (0)