Skip to content

Commit 35ea98f

Browse files
committed
Fix past pd_upper write in ginRedoRecompress()
ginRedoRecompress() replays actions over compressed segments of posting list in-place. However, it might lead to write past pg_upper, because intermediate state during playing the changes can take more space than both original state and final state. This commit fixes that by refuse from in-place modification. Instead page tail is copied once modification is started, and then it's used as the source of original segments. Backpatch to 9.4 where posting list compression was introduced. Reported-by: Sivasubramanian Ramasubramanian Discussion: https://postgr.es/m/1536091151804.6588%40amazon.com Author: Alexander Korotkov based on patch from and ideas by Sivasubramanian Ramasubramanian Review: Sivasubramanian Ramasubramanian Backpatch-through: 9.4
1 parent d200333 commit 35ea98f

File tree

1 file changed

+59
-21
lines changed

1 file changed

+59
-21
lines changed

src/backend/access/gin/ginxlog.c

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,14 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda
142142
}
143143
}
144144

145+
/*
146+
* Redo recompression of posting list. Doing all the changes in-place is not
147+
* always possible, because it might require more space than we've on the page.
148+
* Instead, once modification is required we copy unprocessed tail of the page
149+
* into separately allocated chunk of memory for further reading original
150+
* versions of segments. Thanks to that we don't bother about moving page data
151+
* in-place.
152+
*/
145153
static void
146154
ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
147155
{
@@ -151,6 +159,9 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
151159
Pointer segmentend;
152160
char *walbuf;
153161
int totalsize;
162+
Pointer tailCopy = NULL;
163+
Pointer writePtr;
164+
Pointer segptr;
154165

155166
/*
156167
* If the page is in pre-9.4 format, convert to new format first.
@@ -190,6 +201,7 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
190201
}
191202

192203
oldseg = GinDataLeafPageGetPostingList(page);
204+
writePtr = (Pointer) oldseg;
193205
segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
194206
segno = 0;
195207

@@ -207,8 +219,6 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
207219
ItemPointerData *newitems;
208220
int nnewitems;
209221
int segsize;
210-
Pointer segptr;
211-
int szleft;
212222

213223
/* Extract all the information we need from the WAL record */
214224
if (a_action == GIN_SEGMENT_INSERT ||
@@ -231,6 +241,17 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
231241
Assert(segno <= a_segno);
232242
while (segno < a_segno)
233243
{
244+
/*
245+
* Once modification is started and page tail is copied, we've
246+
* to copy unmodified segments.
247+
*/
248+
segsize = SizeOfGinPostingList(oldseg);
249+
if (tailCopy)
250+
{
251+
Assert(writePtr + segsize < PageGetSpecialPointer(page));
252+
memcpy(writePtr, (Pointer) oldseg, segsize);
253+
}
254+
writePtr += segsize;
234255
oldseg = GinNextPostingListSegment(oldseg);
235256
segno++;
236257
}
@@ -271,36 +292,42 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
271292
Assert(a_action == GIN_SEGMENT_INSERT);
272293
segsize = 0;
273294
}
274-
szleft = segmentend - segptr;
295+
296+
/*
297+
* We're about to start modification of the page. So, copy tail of the
298+
* page if it's not done already.
299+
*/
300+
if (!tailCopy && segptr != segmentend)
301+
{
302+
int tailSize = segmentend - segptr;
303+
304+
tailCopy = (Pointer) palloc(tailSize);
305+
memcpy(tailCopy, segptr, tailSize);
306+
segptr = tailCopy;
307+
oldseg = (GinPostingList *) segptr;
308+
segmentend = segptr + tailSize;
309+
}
275310

276311
switch (a_action)
277312
{
278313
case GIN_SEGMENT_DELETE:
279-
memmove(segptr, segptr + segsize, szleft - segsize);
280-
segmentend -= segsize;
281-
314+
segptr += segsize;
282315
segno++;
283316
break;
284317

285318
case GIN_SEGMENT_INSERT:
286-
/* make room for the new segment */
287-
memmove(segptr + newsegsize, segptr, szleft);
288319
/* copy the new segment in place */
289-
memcpy(segptr, newseg, newsegsize);
290-
segmentend += newsegsize;
291-
segptr += newsegsize;
320+
Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
321+
memcpy(writePtr, newseg, newsegsize);
322+
writePtr += newsegsize;
292323
break;
293324

294325
case GIN_SEGMENT_REPLACE:
295-
/* shift the segments that follow */
296-
memmove(segptr + newsegsize,
297-
segptr + segsize,
298-
szleft - segsize);
299-
/* copy the replacement segment in place */
300-
memcpy(segptr, newseg, newsegsize);
301-
segmentend -= segsize;
302-
segmentend += newsegsize;
303-
segptr += newsegsize;
326+
/* copy the new version of segment in place */
327+
Assert(writePtr + newsegsize <= PageGetSpecialPointer(page));
328+
memcpy(writePtr, newseg, newsegsize);
329+
writePtr += newsegsize;
330+
segptr += segsize;
304331
segno++;
305332
break;
306333

@@ -310,7 +337,18 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
310337
oldseg = (GinPostingList *) segptr;
311338
}
312339

313-
totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page);
340+
/* Copy the rest of unmodified segments if any. */
341+
segptr = (Pointer) oldseg;
342+
if (segptr != segmentend && tailCopy)
343+
{
344+
int restSize = segmentend - segptr;
345+
346+
Assert(writePtr + restSize <= PageGetSpecialPointer(page));
347+
memcpy(writePtr, segptr, restSize);
348+
writePtr += restSize;
349+
}
350+
351+
totalsize = writePtr - (Pointer) GinDataLeafPageGetPostingList(page);
314352
GinDataPageSetDataSize(page, totalsize);
315353
}
316354

0 commit comments

Comments
 (0)