Skip to content

Commit 5800c6b

Browse files
committed
Btree WAL logging.
1 parent 831e78e commit 5800c6b

File tree

4 files changed

+223
-55
lines changed

4 files changed

+223
-55
lines changed

src/backend/access/heap/heapam.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.85 2000/09/07 09:58:34 vadim Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.86 2000/10/04 00:04:41 vadim Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -86,8 +86,8 @@
8686
#include "utils/inval.h"
8787
#include "utils/relcache.h"
8888

89-
#ifdef XLOG /* comments are in _heap_update */
90-
static ItemPointerData _locked_tuple;
89+
#ifdef XLOG /* comments are in heap_update */
90+
static xl_heaptid _locked_tuple_;
9191
#endif
9292

9393

@@ -1650,8 +1650,9 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
16501650
* In the event of crash prio logging, TQUAL routines will see
16511651
* HEAP_XMAX_UNLOGGED flag...
16521652
*/
1653-
_locked_tuple = *otid;
1654-
XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple);
1653+
_locked_tuple_.node = relation->rd_node;
1654+
_locked_tuple_.tid = *otid;
1655+
XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple_);
16551656
#endif
16561657
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
16571658
oldtup.t_data->t_cmax = GetCurrentCommandId();

src/backend/access/nbtree/nbtinsert.c

Lines changed: 132 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.62 2000/08/25 23:13:33 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.63 2000/10/04 00:04:42 vadim Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -33,6 +33,7 @@ typedef struct
3333
int best_delta; /* best size delta so far */
3434
} FindSplitData;
3535

36+
void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
3637

3738
static TransactionId _bt_check_unique(Relation rel, BTItem btitem,
3839
Relation heapRel, Buffer buf,
@@ -54,7 +55,6 @@ static void _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
5455
int leftfree, int rightfree,
5556
bool newitemonleft, Size firstrightitemsz);
5657
static Buffer _bt_getstackbuf(Relation rel, BTStack stack);
57-
static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
5858
static void _bt_pgaddtup(Relation rel, Page page,
5959
Size itemsize, BTItem btitem,
6060
OffsetNumber itup_off, const char *where);
@@ -514,6 +514,29 @@ _bt_insertonpg(Relation rel,
514514
}
515515
else
516516
{
517+
#ifdef XLOG
518+
/* XLOG stuff */
519+
{
520+
char xlbuf[sizeof(xl_btree_insert) + 2 * sizeof(CommandId)];
521+
xl_btree_insert *xlrec = xlbuf;
522+
int hsize = SizeOfBtreeInsert;
523+
524+
xlrec->target.node = rel->rd_node;
525+
ItemPointerSet(&(xlrec->target.tid), BufferGetBlockNumber(buf), newitemoff);
526+
if (P_ISLEAF(lpageop))
527+
{
528+
CommandId cid = GetCurrentCommandId();
529+
memcpy(xlbuf + SizeOfBtreeInsert, &(char*)cid, sizeof(CommandId));
530+
hsize += sizeof(CommandId);
531+
}
532+
533+
XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_INSERT,
534+
xlbuf, hsize, (char*) btitem, itemsz);
535+
536+
PageSetLSN(page, recptr);
537+
PageSetSUI(page, ThisStartUpID);
538+
}
539+
#endif
517540
_bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page");
518541
itup_off = newitemoff;
519542
itup_blkno = BufferGetBlockNumber(buf);
@@ -578,8 +601,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
578601
ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
579602

580603
/* if we're splitting this page, it won't be the root when we're done */
581-
oopaque->btpo_flags &= ~BTP_ROOT;
582-
lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags;
604+
lopaque->btpo_flags = oopaque->btpo_flags;
605+
lopaque->btpo_flags &= ~BTP_ROOT;
606+
ropaque->btpo_flags = lopaque->btpo_flags;
583607
lopaque->btpo_prev = oopaque->btpo_prev;
584608
lopaque->btpo_next = BufferGetBlockNumber(rbuf);
585609
ropaque->btpo_prev = BufferGetBlockNumber(buf);
@@ -608,7 +632,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
608632
item = (BTItem) PageGetItem(origpage, itemid);
609633
if (PageAddItem(rightpage, (Item) item, itemsz, rightoff,
610634
LP_USED) == InvalidOffsetNumber)
611-
elog(FATAL, "btree: failed to add hikey to the right sibling");
635+
elog(STOP, "btree: failed to add hikey to the right sibling");
612636
rightoff = OffsetNumberNext(rightoff);
613637
}
614638

@@ -633,7 +657,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
633657
}
634658
if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
635659
LP_USED) == InvalidOffsetNumber)
636-
elog(FATAL, "btree: failed to add hikey to the left sibling");
660+
elog(STOP, "btree: failed to add hikey to the left sibling");
637661
leftoff = OffsetNumberNext(leftoff);
638662

639663
/*
@@ -704,6 +728,75 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
704728
}
705729
}
706730

731+
/*
732+
* We have to grab the right sibling (if any) and fix the prev
733+
* pointer there. We are guaranteed that this is deadlock-free
734+
* since no other writer will be holding a lock on that page
735+
* and trying to move left, and all readers release locks on a page
736+
* before trying to fetch its neighbors.
737+
*/
738+
739+
if (!P_RIGHTMOST(ropaque))
740+
{
741+
sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE);
742+
spage = BufferGetPage(sbuf);
743+
}
744+
745+
#ifdef XLOG
746+
/*
747+
* Right sibling is locked, new siblings are prepared, but original
748+
* page is not updated yet. Log changes before continuing.
749+
*
750+
* NO ELOG(ERROR) till right sibling is updated.
751+
*
752+
*/
753+
{
754+
char xlbuf[sizeof(xl_btree_split) +
755+
2 * sizeof(CommandId) + BLCKSZ];
756+
xl_btree_split *xlrec = xlbuf;
757+
int hsize = SizeOfBtreeSplit;
758+
int flag = (newitemonleft) ?
759+
XLOG_BTREE_SPLEFT : XLOG_BTREE_SPLIT;
760+
761+
xlrec->target.node = rel->rd_node;
762+
ItemPointerSet(&(xlrec->target.tid), itup_blkno, itup_off);
763+
if (P_ISLEAF(lopaque))
764+
{
765+
CommandId cid = GetCurrentCommandId();
766+
memcpy(xlbuf + hsize, &(char*)cid, sizeof(CommandId));
767+
hsize += sizeof(CommandId);
768+
}
769+
if (newitemonleft)
770+
{
771+
memcpy(xlbuf + hsize, (char*) newitem, newitemsz);
772+
hsize += newitemsz;
773+
xlrec->otherblk = BufferGetBlockNumber(rbuf);
774+
}
775+
else
776+
xlrec->otherblk = BufferGetBlockNumber(buf);
777+
778+
xlrec->rightblk = ropaque->btpo_next;
779+
780+
/*
781+
* Dirrect access to page is not good but faster - we should
782+
* implement some new func in page API.
783+
*/
784+
XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, flag, xlbuf,
785+
hsize, (char*)rightpage + (PageHeader) rightpage)->pd_upper,
786+
((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->upper);
787+
788+
PageSetLSN(leftpage, recptr);
789+
PageSetSUI(leftpage, ThisStartUpID);
790+
PageSetLSN(rightpage, recptr);
791+
PageSetSUI(rightpage, ThisStartUpID);
792+
if (!P_RIGHTMOST(ropaque))
793+
{
794+
PageSetLSN(spage, recptr);
795+
PageSetSUI(spage, ThisStartUpID);
796+
}
797+
}
798+
#endif
799+
707800
/*
708801
* By here, the original data page has been split into two new halves,
709802
* and these are correct. The algorithm requires that the left page
@@ -716,18 +809,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
716809

717810
PageRestoreTempPage(leftpage, origpage);
718811

719-
/*
720-
* Finally, we need to grab the right sibling (if any) and fix the
721-
* prev pointer there. We are guaranteed that this is deadlock-free
722-
* since no other writer will be holding a lock on that page
723-
* and trying to move left, and all readers release locks on a page
724-
* before trying to fetch its neighbors.
725-
*/
726-
727812
if (!P_RIGHTMOST(ropaque))
728813
{
729-
sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE);
730-
spage = BufferGetPage(sbuf);
731814
sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
732815
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
733816

@@ -1002,7 +1085,7 @@ _bt_getstackbuf(Relation rel, BTStack stack)
10021085
* two new children. The new root page is neither pinned nor locked, and
10031086
* we have also written out lbuf and rbuf and dropped their pins/locks.
10041087
*/
1005-
static void
1088+
void
10061089
_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10071090
{
10081091
Buffer rootbuf;
@@ -1011,7 +1094,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10111094
rootpage;
10121095
BlockNumber lbkno,
10131096
rbkno;
1014-
BlockNumber rootbknum;
1097+
BlockNumber rootblknum;
10151098
BTPageOpaque rootopaque;
10161099
ItemId itemid;
10171100
BTItem item;
@@ -1021,12 +1104,16 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10211104
/* get a new root page */
10221105
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
10231106
rootpage = BufferGetPage(rootbuf);
1024-
rootbknum = BufferGetBlockNumber(rootbuf);
1107+
rootblknum = BufferGetBlockNumber(rootbuf);
1108+
1109+
1110+
/* NO ELOG(ERROR) from here till newroot op is logged */
10251111

10261112
/* set btree special data */
10271113
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
10281114
rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
10291115
rootopaque->btpo_flags |= BTP_ROOT;
1116+
rootopaque->btpo_parent = BTREE_METAPAGE;
10301117

10311118
lbkno = BufferGetBlockNumber(lbuf);
10321119
rbkno = BufferGetBlockNumber(rbuf);
@@ -1040,7 +1127,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10401127
*/
10411128
((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo_parent =
10421129
((BTPageOpaque) PageGetSpecialPointer(rpage))->btpo_parent =
1043-
rootbknum;
1130+
rootblknum;
10441131

10451132
/*
10461133
* Create downlink item for left page (old root). Since this will be
@@ -1058,7 +1145,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10581145
* the two items will go into positions P_HIKEY and P_FIRSTKEY.
10591146
*/
10601147
if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber)
1061-
elog(FATAL, "btree: failed to add leftkey to new root page");
1148+
elog(STOP, "btree: failed to add leftkey to new root page");
10621149
pfree(new_item);
10631150

10641151
/*
@@ -1075,14 +1162,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
10751162
* insert the right page pointer into the new root page.
10761163
*/
10771164
if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber)
1078-
elog(FATAL, "btree: failed to add rightkey to new root page");
1165+
elog(STOP, "btree: failed to add rightkey to new root page");
10791166
pfree(new_item);
10801167

1168+
#ifdef XLOG
1169+
/* XLOG stuff */
1170+
{
1171+
xl_btree_newroot xlrec;
1172+
xlrec.node = rel->rd_node;
1173+
xlrec.rootblk = rootblknum;
1174+
1175+
/*
1176+
* Dirrect access to page is not good but faster - we should
1177+
* implement some new func in page API.
1178+
*/
1179+
XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT,
1180+
&xlrec, SizeOfBtreeNewroot,
1181+
(char*)rootpage + (PageHeader) rootpage)->pd_upper,
1182+
((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->upper);
1183+
1184+
PageSetLSN(rootpage, recptr);
1185+
PageSetSUI(rootpage, ThisStartUpID);
1186+
}
1187+
#endif
1188+
10811189
/* write and let go of the new root buffer */
10821190
_bt_wrtbuf(rel, rootbuf);
10831191

10841192
/* update metadata page with new root block number */
1085-
_bt_metaproot(rel, rootbknum, 0);
1193+
_bt_metaproot(rel, rootblknum, 0);
10861194

10871195
/* update and release new sibling, and finally the old root */
10881196
_bt_wrtbuf(rel, rbuf);
@@ -1125,7 +1233,7 @@ _bt_pgaddtup(Relation rel,
11251233

11261234
if (PageAddItem(page, (Item) btitem, itemsize, itup_off,
11271235
LP_USED) == InvalidOffsetNumber)
1128-
elog(FATAL, "btree: failed to add item to the %s for %s",
1236+
elog(STOP, "btree: failed to add item to the %s for %s",
11291237
where, RelationGetRelationName(rel));
11301238
}
11311239

src/backend/access/nbtree/nbtpage.c

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.37 2000/07/21 06:42:32 tgl Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.38 2000/10/04 00:04:42 vadim Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -128,7 +128,7 @@ _bt_getroot(Relation rel, int access)
128128
Page metapg;
129129
BTPageOpaque metaopaque;
130130
Buffer rootbuf;
131-
Page rootpg;
131+
Page rootpage;
132132
BTPageOpaque rootopaque;
133133
BlockNumber rootblkno;
134134
BTMetaPageData *metad;
@@ -177,14 +177,31 @@ _bt_getroot(Relation rel, int access)
177177
*/
178178
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
179179
rootblkno = BufferGetBlockNumber(rootbuf);
180-
rootpg = BufferGetPage(rootbuf);
180+
rootpage = BufferGetPage(rootbuf);
181+
182+
/* NO ELOG(ERROR) till meta is updated */
183+
184+
_bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
185+
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
186+
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
187+
188+
#ifdef XLOG
189+
/* XLOG stuff */
190+
{
191+
xl_btree_insert xlrec;
192+
xlrec.node = rel->rd_node;
193+
194+
XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT,
195+
&xlrec, SizeOfBtreeNewroot, NULL, 0);
196+
197+
PageSetLSN(rootpage, recptr);
198+
PageSetSUI(rootpage, ThisStartUpID);
199+
}
200+
#endif
181201

182202
metad->btm_root = rootblkno;
183203
metad->btm_level = 1;
184204

185-
_bt_pageinit(rootpg, BufferGetPageSize(rootbuf));
186-
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
187-
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
188205
_bt_wrtnorelbuf(rel, rootbuf);
189206

190207
/* swap write lock for read lock */
@@ -218,8 +235,8 @@ _bt_getroot(Relation rel, int access)
218235
* at the metadata page and got the root buffer, then we got the wrong
219236
* buffer. Release it and try again.
220237
*/
221-
rootpg = BufferGetPage(rootbuf);
222-
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
238+
rootpage = BufferGetPage(rootbuf);
239+
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
223240

224241
if (! P_ISROOT(rootopaque))
225242
{
@@ -396,6 +413,20 @@ _bt_pagedel(Relation rel, ItemPointer tid)
396413
buf = _bt_getbuf(rel, blkno, BT_WRITE);
397414
page = BufferGetPage(buf);
398415

416+
#ifdef XLOG
417+
/* XLOG stuff */
418+
{
419+
xl_btree_delete xlrec;
420+
xlrec.target.node = rel->rd_node;
421+
xlrec.target.tid = *tid;
422+
XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE,
423+
(char*) xlrec, SizeOfBtreeDelete, NULL, 0);
424+
425+
PageSetLSN(page, recptr);
426+
PageSetSUI(page, ThisStartUpID);
427+
}
428+
#endif
429+
399430
PageIndexTupleDelete(page, offno);
400431

401432
/* write the buffer and release the lock */

0 commit comments

Comments
 (0)