PostgreSQL Source Code git master
origin.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * origin.c
4 * Logical replication progress tracking support.
5 *
6 * Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/origin.c
10 *
11 * NOTES
12 *
13 * This file provides the following:
14 * * An infrastructure to name nodes in a replication setup
15 * * A facility to efficiently store and persist replication progress in an
16 * efficient and durable manner.
17 *
18 * Replication origin consist out of a descriptive, user defined, external
19 * name and a short, thus space efficient, internal 2 byte one. This split
20 * exists because replication origin have to be stored in WAL and shared
21 * memory and long descriptors would be inefficient. For now only use 2 bytes
22 * for the internal id of a replication origin as it seems unlikely that there
23 * soon will be more than 65k nodes in one replication setup; and using only
24 * two bytes allow us to be more space efficient.
25 *
26 * Replication progress is tracked in a shared memory table
27 * (ReplicationState) that's dumped to disk every checkpoint. Entries
28 * ('slots') in this table are identified by the internal id. That's the case
29 * because it allows to increase replication progress during crash
30 * recovery. To allow doing so we store the original LSN (from the originating
31 * system) of a transaction in the commit record. That allows to recover the
32 * precise replayed state after crash recovery; without requiring synchronous
33 * commits. Allowing logical replication to use asynchronous commit is
34 * generally good for performance, but especially important as it allows a
35 * single threaded replay process to keep up with a source that has multiple
36 * backends generating changes concurrently. For efficiency and simplicity
37 * reasons a backend can setup one replication origin that's from then used as
38 * the source of changes produced by the backend, until reset again.
39 *
40 * This infrastructure is intended to be used in cooperation with logical
41 * decoding. When replaying from a remote system the configured origin is
42 * provided to output plugins, allowing prevention of replication loops and
43 * other filtering.
44 *
45 * There are several levels of locking at work:
46 *
47 * * To create and drop replication origins an exclusive lock on
48 * pg_replication_slot is required for the duration. That allows us to
49 * safely and conflict free assign new origins using a dirty snapshot.
50 *
51 * * When creating an in-memory replication progress slot the ReplicationOrigin
52 * LWLock has to be held exclusively; when iterating over the replication
53 * progress a shared lock has to be held, the same when advancing the
54 * replication progress of an individual backend that has not setup as the
55 * session's replication origin.
56 *
57 * * When manipulating or looking at the remote_lsn and local_lsn fields of a
58 * replication progress slot that slot's lwlock has to be held. That's
59 * primarily because we do not assume 8 byte writes (the LSN) is atomic on
60 * all our platforms, but it also simplifies memory ordering concerns
61 * between the remote and local lsn. We use a lwlock instead of a spinlock
62 * so it's less harmful to hold the lock over a WAL write
63 * (cf. AdvanceReplicationProgress).
64 *
65 * ---------------------------------------------------------------------------
66 */
67
68#include "postgres.h"
69
70#include <unistd.h>
71#include <sys/stat.h>
72
73#include "access/genam.h"
74#include "access/htup_details.h"
75#include "access/table.h"
76#include "access/xact.h"
77#include "access/xloginsert.h"
78#include "catalog/catalog.h"
79#include "catalog/indexing.h"
81#include "funcapi.h"
82#include "miscadmin.h"
83#include "nodes/execnodes.h"
84#include "pgstat.h"
85#include "replication/origin.h"
86#include "replication/slot.h"
88#include "storage/fd.h"
89#include "storage/ipc.h"
90#include "storage/lmgr.h"
91#include "utils/builtins.h"
92#include "utils/fmgroids.h"
93#include "utils/guc.h"
94#include "utils/pg_lsn.h"
95#include "utils/rel.h"
96#include "utils/snapmgr.h"
97#include "utils/syscache.h"
98
99/* paths for replication origin checkpoint files */
100#define PG_REPLORIGIN_CHECKPOINT_FILENAME PG_LOGICAL_DIR "/replorigin_checkpoint"
101#define PG_REPLORIGIN_CHECKPOINT_TMPFILE PG_REPLORIGIN_CHECKPOINT_FILENAME ".tmp"
102
103/* GUC variables */
105
106/*
107 * Replay progress of a single remote node.
108 */
109typedef struct ReplicationState
110{
111 /*
112 * Local identifier for the remote node.
113 */
115
116 /*
117 * Location of the latest commit from the remote side.
118 */
120
121 /*
122 * Remember the local lsn of the commit record so we can XLogFlush() to it
123 * during a checkpoint so we know the commit record actually is safe on
124 * disk.
125 */
127
128 /*
129 * PID of backend that's acquired slot, or 0 if none.
130 */
132
133 /*
134 * Condition variable that's signaled when acquired_by changes.
135 */
137
138 /*
139 * Lock protecting remote_lsn and local_lsn.
140 */
143
144/*
145 * On disk version of ReplicationState.
146 */
148{
152
153
155{
156 /* Tranche to use for per-origin LWLocks */
158 /* Array of length max_active_replication_origins */
161
162/* external variables */
166
167/*
168 * Base address into a shared memory array of replication states of size
169 * max_active_replication_origins.
170 */
172
173/*
174 * Actual shared memory block (replication_states[] is now part of this).
175 */
177
178/*
179 * We keep a pointer to this backend's ReplicationState to avoid having to
180 * search the replication_states array in replorigin_session_advance for each
181 * remote commit. (Ownership of a backend's own entry can only be changed by
182 * that backend.)
183 */
185
186/* Magic for on disk files. */
187#define REPLICATION_STATE_MAGIC ((uint32) 0x1257DADE)
188
189static void
190replorigin_check_prerequisites(bool check_origins, bool recoveryOK)
191{
192 if (check_origins && max_active_replication_origins == 0)
194 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
195 errmsg("cannot query or manipulate replication origin when \"max_active_replication_origins\" is 0")));
196
197 if (!recoveryOK && RecoveryInProgress())
199 (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
200 errmsg("cannot manipulate replication origins during recovery")));
201}
202
203
204/*
205 * IsReservedOriginName
206 * True iff name is either "none" or "any".
207 */
208static bool
210{
211 return ((pg_strcasecmp(name, LOGICALREP_ORIGIN_NONE) == 0) ||
212 (pg_strcasecmp(name, LOGICALREP_ORIGIN_ANY) == 0));
213}
214
215/* ---------------------------------------------------------------------------
216 * Functions for working with replication origins themselves.
217 * ---------------------------------------------------------------------------
218 */
219
220/*
221 * Check for a persistent replication origin identified by name.
222 *
223 * Returns InvalidOid if the node isn't known yet and missing_ok is true.
224 */
226replorigin_by_name(const char *roname, bool missing_ok)
227{
229 Oid roident = InvalidOid;
230 HeapTuple tuple;
231 Datum roname_d;
232
233 roname_d = CStringGetTextDatum(roname);
234
235 tuple = SearchSysCache1(REPLORIGNAME, roname_d);
236 if (HeapTupleIsValid(tuple))
237 {
239 roident = ident->roident;
240 ReleaseSysCache(tuple);
241 }
242 else if (!missing_ok)
244 (errcode(ERRCODE_UNDEFINED_OBJECT),
245 errmsg("replication origin \"%s\" does not exist",
246 roname)));
247
248 return roident;
249}
250
251/*
252 * Create a replication origin.
253 *
254 * Needs to be called in a transaction.
255 */
257replorigin_create(const char *roname)
258{
259 Oid roident;
260 HeapTuple tuple = NULL;
261 Relation rel;
262 Datum roname_d;
263 SnapshotData SnapshotDirty;
264 SysScanDesc scan;
266
267 /*
268 * To avoid needing a TOAST table for pg_replication_origin, we limit
269 * replication origin names to 512 bytes. This should be more than enough
270 * for all practical use.
271 */
272 if (strlen(roname) > MAX_RONAME_LEN)
274 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
275 errmsg("replication origin name is too long"),
276 errdetail("Replication origin names must be no longer than %d bytes.",
278
279 roname_d = CStringGetTextDatum(roname);
280
282
283 /*
284 * We need the numeric replication origin to be 16bit wide, so we cannot
285 * rely on the normal oid allocation. Instead we simply scan
286 * pg_replication_origin for the first unused id. That's not particularly
287 * efficient, but this should be a fairly infrequent operation - we can
288 * easily spend a bit more code on this when it turns out it needs to be
289 * faster.
290 *
291 * We handle concurrency by taking an exclusive lock (allowing reads!)
292 * over the table for the duration of the search. Because we use a "dirty
293 * snapshot" we can read rows that other in-progress sessions have
294 * written, even though they would be invisible with normal snapshots. Due
295 * to the exclusive lock there's no danger that new rows can appear while
296 * we're checking.
297 */
298 InitDirtySnapshot(SnapshotDirty);
299
300 rel = table_open(ReplicationOriginRelationId, ExclusiveLock);
301
302 /*
303 * We want to be able to access pg_replication_origin without setting up a
304 * snapshot. To make that safe, it needs to not have a TOAST table, since
305 * TOASTed data cannot be fetched without a snapshot. As of this writing,
306 * its only varlena column is roname, which we limit to 512 bytes to avoid
307 * needing out-of-line storage. If you add a TOAST table to this catalog,
308 * be sure to set up a snapshot everywhere it might be needed. For more
309 * information, see https://postgr.es/m/ZvMSUPOqUU-VNADN%40nathan.
310 */
311 Assert(!OidIsValid(rel->rd_rel->reltoastrelid));
312
313 for (roident = InvalidOid + 1; roident < PG_UINT16_MAX; roident++)
314 {
315 bool nulls[Natts_pg_replication_origin];
316 Datum values[Natts_pg_replication_origin];
317 bool collides;
318
320
322 Anum_pg_replication_origin_roident,
323 BTEqualStrategyNumber, F_OIDEQ,
324 ObjectIdGetDatum(roident));
325
326 scan = systable_beginscan(rel, ReplicationOriginIdentIndex,
327 true /* indexOK */ ,
328 &SnapshotDirty,
329 1, &key);
330
331 collides = HeapTupleIsValid(systable_getnext(scan));
332
333 systable_endscan(scan);
334
335 if (!collides)
336 {
337 /*
338 * Ok, found an unused roident, insert the new row and do a CCI,
339 * so our callers can look it up if they want to.
340 */
341 memset(&nulls, 0, sizeof(nulls));
342
343 values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(roident);
344 values[Anum_pg_replication_origin_roname - 1] = roname_d;
345
346 tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
347 CatalogTupleInsert(rel, tuple);
349 break;
350 }
351 }
352
353 /* now release lock again, */
355
356 if (tuple == NULL)
358 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
359 errmsg("could not find free replication origin ID")));
360
361 heap_freetuple(tuple);
362 return roident;
363}
364
365/*
366 * Helper function to drop a replication origin.
367 */
368static void
370{
371 int i;
372
373 /*
374 * Clean up the slot state info, if there is any matching slot.
375 */
376restart:
377 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
378
379 for (i = 0; i < max_active_replication_origins; i++)
380 {
382
383 if (state->roident == roident)
384 {
385 /* found our slot, is it busy? */
386 if (state->acquired_by != 0)
387 {
389
390 if (nowait)
392 (errcode(ERRCODE_OBJECT_IN_USE),
393 errmsg("could not drop replication origin with ID %d, in use by PID %d",
394 state->roident,
395 state->acquired_by)));
396
397 /*
398 * We must wait and then retry. Since we don't know which CV
399 * to wait on until here, we can't readily use
400 * ConditionVariablePrepareToSleep (calling it here would be
401 * wrong, since we could miss the signal if we did so); just
402 * use ConditionVariableSleep directly.
403 */
404 cv = &state->origin_cv;
405
406 LWLockRelease(ReplicationOriginLock);
407
408 ConditionVariableSleep(cv, WAIT_EVENT_REPLICATION_ORIGIN_DROP);
409 goto restart;
410 }
411
412 /* first make a WAL log entry */
413 {
414 xl_replorigin_drop xlrec;
415
416 xlrec.node_id = roident;
418 XLogRegisterData(&xlrec, sizeof(xlrec));
419 XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP);
420 }
421
422 /* then clear the in-memory slot */
423 state->roident = InvalidRepOriginId;
424 state->remote_lsn = InvalidXLogRecPtr;
425 state->local_lsn = InvalidXLogRecPtr;
426 break;
427 }
428 }
429 LWLockRelease(ReplicationOriginLock);
431}
432
433/*
434 * Drop replication origin (by name).
435 *
436 * Needs to be called in a transaction.
437 */
438void
439replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait)
440{
441 RepOriginId roident;
442 Relation rel;
443 HeapTuple tuple;
444
446
447 rel = table_open(ReplicationOriginRelationId, RowExclusiveLock);
448
449 roident = replorigin_by_name(name, missing_ok);
450
451 /* Lock the origin to prevent concurrent drops. */
452 LockSharedObject(ReplicationOriginRelationId, roident, 0,
454
455 tuple = SearchSysCache1(REPLORIGIDENT, ObjectIdGetDatum(roident));
456 if (!HeapTupleIsValid(tuple))
457 {
458 if (!missing_ok)
459 elog(ERROR, "cache lookup failed for replication origin with ID %d",
460 roident);
461
462 /*
463 * We don't need to retain the locks if the origin is already dropped.
464 */
465 UnlockSharedObject(ReplicationOriginRelationId, roident, 0,
468 return;
469 }
470
471 replorigin_state_clear(roident, nowait);
472
473 /*
474 * Now, we can delete the catalog entry.
475 */
476 CatalogTupleDelete(rel, &tuple->t_self);
477 ReleaseSysCache(tuple);
478
480
481 /* We keep the lock on pg_replication_origin until commit */
482 table_close(rel, NoLock);
483}
484
485/*
486 * Lookup replication origin via its oid and return the name.
487 *
488 * The external name is palloc'd in the calling context.
489 *
490 * Returns true if the origin is known, false otherwise.
491 */
492bool
493replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
494{
495 HeapTuple tuple;
497
498 Assert(OidIsValid((Oid) roident));
499 Assert(roident != InvalidRepOriginId);
500 Assert(roident != DoNotReplicateId);
501
502 tuple = SearchSysCache1(REPLORIGIDENT,
503 ObjectIdGetDatum((Oid) roident));
504
505 if (HeapTupleIsValid(tuple))
506 {
508 *roname = text_to_cstring(&ric->roname);
509 ReleaseSysCache(tuple);
510
511 return true;
512 }
513 else
514 {
515 *roname = NULL;
516
517 if (!missing_ok)
519 (errcode(ERRCODE_UNDEFINED_OBJECT),
520 errmsg("replication origin with ID %d does not exist",
521 roident)));
522
523 return false;
524 }
525}
526
527
528/* ---------------------------------------------------------------------------
529 * Functions for handling replication progress.
530 * ---------------------------------------------------------------------------
531 */
532
533Size
535{
536 Size size = 0;
537
539 return size;
540
541 size = add_size(size, offsetof(ReplicationStateCtl, states));
542
543 size = add_size(size,
545 return size;
546}
547
548void
550{
551 bool found;
552
554 return;
555
557 ShmemInitStruct("ReplicationOriginState",
559 &found);
561
562 if (!found)
563 {
564 int i;
565
567
569
570 for (i = 0; i < max_active_replication_origins; i++)
571 {
575 }
576 }
577}
578
579/* ---------------------------------------------------------------------------
580 * Perform a checkpoint of each replication origin's progress with respect to
581 * the replayed remote_lsn. Make sure that all transactions we refer to in the
582 * checkpoint (local_lsn) are actually on-disk. This might not yet be the case
583 * if the transactions were originally committed asynchronously.
584 *
585 * We store checkpoints in the following format:
586 * +-------+------------------------+------------------+-----+--------+
587 * | MAGIC | ReplicationStateOnDisk | struct Replic... | ... | CRC32C | EOF
588 * +-------+------------------------+------------------+-----+--------+
589 *
590 * So its just the magic, followed by the statically sized
591 * ReplicationStateOnDisk structs. Note that the maximum number of
592 * ReplicationState is determined by max_active_replication_origins.
593 * ---------------------------------------------------------------------------
594 */
595void
597{
598 const char *tmppath = PG_REPLORIGIN_CHECKPOINT_TMPFILE;
599 const char *path = PG_REPLORIGIN_CHECKPOINT_FILENAME;
600 int tmpfd;
601 int i;
604
606 return;
607
609
610 /* make sure no old temp file is remaining */
611 if (unlink(tmppath) < 0 && errno != ENOENT)
614 errmsg("could not remove file \"%s\": %m",
615 tmppath)));
616
617 /*
618 * no other backend can perform this at the same time; only one checkpoint
619 * can happen at a time.
620 */
621 tmpfd = OpenTransientFile(tmppath,
622 O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
623 if (tmpfd < 0)
626 errmsg("could not create file \"%s\": %m",
627 tmppath)));
628
629 /* write magic */
630 errno = 0;
631 if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
632 {
633 /* if write didn't set errno, assume problem is no disk space */
634 if (errno == 0)
635 errno = ENOSPC;
638 errmsg("could not write to file \"%s\": %m",
639 tmppath)));
640 }
641 COMP_CRC32C(crc, &magic, sizeof(magic));
642
643 /* prevent concurrent creations/drops */
644 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
645
646 /* write actual data */
647 for (i = 0; i < max_active_replication_origins; i++)
648 {
649 ReplicationStateOnDisk disk_state;
651 XLogRecPtr local_lsn;
652
653 if (curstate->roident == InvalidRepOriginId)
654 continue;
655
656 /* zero, to avoid uninitialized padding bytes */
657 memset(&disk_state, 0, sizeof(disk_state));
658
659 LWLockAcquire(&curstate->lock, LW_SHARED);
660
661 disk_state.roident = curstate->roident;
662
663 disk_state.remote_lsn = curstate->remote_lsn;
664 local_lsn = curstate->local_lsn;
665
666 LWLockRelease(&curstate->lock);
667
668 /* make sure we only write out a commit that's persistent */
669 XLogFlush(local_lsn);
670
671 errno = 0;
672 if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
673 sizeof(disk_state))
674 {
675 /* if write didn't set errno, assume problem is no disk space */
676 if (errno == 0)
677 errno = ENOSPC;
680 errmsg("could not write to file \"%s\": %m",
681 tmppath)));
682 }
683
684 COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
685 }
686
687 LWLockRelease(ReplicationOriginLock);
688
689 /* write out the CRC */
691 errno = 0;
692 if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
693 {
694 /* if write didn't set errno, assume problem is no disk space */
695 if (errno == 0)
696 errno = ENOSPC;
699 errmsg("could not write to file \"%s\": %m",
700 tmppath)));
701 }
702
703 if (CloseTransientFile(tmpfd) != 0)
706 errmsg("could not close file \"%s\": %m",
707 tmppath)));
708
709 /* fsync, rename to permanent file, fsync file and directory */
710 durable_rename(tmppath, path, PANIC);
711}
712
713/*
714 * Recover replication replay status from checkpoint data saved earlier by
715 * CheckPointReplicationOrigin.
716 *
717 * This only needs to be called at startup and *not* during every checkpoint
718 * read during recovery (e.g. in HS or PITR from a base backup) afterwards. All
719 * state thereafter can be recovered by looking at commit records.
720 */
721void
723{
724 const char *path = PG_REPLORIGIN_CHECKPOINT_FILENAME;
725 int fd;
726 int readBytes;
728 int last_state = 0;
729 pg_crc32c file_crc;
731
732 /* don't want to overwrite already existing state */
733#ifdef USE_ASSERT_CHECKING
734 static bool already_started = false;
735
736 Assert(!already_started);
737 already_started = true;
738#endif
739
741 return;
742
744
745 elog(DEBUG2, "starting up replication origin progress state");
746
747 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
748
749 /*
750 * might have had max_active_replication_origins == 0 last run, or we just
751 * brought up a standby.
752 */
753 if (fd < 0 && errno == ENOENT)
754 return;
755 else if (fd < 0)
758 errmsg("could not open file \"%s\": %m",
759 path)));
760
761 /* verify magic, that is written even if nothing was active */
762 readBytes = read(fd, &magic, sizeof(magic));
763 if (readBytes != sizeof(magic))
764 {
765 if (readBytes < 0)
768 errmsg("could not read file \"%s\": %m",
769 path)));
770 else
773 errmsg("could not read file \"%s\": read %d of %zu",
774 path, readBytes, sizeof(magic))));
775 }
776 COMP_CRC32C(crc, &magic, sizeof(magic));
777
778 if (magic != REPLICATION_STATE_MAGIC)
780 (errmsg("replication checkpoint has wrong magic %u instead of %u",
781 magic, REPLICATION_STATE_MAGIC)));
782
783 /* we can skip locking here, no other access is possible */
784
785 /* recover individual states, until there are no more to be found */
786 while (true)
787 {
788 ReplicationStateOnDisk disk_state;
789
790 readBytes = read(fd, &disk_state, sizeof(disk_state));
791
792 /* no further data */
793 if (readBytes == sizeof(crc))
794 {
795 /* not pretty, but simple ... */
796 file_crc = *(pg_crc32c *) &disk_state;
797 break;
798 }
799
800 if (readBytes < 0)
801 {
804 errmsg("could not read file \"%s\": %m",
805 path)));
806 }
807
808 if (readBytes != sizeof(disk_state))
809 {
812 errmsg("could not read file \"%s\": read %d of %zu",
813 path, readBytes, sizeof(disk_state))));
814 }
815
816 COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
817
818 if (last_state == max_active_replication_origins)
820 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
821 errmsg("could not find free replication state, increase \"max_active_replication_origins\"")));
822
823 /* copy data to shared memory */
824 replication_states[last_state].roident = disk_state.roident;
825 replication_states[last_state].remote_lsn = disk_state.remote_lsn;
826 last_state++;
827
828 ereport(LOG,
829 (errmsg("recovered replication state of node %d to %X/%X",
830 disk_state.roident,
831 LSN_FORMAT_ARGS(disk_state.remote_lsn))));
832 }
833
834 /* now check checksum */
836 if (file_crc != crc)
839 errmsg("replication slot checkpoint has wrong checksum %u, expected %u",
840 crc, file_crc)));
841
842 if (CloseTransientFile(fd) != 0)
845 errmsg("could not close file \"%s\": %m",
846 path)));
847}
848
849void
851{
852 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
853
854 switch (info)
855 {
857 {
858 xl_replorigin_set *xlrec =
860
862 xlrec->remote_lsn, record->EndRecPtr,
863 xlrec->force /* backward */ ,
864 false /* WAL log */ );
865 break;
866 }
868 {
869 xl_replorigin_drop *xlrec;
870 int i;
871
872 xlrec = (xl_replorigin_drop *) XLogRecGetData(record);
873
874 for (i = 0; i < max_active_replication_origins; i++)
875 {
877
878 /* found our slot */
879 if (state->roident == xlrec->node_id)
880 {
881 /* reset entry */
882 state->roident = InvalidRepOriginId;
883 state->remote_lsn = InvalidXLogRecPtr;
884 state->local_lsn = InvalidXLogRecPtr;
885 break;
886 }
887 }
888 break;
889 }
890 default:
891 elog(PANIC, "replorigin_redo: unknown op code %u", info);
892 }
893}
894
895
896/*
897 * Tell the replication origin progress machinery that a commit from 'node'
898 * that originated at the LSN remote_commit on the remote node was replayed
899 * successfully and that we don't need to do so again. In combination with
900 * setting up replorigin_session_origin_lsn and replorigin_session_origin
901 * that ensures we won't lose knowledge about that after a crash if the
902 * transaction had a persistent effect (think of asynchronous commits).
903 *
904 * local_commit needs to be a local LSN of the commit so that we can make sure
905 * upon a checkpoint that enough WAL has been persisted to disk.
906 *
907 * Needs to be called with a RowExclusiveLock on pg_replication_origin,
908 * unless running in recovery.
909 */
910void
912 XLogRecPtr remote_commit, XLogRecPtr local_commit,
913 bool go_backward, bool wal_log)
914{
915 int i;
916 ReplicationState *replication_state = NULL;
917 ReplicationState *free_state = NULL;
918
919 Assert(node != InvalidRepOriginId);
920
921 /* we don't track DoNotReplicateId */
922 if (node == DoNotReplicateId)
923 return;
924
925 /*
926 * XXX: For the case where this is called by WAL replay, it'd be more
927 * efficient to restore into a backend local hashtable and only dump into
928 * shmem after recovery is finished. Let's wait with implementing that
929 * till it's shown to be a measurable expense
930 */
931
932 /* Lock exclusively, as we may have to create a new table entry. */
933 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
934
935 /*
936 * Search for either an existing slot for the origin, or a free one we can
937 * use.
938 */
939 for (i = 0; i < max_active_replication_origins; i++)
940 {
942
943 /* remember where to insert if necessary */
944 if (curstate->roident == InvalidRepOriginId &&
945 free_state == NULL)
946 {
947 free_state = curstate;
948 continue;
949 }
950
951 /* not our slot */
952 if (curstate->roident != node)
953 {
954 continue;
955 }
956
957 /* ok, found slot */
958 replication_state = curstate;
959
960 LWLockAcquire(&replication_state->lock, LW_EXCLUSIVE);
961
962 /* Make sure it's not used by somebody else */
963 if (replication_state->acquired_by != 0)
964 {
966 (errcode(ERRCODE_OBJECT_IN_USE),
967 errmsg("replication origin with ID %d is already active for PID %d",
968 replication_state->roident,
969 replication_state->acquired_by)));
970 }
971
972 break;
973 }
974
975 if (replication_state == NULL && free_state == NULL)
977 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
978 errmsg("could not find free replication state slot for replication origin with ID %d",
979 node),
980 errhint("Increase \"max_active_replication_origins\" and try again.")));
981
982 if (replication_state == NULL)
983 {
984 /* initialize new slot */
985 LWLockAcquire(&free_state->lock, LW_EXCLUSIVE);
986 replication_state = free_state;
987 Assert(replication_state->remote_lsn == InvalidXLogRecPtr);
988 Assert(replication_state->local_lsn == InvalidXLogRecPtr);
989 replication_state->roident = node;
990 }
991
992 Assert(replication_state->roident != InvalidRepOriginId);
993
994 /*
995 * If somebody "forcefully" sets this slot, WAL log it, so it's durable
996 * and the standby gets the message. Primarily this will be called during
997 * WAL replay (of commit records) where no WAL logging is necessary.
998 */
999 if (wal_log)
1000 {
1001 xl_replorigin_set xlrec;
1002
1003 xlrec.remote_lsn = remote_commit;
1004 xlrec.node_id = node;
1005 xlrec.force = go_backward;
1006
1008 XLogRegisterData(&xlrec, sizeof(xlrec));
1009
1010 XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET);
1011 }
1012
1013 /*
1014 * Due to - harmless - race conditions during a checkpoint we could see
1015 * values here that are older than the ones we already have in memory. We
1016 * could also see older values for prepared transactions when the prepare
1017 * is sent at a later point of time along with commit prepared and there
1018 * are other transactions commits between prepare and commit prepared. See
1019 * ReorderBufferFinishPrepared. Don't overwrite those.
1020 */
1021 if (go_backward || replication_state->remote_lsn < remote_commit)
1022 replication_state->remote_lsn = remote_commit;
1023 if (local_commit != InvalidXLogRecPtr &&
1024 (go_backward || replication_state->local_lsn < local_commit))
1025 replication_state->local_lsn = local_commit;
1026 LWLockRelease(&replication_state->lock);
1027
1028 /*
1029 * Release *after* changing the LSNs, slot isn't acquired and thus could
1030 * otherwise be dropped anytime.
1031 */
1032 LWLockRelease(ReplicationOriginLock);
1033}
1034
1035
1038{
1039 int i;
1040 XLogRecPtr local_lsn = InvalidXLogRecPtr;
1041 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1042
1043 /* prevent slots from being concurrently dropped */
1044 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1045
1046 for (i = 0; i < max_active_replication_origins; i++)
1047 {
1049
1051
1052 if (state->roident == node)
1053 {
1054 LWLockAcquire(&state->lock, LW_SHARED);
1055
1056 remote_lsn = state->remote_lsn;
1057 local_lsn = state->local_lsn;
1058
1059 LWLockRelease(&state->lock);
1060
1061 break;
1062 }
1063 }
1064
1065 LWLockRelease(ReplicationOriginLock);
1066
1067 if (flush && local_lsn != InvalidXLogRecPtr)
1068 XLogFlush(local_lsn);
1069
1070 return remote_lsn;
1071}
1072
1073/*
1074 * Tear down a (possibly) configured session replication origin during process
1075 * exit.
1076 */
1077static void
1079{
1080 ConditionVariable *cv = NULL;
1081
1082 if (session_replication_state == NULL)
1083 return;
1084
1085 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1086
1088 {
1090
1093 }
1094
1095 LWLockRelease(ReplicationOriginLock);
1096
1097 if (cv)
1099}
1100
1101/*
1102 * Setup a replication origin in the shared memory struct if it doesn't
1103 * already exist and cache access to the specific ReplicationSlot so the
1104 * array doesn't have to be searched when calling
1105 * replorigin_session_advance().
1106 *
1107 * Normally only one such cached origin can exist per process so the cached
1108 * value can only be set again after the previous value is torn down with
1109 * replorigin_session_reset(). For this normal case pass acquired_by = 0
1110 * (meaning the slot is not allowed to be already acquired by another process).
1111 *
1112 * However, sometimes multiple processes can safely re-use the same origin slot
1113 * (for example, multiple parallel apply processes can safely use the same
1114 * origin, provided they maintain commit order by allowing only one process to
1115 * commit at a time). For this case the first process must pass acquired_by =
1116 * 0, and then the other processes sharing that same origin can pass
1117 * acquired_by = PID of the first process.
1118 */
1119void
1121{
1122 static bool registered_cleanup;
1123 int i;
1124 int free_slot = -1;
1125
1126 if (!registered_cleanup)
1127 {
1129 registered_cleanup = true;
1130 }
1131
1133
1134 if (session_replication_state != NULL)
1135 ereport(ERROR,
1136 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1137 errmsg("cannot setup replication origin when one is already setup")));
1138
1139 /* Lock exclusively, as we may have to create a new table entry. */
1140 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1141
1142 /*
1143 * Search for either an existing slot for the origin, or a free one we can
1144 * use.
1145 */
1146 for (i = 0; i < max_active_replication_origins; i++)
1147 {
1149
1150 /* remember where to insert if necessary */
1151 if (curstate->roident == InvalidRepOriginId &&
1152 free_slot == -1)
1153 {
1154 free_slot = i;
1155 continue;
1156 }
1157
1158 /* not our slot */
1159 if (curstate->roident != node)
1160 continue;
1161
1162 else if (curstate->acquired_by != 0 && acquired_by == 0)
1163 {
1164 ereport(ERROR,
1165 (errcode(ERRCODE_OBJECT_IN_USE),
1166 errmsg("replication origin with ID %d is already active for PID %d",
1167 curstate->roident, curstate->acquired_by)));
1168 }
1169
1170 /* ok, found slot */
1171 session_replication_state = curstate;
1172 break;
1173 }
1174
1175
1176 if (session_replication_state == NULL && free_slot == -1)
1177 ereport(ERROR,
1178 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1179 errmsg("could not find free replication state slot for replication origin with ID %d",
1180 node),
1181 errhint("Increase \"max_active_replication_origins\" and try again.")));
1182 else if (session_replication_state == NULL)
1183 {
1184 /* initialize new slot */
1189 }
1190
1191
1193
1194 if (acquired_by == 0)
1196 else if (session_replication_state->acquired_by != acquired_by)
1197 elog(ERROR, "could not find replication state slot for replication origin with OID %u which was acquired by %d",
1198 node, acquired_by);
1199
1200 LWLockRelease(ReplicationOriginLock);
1201
1202 /* probably this one is pointless */
1204}
1205
1206/*
1207 * Reset replay state previously setup in this session.
1208 *
1209 * This function may only be called if an origin was setup with
1210 * replorigin_session_setup().
1211 */
1212void
1214{
1216
1218
1219 if (session_replication_state == NULL)
1220 ereport(ERROR,
1221 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1222 errmsg("no replication origin is configured")));
1223
1224 LWLockAcquire(ReplicationOriginLock, LW_EXCLUSIVE);
1225
1229
1230 LWLockRelease(ReplicationOriginLock);
1231
1233}
1234
1235/*
1236 * Do the same work replorigin_advance() does, just on the session's
1237 * configured origin.
1238 *
1239 * This is noticeably cheaper than using replorigin_advance().
1240 */
1241void
1243{
1246
1248 if (session_replication_state->local_lsn < local_commit)
1249 session_replication_state->local_lsn = local_commit;
1250 if (session_replication_state->remote_lsn < remote_commit)
1251 session_replication_state->remote_lsn = remote_commit;
1253}
1254
1255/*
1256 * Ask the machinery about the point up to which we successfully replayed
1257 * changes from an already setup replication origin.
1258 */
1261{
1262 XLogRecPtr remote_lsn;
1263 XLogRecPtr local_lsn;
1264
1266
1271
1272 if (flush && local_lsn != InvalidXLogRecPtr)
1273 XLogFlush(local_lsn);
1274
1275 return remote_lsn;
1276}
1277
1278
1279
1280/* ---------------------------------------------------------------------------
1281 * SQL functions for working with replication origin.
1282 *
1283 * These mostly should be fairly short wrappers around more generic functions.
1284 * ---------------------------------------------------------------------------
1285 */
1286
1287/*
1288 * Create replication origin for the passed in name, and return the assigned
1289 * oid.
1290 */
1291Datum
1293{
1294 char *name;
1295 RepOriginId roident;
1296
1297 replorigin_check_prerequisites(false, false);
1298
1300
1301 /*
1302 * Replication origins "any and "none" are reserved for system options.
1303 * The origins "pg_xxx" are reserved for internal use.
1304 */
1306 ereport(ERROR,
1307 (errcode(ERRCODE_RESERVED_NAME),
1308 errmsg("replication origin name \"%s\" is reserved",
1309 name),
1310 errdetail("Origin names \"%s\", \"%s\", and names starting with \"pg_\" are reserved.",
1311 LOGICALREP_ORIGIN_ANY, LOGICALREP_ORIGIN_NONE)));
1312
1313 /*
1314 * If built with appropriate switch, whine when regression-testing
1315 * conventions for replication origin names are violated.
1316 */
1317#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
1318 if (strncmp(name, "regress_", 8) != 0)
1319 elog(WARNING, "replication origins created by regression test cases should have names starting with \"regress_\"");
1320#endif
1321
1322 roident = replorigin_create(name);
1323
1324 pfree(name);
1325
1326 PG_RETURN_OID(roident);
1327}
1328
1329/*
1330 * Drop replication origin.
1331 */
1332Datum
1334{
1335 char *name;
1336
1337 replorigin_check_prerequisites(false, false);
1338
1340
1341 replorigin_drop_by_name(name, false, true);
1342
1343 pfree(name);
1344
1346}
1347
1348/*
1349 * Return oid of a replication origin.
1350 */
1351Datum
1353{
1354 char *name;
1355 RepOriginId roident;
1356
1357 replorigin_check_prerequisites(false, false);
1358
1360 roident = replorigin_by_name(name, true);
1361
1362 pfree(name);
1363
1364 if (OidIsValid(roident))
1365 PG_RETURN_OID(roident);
1367}
1368
1369/*
1370 * Setup a replication origin for this session.
1371 */
1372Datum
1374{
1375 char *name;
1376 RepOriginId origin;
1377
1378 replorigin_check_prerequisites(true, false);
1379
1381 origin = replorigin_by_name(name, false);
1382 replorigin_session_setup(origin, 0);
1383
1385
1386 pfree(name);
1387
1389}
1390
1391/*
1392 * Reset previously setup origin in this session
1393 */
1394Datum
1396{
1397 replorigin_check_prerequisites(true, false);
1398
1400
1404
1406}
1407
1408/*
1409 * Has a replication origin been setup for this session.
1410 */
1411Datum
1413{
1414 replorigin_check_prerequisites(false, false);
1415
1417}
1418
1419
1420/*
1421 * Return the replication progress for origin setup in the current session.
1422 *
1423 * If 'flush' is set to true it is ensured that the returned value corresponds
1424 * to a local transaction that has been flushed. This is useful if asynchronous
1425 * commits are used when replaying replicated transactions.
1426 */
1427Datum
1429{
1430 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1431 bool flush = PG_GETARG_BOOL(0);
1432
1433 replorigin_check_prerequisites(true, false);
1434
1435 if (session_replication_state == NULL)
1436 ereport(ERROR,
1437 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1438 errmsg("no replication origin is configured")));
1439
1440 remote_lsn = replorigin_session_get_progress(flush);
1441
1442 if (remote_lsn == InvalidXLogRecPtr)
1444
1445 PG_RETURN_LSN(remote_lsn);
1446}
1447
1448Datum
1450{
1451 XLogRecPtr location = PG_GETARG_LSN(0);
1452
1453 replorigin_check_prerequisites(true, false);
1454
1455 if (session_replication_state == NULL)
1456 ereport(ERROR,
1457 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1458 errmsg("no replication origin is configured")));
1459
1462
1464}
1465
1466Datum
1468{
1469 replorigin_check_prerequisites(true, false);
1470
1473
1475}
1476
1477
1478Datum
1480{
1482 XLogRecPtr remote_commit = PG_GETARG_LSN(1);
1483 RepOriginId node;
1484
1485 replorigin_check_prerequisites(true, false);
1486
1487 /* lock to prevent the replication origin from vanishing */
1488 LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1489
1490 node = replorigin_by_name(text_to_cstring(name), false);
1491
1492 /*
1493 * Can't sensibly pass a local commit to be flushed at checkpoint - this
1494 * xact hasn't committed yet. This is why this function should be used to
1495 * set up the initial replication state, but not for replay.
1496 */
1497 replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
1498 true /* go backward */ , true /* WAL log */ );
1499
1500 UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1501
1503}
1504
1505
1506/*
1507 * Return the replication progress for an individual replication origin.
1508 *
1509 * If 'flush' is set to true it is ensured that the returned value corresponds
1510 * to a local transaction that has been flushed. This is useful if asynchronous
1511 * commits are used when replaying replicated transactions.
1512 */
1513Datum
1515{
1516 char *name;
1517 bool flush;
1518 RepOriginId roident;
1519 XLogRecPtr remote_lsn = InvalidXLogRecPtr;
1520
1522
1524 flush = PG_GETARG_BOOL(1);
1525
1526 roident = replorigin_by_name(name, false);
1527 Assert(OidIsValid(roident));
1528
1529 remote_lsn = replorigin_get_progress(roident, flush);
1530
1531 if (remote_lsn == InvalidXLogRecPtr)
1533
1534 PG_RETURN_LSN(remote_lsn);
1535}
1536
1537
1538Datum
1540{
1541 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1542 int i;
1544
1545 /* we want to return 0 rows if slot is set to zero */
1546 replorigin_check_prerequisites(false, true);
1547
1548 InitMaterializedSRF(fcinfo, 0);
1549
1550 /* prevent slots from being concurrently dropped */
1551 LWLockAcquire(ReplicationOriginLock, LW_SHARED);
1552
1553 /*
1554 * Iterate through all possible replication_states, display if they are
1555 * filled. Note that we do not take any locks, so slightly corrupted/out
1556 * of date values are a possibility.
1557 */
1558 for (i = 0; i < max_active_replication_origins; i++)
1559 {
1563 char *roname;
1564
1566
1567 /* unused slot, nothing to display */
1568 if (state->roident == InvalidRepOriginId)
1569 continue;
1570
1571 memset(values, 0, sizeof(values));
1572 memset(nulls, 1, sizeof(nulls));
1573
1574 values[0] = ObjectIdGetDatum(state->roident);
1575 nulls[0] = false;
1576
1577 /*
1578 * We're not preventing the origin to be dropped concurrently, so
1579 * silently accept that it might be gone.
1580 */
1581 if (replorigin_by_oid(state->roident, true,
1582 &roname))
1583 {
1584 values[1] = CStringGetTextDatum(roname);
1585 nulls[1] = false;
1586 }
1587
1588 LWLockAcquire(&state->lock, LW_SHARED);
1589
1590 values[2] = LSNGetDatum(state->remote_lsn);
1591 nulls[2] = false;
1592
1593 values[3] = LSNGetDatum(state->local_lsn);
1594 nulls[3] = false;
1595
1596 LWLockRelease(&state->lock);
1597
1598 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1599 values, nulls);
1600 }
1601
1602 LWLockRelease(ReplicationOriginLock);
1603
1604#undef REPLICATION_ORIGIN_PROGRESS_COLS
1605
1606 return (Datum) 0;
1607}
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define CStringGetTextDatum(s)
Definition: builtins.h:97
uint8_t uint8
Definition: c.h:500
#define PG_BINARY
Definition: c.h:1244
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
uint32_t uint32
Definition: c.h:502
#define PG_UINT16_MAX
Definition: c.h:558
#define MemSet(start, val, len)
Definition: c.h:991
#define OidIsValid(objectId)
Definition: c.h:746
size_t Size
Definition: c.h:576
bool IsReservedName(const char *name)
Definition: catalog.c:278
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int64 TimestampTz
Definition: timestamp.h:39
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int CloseTransientFile(int fd)
Definition: fd.c:2871
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_OID(x)
Definition: fmgr.h:360
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:603
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:514
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:388
int MyProcPid
Definition: globals.c:48
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
#define ident
Definition: indent_codes.h:47
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition: indexing.c:233
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:365
#define write(a, b, c)
Definition: win32.h:14
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
int i
Definition: isn.c:77
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1082
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:229
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:107
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1142
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define ExclusiveLock
Definition: lockdefs.h:42
#define RowExclusiveLock
Definition: lockdefs.h:38
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
@ LWTRANCHE_REPLICATION_ORIGIN_STATE
Definition: lwlock.h:192
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pfree(void *pointer)
Definition: mcxt.c:2152
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
TimestampTz replorigin_session_origin_timestamp
Definition: origin.c:165
static ReplicationStateCtl * replication_states_ctl
Definition: origin.c:176
RepOriginId replorigin_by_name(const char *roname, bool missing_ok)
Definition: origin.c:226
Size ReplicationOriginShmemSize(void)
Definition: origin.c:534
RepOriginId replorigin_create(const char *roname)
Definition: origin.c:257
Datum pg_replication_origin_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1514
void replorigin_session_reset(void)
Definition: origin.c:1213
struct ReplicationState ReplicationState
static bool IsReservedOriginName(const char *name)
Definition: origin.c:209
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition: origin.c:1242
bool replorigin_by_oid(RepOriginId roident, bool missing_ok, char **roname)
Definition: origin.c:493
int max_active_replication_origins
Definition: origin.c:104
Datum pg_replication_origin_advance(PG_FUNCTION_ARGS)
Definition: origin.c:1479
XLogRecPtr replorigin_get_progress(RepOriginId node, bool flush)
Definition: origin.c:1037
#define PG_REPLORIGIN_CHECKPOINT_TMPFILE
Definition: origin.c:101
Datum pg_replication_origin_session_progress(PG_FUNCTION_ARGS)
Definition: origin.c:1428
static ReplicationState * replication_states
Definition: origin.c:171
#define PG_REPLORIGIN_CHECKPOINT_FILENAME
Definition: origin.c:100
Datum pg_replication_origin_session_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1395
Datum pg_replication_origin_xact_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1449
Datum pg_replication_origin_session_is_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1412
Datum pg_replication_origin_oid(PG_FUNCTION_ARGS)
Definition: origin.c:1352
Datum pg_replication_origin_session_setup(PG_FUNCTION_ARGS)
Definition: origin.c:1373
static void ReplicationOriginExitCleanup(int code, Datum arg)
Definition: origin.c:1078
void StartupReplicationOrigin(void)
Definition: origin.c:722
void replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait)
Definition: origin.c:439
RepOriginId replorigin_session_origin
Definition: origin.c:163
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:911
static void replorigin_state_clear(RepOriginId roident, bool nowait)
Definition: origin.c:369
void replorigin_session_setup(RepOriginId node, int acquired_by)
Definition: origin.c:1120
void CheckPointReplicationOrigin(void)
Definition: origin.c:596
static void replorigin_check_prerequisites(bool check_origins, bool recoveryOK)
Definition: origin.c:190
static ReplicationState * session_replication_state
Definition: origin.c:184
Datum pg_replication_origin_drop(PG_FUNCTION_ARGS)
Definition: origin.c:1333
#define REPLICATION_ORIGIN_PROGRESS_COLS
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1260
void ReplicationOriginShmemInit(void)
Definition: origin.c:549
Datum pg_show_replication_origin_status(PG_FUNCTION_ARGS)
Definition: origin.c:1539
#define REPLICATION_STATE_MAGIC
Definition: origin.c:187
XLogRecPtr replorigin_session_origin_lsn
Definition: origin.c:164
Datum pg_replication_origin_create(PG_FUNCTION_ARGS)
Definition: origin.c:1292
Datum pg_replication_origin_xact_reset(PG_FUNCTION_ARGS)
Definition: origin.c:1467
void replorigin_redo(XLogReaderState *record)
Definition: origin.c:850
struct ReplicationStateCtl ReplicationStateCtl
struct ReplicationStateOnDisk ReplicationStateOnDisk
#define DoNotReplicateId
Definition: origin.h:34
#define InvalidRepOriginId
Definition: origin.h:33
#define XLOG_REPLORIGIN_DROP
Definition: origin.h:31
#define MAX_RONAME_LEN
Definition: origin.h:41
#define XLOG_REPLORIGIN_SET
Definition: origin.h:30
void * arg
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
return crc
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:33
static Datum LSNGetDatum(XLogRecPtr X)
Definition: pg_lsn.h:28
#define PG_RETURN_LSN(x)
Definition: pg_lsn.h:34
FormData_pg_replication_origin * Form_pg_replication_origin
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
uintptr_t Datum
Definition: postgres.h:69
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define RelationGetDescr(relation)
Definition: rel.h:542
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:42
#define BTEqualStrategyNumber
Definition: stratnum.h:31
ItemPointerData t_self
Definition: htup.h:65
Definition: lwlock.h:42
Form_pg_class rd_rel
Definition: rel.h:111
ReplicationState states[FLEXIBLE_ARRAY_MEMBER]
Definition: origin.c:159
XLogRecPtr remote_lsn
Definition: origin.c:150
RepOriginId roident
Definition: origin.c:149
XLogRecPtr remote_lsn
Definition: origin.c:119
XLogRecPtr local_lsn
Definition: origin.c:126
ConditionVariable origin_cv
Definition: origin.c:136
RepOriginId roident
Definition: origin.c:114
LWLock lock
Definition: origin.c:141
TupleDesc setDesc
Definition: execnodes.h:359
Tuplestorestate * setResult
Definition: execnodes.h:358
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
Definition: regguts.h:323
Definition: c.h:658
RepOriginId node_id
Definition: origin.h:27
RepOriginId node_id
Definition: origin.h:21
XLogRecPtr remote_lsn
Definition: origin.h:20
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:221
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
#define PG_GETARG_TIMESTAMPTZ(n)
Definition: timestamp.h:64
char * text_to_cstring(const text *t)
Definition: varlena.c:225
const char * name
bool IsTransactionState(void)
Definition: xact.c:387
void CommandCounterIncrement(void)
Definition: xact.c:1100
bool RecoveryInProgress(void)
Definition: xlog.c:6522
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415