Skip to content

Commit 5513c09

Browse files
committed
Fix data loss in wal_level=minimal crash recovery of CREATE TABLESPACE.
If the system crashed between CREATE TABLESPACE and the next checkpoint, the result could be some files in the tablespace unexpectedly containing no rows. Affected files would be those for which the system did not write WAL; see the wal_skip_threshold documentation. Before v13, a different set of conditions governed the writing of WAL; see v12's <sect2 id="populate-pitr">. (The v12 conditions were broader in some ways and narrower in others.) Users may want to audit non-default tablespaces for unexpected short files. The bug could have truncated an index without affecting the associated table, and reindexing the index would fix that particular problem. This fixes the bug by making create_tablespace_directories() more like TablespaceCreateDbspace(). create_tablespace_directories() was recursively removing tablespace contents, reasoning that WAL redo would recreate everything removed that way. That assumption holds for other wal_level values. Under wal_level=minimal, the old approach could delete files for which no other copy existed. Back-patch to 9.6 (all supported versions). Reviewed by Robert Haas and Prabhat Sahu. Reported by Robert Haas. Discussion: https://postgr.es/m/CA+TgmoaLO9ncuwvr2nN-J4VEP5XyAcy=zKiHxQzBbFRxxGxm0w@mail.gmail.com
1 parent e84d481 commit 5513c09

File tree

2 files changed

+46
-27
lines changed

2 files changed

+46
-27
lines changed

src/backend/commands/tablespace.c

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -616,40 +616,36 @@ create_tablespace_directories(const char *location, const Oid tablespaceoid)
616616
location)));
617617
}
618618

619-
if (InRecovery)
620-
{
621-
/*
622-
* Our theory for replaying a CREATE is to forcibly drop the target
623-
* subdirectory if present, and then recreate it. This may be more
624-
* work than needed, but it is simple to implement.
625-
*/
626-
if (stat(location_with_version_dir, &st) == 0 && S_ISDIR(st.st_mode))
627-
{
628-
if (!rmtree(location_with_version_dir, true))
629-
/* If this failed, MakePGDirectory() below is going to error. */
630-
ereport(WARNING,
631-
(errmsg("some useless files may be left behind in old database directory \"%s\"",
632-
location_with_version_dir)));
633-
}
634-
}
635-
636619
/*
637620
* The creation of the version directory prevents more than one tablespace
638-
* in a single location.
621+
* in a single location. This imitates TablespaceCreateDbspace(), but it
622+
* ignores concurrency and missing parent directories. The chmod() would
623+
* have failed in the absence of a parent. pg_tablespace_spcname_index
624+
* prevents concurrency.
639625
*/
640-
if (MakePGDirectory(location_with_version_dir) < 0)
626+
if (stat(location_with_version_dir, &st) < 0)
641627
{
642-
if (errno == EEXIST)
628+
if (errno != ENOENT)
643629
ereport(ERROR,
644-
(errcode(ERRCODE_OBJECT_IN_USE),
645-
errmsg("directory \"%s\" already in use as a tablespace",
630+
(errcode_for_file_access(),
631+
errmsg("could not stat directory \"%s\": %m",
646632
location_with_version_dir)));
647-
else
633+
else if (MakePGDirectory(location_with_version_dir) < 0)
648634
ereport(ERROR,
649635
(errcode_for_file_access(),
650636
errmsg("could not create directory \"%s\": %m",
651637
location_with_version_dir)));
652638
}
639+
else if (!S_ISDIR(st.st_mode))
640+
ereport(ERROR,
641+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
642+
errmsg("\"%s\" exists but is not a directory",
643+
location_with_version_dir)));
644+
else if (!InRecovery)
645+
ereport(ERROR,
646+
(errcode(ERRCODE_OBJECT_IN_USE),
647+
errmsg("directory \"%s\" already in use as a tablespace",
648+
location_with_version_dir)));
653649

654650
/*
655651
* In recovery, remove old symlink, in case it points to the wrong place.

src/test/recovery/t/018_wal_optimize.pl

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
use PostgresNode;
1616
use TestLib;
17-
use Test::More tests => 34;
17+
use Test::More tests => 38;
1818

1919
sub check_orphan_relfilenodes
2020
{
@@ -59,8 +59,31 @@ sub run_wal_optimize
5959
my $tablespace_dir = $node->basedir . '/tablespace_other';
6060
mkdir($tablespace_dir);
6161
$tablespace_dir = TestLib::perl2host($tablespace_dir);
62-
$node->safe_psql('postgres',
63-
"CREATE TABLESPACE other LOCATION '$tablespace_dir';");
62+
my $result;
63+
64+
# Test redo of CREATE TABLESPACE.
65+
$node->safe_psql(
66+
'postgres', "
67+
CREATE TABLE moved (id int);
68+
INSERT INTO moved VALUES (1);
69+
CREATE TABLESPACE other LOCATION '$tablespace_dir';
70+
BEGIN;
71+
ALTER TABLE moved SET TABLESPACE other;
72+
CREATE TABLE originated (id int);
73+
INSERT INTO originated VALUES (1);
74+
CREATE UNIQUE INDEX ON originated(id) TABLESPACE other;
75+
COMMIT;");
76+
$node->stop('immediate');
77+
$node->start;
78+
$result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;");
79+
is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE");
80+
$result = $node->safe_psql(
81+
'postgres', "
82+
INSERT INTO originated VALUES (1) ON CONFLICT (id)
83+
DO UPDATE set id = originated.id + 1
84+
RETURNING id;");
85+
is($result, qq(2),
86+
"wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX");
6487

6588
# Test direct truncation optimization. No tuples.
6689
$node->safe_psql(
@@ -71,7 +94,7 @@ sub run_wal_optimize
7194
COMMIT;");
7295
$node->stop('immediate');
7396
$node->start;
74-
my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
97+
$result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
7598
is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
7699

77100
# Test truncation with inserted tuples within the same transaction.

0 commit comments

Comments
 (0)