Skip to content

Commit d7598ae

Browse files
committed
Close un-owned SMgrRelations at transaction end.
If an SMgrRelation is not "owned" by a relcache entry, don't allow it to live past transaction end. This design allows the same SMgrRelation to be used for blind writes of multiple blocks during a transaction, but ensures that we don't hold onto such an SMgrRelation indefinitely. Because an SMgrRelation typically corresponds to open file descriptors at the fd.c level, leaving it open when there's no corresponding relcache entry can mean that we prevent the kernel from reclaiming deleted disk space. (While CacheInvalidateSmgr messages usually fix that, there are cases where they're not issued, such as DROP DATABASE. We might want to add some more sinval messaging for that, but I'd be inclined to keep this type of logic anyway, since allowing VFDs to accumulate indefinitely for blind-written relations doesn't seem like a good idea.) This code replaces a previous attempt towards the same goal that proved to be unreliable. Back-patch to 9.1 where the previous patch was added.
1 parent a1f064f commit d7598ae

File tree

6 files changed

+92
-3
lines changed

6 files changed

+92
-3
lines changed

src/backend/access/transam/xact.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,7 @@ CommitTransaction(void)
19691969
AtEOXact_SPI(true);
19701970
AtEOXact_on_commit_actions(true);
19711971
AtEOXact_Namespace(true);
1972-
/* smgrcommit already done */
1972+
AtEOXact_SMgr();
19731973
AtEOXact_Files();
19741974
AtEOXact_ComboCid();
19751975
AtEOXact_HashTables(true);
@@ -2222,7 +2222,7 @@ PrepareTransaction(void)
22222222
AtEOXact_SPI(true);
22232223
AtEOXact_on_commit_actions(true);
22242224
AtEOXact_Namespace(true);
2225-
/* smgrcommit already done */
2225+
AtEOXact_SMgr();
22262226
AtEOXact_Files();
22272227
AtEOXact_ComboCid();
22282228
AtEOXact_HashTables(true);
@@ -2368,6 +2368,7 @@ AbortTransaction(void)
23682368
AtEOXact_SPI(false);
23692369
AtEOXact_on_commit_actions(false);
23702370
AtEOXact_Namespace(false);
2371+
AtEOXact_SMgr();
23712372
AtEOXact_Files();
23722373
AtEOXact_ComboCid();
23732374
AtEOXact_HashTables(false);

src/backend/postmaster/bgwriter.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ BackgroundWriterMain(void)
182182
false, true);
183183
/* we needn't bother with the other ResourceOwnerRelease phases */
184184
AtEOXact_Buffers(false);
185+
AtEOXact_SMgr();
185186
AtEOXact_Files();
186187
AtEOXact_HashTables(false);
187188

src/backend/postmaster/checkpointer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ CheckpointerMain(void)
290290
false, true);
291291
/* we needn't bother with the other ResourceOwnerRelease phases */
292292
AtEOXact_Buffers(false);
293+
AtEOXact_SMgr();
293294
AtEOXact_Files();
294295
AtEOXact_HashTables(false);
295296

src/backend/postmaster/walwriter.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ WalWriterMain(void)
187187
false, true);
188188
/* we needn't bother with the other ResourceOwnerRelease phases */
189189
AtEOXact_Buffers(false);
190+
AtEOXact_SMgr();
190191
AtEOXact_Files();
191192
AtEOXact_HashTables(false);
192193

src/backend/storage/smgr/smgr.c

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,15 @@ static const int NSmgr = lengthof(smgrsw);
7676

7777
/*
7878
* Each backend has a hashtable that stores all extant SMgrRelation objects.
79+
* In addition, "unowned" SMgrRelation objects are chained together in a list.
7980
*/
8081
static HTAB *SMgrRelationHash = NULL;
8182

83+
static SMgrRelation first_unowned_reln = NULL;
84+
8285
/* local function prototypes */
8386
static void smgrshutdown(int code, Datum arg);
87+
static void remove_from_unowned_list(SMgrRelation reln);
8488

8589

8690
/*
@@ -124,7 +128,7 @@ smgrshutdown(int code, Datum arg)
124128
/*
125129
* smgropen() -- Return an SMgrRelation object, creating it if need be.
126130
*
127-
* This does not attempt to actually open the object.
131+
* This does not attempt to actually open the underlying file.
128132
*/
129133
SMgrRelation
130134
smgropen(RelFileNode rnode, BackendId backend)
@@ -144,6 +148,7 @@ smgropen(RelFileNode rnode, BackendId backend)
144148
ctl.hash = tag_hash;
145149
SMgrRelationHash = hash_create("smgr relation table", 400,
146150
&ctl, HASH_ELEM | HASH_FUNCTION);
151+
first_unowned_reln = NULL;
147152
}
148153

149154
/* Look up or create an entry */
@@ -168,6 +173,10 @@ smgropen(RelFileNode rnode, BackendId backend)
168173
/* mark it not open */
169174
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
170175
reln->md_fd[forknum] = NULL;
176+
177+
/* place it at head of unowned list (to make smgrsetowner cheap) */
178+
reln->next_unowned_reln = first_unowned_reln;
179+
first_unowned_reln = reln;
171180
}
172181

173182
return reln;
@@ -182,20 +191,60 @@ smgropen(RelFileNode rnode, BackendId backend)
182191
void
183192
smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
184193
{
194+
/* We don't currently support "disowning" an SMgrRelation here */
195+
Assert(owner != NULL);
196+
185197
/*
186198
* First, unhook any old owner. (Normally there shouldn't be any, but it
187199
* seems possible that this can happen during swap_relation_files()
188200
* depending on the order of processing. It's ok to close the old
189201
* relcache entry early in that case.)
202+
*
203+
* If there isn't an old owner, then the reln should be in the unowned
204+
* list, and we need to remove it.
190205
*/
191206
if (reln->smgr_owner)
192207
*(reln->smgr_owner) = NULL;
208+
else
209+
remove_from_unowned_list(reln);
193210

194211
/* Now establish the ownership relationship. */
195212
reln->smgr_owner = owner;
196213
*owner = reln;
197214
}
198215

216+
/*
217+
* remove_from_unowned_list -- unlink an SMgrRelation from the unowned list
218+
*
219+
* If the reln is not present in the list, nothing happens. Typically this
220+
* would be caller error, but there seems no reason to throw an error.
221+
*
222+
* In the worst case this could be rather slow; but in all the cases that seem
223+
* likely to be performance-critical, the reln being sought will actually be
224+
* first in the list. Furthermore, the number of unowned relns touched in any
225+
* one transaction shouldn't be all that high typically. So it doesn't seem
226+
* worth expending the additional space and management logic needed for a
227+
* doubly-linked list.
228+
*/
229+
static void
230+
remove_from_unowned_list(SMgrRelation reln)
231+
{
232+
SMgrRelation *link;
233+
SMgrRelation cur;
234+
235+
for (link = &first_unowned_reln, cur = *link;
236+
cur != NULL;
237+
link = &cur->next_unowned_reln, cur = *link)
238+
{
239+
if (cur == reln)
240+
{
241+
*link = cur->next_unowned_reln;
242+
cur->next_unowned_reln = NULL;
243+
break;
244+
}
245+
}
246+
}
247+
199248
/*
200249
* smgrexists() -- Does the underlying file for a fork exist?
201250
*/
@@ -219,6 +268,9 @@ smgrclose(SMgrRelation reln)
219268

220269
owner = reln->smgr_owner;
221270

271+
if (!owner)
272+
remove_from_unowned_list(reln);
273+
222274
if (hash_search(SMgrRelationHash,
223275
(void *) &(reln->smgr_rnode),
224276
HASH_REMOVE, NULL) == NULL)
@@ -600,3 +652,29 @@ smgrpostckpt(void)
600652
(*(smgrsw[i].smgr_post_ckpt)) ();
601653
}
602654
}
655+
656+
/*
657+
* AtEOXact_SMgr
658+
*
659+
* This routine is called during transaction commit or abort (it doesn't
660+
* particularly care which). All transient SMgrRelation objects are closed.
661+
*
662+
* We do this as a compromise between wanting transient SMgrRelations to
663+
* live awhile (to amortize the costs of blind writes of multiple blocks)
664+
* and needing them to not live forever (since we're probably holding open
665+
* a kernel file descriptor for the underlying file, and we need to ensure
666+
* that gets closed reasonably soon if the file gets deleted).
667+
*/
668+
void
669+
AtEOXact_SMgr(void)
670+
{
671+
/*
672+
* Zap all unowned SMgrRelations. We rely on smgrclose() to remove each
673+
* one from the list.
674+
*/
675+
while (first_unowned_reln != NULL)
676+
{
677+
Assert(first_unowned_reln->smgr_owner == NULL);
678+
smgrclose(first_unowned_reln);
679+
}
680+
}

src/include/storage/smgr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
* without having to make the smgr explicitly aware of relcache. There
3434
* can't be more than one "owner" pointer per SMgrRelation, but that's
3535
* all we need.
36+
*
37+
* SMgrRelations that do not have an "owner" are considered to be transient,
38+
* and are deleted at end of transaction.
3639
*/
3740
typedef struct SMgrRelationData
3841
{
@@ -63,6 +66,9 @@ typedef struct SMgrRelationData
6366

6467
/* for md.c; NULL for forks that are not open */
6568
struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
69+
70+
/* if unowned, list link in list of all unowned SMgrRelations */
71+
struct SMgrRelationData *next_unowned_reln;
6672
} SMgrRelationData;
6773

6874
typedef SMgrRelationData *SMgrRelation;
@@ -95,6 +101,7 @@ extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
95101
extern void smgrpreckpt(void);
96102
extern void smgrsync(void);
97103
extern void smgrpostckpt(void);
104+
extern void AtEOXact_SMgr(void);
98105

99106

100107
/* internals: move me elsewhere -- ay 7/94 */

0 commit comments

Comments
 (0)