Skip to content

Commit 84b1c63

Browse files
committed
Preallocate some DSM space at startup.
Create an optional region in the main shared memory segment that can be used to acquire and release "fast" DSM segments, and can benefit from huge pages allocated at cluster startup time, if configured. Fall back to the existing mechanisms when that space is full. The size is controlled by a new GUC min_dynamic_shared_memory, defaulting to 0. Main region DSM segments initially contain whatever garbage the memory held last time they were used, rather than zeroes. That change revealed that DSA areas failed to initialize themselves correctly in memory that wasn't zeroed first, so fix that problem. Discussion: https://postgr.es/m/CA%2BhUKGLAE2QBv-WgGp%2BD9P_J-%3Dyne3zof9nfMaqq1h3EGHFXYQ%40mail.gmail.com
1 parent 7b1110d commit 84b1c63

File tree

9 files changed

+216
-26
lines changed

9 files changed

+216
-26
lines changed

doc/src/sgml/config.sgml

+24
Original file line numberDiff line numberDiff line change
@@ -1906,6 +1906,30 @@ include_dir 'conf.d'
19061906
</listitem>
19071907
</varlistentry>
19081908

1909+
<varlistentry id="guc-min-dynamic-shared-memory" xreflabel="min_dynamic_shared_memory">
1910+
<term><varname>min_dynamic_shared_memory</varname> (<type>integer</type>)
1911+
<indexterm>
1912+
<primary><varname>min_dynamic_shared_memory</varname> configuration parameter</primary>
1913+
</indexterm>
1914+
</term>
1915+
<listitem>
1916+
<para>
1917+
Specifies the amount of memory that should be allocated at server
1918+
startup time for use by parallel queries. When this memory region is
1919+
insufficient or exhausted by concurrent queries, new parallel queries
1920+
try to allocate extra shared memory temporarily from the operating
1921+
system using the method configured with
1922+
<varname>dynamic_shared_memory_type</varname>, which may be slower due
1923+
to memory management overheads. Memory that is allocated at startup
1924+
time with <varname>min_dynamic_shared_memory</varname> is affected by
1925+
the <varname>huge_pages</varname> setting on operating systems where
1926+
that is supported, and may be more likely to benefit from larger pages
1927+
on operating systems where that is managed automatically.
1928+
The default value is <literal>0</literal> (none).
1929+
</para>
1930+
</listitem>
1931+
</varlistentry>
1932+
19091933
</variablelist>
19101934
</sect2>
19111935

src/backend/storage/ipc/dsm.c

+169-22
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@
3535

3636
#include "lib/ilist.h"
3737
#include "miscadmin.h"
38+
#include "port/pg_bitutils.h"
3839
#include "storage/dsm.h"
3940
#include "storage/ipc.h"
4041
#include "storage/lwlock.h"
4142
#include "storage/pg_shmem.h"
43+
#include "utils/freepage.h"
4244
#include "utils/guc.h"
4345
#include "utils/memutils.h"
4446
#include "utils/resowner_private.h"
@@ -76,6 +78,8 @@ typedef struct dsm_control_item
7678
{
7779
dsm_handle handle;
7880
uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
81+
size_t first_page;
82+
size_t npages;
7983
void *impl_private_pm_handle; /* only needed on Windows */
8084
bool pinned;
8185
} dsm_control_item;
@@ -95,10 +99,15 @@ static dsm_segment *dsm_create_descriptor(void);
9599
static bool dsm_control_segment_sane(dsm_control_header *control,
96100
Size mapped_size);
97101
static uint64 dsm_control_bytes_needed(uint32 nitems);
102+
static inline dsm_handle make_main_region_dsm_handle(int slot);
103+
static inline bool is_main_region_dsm_handle(dsm_handle handle);
98104

99105
/* Has this backend initialized the dynamic shared memory system yet? */
100106
static bool dsm_init_done = false;
101107

108+
/* Preallocated DSM space in the main shared memory region. */
109+
static void *dsm_main_space_begin = NULL;
110+
102111
/*
103112
* List of dynamic shared memory segments used by this backend.
104113
*
@@ -171,7 +180,7 @@ dsm_postmaster_startup(PGShmemHeader *shim)
171180
{
172181
Assert(dsm_control_address == NULL);
173182
Assert(dsm_control_mapped_size == 0);
174-
dsm_control_handle = random();
183+
dsm_control_handle = random() << 1; /* Even numbers only */
175184
if (dsm_control_handle == DSM_HANDLE_INVALID)
176185
continue;
177186
if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
@@ -247,8 +256,12 @@ dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
247256
if (refcnt == 0)
248257
continue;
249258

250-
/* Log debugging information. */
259+
/* If it was using the main shmem area, there is nothing to do. */
251260
handle = old_control->item[i].handle;
261+
if (is_main_region_dsm_handle(handle))
262+
continue;
263+
264+
/* Log debugging information. */
252265
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
253266
handle, refcnt);
254267

@@ -348,8 +361,11 @@ dsm_postmaster_shutdown(int code, Datum arg)
348361
if (dsm_control->item[i].refcnt == 0)
349362
continue;
350363

351-
/* Log debugging information. */
352364
handle = dsm_control->item[i].handle;
365+
if (is_main_region_dsm_handle(handle))
366+
continue;
367+
368+
/* Log debugging information. */
353369
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
354370
handle);
355371

@@ -418,6 +434,45 @@ dsm_set_control_handle(dsm_handle h)
418434
}
419435
#endif
420436

437+
/*
438+
* Reserve some space in the main shared memory segment for DSM segments.
439+
*/
440+
size_t
441+
dsm_estimate_size(void)
442+
{
443+
return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
444+
}
445+
446+
/*
447+
* Initialize space in the main shared memory segment for DSM segments.
448+
*/
449+
void
450+
dsm_shmem_init(void)
451+
{
452+
size_t size = dsm_estimate_size();
453+
bool found;
454+
455+
if (size == 0)
456+
return;
457+
458+
dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
459+
if (!found)
460+
{
461+
FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin;
462+
size_t first_page = 0;
463+
size_t pages;
464+
465+
/* Reserve space for the FreePageManager. */
466+
while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
467+
++first_page;
468+
469+
/* Initialize it and give it all the rest of the space. */
470+
FreePageManagerInitialize(fpm, dsm_main_space_begin);
471+
pages = (size / FPM_PAGE_SIZE) - first_page;
472+
FreePageManagerPut(fpm, first_page, pages);
473+
}
474+
}
475+
421476
/*
422477
* Create a new dynamic shared memory segment.
423478
*
@@ -434,6 +489,10 @@ dsm_create(Size size, int flags)
434489
dsm_segment *seg;
435490
uint32 i;
436491
uint32 nitems;
492+
size_t npages = 0;
493+
size_t first_page = 0;
494+
FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
495+
bool using_main_dsm_region = false;
437496

438497
/* Unsafe in postmaster (and pointless in a stand-alone backend). */
439498
Assert(IsUnderPostmaster);
@@ -444,27 +503,63 @@ dsm_create(Size size, int flags)
444503
/* Create a new segment descriptor. */
445504
seg = dsm_create_descriptor();
446505

447-
/* Loop until we find an unused segment identifier. */
448-
for (;;)
506+
/*
507+
* Lock the control segment while we try to allocate from the main shared
508+
* memory area, if configured.
509+
*/
510+
if (dsm_main_space_fpm)
449511
{
450-
Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
451-
seg->handle = random();
452-
if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
453-
continue;
454-
if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
455-
&seg->mapped_address, &seg->mapped_size, ERROR))
456-
break;
512+
npages = size / FPM_PAGE_SIZE;
513+
if (size % FPM_PAGE_SIZE > 0)
514+
++npages;
515+
516+
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
517+
if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
518+
{
519+
/* We can carve out a piece of the main shared memory segment. */
520+
seg->mapped_address = (char *) dsm_main_space_begin +
521+
first_page * FPM_PAGE_SIZE;
522+
seg->mapped_size = npages * FPM_PAGE_SIZE;
523+
using_main_dsm_region = true;
524+
/* We'll choose a handle below. */
525+
}
457526
}
458527

459-
/* Lock the control segment so we can register the new segment. */
460-
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
528+
if (!using_main_dsm_region)
529+
{
530+
/*
531+
* We need to create a new memory segment. Loop until we find an
532+
* unused segment identifier.
533+
*/
534+
if (dsm_main_space_fpm)
535+
LWLockRelease(DynamicSharedMemoryControlLock);
536+
for (;;)
537+
{
538+
Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
539+
seg->handle = random() << 1; /* Even numbers only */
540+
if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
541+
continue;
542+
if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
543+
&seg->mapped_address, &seg->mapped_size, ERROR))
544+
break;
545+
}
546+
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
547+
}
461548

462549
/* Search the control segment for an unused slot. */
463550
nitems = dsm_control->nitems;
464551
for (i = 0; i < nitems; ++i)
465552
{
466553
if (dsm_control->item[i].refcnt == 0)
467554
{
555+
if (using_main_dsm_region)
556+
{
557+
seg->handle = make_main_region_dsm_handle(i);
558+
dsm_control->item[i].first_page = first_page;
559+
dsm_control->item[i].npages = npages;
560+
}
561+
else
562+
Assert(!is_main_region_dsm_handle(seg->handle));
468563
dsm_control->item[i].handle = seg->handle;
469564
/* refcnt of 1 triggers destruction, so start at 2 */
470565
dsm_control->item[i].refcnt = 2;
@@ -479,9 +574,12 @@ dsm_create(Size size, int flags)
479574
/* Verify that we can support an additional mapping. */
480575
if (nitems >= dsm_control->maxitems)
481576
{
577+
if (using_main_dsm_region)
578+
FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
482579
LWLockRelease(DynamicSharedMemoryControlLock);
483-
dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
484-
&seg->mapped_address, &seg->mapped_size, WARNING);
580+
if (!using_main_dsm_region)
581+
dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
582+
&seg->mapped_address, &seg->mapped_size, WARNING);
485583
if (seg->resowner != NULL)
486584
ResourceOwnerForgetDSM(seg->resowner, seg);
487585
dlist_delete(&seg->node);
@@ -495,6 +593,12 @@ dsm_create(Size size, int flags)
495593
}
496594

497595
/* Enter the handle into a new array slot. */
596+
if (using_main_dsm_region)
597+
{
598+
seg->handle = make_main_region_dsm_handle(nitems);
599+
dsm_control->item[i].first_page = first_page;
600+
dsm_control->item[i].npages = npages;
601+
}
498602
dsm_control->item[nitems].handle = seg->handle;
499603
/* refcnt of 1 triggers destruction, so start at 2 */
500604
dsm_control->item[nitems].refcnt = 2;
@@ -580,6 +684,12 @@ dsm_attach(dsm_handle h)
580684
/* Otherwise we've found a match. */
581685
dsm_control->item[i].refcnt++;
582686
seg->control_slot = i;
687+
if (is_main_region_dsm_handle(seg->handle))
688+
{
689+
seg->mapped_address = (char *) dsm_main_space_begin +
690+
dsm_control->item[i].first_page * FPM_PAGE_SIZE;
691+
seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE;
692+
}
583693
break;
584694
}
585695
LWLockRelease(DynamicSharedMemoryControlLock);
@@ -597,8 +707,9 @@ dsm_attach(dsm_handle h)
597707
}
598708

599709
/* Here's where we actually try to map the segment. */
600-
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
601-
&seg->mapped_address, &seg->mapped_size, ERROR);
710+
if (!is_main_region_dsm_handle(seg->handle))
711+
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
712+
&seg->mapped_address, &seg->mapped_size, ERROR);
602713

603714
return seg;
604715
}
@@ -688,8 +799,9 @@ dsm_detach(dsm_segment *seg)
688799
*/
689800
if (seg->mapped_address != NULL)
690801
{
691-
dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
692-
&seg->mapped_address, &seg->mapped_size, WARNING);
802+
if (!is_main_region_dsm_handle(seg->handle))
803+
dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
804+
&seg->mapped_address, &seg->mapped_size, WARNING);
693805
seg->impl_private = NULL;
694806
seg->mapped_address = NULL;
695807
seg->mapped_size = 0;
@@ -729,10 +841,15 @@ dsm_detach(dsm_segment *seg)
729841
* other reason, the postmaster may not have any better luck than
730842
* we did. There's not much we can do about that, though.
731843
*/
732-
if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
844+
if (is_main_region_dsm_handle(seg->handle) ||
845+
dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
733846
&seg->mapped_address, &seg->mapped_size, WARNING))
734847
{
735848
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
849+
if (is_main_region_dsm_handle(seg->handle))
850+
FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
851+
dsm_control->item[control_slot].first_page,
852+
dsm_control->item[control_slot].npages);
736853
Assert(dsm_control->item[control_slot].handle == seg->handle);
737854
Assert(dsm_control->item[control_slot].refcnt == 1);
738855
dsm_control->item[control_slot].refcnt = 0;
@@ -894,10 +1011,15 @@ dsm_unpin_segment(dsm_handle handle)
8941011
* pass the mapped size, mapped address, and private data as NULL
8951012
* here.
8961013
*/
897-
if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1014+
if (is_main_region_dsm_handle(handle) ||
1015+
dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
8981016
&junk_mapped_address, &junk_mapped_size, WARNING))
8991017
{
9001018
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
1019+
if (is_main_region_dsm_handle(handle))
1020+
FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
1021+
dsm_control->item[control_slot].first_page,
1022+
dsm_control->item[control_slot].npages);
9011023
Assert(dsm_control->item[control_slot].handle == handle);
9021024
Assert(dsm_control->item[control_slot].refcnt == 1);
9031025
dsm_control->item[control_slot].refcnt = 0;
@@ -1094,3 +1216,28 @@ dsm_control_bytes_needed(uint32 nitems)
10941216
return offsetof(dsm_control_header, item)
10951217
+ sizeof(dsm_control_item) * (uint64) nitems;
10961218
}
1219+
1220+
static inline dsm_handle
1221+
make_main_region_dsm_handle(int slot)
1222+
{
1223+
dsm_handle handle;
1224+
1225+
/*
1226+
* We need to create a handle that doesn't collide with any existing extra
1227+
* segment created by dsm_impl_op(), so we'll make it odd. It also
1228+
* mustn't collide with any other main area pseudo-segment, so we'll
1229+
* include the slot number in some of the bits. We also want to make an
1230+
* effort to avoid newly created and recently destroyed handles from being
1231+
* confused, so we'll make the rest of the bits random.
1232+
*/
1233+
handle = 1;
1234+
handle |= slot << 1;
1235+
handle |= random() << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1);
1236+
return handle;
1237+
}
1238+
1239+
static inline bool
1240+
is_main_region_dsm_handle(dsm_handle handle)
1241+
{
1242+
return handle & 1;
1243+
}

src/backend/storage/ipc/dsm_impl.c

+3
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ const struct config_enum_entry dynamic_shared_memory_options[] = {
113113
/* Implementation selector. */
114114
int dynamic_shared_memory_type;
115115

116+
/* Amount of space reserved for DSM segments in the main area. */
117+
int min_dynamic_shared_memory;
118+
116119
/* Size of buffer to be used for zero-filling. */
117120
#define ZBUFFER_SIZE 8192
118121

src/backend/storage/ipc/ipci.c

+3
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ CreateSharedMemoryAndSemaphores(void)
120120
size = add_size(size, SpinlockSemaSize());
121121
size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
122122
sizeof(ShmemIndexEnt)));
123+
size = add_size(size, dsm_estimate_size());
123124
size = add_size(size, BufferShmemSize());
124125
size = add_size(size, LockShmemSize());
125126
size = add_size(size, PredicateLockShmemSize());
@@ -209,6 +210,8 @@ CreateSharedMemoryAndSemaphores(void)
209210
*/
210211
InitShmemIndex();
211212

213+
dsm_shmem_init();
214+
212215
/*
213216
* Set up xlog, clog, and buffers
214217
*/

0 commit comments

Comments
 (0)