@@ -1155,8 +1155,12 @@ typedef struct GinBuffer
1155
1155
int16 typlen ;
1156
1156
bool typbyval ;
1157
1157
1158
+ /* Number of TIDs to collect before attempt to write some out. */
1159
+ int maxitems ;
1160
+
1158
1161
/* array of TID values */
1159
1162
int nitems ;
1163
+ int nfrozen ;
1160
1164
SortSupport ssup ; /* for sorting/comparing keys */
1161
1165
ItemPointerData * items ;
1162
1166
} GinBuffer ;
@@ -1229,6 +1233,13 @@ GinBufferInit(Relation index)
1229
1233
nKeys ;
1230
1234
TupleDesc desc = RelationGetDescr (index );
1231
1235
1236
+ /*
1237
+ * How many items can we fit into the memory limit? We don't want to end
1238
+ * with too many TIDs. and 64kB seems more than enough. But maybe this
1239
+ * should be tied to maintenance_work_mem or something like that?
1240
+ */
1241
+ buffer -> maxitems = (64 * 1024L ) / sizeof (ItemPointerData );
1242
+
1232
1243
nKeys = IndexRelationGetNumberOfKeyAttributes (index );
1233
1244
1234
1245
buffer -> ssup = palloc0 (sizeof (SortSupportData ) * nKeys );
@@ -1336,6 +1347,48 @@ GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
1336
1347
return (r == 0 );
1337
1348
}
1338
1349
1350
+ /*
1351
+ * GinBufferShouldTrim
1352
+ * Should we trim the list of item pointers?
1353
+ *
1354
+ * By trimming we understand writing out and removing the tuple IDs that
1355
+ * we know can't change by future merges. We can deduce the TID up to which
1356
+ * this is guaranteed from the "first" TID in each GIN tuple, which provides
1357
+ * a "horizon" (for a given key) thanks to the sort.
1358
+ *
1359
+ * We don't want to do this too often - compressing longer TID lists is more
1360
+ * efficient. But we also don't want to accumulate too many TIDs, for two
1361
+ * reasons. First, it consumes memory and we might exceed maintenance_work_mem
1362
+ * (or whatever limit applies), even if that's unlikely because TIDs are very
1363
+ * small so we can fit a lot of them. Second, and more importantly, long TID
1364
+ * lists are an issue if the scan wraps around, because a key may get a very
1365
+ * wide list (with min/max TID for that key), forcing "full" mergesorts for
1366
+ * every list merged into it (instead of the efficient append).
1367
+ *
1368
+ * So we look at two things when deciding if to trim - if the resulting list
1369
+ * (after adding TIDs from the new tuple) would be too long, and if there is
1370
+ * enough TIDs to trim (with values less than "first" TID from the new tuple),
1371
+ * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
1372
+ * number).
1373
+ */
1374
+ static bool
1375
+ GinBufferShouldTrim (GinBuffer * buffer , GinTuple * tup )
1376
+ {
1377
+ /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
1378
+ if (buffer -> nfrozen < 1024 )
1379
+ return false;
1380
+
1381
+ /* no need to trim if we have not hit the memory limit yet */
1382
+ if ((buffer -> nitems + tup -> nitems ) < buffer -> maxitems )
1383
+ return false;
1384
+
1385
+ /*
1386
+ * OK, we have enough frozen TIDs to flush, and we have hit the memory
1387
+ * limit, so it's time to write it out.
1388
+ */
1389
+ return true;
1390
+ }
1391
+
1339
1392
/*
1340
1393
* GinBufferStoreTuple
1341
1394
* Add data (especially TID list) from a GIN tuple to the buffer.
@@ -1386,21 +1439,76 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
1386
1439
buffer -> key = (Datum ) 0 ;
1387
1440
}
1388
1441
1442
+ /*
1443
+ * Try freeze TIDs at the beginning of the list, i.e. exclude them from
1444
+ * the mergesort. We can do that with TIDs before the first TID in the new
1445
+ * tuple we're about to add into the buffer.
1446
+ *
1447
+ * We do this incrementally when adding data into the in-memory buffer,
1448
+ * and not later (e.g. when hitting a memory limit), because it allows us
1449
+ * to skip the frozen data during the mergesort, making it cheaper.
1450
+ */
1451
+
1452
+ /*
1453
+ * Check if the last TID in the current list is frozen. This is the case
1454
+ * when merging non-overlapping lists, e.g. in each parallel worker.
1455
+ */
1456
+ if ((buffer -> nitems > 0 ) &&
1457
+ (ItemPointerCompare (& buffer -> items [buffer -> nitems - 1 ],
1458
+ GinTupleGetFirst (tup )) == 0 ))
1459
+ buffer -> nfrozen = buffer -> nitems ;
1460
+
1461
+ /*
1462
+ * Now find the last TID we know to be frozen, i.e. the last TID right
1463
+ * before the new GIN tuple.
1464
+ *
1465
+ * Start with the first not-yet-frozen tuple, and walk until we find the
1466
+ * first TID that's higher. If we already know the whole list is frozen
1467
+ * (i.e. nfrozen == nitems), this does nothing.
1468
+ *
1469
+ * XXX This might do a binary search for sufficiently long lists, but it
1470
+ * does not seem worth the complexity. Overlapping lists should be rare
1471
+ * common, TID comparisons are cheap, and we should quickly freeze most of
1472
+ * the list.
1473
+ */
1474
+ for (int i = buffer -> nfrozen ; i < buffer -> nitems ; i ++ )
1475
+ {
1476
+ /* Is the TID after the first TID of the new tuple? Can't freeze. */
1477
+ if (ItemPointerCompare (& buffer -> items [i ],
1478
+ GinTupleGetFirst (tup )) > 0 )
1479
+ break ;
1480
+
1481
+ buffer -> nfrozen ++ ;
1482
+ }
1483
+
1389
1484
/* add the new TIDs into the buffer, combine using merge-sort */
1390
1485
{
1391
1486
int nnew ;
1392
1487
ItemPointer new ;
1393
1488
1394
- new = ginMergeItemPointers (buffer -> items , buffer -> nitems ,
1489
+ /*
1490
+ * Resize the array - we do this first, because we'll dereference the
1491
+ * first unfrozen TID, which would fail if the array is NULL. We'll
1492
+ * still pass 0 as number of elements in that array though.
1493
+ */
1494
+ if (buffer -> items == NULL )
1495
+ buffer -> items = palloc ((buffer -> nitems + tup -> nitems ) * sizeof (ItemPointerData ));
1496
+ else
1497
+ buffer -> items = repalloc (buffer -> items ,
1498
+ (buffer -> nitems + tup -> nitems ) * sizeof (ItemPointerData ));
1499
+
1500
+ new = ginMergeItemPointers (& buffer -> items [buffer -> nfrozen ], /* first unfronzen */
1501
+ (buffer -> nitems - buffer -> nfrozen ), /* num of unfrozen */
1395
1502
items , tup -> nitems , & nnew );
1396
1503
1397
- Assert (nnew == buffer -> nitems + tup -> nitems );
1504
+ Assert (nnew == ( tup -> nitems + ( buffer -> nitems - buffer -> nfrozen )) );
1398
1505
1399
- if ( buffer -> items )
1400
- pfree ( buffer -> items );
1506
+ memcpy ( & buffer -> items [ buffer -> nfrozen ], new ,
1507
+ nnew * sizeof ( ItemPointerData ) );
1401
1508
1402
- buffer -> items = new ;
1403
- buffer -> nitems = nnew ;
1509
+ pfree (new );
1510
+
1511
+ buffer -> nitems += tup -> nitems ;
1404
1512
1405
1513
AssertCheckItemPointers (buffer );
1406
1514
}
@@ -1432,11 +1540,29 @@ GinBufferReset(GinBuffer *buffer)
1432
1540
buffer -> category = 0 ;
1433
1541
buffer -> keylen = 0 ;
1434
1542
buffer -> nitems = 0 ;
1543
+ buffer -> nfrozen = 0 ;
1435
1544
1436
1545
buffer -> typlen = 0 ;
1437
1546
buffer -> typbyval = 0 ;
1438
1547
}
1439
1548
1549
+ /*
1550
+ * GinBufferTrim
1551
+ * Discard the "frozen" part of the TID list (which should have been
1552
+ * written to disk/index before this call).
1553
+ */
1554
+ static void
1555
+ GinBufferTrim (GinBuffer * buffer )
1556
+ {
1557
+ Assert ((buffer -> nfrozen > 0 ) && (buffer -> nfrozen <= buffer -> nitems ));
1558
+
1559
+ memmove (& buffer -> items [0 ], & buffer -> items [buffer -> nfrozen ],
1560
+ sizeof (ItemPointerData ) * (buffer -> nitems - buffer -> nfrozen ));
1561
+
1562
+ buffer -> nitems -= buffer -> nfrozen ;
1563
+ buffer -> nfrozen = 0 ;
1564
+ }
1565
+
1440
1566
/*
1441
1567
* GinBufferFree
1442
1568
* Release memory associated with the GinBuffer (including TID array).
@@ -1504,7 +1630,12 @@ _gin_parallel_merge(GinBuildState *state)
1504
1630
/* do the actual sort in the leader */
1505
1631
tuplesort_performsort (state -> bs_sortstate );
1506
1632
1507
- /* initialize buffer to combine entries for the same key */
1633
+ /*
1634
+ * Initialize buffer to combine entries for the same key.
1635
+ *
1636
+ * The leader is allowed to use the whole maintenance_work_mem buffer to
1637
+ * combine data. The parallel workers already completed.
1638
+ */
1508
1639
buffer = GinBufferInit (state -> ginstate .index );
1509
1640
1510
1641
/*
@@ -1562,6 +1693,32 @@ _gin_parallel_merge(GinBuildState *state)
1562
1693
GinBufferReset (buffer );
1563
1694
}
1564
1695
1696
+ /*
1697
+ * We're about to add a GIN tuple to the buffer - check the memory
1698
+ * limit first, and maybe write out some of the data into the index
1699
+ * first, if needed (and possible). We only flush the part of the TID
1700
+ * list that we know won't change, and only if there's enough data for
1701
+ * compression to work well.
1702
+ */
1703
+ if (GinBufferShouldTrim (buffer , tup ))
1704
+ {
1705
+ Assert (buffer -> nfrozen > 0 );
1706
+
1707
+ /*
1708
+ * Buffer is not empty and it's storing a different key - flush
1709
+ * the data into the insert, and start a new entry for current
1710
+ * GinTuple.
1711
+ */
1712
+ AssertCheckItemPointers (buffer );
1713
+
1714
+ ginEntryInsert (& state -> ginstate ,
1715
+ buffer -> attnum , buffer -> key , buffer -> category ,
1716
+ buffer -> items , buffer -> nfrozen , & state -> buildStats );
1717
+
1718
+ /* truncate the data we've just discarded */
1719
+ GinBufferTrim (buffer );
1720
+ }
1721
+
1565
1722
/*
1566
1723
* Remember data for the current tuple (either remember the new key,
1567
1724
* or append if to the existing data).
@@ -1655,7 +1812,13 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
1655
1812
1656
1813
GinBuffer * buffer ;
1657
1814
1658
- /* initialize buffer to combine entries for the same key */
1815
+ /*
1816
+ * Initialize buffer to combine entries for the same key.
1817
+ *
1818
+ * The workers are limited to the same amount of memory as during the sort
1819
+ * in ginBuildCallbackParallel. But this probably should be the 32MB used
1820
+ * during planning, just like there.
1821
+ */
1659
1822
buffer = GinBufferInit (state -> ginstate .index );
1660
1823
1661
1824
/* sort the raw per-worker data */
@@ -1711,6 +1874,39 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
1711
1874
GinBufferReset (buffer );
1712
1875
}
1713
1876
1877
+ /*
1878
+ * We're about to add a GIN tuple to the buffer - check the memory
1879
+ * limit first, and maybe write out some of the data into the index
1880
+ * first, if needed (and possible). We only flush the part of the TID
1881
+ * list that we know won't change, and only if there's enough data for
1882
+ * compression to work well.
1883
+ */
1884
+ if (GinBufferShouldTrim (buffer , tup ))
1885
+ {
1886
+ GinTuple * ntup ;
1887
+ Size ntuplen ;
1888
+
1889
+ Assert (buffer -> nfrozen > 0 );
1890
+
1891
+ /*
1892
+ * Buffer is not empty and it's storing a different key - flush
1893
+ * the data into the insert, and start a new entry for current
1894
+ * GinTuple.
1895
+ */
1896
+ AssertCheckItemPointers (buffer );
1897
+
1898
+ ntup = _gin_build_tuple (buffer -> attnum , buffer -> category ,
1899
+ buffer -> key , buffer -> typlen , buffer -> typbyval ,
1900
+ buffer -> items , buffer -> nfrozen , & ntuplen );
1901
+
1902
+ tuplesort_putgintuple (state -> bs_sortstate , ntup , ntuplen );
1903
+
1904
+ pfree (ntup );
1905
+
1906
+ /* truncate the data we've just discarded */
1907
+ GinBufferTrim (buffer );
1908
+ }
1909
+
1714
1910
/*
1715
1911
* Remember data for the current tuple (either remember the new key,
1716
1912
* or append if to the existing data).
0 commit comments