Skip to content

Commit ee07e06

Browse files
committed
rewrite dsm arrays
1 parent c60d242 commit ee07e06

File tree

11 files changed

+209
-80
lines changed

11 files changed

+209
-80
lines changed

README.md

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,35 @@ CREATE TABLE hash_rel (
124124
value INTEGER);
125125
INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g;
126126
```
127-
Then run create_hash_partitions() function with appropriate arguments:
127+
If partitions are supposed to have indexes, then they should be created for parent table before partitioning. In this case pg_pathman will automaticaly create indexes for partitions. Then run create_hash_partitions() function with appropriate arguments:
128128
```
129129
SELECT create_hash_partitions('hash_rel', 'value', 100);
130130
```
131131
This will create new partitions but data will still be in the parent table. To move data to the corresponding partitions use partition_data() function:
132132
```
133133
SELECT partition_data('hash_rel');
134134
```
135+
Here is an example of the query with filtering by partitioning key and its plan:
136+
```
137+
SELECT * FROM hash_rel WHERE value = 1234;
138+
id | value
139+
------+-------
140+
1234 | 1234
141+
142+
EXPLAIN SELECT * FROM hash_rel WHERE value = 1234;
143+
QUERY PLAN
144+
-----------------------------------------------------------------
145+
Append (cost=0.00..2.00 rows=0 width=0)
146+
-> Seq Scan on hash_rel_34 (cost=0.00..2.00 rows=0 width=0)
147+
Filter: (value = 1234)
148+
```
149+
Note that pg_pathman exludes parent table from the query plan. To access parent table use ONLY modifier:
150+
```
151+
EXPLAIN SELECT * FROM ONLY hash_rel;
152+
QUERY PLAN
153+
--------------------------------------------------------
154+
Seq Scan on hash_rel (cost=0.00..0.00 rows=1 width=8)
155+
```
135156
### RANGE
136157
Consider an example of RANGE partitioning. Create a table with numerical or date or timestamp column:
137158
```
@@ -159,12 +180,29 @@ SELECT split_range_partition('range_rel_1', '2010-02-15'::date);
159180
Now let's create new partition. You can use append_partition() or prepend_partition() functions:
160181
```
161182
SELECT append_partition('range_rel');
162-
SELECT prepend_partition('range_rel');
163183
```
184+
Here is an example of the query with filtering by partitioning key and its plan:
185+
```
186+
SELECT * FROM range_rel WHERE dt >= '2012-04-30' AND dt <= '2012-05-01';
187+
id | dt
188+
-----+---------------------
189+
851 | 2012-04-30 00:00:00
190+
852 | 2012-05-01 00:00:00
191+
192+
EXPLAIN SELECT * FROM range_rel WHERE dt >= '2012-04-30' AND dt <= '2012-05-01';
193+
QUERY PLAN
194+
----------------------------------------------------------------------------
195+
Append (cost=0.00..60.80 rows=0 width=0)
196+
-> Seq Scan on range_rel_28 (cost=0.00..30.40 rows=0 width=0)
197+
Filter: (dt >= '2012-04-30 00:00:00'::timestamp without time zone)
198+
-> Seq Scan on range_rel_29 (cost=0.00..30.40 rows=0 width=0)
199+
Filter: (dt <= '2012-05-01 00:00:00'::timestamp without time zone)
200+
```
201+
164202
### Disable pg_pathman
165-
To disable pg_pathman for some previously partitioned table use disable_pathman() function:
203+
To disable pg_pathman for some previously partitioned table use disable_partitioning() function:
166204
```
167-
SELECT disable_pathman('range_rel');
205+
SELECT disable_partitioning('range_rel');
168206
```
169207
All sections and data will stay available and will be handled by standard PostgreSQL partitioning mechanism.
170208
### Manual partitions management

README.rus.md

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,36 @@ CREATE TABLE hash_rel (
125125
value INTEGER);
126126
INSERT INTO hash_rel (value) SELECT g FROM generate_series(1, 10000) as g;
127127
```
128-
Разобьем таблицу `hash_rel` на 100 секций по полю `value`:
128+
Если дочерние секции подразумевают наличие индексов, то стоит их создать в родительской таблице до разбиения. Тогда при разбиении pg_pathman автоматически создаст соответствующие индексы в дочерних.таблицах. Разобьем таблицу `hash_rel` на 100 секций по полю `value`:
129129
```
130130
SELECT create_hash_partitions('hash_rel', 'value', 100);
131131
```
132132
Перенесем данные из родительской таблицы в дочерние секции.
133133
```
134134
SELECT partition_data('hash_rel');
135135
```
136+
Пример построения плана для запроса с фильтрацией по ключевому полю:
137+
```
138+
SELECT * FROM hash_rel WHERE value = 1234;
139+
id | value
140+
------+-------
141+
1234 | 1234
142+
143+
EXPLAIN SELECT * FROM hash_rel WHERE value = 1234;
144+
QUERY PLAN
145+
-----------------------------------------------------------------
146+
Append (cost=0.00..2.00 rows=0 width=0)
147+
-> Seq Scan on hash_rel_34 (cost=0.00..2.00 rows=0 width=0)
148+
Filter: (value = 1234)
149+
```
150+
Стоит отметить, что pg_pathman исключает из плана запроса родительскую таблицу, и чтобы получить данные из нее, следует использовать модификатор ONLY:
151+
```
152+
EXPLAIN SELECT * FROM ONLY hash_rel;
153+
QUERY PLAN
154+
--------------------------------------------------------
155+
Seq Scan on hash_rel (cost=0.00..0.00 rows=1 width=8)
156+
```
157+
136158
### RANGE
137159
Пример секционирования таблицы с использованием стратегии RANGE.
138160
```
@@ -161,12 +183,30 @@ SELECT split_range_partition('range_rel_1', '2010-02-15'::date);
161183
```
162184
Добавим новую секцию в конец списка секций:
163185
```
164-
SELECT append_partition('range_rel')
186+
SELECT append_partition('range_rel');
187+
```
188+
Пример построения плана для запроса с фильтрацией по ключевому полю:
165189
```
190+
SELECT * FROM range_rel WHERE dt >= '2012-04-30' AND dt <= '2012-05-01';
191+
id | dt
192+
-----+---------------------
193+
851 | 2012-04-30 00:00:00
194+
852 | 2012-05-01 00:00:00
195+
196+
EXPLAIN SELECT * FROM range_rel WHERE dt >= '2012-04-30' AND dt <= '2012-05-01';
197+
QUERY PLAN
198+
----------------------------------------------------------------------------
199+
Append (cost=0.00..60.80 rows=0 width=0)
200+
-> Seq Scan on range_rel_28 (cost=0.00..30.40 rows=0 width=0)
201+
Filter: (dt >= '2012-04-30 00:00:00'::timestamp without time zone)
202+
-> Seq Scan on range_rel_29 (cost=0.00..30.40 rows=0 width=0)
203+
Filter: (dt <= '2012-05-01 00:00:00'::timestamp without time zone)
204+
```
205+
166206
### Деакцивация pathman
167-
Деактивировать pathman для некоторой ранее разделенной таблицы можно следующей командой disable_pathman():
207+
Деактивировать pathman для некоторой ранее разделенной таблицы можно следующей командой disable_partitioning():
168208
```
169-
SELECT disable_pathman('range_rel');
209+
SELECT disable_partitioning('range_rel');
170210
```
171211
Все созданные секции и данные останутся по прежнему доступны и будут обрабатываться стандартным планировщиком PostgreSQL.
172212
### Ручное управление секциями

dsm_array.c

Lines changed: 85 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,36 @@
22
#include "storage/shmem.h"
33
#include "storage/dsm.h"
44
#include "storage/lwlock.h"
5+
#include <stdint.h>
56

6-
7-
static Table *table;
7+
// static Table *table;
88
static dsm_segment *segment = NULL;
9-
9+
static dsm_handle *segment_handle = 0;
10+
static size_t _first_free = 0;
11+
static size_t _block_size = 0;
12+
13+
typedef int BlockHeader;
14+
typedef BlockHeader* BlockHeaderPtr;
15+
16+
#define FREE_BIT 0x80000000
17+
#define is_free(header) \
18+
((*header) & FREE_BIT)
19+
#define set_free(header) \
20+
((*header) | FREE_BIT)
21+
#define set_used(header) \
22+
((*header) & ~FREE_BIT)
23+
#define get_length(header) \
24+
((*header) & ~FREE_BIT)
25+
#define set_length(header, length) \
26+
((length) | ((*header) & FREE_BIT))
1027

1128
void
1229
alloc_dsm_table()
1330
{
1431
bool found;
15-
table = (Table *) ShmemInitStruct("dsm table", sizeof(Table), &found);
32+
segment_handle = ShmemInitStruct("dsm table", sizeof(dsm_handle), &found);
1633
if (!found)
17-
table->segment_handle = 0;
34+
*segment_handle = 0;
1835
}
1936

2037

@@ -26,30 +43,30 @@ bool
2643
init_dsm_segment(size_t block_size)
2744
{
2845
bool ret;
29-
dsm_handle handle;
3046

3147
/* lock here */
3248
LWLockAcquire(dsm_init_lock, LW_EXCLUSIVE);
3349

3450
/* if there is already an existing segment then attach to it */
35-
if (table->segment_handle != 0)
51+
if (*segment_handle != 0)
3652
{
3753
ret = false;
38-
segment = dsm_attach(table->segment_handle);
54+
segment = dsm_attach(*segment_handle);
3955
}
4056

4157
/*
4258
* If segment hasn't been created yet or has already been destroyed
4359
* (it happens when last session detaches segment) then create new one
4460
*/
45-
if (table->segment_handle == 0 || segment == NULL)
61+
if (*segment_handle == 0 || segment == NULL)
4662
{
4763
/* create segment */
4864
segment = dsm_create(block_size * BLOCKS_COUNT, 0);
49-
handle = dsm_segment_handle(segment);
50-
init_dsm_table(table, handle, block_size);
65+
*segment_handle = dsm_segment_handle(segment);
66+
init_dsm_table(block_size);
5167
ret = true;
5268
}
69+
_block_size = block_size;
5370

5471
/*
5572
* Keep mapping till the end of the session. Otherwise it would be
@@ -63,25 +80,24 @@ init_dsm_segment(size_t block_size)
6380
return ret;
6481
}
6582

83+
/*
84+
* Initialize allocated segment with block structure
85+
*/
6686
void
67-
init_dsm_table(Table *tbl, dsm_handle h, size_t block_size)
87+
init_dsm_table(size_t block_size)
6888
{
6989
int i;
70-
Block *block;
71-
72-
memset(table, 0, sizeof(Table));
73-
table->segment_handle = h;
74-
table->block_size = block_size;
75-
table->first_free = 0;
90+
BlockHeaderPtr header;
91+
char *ptr = dsm_segment_address(segment);
7692

7793
/* create blocks */
7894
for (i=0; i<BLOCKS_COUNT; i++)
7995
{
80-
block = &table->blocks[i];
81-
block->segment = h;
82-
block->offset = i * block_size;
83-
block->is_free = true;
96+
header = (BlockHeaderPtr) &ptr[i * block_size];
97+
*header = set_free(header);
98+
*header = set_length(header, 1);
8499
}
100+
_first_free = 0;
85101

86102
return;
87103
}
@@ -93,71 +109,91 @@ void
93109
alloc_dsm_array(DsmArray *arr, size_t entry_size, size_t length)
94110
{
95111
int i = 0;
96-
Block *block = NULL;
97-
int free_count = 0;
98112
int size_requested = entry_size * length;
99113
int min_pos = 0;
100114
int max_pos = 0;
115+
size_t offset = 0;
116+
size_t total_length = 0;
117+
char *ptr = dsm_segment_address(segment);
118+
BlockHeaderPtr header;
101119

102-
for (i = table->first_free; i<BLOCKS_COUNT; i++)
120+
for (i = _first_free; i<BLOCKS_COUNT; )
103121
{
104-
if (table->blocks[i].is_free)
122+
header = (BlockHeaderPtr) &ptr[i * _block_size];
123+
if (is_free(header))
105124
{
106-
if (!block)
125+
if (!offset)
107126
{
108-
block = &table->blocks[i];
127+
offset = i * _block_size;
128+
total_length = _block_size - sizeof(BlockHeader);
109129
min_pos = i;
110130
}
111-
free_count++;
131+
else
132+
{
133+
total_length += _block_size;
134+
}
135+
i++;
112136
}
113137
else
114138
{
115-
free_count = 0;
116-
block = NULL;
139+
offset = 0;
140+
total_length = 0;
141+
i += get_length(header);
117142
}
118143

119-
if (free_count * table->block_size >= size_requested)
144+
if (total_length >= size_requested)
120145
{
121-
// return block->offset;
122-
max_pos = i;
146+
max_pos = i-1;
123147
break;
124148
}
125149
}
126150

127151
/* look up for first free block */
128-
for (i = i+1; i<BLOCKS_COUNT; i++)
129-
if (table->blocks[i].is_free == true)
152+
for (; i<BLOCKS_COUNT; )
153+
{
154+
header = (BlockHeaderPtr) &ptr[i * _block_size];
155+
if (is_free(header))
130156
{
131-
table->first_free = i;
157+
_first_free = i;
132158
break;
133159
}
160+
else
161+
{
162+
i += get_length(header);
163+
}
164+
}
134165

135166
/* if we found enough of space */
136-
if (free_count * table->block_size >= size_requested)
167+
if (total_length >= size_requested)
137168
{
138-
for(i=min_pos; i<=max_pos; i++)
139-
table->blocks[i].is_free = false;
140-
arr->offset = block->offset;
169+
header = (BlockHeaderPtr) &ptr[min_pos * _block_size];
170+
*header = set_used(header);
171+
*header = set_length(header, max_pos - min_pos + 1);
172+
173+
arr->offset = offset;
141174
arr->length = length;
142175
}
143176
}
144177

145178
void
146179
free_dsm_array(DsmArray *arr)
147180
{
148-
int start = arr->offset / table->block_size;
181+
int start = arr->offset / _block_size;
149182
int i = 0;
183+
char *ptr = dsm_segment_address(segment);
184+
BlockHeaderPtr header = (BlockHeaderPtr) &ptr[start * _block_size];
185+
size_t blocks_count = get_length(header);
150186

151187
/* set blocks free */
152-
for(;; i++)
188+
for(; i < blocks_count; i++)
153189
{
154-
table->blocks[start + i].is_free = true;
155-
if (i * table->block_size >= arr->length)
156-
break;
190+
header = (BlockHeaderPtr) &ptr[(start + i) * _block_size];
191+
*header = set_free(header);
192+
*header = set_length(header, 1);
157193
}
158194

159-
if (arr->offset < table->first_free)
160-
table->first_free = arr->offset;
195+
if (start < _first_free)
196+
_first_free = start;
161197

162198
arr->offset = 0;
163199
arr->length = 0;
@@ -166,5 +202,5 @@ free_dsm_array(DsmArray *arr)
166202
void *
167203
dsm_array_get_pointer(const DsmArray* arr)
168204
{
169-
return (uint8_t *) dsm_segment_address(segment) + arr->offset;
205+
return (char *) dsm_segment_address(segment) + arr->offset + sizeof(BlockHeader);
170206
}

0 commit comments

Comments
 (0)