Skip to content

Commit a088277

Browse files
committed
2 parents c2989af + db8fb8d commit a088277

17 files changed

+1827
-4
lines changed

contrib/mmts/multimaster.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,14 +1220,44 @@ void MtmOnNodeConnect(int nodeId)
12201220
*/
12211221
void* PaxosGet(char const* key, int* size, PaxosTimestamp* ts, bool nowait)
12221222
{
1223-
if (size != NULL) {
1223+
unsigned enclen, declen, len;
1224+
char *enc, *dec;
1225+
Assert(ts == NULL); // not implemented
1226+
1227+
enc = raftable_get(key);
1228+
if (enc == NULL)
1229+
{
12241230
*size = 0;
1231+
return NULL;
1232+
}
1233+
1234+
enclen = strlen(enc);
1235+
declen = hex_dec_len(enc, enclen);
1236+
dec = palloc(declen);
1237+
len = hex_decode(enc, enclen, dec);
1238+
pfree(enc);
1239+
Assert(len == declen);
1240+
1241+
if (size != NULL) {
1242+
*size = declen;
12251243
}
1226-
return NULL;
1244+
return dec;
12271245
}
12281246

12291247
void PaxosSet(char const* key, void const* value, int size, bool nowait)
1230-
{}
1248+
{
1249+
unsigned enclen, declen, len;
1250+
char *enc, *dec;
1251+
1252+
enclen = hex_enc_len(value, size);
1253+
enc = palloc(enclen) + 1;
1254+
len = hex_encode(value, size, enc);
1255+
Assert(len == enclen);
1256+
enc[len] = '\0';
1257+
1258+
raftable_set(key, enc, nowait ? 1 : INT_MAX);
1259+
pfree(enc);
1260+
}
12311261

12321262

12331263
/*

contrib/mmts/multimaster.control

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ comment = 'Multimaster'
22
default_version = '1.0'
33
module_pathname = '$libdir/multimaster'
44
schema = mtm
5-
relocatable = false
5+
relocatable = false
6+
requires = raftable

contrib/raftable/Makefile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
MODULE_big = raftable
2+
OBJS = raftable.o worker.o state.o blockmem.o
3+
EXTENSION = raftable
4+
DATA = raftable--1.0.sql
5+
6+
EXTRA_INSTALL = contrib/raftable
7+
8+
RAFT_PREFIX = $(HOME)/raft
9+
override LDFLAGS += -L$(RAFT_PREFIX)/lib -Wl,-whole-archive -lraft -Wl,-no-whole-archive
10+
override CFLAGS += -Wfatal-errors
11+
override CPPFLAGS += -I$(RAFT_PREFIX)/include
12+
13+
ifdef USE_PGXS
14+
PG_CONFIG = pg_config
15+
PGXS := $(shell $(PG_CONFIG) --pgxs)
16+
include $(PGXS)
17+
else
18+
subdir = contrib/raftable
19+
top_builddir = ../..
20+
include $(top_builddir)/src/Makefile.global
21+
include $(top_srcdir)/contrib/contrib-global.mk
22+
endif
23+
24+
check:
25+
$(prove_check)

contrib/raftable/README

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
Raftable
2+
========
3+
4+
This extension allows you to have a key-value table replicated between several
5+
Postgres instances over Raft protocol.
6+
7+
Depends on
8+
----------
9+
10+
Raft implementation in C: https://github.com/kvap/raft
11+
Please compile the raft library with -fpic flag.
12+
13+
Internals
14+
---------
15+
16+
Each 'postgres' instance starts a background worker 'raftable' and creates a
17+
shared memory segment for storing the 'state'. All 'raftable' workers are
18+
communicating Raft protocol over UDP.
19+
20+
When 'frontend' issues a read command, the 'backend' returns the data from the
21+
local replica of the 'state' which is in the shared memory of current instance.
22+
23+
When 'frontend' issues a write command, the 'backend' connects to the current
24+
'raftable' leader directly through TCP and sends an update. Raftable leader
25+
returns 'ok' to the backend when the update gets applied on current instance.
26+
27+
The backend can also issue commands to itself through C API.
28+
29+
┓ ┏━━━━━━━━━┓┏━━━━━━━━━┓┏━━━━━━━━━┓ ┏━━━━━━━━━┓┏━━━━━━━━━┓┏━━━━━━━━━┓ ┏
30+
┃ ┃ backend ┃┃ backend ┃┃ backend ┠─┐ ┃ backend ┃┃ backend ┃┃ backend ┃ ┃
31+
┛ ┗━━━━┯━━━━┛┗━━━━┯━━━━┛┗━━━━┯━━━━┛ │ ┗━━━━┯━━━━┛┗━━━━┯━━━━┛┗━━━━┯━━━━┛ ┗
32+
╗ ╔════╧══════════╧══════════╧════╗ T ╔════╧══════════╧══════════╧════╗ ╔
33+
║ ║ state in shared memory ║ C ║ state in shared memory ║ ║
34+
╝ ╚═══════════════╤═══════════════╝ P ╚═══════════════╤═══════════════╝ ╚
35+
┓ ┏━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┓ │ ┏━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┓ ┏
36+
┃ ┃ raftable worker ┃ └─┨ raftable worker ┃ ┃
37+
┛ ┗━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┛ ┗
38+
┆ ┆
39+
╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┴╶╶╶╶╶╶╶╶╶╶ Raft over UDP ╴╴╴╴╴╴╴╴╴╴┴╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶╶
40+
41+
C API:
42+
/* Gets value by key. Returns the value or NULL if not found. */
43+
char *raftable_get(char *key);
44+
45+
/*
46+
* Adds/updates value by key. Returns when the value gets replicated on
47+
* current machine. Storing NULL will delete the item from the table.
48+
*/
49+
void raftable_set(char *key, char *value);
50+
51+
/*
52+
* Iterates over all items in the table, calling func(key, value, arg)
53+
* for each of them.
54+
*/
55+
void raftable_every(void (*func)(char *, char *, void *), void *arg);
56+
57+
SQL API:
58+
-- set
59+
raftable(key varchar(64), value text, tries int);
60+
61+
-- get
62+
raftable(key varchar(64)) returns text;
63+
64+
-- list
65+
raftable() returns table (key varchar(64), value text);

contrib/raftable/blockmem.c

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#include "blockmem.h"
2+
3+
#include <stddef.h>
4+
#include <assert.h>
5+
#include <stdbool.h>
6+
#include <string.h>
7+
8+
#define BLOCK_LEN (256)
9+
#define BLOCK_DATA_LEN ((BLOCK_LEN) - offsetof(block_t, data))
10+
#define META(ORIGIN) ((meta_t *)(ORIGIN))
11+
#define BLOCK(ORIGIN, ID) ((block_t *)((char *)(ORIGIN) + BLOCK_LEN * ID))
12+
#define TAIL(ORIGIN, ID) (BLOCK(ORIGIN, ID)->next)
13+
#define USED(ORIGIN, ID) (BLOCK(ORIGIN, ID)->used)
14+
#define LEN(ORIGIN, ID) (BLOCK(ORIGIN, ID)->len)
15+
#define DATA(ORIGIN, ID) (BLOCK(ORIGIN, ID)->data)
16+
#define FREE(ORIGIN) (META(ORIGIN)->freeblock)
17+
18+
typedef struct meta_t
19+
{
20+
int freeblock; /* the id of first free block, or -1 if none */
21+
char data[1];
22+
} meta_t;
23+
24+
typedef struct block_t
25+
{
26+
bool used;
27+
int next;
28+
int len;
29+
char data[1];
30+
} block_t;
31+
32+
int
33+
blockmem_format(void *origin, size_t size)
34+
{
35+
block_t *block;
36+
meta_t *meta;
37+
int id;
38+
int blocks = (size - 1) / BLOCK_LEN;
39+
if (blocks <= 0) return 0;
40+
41+
FREE(origin) = 1;
42+
43+
for (id = 1; id <= blocks; id++)
44+
{
45+
USED(origin, id) = 0;
46+
TAIL(origin, id) = id + 1;
47+
}
48+
TAIL(origin, blocks) = 0; /* the last block has no tail */
49+
50+
return 1;
51+
}
52+
53+
static size_t
54+
block_fill(void *origin, int id, void *src, size_t len)
55+
{
56+
if (len > BLOCK_DATA_LEN)
57+
len = BLOCK_DATA_LEN;
58+
LEN(origin, id) = len;
59+
memcpy(DATA(origin, id), src, len);
60+
return len;
61+
}
62+
63+
void
64+
block_clear(void *origin, int id)
65+
{
66+
TAIL(origin, id) = 0;
67+
USED(origin, id) = true;
68+
LEN(origin, id) = 0;
69+
}
70+
71+
static int
72+
block_alloc(void *origin)
73+
{
74+
int newborn = FREE(origin);
75+
if (!newborn) return 0;
76+
77+
FREE(origin) = TAIL(origin, newborn);
78+
block_clear(origin, newborn);
79+
return newborn;
80+
}
81+
82+
static void
83+
block_free(void *origin, int id)
84+
{
85+
/* behead the victim, and repeat for the tail */
86+
while (id > 0)
87+
{
88+
int tail;
89+
assert(USED(origin, id));
90+
91+
USED(origin, id) = false;
92+
tail = TAIL(origin, id);
93+
TAIL(origin, id) = FREE(origin);
94+
FREE(origin) = id;
95+
id = tail;
96+
}
97+
}
98+
99+
int
100+
blockmem_put(void *origin, void *src, size_t len)
101+
{
102+
int head = 0;
103+
int id = 0;
104+
char *cursor = src;
105+
size_t bytesleft = len;
106+
107+
while (bytesleft > 0)
108+
{
109+
int copied;
110+
int newid = block_alloc(origin);
111+
if (!newid) goto failure;
112+
113+
copied = block_fill(origin, newid, cursor, bytesleft);
114+
115+
cursor += copied;
116+
bytesleft -= copied;
117+
if (id)
118+
TAIL(origin, id) = newid;
119+
else
120+
head = newid;
121+
id = newid;
122+
}
123+
124+
return head;
125+
failure:
126+
block_free(origin, head);
127+
return -1;
128+
}
129+
130+
size_t
131+
blockmem_len(void *origin, int id)
132+
{
133+
size_t len = 0;
134+
while (id > 0)
135+
{
136+
assert(USED(origin, id));
137+
len += LEN(origin, id);
138+
id = TAIL(origin, id);
139+
}
140+
assert(len > 0);
141+
return len;
142+
}
143+
144+
size_t
145+
blockmem_get(void *origin, int id, void *dst, size_t len)
146+
{
147+
size_t copied = 0;
148+
char *cursor = dst;
149+
while ((id > 0) && (copied < len))
150+
{
151+
size_t tocopy = LEN(origin, id);
152+
if (tocopy > len - copied)
153+
tocopy = len - copied;
154+
assert(tocopy > 0);
155+
assert(USED(origin, id));
156+
memcpy(cursor, DATA(origin, id), tocopy);
157+
copied += tocopy;
158+
cursor += tocopy;
159+
id = TAIL(origin, id);
160+
}
161+
return len;
162+
}
163+
164+
void
165+
blockmem_forget(void *origin, int id)
166+
{
167+
block_free(origin, id);
168+
}

contrib/raftable/blockmem.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#ifndef BLOCKMEM_H
2+
#define BLOCKMEM_H
3+
4+
#include <stdlib.h>
5+
6+
/*
7+
* Formats the memory region of 'size' bytes starting at 'origin'. Use this
8+
* before any other functions related to blockmem. Returns 1 on success,
9+
* 0 otherwise.
10+
*/
11+
int blockmem_format(void *origin, size_t size);
12+
13+
/*
14+
* Allocates blocks in the formatted region starting at 'origin' and stores
15+
* 'len' bytes from 'src' inside those blocks. Returns the id for later _get(),
16+
* _len() and _forget() operations. Returns 0 if not enough free blocks.
17+
*/
18+
int blockmem_put(void *origin, void *src, size_t len);
19+
20+
/*
21+
* Returns the length of data identified by 'id' that was previously stored
22+
* with a call to _put().
23+
*/
24+
size_t blockmem_len(void *origin, int id);
25+
26+
/*
27+
* Retrieves into 'dst' no more than 'len' bytes of data identified by 'id'
28+
* that were previously stored with a call to _put(). Returns the length of data
29+
* actually copied.
30+
*/
31+
size_t blockmem_get(void *origin, int id, void *dst, size_t len);
32+
33+
/*
34+
* Frees the blocks occupied by data identified by 'id'.
35+
*/
36+
void blockmem_forget(void *origin, int id);
37+
38+
#endif

contrib/raftable/raftable--1.0.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
2+
\echo Use "CREATE EXTENSION raftable" to load this file. \quit
3+
4+
-- get
5+
CREATE FUNCTION raftable(key varchar(64))
6+
RETURNS text
7+
AS 'MODULE_PATHNAME','raftable_sql_get'
8+
LANGUAGE C;
9+
10+
-- set
11+
CREATE FUNCTION raftable(key varchar(64), value text, tries int)
12+
RETURNS void
13+
AS 'MODULE_PATHNAME','raftable_sql_set'
14+
LANGUAGE C;
15+
16+
-- list
17+
CREATE FUNCTION raftable()
18+
RETURNS table (key varchar(64), value text)
19+
AS 'MODULE_PATHNAME','raftable_sql_list'
20+
LANGUAGE C;

0 commit comments

Comments
 (0)