Skip to content

Commit 19c0cde

Browse files
committed
Step 4: basic rehydration working (with leaks)
Lots of memory is leaking here.
1 parent f8c2e01 commit 19c0cde

File tree

5 files changed

+148
-3
lines changed

5 files changed

+148
-3
lines changed

Include/cpython/code.h

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ struct PyCodeObject {
109109
interpreter. */
110110
union _cache_or_instruction *co_quickened;
111111

112+
/* Hydration */
113+
struct context *co_hydra_context;
114+
Py_ssize_t co_hydra_offset;
115+
112116
};
113117

114118
/* Masks for co_flags above */

Include/internal/pycore_code.h

+16
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ struct _PyCodeConstructor {
241241

242242
/* used by the eval loop */
243243
PyObject *exceptiontable;
244+
245+
/* Hydration */
246+
struct context *hydra_context;
247+
Py_ssize_t hydra_offset;
244248
};
245249

246250
// Using an "arguments struct" like this is helpful for maintainability
@@ -252,8 +256,10 @@ struct _PyCodeConstructor {
252256
// back to a regular function signature. Regardless, this approach
253257
// wouldn't be appropriate if this weren't a strictly internal API.
254258
// (See the comments in https://github.com/python/cpython/pull/26258.)
259+
// TODO: Why PyAPI_FUNC() if these are private?
255260
PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
256261
PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
262+
PyAPI_FUNC(PyCodeObject *) _PyCode_Update(struct _PyCodeConstructor *, PyCodeObject *);
257263

258264

259265
/* Private API */
@@ -348,6 +354,16 @@ void _Py_PrintSpecializationStats(void);
348354
#endif
349355

350356

357+
/* Hydration */
358+
359+
static inline int
360+
_PyCode_IsHydrated(PyCodeObject *code)
361+
{
362+
return code->co_firstinstr != NULL;
363+
}
364+
365+
PyCodeObject *_PyCode_Hydrate(PyCodeObject *code);
366+
351367
#ifdef __cplusplus
352368
}
353369
#endif

Objects/codeobject.c

+28-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
336336
}
337337
co->co_firstlineno = con->firstlineno;
338338

339-
// These may be NULL, and set by hydration
339+
// These may be NULL, and will then be set by hydration
340340
Py_XINCREF(con->linetable);
341341
co->co_linetable = con->linetable;
342342
Py_XINCREF(con->endlinetable);
@@ -363,6 +363,10 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
363363
Py_XINCREF(con->exceptiontable);
364364
co->co_exceptiontable = con->exceptiontable;
365365

366+
/* hydration */
367+
co->co_hydra_context = con->hydra_context;
368+
co->co_hydra_offset = con->hydra_offset;
369+
366370
/* derived values */
367371
co->co_varnames = NULL;
368372
co->co_cellvars = NULL;
@@ -419,6 +423,29 @@ _PyCode_New(struct _PyCodeConstructor *con)
419423
return co;
420424
}
421425

426+
PyCodeObject *
427+
_PyCode_Update(struct _PyCodeConstructor *con, PyCodeObject *code)
428+
{
429+
if (_PyCode_Validate(con) != 0) {
430+
PyErr_SetString(PyExc_SystemError, "_PyCode_Update(): invalid input");
431+
return NULL;
432+
}
433+
434+
// There's no need to intern stuff, marshal took care of that
435+
436+
// Discard the endlinetable and columntable if we are opted out of debug
437+
// ranges.
438+
if (_Py_GetConfig()->no_debug_ranges) {
439+
con->endlinetable = Py_None;
440+
con->columntable = Py_None;
441+
}
442+
443+
init_code(code, con); // TODO: This leaks!
444+
445+
return code;
446+
}
447+
448+
422449

423450
/******************
424451
* the legacy "constructors"

Python/ceval.c

+17
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,11 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
10581058
if (builtins == NULL) {
10591059
return NULL;
10601060
}
1061+
if (PyCode_Check(co) && !_PyCode_IsHydrated((PyCodeObject *)co)) {
1062+
if (_PyCode_Hydrate((PyCodeObject *)co) == NULL) {
1063+
return NULL;
1064+
}
1065+
}
10611066
PyFrameConstructor desc = {
10621067
.fc_globals = globals,
10631068
.fc_builtins = builtins,
@@ -1493,6 +1498,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
14931498
if (PyDTrace_FUNCTION_ENTRY_ENABLED())
14941499
dtrace_function_entry(f);
14951500

1501+
if (!_PyCode_IsHydrated(co)) {
1502+
if (_PyCode_Hydrate(co) == NULL) {
1503+
goto exit_eval_frame;
1504+
}
1505+
}
1506+
14961507
/* Increment the warmup counter and quicken if warm enough
14971508
* _Py_Quicken is idempotent so we don't worry about overflow */
14981509
if (!PyCodeObject_IsWarmedUp(co)) {
@@ -5004,6 +5015,12 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con,
50045015
{
50055016
PyObject **localsarray;
50065017
PyCodeObject *code = (PyCodeObject *)con->fc_code;
5018+
if(!_PyCode_IsHydrated(code)) {
5019+
// Needed to set co_nlocalsplus
5020+
if (_PyCode_Hydrate(code) == NULL) {
5021+
return NULL;
5022+
}
5023+
}
50075024
int is_coro = code->co_flags &
50085025
(CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR);
50095026
if (is_coro) {

Python/marshal.c

+83-2
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,16 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
651651
w_flush(&wf);
652652
}
653653

654+
655+
struct context {
656+
PyObject *obj; // Python bytes(-like) object containing the data
657+
const char *buf; // Pointer to first byte
658+
Py_ssize_t len; // Number of bytes
659+
PyObject *refs; // List of shared values
660+
PyCodeObject *code; // If not NULL, code object to be updated
661+
// TODO: the latter is neither re-entrant nor thread-safe :-(
662+
};
663+
654664
typedef struct {
655665
FILE *fp;
656666
int depth;
@@ -660,6 +670,7 @@ typedef struct {
660670
char *buf;
661671
Py_ssize_t buf_size;
662672
PyObject *refs; /* a list */
673+
struct context *ctx;
663674
} RFILE;
664675

665676
static const char *
@@ -1379,6 +1390,8 @@ r_object(RFILE *p)
13791390
if (PyErr_Occurred())
13801391
goto code_error;
13811392

1393+
// assert(nrefs == 0);
1394+
13821395
argcount = (int)r_long(p);
13831396
if (PyErr_Occurred())
13841397
goto code_error;
@@ -1419,12 +1432,21 @@ r_object(RFILE *p)
14191432
con.qualname = qualname;
14201433
con.filename = filename;
14211434

1422-
if (should_load_lazy(p)) {
1435+
PyCodeObject *to_update = NULL;
1436+
1437+
if (p->ctx != NULL && p->ctx->code == NULL) {
14231438
printf("Loading lazy\n");
14241439
assert(nrefs == 0);
14251440
p->ptr = save_ptr + datasize;
1441+
1442+
con.hydra_context = p->ctx;
1443+
con.hydra_offset = save_ptr - 1 - p->ctx->buf; // Back up over typecode
14261444
}
14271445
else {
1446+
if (p->ctx != NULL && p->ctx->code != NULL) {
1447+
to_update = p->ctx->code;
1448+
p->ctx->code = NULL;
1449+
}
14281450
code = r_object(p);
14291451
if (code == NULL)
14301452
goto code_error;
@@ -1469,7 +1491,12 @@ r_object(RFILE *p)
14691491
}
14701492
};
14711493

1472-
v = (PyObject *)_PyCode_New(&con);
1494+
if (to_update != NULL) {
1495+
v = (PyObject *)_PyCode_Update(&con, to_update);
1496+
}
1497+
else {
1498+
v = (PyObject *)_PyCode_New(&con);
1499+
}
14731500
if (v == NULL) {
14741501
printf("Failed to create\n");
14751502
goto code_error;
@@ -1555,6 +1582,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
15551582
rf.fp = fp;
15561583
rf.end = rf.ptr = NULL;
15571584
rf.buf = NULL;
1585+
rf.ctx = NULL;
15581586
res = r_short(&rf);
15591587
if (rf.buf != NULL)
15601588
PyMem_Free(rf.buf);
@@ -1570,6 +1598,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
15701598
rf.readable = NULL;
15711599
rf.ptr = rf.end = NULL;
15721600
rf.buf = NULL;
1601+
rf.ctx = NULL;
15731602
res = r_long(&rf);
15741603
if (rf.buf != NULL)
15751604
PyMem_Free(rf.buf);
@@ -1633,6 +1662,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
16331662
rf.ptr = rf.end = NULL;
16341663
rf.buf = NULL;
16351664
rf.refs = PyList_New(0);
1665+
rf.ctx = NULL;
16361666
if (rf.refs == NULL)
16371667
return NULL;
16381668
result = read_object(&rf);
@@ -1656,6 +1686,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
16561686
rf.refs = PyList_New(0);
16571687
if (rf.refs == NULL)
16581688
return NULL;
1689+
rf.ctx = NULL;
16591690
result = read_object(&rf);
16601691
Py_DECREF(rf.refs);
16611692
if (rf.buf != NULL)
@@ -1794,6 +1825,7 @@ marshal_load(PyObject *module, PyObject *file)
17941825
PyMem_Free(rf.buf);
17951826
} else
17961827
result = NULL;
1828+
rf.ctx = NULL;
17971829
}
17981830
Py_DECREF(data);
17991831
return result;
@@ -1848,11 +1880,60 @@ marshal_loads_impl(PyObject *module, Py_buffer *bytes)
18481880
rf.depth = 0;
18491881
if ((rf.refs = PyList_New(0)) == NULL)
18501882
return NULL;
1883+
rf.ctx = NULL;
1884+
if (should_load_lazy(&rf)) {
1885+
rf.ctx = PyMem_Malloc(sizeof(struct context));
1886+
if (rf.ctx == NULL) {
1887+
PyErr_NoMemory();
1888+
return NULL;
1889+
}
1890+
rf.ctx->obj = bytes->obj;
1891+
rf.ctx->buf = s;
1892+
rf.ctx->len = n;
1893+
rf.ctx->code = NULL;
1894+
Py_INCREF(rf.refs);
1895+
rf.ctx->refs = rf.refs;
1896+
}
18511897
result = read_object(&rf);
18521898
Py_DECREF(rf.refs);
18531899
return result;
18541900
}
18551901

1902+
PyCodeObject *
1903+
_PyCode_Hydrate(PyCodeObject *code)
1904+
{
1905+
struct context *ctx = code->co_hydra_context;
1906+
if (ctx == NULL) {
1907+
// Not dehydrated
1908+
assert(_PyCode_IsHydrated(code));
1909+
return code;
1910+
}
1911+
1912+
assert(!_PyCode_IsHydrated(code));
1913+
assert(ctx->code == NULL);
1914+
1915+
const char *s = ctx->buf;
1916+
Py_ssize_t n = ctx->len;
1917+
1918+
RFILE rf;
1919+
rf.fp = NULL;
1920+
rf.readable = NULL;
1921+
rf.ptr = s + code->co_hydra_offset;
1922+
rf.end = s + n;
1923+
rf.depth = 0;
1924+
rf.refs = ctx->refs;
1925+
rf.ctx = ctx;
1926+
ctx->code = code;
1927+
1928+
PyObject *result = read_object(&rf);
1929+
1930+
ctx->code = NULL;
1931+
if (result == NULL)
1932+
return NULL;
1933+
assert(PyCode_Check(result));
1934+
return (PyCodeObject *)result;
1935+
}
1936+
18561937
static PyMethodDef marshal_methods[] = {
18571938
MARSHAL_DUMP_METHODDEF
18581939
MARSHAL_LOAD_METHODDEF

0 commit comments

Comments
 (0)