Skip to content

bpo-45527: Don't count cache hits, just misses. #29092

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,53 +253,6 @@ PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *);


/* Cache hits and misses */

static inline uint8_t
saturating_increment(uint8_t c)
{
return c<<1;
}

static inline uint8_t
saturating_decrement(uint8_t c)
{
return (c>>1) + 128;
}

static inline uint8_t
saturating_zero(void)
{
return 255;
}

/* Starting value for saturating counter.
* Technically this should be 1, but that is likely to
* cause a bit of thrashing when we optimize then get an immediate miss.
* We want to give the counter a change to stabilize, so we start at 3.
*/
static inline uint8_t
saturating_start(void)
{
return saturating_zero()<<3;
}

static inline void
record_cache_hit(_PyAdaptiveEntry *entry) {
entry->counter = saturating_increment(entry->counter);
}

static inline void
record_cache_miss(_PyAdaptiveEntry *entry) {
entry->counter = saturating_decrement(entry->counter);
}

static inline int
too_many_cache_misses(_PyAdaptiveEntry *entry) {
return entry->counter == saturating_zero();
}

#define ADAPTIVE_CACHE_BACKOFF 64

static inline void
Expand Down
53 changes: 7 additions & 46 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_call.h" // _PyObject_FastCallDictTstate()
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
#include "pycore_code.h" // saturating_increment()
#include "pycore_code.h"
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_long.h" // _PyLong_GetZero()
#include "pycore_object.h" // _PyObject_GC_TRACK()
Expand Down Expand Up @@ -1452,11 +1452,6 @@ eval_frame_handle_pending(PyThreadState *tstate)

#define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg)

static inline void
record_hit_inline(_Py_CODEUNIT *next_instr, int oparg)
{
UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg));
}

#define GLOBALS() frame->f_globals
#define BUILTINS() frame->f_builtins
Expand All @@ -1480,7 +1475,6 @@ record_hit_inline(_Py_CODEUNIT *next_instr, int oparg)
res = ep->me_value; \
DEOPT_IF(res == NULL, LOAD_##attr_or_method); \
STAT_INC(LOAD_##attr_or_method, hit); \
record_cache_hit(cache0); \
Py_INCREF(res);

static int
Expand Down Expand Up @@ -1976,7 +1970,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyLong_CheckExact(left), BINARY_MULTIPLY);
DEOPT_IF(!PyLong_CheckExact(right), BINARY_MULTIPLY);
STAT_INC(BINARY_MULTIPLY, hit);
record_hit_inline(next_instr, oparg);
PyObject *prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);
SET_SECOND(prod);
Py_DECREF(right);
Expand All @@ -1994,7 +1987,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY);
DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY);
STAT_INC(BINARY_MULTIPLY, hit);
record_hit_inline(next_instr, oparg);
double dprod = ((PyFloatObject *)left)->ob_fval *
((PyFloatObject *)right)->ob_fval;
PyObject *prod = PyFloat_FromDouble(dprod);
Expand Down Expand Up @@ -2103,7 +2095,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD);
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
STAT_INC(BINARY_ADD, hit);
record_hit_inline(next_instr, oparg);
PyObject *res = PyUnicode_Concat(left, right);
STACK_SHRINK(1);
SET_TOP(res);
Expand Down Expand Up @@ -2132,7 +2123,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *var = GETLOCAL(next_oparg);
DEOPT_IF(var != left, BINARY_ADD);
STAT_INC(BINARY_ADD, hit);
record_hit_inline(next_instr, oparg);
GETLOCAL(next_oparg) = NULL;
Py_DECREF(left);
STACK_SHRINK(1);
Expand All @@ -2150,7 +2140,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD);
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
STAT_INC(BINARY_ADD, hit);
record_hit_inline(next_instr, oparg);
double dsum = ((PyFloatObject *)left)->ob_fval +
((PyFloatObject *)right)->ob_fval;
PyObject *sum = PyFloat_FromDouble(dsum);
Expand All @@ -2170,7 +2159,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD);
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
STAT_INC(BINARY_ADD, hit);
record_hit_inline(next_instr, oparg);
PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
SET_SECOND(sum);
Py_DECREF(right);
Expand Down Expand Up @@ -2241,8 +2229,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0);
Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0];
DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR);

record_hit_inline(next_instr, oparg);
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res = PyList_GET_ITEM(list, index);
assert(res != NULL);
Expand All @@ -2266,8 +2252,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0);
Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0];
DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR);

record_hit_inline(next_instr, oparg);
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res = PyTuple_GET_ITEM(tuple, index);
assert(res != NULL);
Expand All @@ -2282,7 +2266,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
TARGET(BINARY_SUBSCR_DICT) {
PyObject *dict = SECOND();
DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR);
record_hit_inline(next_instr, oparg);
STAT_INC(BINARY_SUBSCR, hit);
PyObject *sub = TOP();
PyObject *res = PyDict_GetItemWithError(dict, sub);
Expand Down Expand Up @@ -3258,7 +3241,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
PyObject *res = ep->me_value;
DEOPT_IF(res == NULL, LOAD_GLOBAL);
record_cache_hit(cache0);
STAT_INC(LOAD_GLOBAL, hit);
Py_INCREF(res);
PUSH(res);
Expand All @@ -3279,7 +3261,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index;
PyObject *res = ep->me_value;
DEOPT_IF(res == NULL, LOAD_GLOBAL);
record_cache_hit(cache0);
STAT_INC(LOAD_GLOBAL, hit);
Py_INCREF(res);
PUSH(res);
Expand Down Expand Up @@ -3702,7 +3683,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
res = values->values[cache0->index];
DEOPT_IF(res == NULL, LOAD_ATTR);
STAT_INC(LOAD_ATTR, hit);
record_cache_hit(cache0);
Py_INCREF(res);
SET_TOP(res);
Py_DECREF(owner);
Expand Down Expand Up @@ -3742,7 +3722,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
res = ep->me_value;
DEOPT_IF(res == NULL, LOAD_ATTR);
STAT_INC(LOAD_ATTR, hit);
record_cache_hit(cache0);
Py_INCREF(res);
SET_TOP(res);
Py_DECREF(owner);
Expand All @@ -3763,7 +3742,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
res = *(PyObject **)addr;
DEOPT_IF(res == NULL, LOAD_ATTR);
STAT_INC(LOAD_ATTR, hit);
record_cache_hit(cache0);
Py_INCREF(res);
SET_TOP(res);
Py_DECREF(owner);
Expand Down Expand Up @@ -3805,7 +3783,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyDictValues *values = *(PyDictValues **)(((char *)owner) + tp->tp_inline_values_offset);
DEOPT_IF(values == NULL, STORE_ATTR);
STAT_INC(STORE_ATTR, hit);
record_cache_hit(cache0);
int index = cache0->index;
STACK_SHRINK(1);
PyObject *value = POP();
Expand Down Expand Up @@ -3843,7 +3820,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *old_value = ep->me_value;
DEOPT_IF(old_value == NULL, STORE_ATTR);
STAT_INC(STORE_ATTR, hit);
record_cache_hit(cache0);
STACK_SHRINK(1);
PyObject *value = POP();
ep->me_value = value;
Expand All @@ -3869,7 +3845,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(tp->tp_version_tag != cache1->tp_version, STORE_ATTR);
char *addr = (char *)owner + cache0->index;
STAT_INC(STORE_ATTR, hit);
record_cache_hit(cache0);
STACK_SHRINK(1);
PyObject *value = POP();
PyObject *old_value = *(PyObject **)addr;
Expand Down Expand Up @@ -4527,7 +4502,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *self = TOP();
PyTypeObject *self_cls = Py_TYPE(self);
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyAttrCache *cache1 = &caches[-1].attr;
_PyObjectCache *cache2 = &caches[-2].obj;

Expand All @@ -4538,7 +4512,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(dict != NULL, LOAD_METHOD);
DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != cache1->dk_version_or_hint, LOAD_METHOD);
STAT_INC(LOAD_METHOD, hit);
record_cache_hit(cache0);
PyObject *res = cache2->obj;
assert(res != NULL);
assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR));
Expand All @@ -4552,13 +4525,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *self = TOP();
PyTypeObject *self_cls = Py_TYPE(self);
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyAttrCache *cache1 = &caches[-1].attr;
_PyObjectCache *cache2 = &caches[-2].obj;
DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD);
assert(self_cls->tp_dictoffset == 0);
STAT_INC(LOAD_METHOD, hit);
record_cache_hit(cache0);
PyObject *res = cache2->obj;
assert(res != NULL);
assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR));
Expand All @@ -4584,7 +4555,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
/* LOAD_METHOD, for class methods */
assert(cframe.use_tracing == 0);
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyAttrCache *cache1 = &caches[-1].attr;
_PyObjectCache *cache2 = &caches[-2].obj;

Expand All @@ -4595,7 +4565,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(cache1->tp_version != 0);

STAT_INC(LOAD_METHOD, hit);
record_cache_hit(cache0);
PyObject *res = cache2->obj;
assert(res != NULL);
Py_INCREF(res);
Expand Down Expand Up @@ -4751,7 +4720,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
/* PEP 523 */
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
record_cache_hit(cache0);
InterpreterFrame *new_frame = _PyThreadState_PushFrame(
tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL);
if (new_frame == NULL) {
Expand Down Expand Up @@ -4783,8 +4751,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
PyObject *callable = SECOND();
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION);
_PyAdaptiveEntry *cache0 = &GET_CACHE()[0].adaptive;
record_cache_hit(cache0);
STAT_INC(CALL_FUNCTION, hit);

PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
Expand Down Expand Up @@ -4813,7 +4779,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION);
DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL,
CALL_FUNCTION);
record_cache_hit(cache0);
STAT_INC(CALL_FUNCTION, hit);

PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable);
Expand Down Expand Up @@ -4845,13 +4810,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(cframe.use_tracing == 0);
/* len(o) */
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
assert(caches[0].adaptive.original_oparg == 1);
_PyObjectCache *cache1 = &caches[-1].obj;
assert(cache0->original_oparg == 1);

PyObject *callable = SECOND();
DEOPT_IF(callable != cache1->obj, CALL_FUNCTION);
record_cache_hit(cache0);
STAT_INC(CALL_FUNCTION, hit);

Py_ssize_t len_i = PyObject_Length(TOP());
Expand All @@ -4875,13 +4838,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(cframe.use_tracing == 0);
/* isinstance(o, o2) */
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
assert(caches[0].adaptive.original_oparg == 2);
_PyObjectCache *cache1 = &caches[-1].obj;
assert(cache0->original_oparg == 2);

PyObject *callable = THIRD();
DEOPT_IF(callable != cache1->obj, CALL_FUNCTION);
record_cache_hit(cache0);
STAT_INC(CALL_FUNCTION, hit);

int retval = PyObject_IsInstance(SECOND(), TOP());
Expand Down Expand Up @@ -5139,8 +5100,8 @@ opname ## _miss: \
{ \
STAT_INC(opname, miss); \
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
record_cache_miss(cache); \
if (too_many_cache_misses(cache)) { \
cache->counter--; \
if (cache->counter == 0) { \
next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \
STAT_INC(opname, deopt); \
cache_backoff(cache); \
Expand All @@ -5154,10 +5115,10 @@ opname ## _miss: \
opname ## _miss: \
{ \
STAT_INC(opname, miss); \
uint8_t oparg = saturating_decrement(_Py_OPARG(next_instr[-1])); \
uint8_t oparg = _Py_OPARG(next_instr[-1])-1; \
UPDATE_PREV_INSTR_OPARG(next_instr, oparg); \
assert(_Py_OPARG(next_instr[-1]) == oparg); \
if (oparg == saturating_zero()) /* too many cache misses */ { \
if (oparg == 0) /* too many cache misses */ { \
oparg = ADAPTIVE_CACHE_BACKOFF; \
next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \
STAT_INC(opname, deopt); \
Expand Down
Loading