diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index e332ab94ed565..f834dac38684e 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -52,7 +52,7 @@ codepoint2name[ord("|")] = "pipe" codepoint2name[ord("~")] = "tilde" -# static qstrs, should be sorted +# static qstrs, unsorted. static_qstr_list = [ "", @@ -61,7 +61,12 @@ " ", "*", "/", + "", + "", + "", + "", "", + "", "_", "__call__", "__class__", @@ -84,6 +89,64 @@ "__repr__", "__setitem__", "__str__", + "__bool__", + "__pos__", + "__neg__", + "__invert__", + "__abs__", + "__float__", + "__complex__", + "__sizeof__", + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "__contains__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__divmod__", + "__pow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rfloordiv__", + "__rtruediv__", + "__rmod__", + "__rpow__", + "__ror__", + "__rxor__", + "__rand__", + "__rlshift__", + "__rrshift__", + "__get__", + "__set__", + "__delete__", "ArithmeticError", "AssertionError", "AttributeError", @@ -257,7 +320,7 @@ def parse_input_headers(infiles): # add the qstr to the list, with order number to retain original order in file order = len(qstrs) - 300000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = Qstr(order, ident, qstr) # read the qstrs in from the input files for infile in infiles: @@ -298,17 +361,7 @@ def parse_input_headers(infiles): # add the qstr to the list, with order number to retain original order in file order = len(qstrs) - # but put special method names like __add__ at the top of list, so - # that their id's fit into a byte - if ident == "": - # Sort empty qstr above all still - order = -200000 - elif ident == "__dir__": - # Put __dir__ after empty qstr for builtin dir() to work - order = -190000 - elif ident.startswith("__"): - order -= 100000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = Qstr(order, ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") @@ -317,47 +370,72 @@ def parse_input_headers(infiles): return qcfgs, qstrs -def escape_bytes(qstr, qbytes): - if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): - # qstr is all printable ASCII so render it as-is (for easier debugging) - return qstr - else: - # qstr contains non-printable codes so render entire thing as hex pairs - return "".join(("\\x%02x" % b) for b in qbytes) +class Qstr: + cfg_bytes_len = 0 + cfg_bytes_hash = 0 + def __init__(self, order, ident, qstr): + self.order = order + self.ident = ident + self.qstr = qstr -def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): - qbytes = bytes_cons(qstr, "utf8") - qlen = len(qbytes) - qhash = compute_hash(qbytes, cfg_bytes_hash) - if qlen >= (1 << (8 * cfg_bytes_len)): - print("qstr is too long:", qstr) - assert False - qdata = escape_bytes(qstr, qbytes) - return '%d, %d, "%s"' % (qhash, qlen, qdata) + @property + def qbytes(self): + return bytes_cons(self.qstr, "utf8") + @property + def qlen(self): + if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)): + print("qstr is too long:", self.qstr) + assert False + return len(self.qbytes) -def print_qstr_data(qcfgs, qstrs): - # get config variables - cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"]) - cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"]) + @property + def qhash(self): + return compute_hash(self.qbytes, Qstr.cfg_bytes_hash) + + def _escape_bytes(self): + if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr): + # qstr is all printable ASCII so render it as-is (for easier debugging) + return self.qstr + else: + # qstr contains non-printable codes so render entire thing as hex pairs + return "".join(("\\x%02x" % b) for b in self.qbytes) + + @property + def qdata(self): + return self._escape_bytes() + +def print_qstr_data(qstrs): # print out the starter of the generated C header file print("// This file was automatically generated by makeqstrdata.py") print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') + + # split qstr values into two pools. static consts first. + q0_values = [q for q in qstrs.values() if q.order < 0] + q1_values = [q for q in qstrs.values() if q.order >= 0] - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): - qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + # go through each qstr in pool 0 and print it out. pool0 has special sort. + for q in sorted(q0_values, key=lambda x: x.qhash): + print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) + + # go through each qstr in pool 1 and print it out. pool1 is regularly sorted. + for q in sorted(q1_values, key=lambda x: x.qhash): + print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) def do_work(infiles): qcfgs, qstrs = parse_input_headers(infiles) - print_qstr_data(qcfgs, qstrs) + + # get config variables + Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"]) + Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"]) + + print_qstr_data(qstrs) if __name__ == "__main__": diff --git a/py/qstr.c b/py/qstr.c index ea700566f4f7d..5cbd6407b18a6 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -74,34 +74,82 @@ size_t qstr_compute_hash(const byte *data, size_t len) { return hash; } -const qstr_hash_t mp_qstr_const_hashes[] = { +const qstr_hash_t mp_qstr_const_hashes0[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; -const qstr_len_t mp_qstr_const_lengths[] = { +const qstr_hash_t mp_qstr_const_hashes1[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; +const qstr_len_t mp_qstr_const_lengths0[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_len_t mp_qstr_const_lengths1[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_pool_t mp_qstr_special_const_pool = { + NULL, // no previous pool + 0, // no previous pool + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes0, + (qstr_len_t *)mp_qstr_const_lengths0, + true, + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + (const char *)"", // spacer for MP_QSTRspecial_const_number_of + }, +}; + const qstr_pool_t mp_qstr_const_pool = { - NULL, // no previous pool - 0, // no previous pool + (qstr_pool_t *)&mp_qstr_special_const_pool, + MP_QSTRspecial_const_number_of + 1, MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below - (qstr_hash_t *)mp_qstr_const_hashes, - (qstr_len_t *)mp_qstr_const_lengths, + MP_QSTRnumber_of - + (MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes1, + (qstr_len_t *)mp_qstr_const_lengths1, + true, // constant qstrs are sorted { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; @@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; pool->len = 0; + pool->sorted = false; MP_STATE_VM(last_pool) = pool; DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc); } @@ -185,7 +234,30 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + size_t low = 0; + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->sorted) { + while (high - low > 1) { + size_t mid = (low + high) / 2; + int cmp = pool->hashes[mid] - str_hash; + if (cmp > 0) { + high = mid; + } else { + low = mid; + if (MP_UNLIKELY(cmp == 0)) { + while (low > 0 && pool->hashes[low - 1] == str_hash) { + low--; + } + break; + } + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; diff --git a/py/qstr.h b/py/qstr.h index 0ef861f33e8d5..22c234e0c4932 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, + +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + + MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above + +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -71,6 +83,7 @@ typedef struct _qstr_pool_t { size_t len; qstr_hash_t *hashes; qstr_len_t *lengths; + bool sorted; const char *qstrs[]; } qstr_pool_t; diff --git a/tools/makemanifest.py b/tools/makemanifest.py index e69698d3f2340..c019f9a5058d4 100644 --- a/tools/makemanifest.py +++ b/tools/makemanifest.py @@ -415,7 +415,7 @@ def main(): b'#include "py/emitglue.h"\n' b"extern const qstr_pool_t mp_qstr_const_pool;\n" b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n" - b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n" + b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0, NULL, NULL, false, NULL\n" b"};\n" b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n' b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n" diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 31212fd5bdda8..d1bfa12718b5c 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -1396,8 +1396,8 @@ def freeze_mpy(base_qstrs, compiled_modules): # don't add duplicates if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: continue - new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str) + new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen)) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1438,9 +1438,9 @@ def freeze_mpy(base_qstrs, compiled_modules): print("enum {") for i in range(len(new)): if i == 0: - print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1]) + print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i].ident) else: - print(" MP_QSTR_%s," % new[i][1]) + print(" MP_QSTR_%s," % new[i].ident) print("};") # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len @@ -1460,18 +1460,17 @@ def freeze_mpy(base_qstrs, compiled_modules): print() print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") qstr_size = {"metadata": 0, "data": 0} - for _, _, _, qbytes in new: - qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) - print(" %d," % qhash) + for q in new: + print(" %d," % q.qhash) print("};") print() print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {") - for _, _, _, qbytes in new: - print(" %d," % len(qbytes)) + for q in new: + print(" %d," % len(q.qbytes)) qstr_size["metadata"] += ( config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH ) - qstr_size["data"] += len(qbytes) + qstr_size["data"] += len(q.qbytes) print("};") print() print("extern const qstr_pool_t mp_qstr_const_pool;") @@ -1482,11 +1481,15 @@ def freeze_mpy(base_qstrs, compiled_modules): print(" %u, // used entries" % len(new)) print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") + print(" true, // entries are sorted") print(" {") - for _, _, qstr, qbytes in new: - print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) + for q in new: + print(' "%s",' % q.qdata) qstr_content += ( - config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1 + config.MICROPY_QSTR_BYTES_IN_LEN + + config.MICROPY_QSTR_BYTES_IN_HASH + + len(q.qbytes) + + 1 ) print(" },") print("};") @@ -1781,6 +1784,8 @@ def main(): # Create initial list of global qstrs. global_qstrs = GlobalQStrList() + qstrutil.Qstr.cfg_bytes_len = config.MICROPY_QSTR_BYTES_IN_LEN + qstrutil.Qstr.cfg_bytes_hash = config.MICROPY_QSTR_BYTES_IN_HASH # Load all .mpy files. try: