|
| 1 | +""" Test code to demo quickening in the Python interpreter |
| 2 | +
|
| 3 | +Specialization is typically done in the context of a JIT compiler, but research shows specialization |
| 4 | +in an interpreter can boost performance significantly, even outperforming a naive compiler . |
| 5 | +
|
| 6 | +Specialization at the level of individual bytecodes makes de-optimization trivial, as it cannot occur |
| 7 | +in the middle of a region. |
| 8 | +
|
| 9 | +This PEP proposes using a specializing, adaptive interpreter that specializes code aggressively, but |
| 10 | +over a very small region, and is able to adjust to mis-specialization rapidly and at low cost. |
| 11 | +
|
| 12 | +Selectively adapts bytecode to try and optimize and de-optimizes where the optimization is not |
| 13 | +stable. The de-optimization should be able to be run continually and cheaply. |
| 14 | +
|
| 15 | +Most of the speedup comes directly from specialization. The largest contributors are speedups to attribute |
| 16 | +lookup, global variables, and calls. |
| 17 | +
|
| 18 | +Speedups seem to be in the range 10% - 60%. |
| 19 | +
|
| 20 | +NOTE: Caching of data in the bytecode is done using the opcode 'CACHE'. |
| 21 | +Ref -> https://docs.python.org/3.11/library/dis.html#opcode-CACHE |
| 22 | +
|
| 23 | +Any instruction that would benefit from specialization will be replaced by an "adaptive" form of that instruction. |
| 24 | +When executed, the adaptive instructions will specialize themselves in response to the types and values that they see. |
| 25 | +This process is known as "quickening". |
| 26 | +
|
| 27 | +# Ref: https://www.unibw.de/ucsrl/pubs/ecoop10.pdf |
| 28 | +
|
| 29 | +Each instruction that would benefit from specialization is replaced by an adaptive version during quickening. |
| 30 | +For example, the LOAD_ATTR instruction would be replaced with LOAD_ATTR_ADAPTIVE. BINARY_OP is replaced by |
| 31 | +BINARY_OP_ADAPTIVE . |
| 32 | +
|
| 33 | +LOAD_ATTR Specializations: |
| 34 | + - LOAD_ATTR_INSTANCE_VALUE - Attribute stored in objec'ts value array |
| 35 | + - LOAD_ATTR_MODULE - Load an attribute from a module |
| 36 | + - LOAD_ATTR_SLOT - Load an attribute from an object's __slot__ |
| 37 | +
|
| 38 | +LOAD_GLOBAL |
| 39 | + - LOAD_GLOBAL_MODULE - Load an attribute from a module's global dict |
| 40 | +
|
| 41 | +BINARY_SUBSCR |
| 42 | + - BINARY_SUBSCR_<type> - Index subscription of a sequence like dict,list etc |
| 43 | +
|
| 44 | +Notes on Opcodes |
| 45 | +
|
| 46 | +RESUME_QUICK - A no-op. Performs internal tracing, debugging and optimization checks. |
| 47 | +this can be seen when the interpreter performs an optimization |
| 48 | +LOAD_CONST_LOAD_FAST - No documentation |
| 49 | +
|
| 50 | +Ref: https://discuss.python.org/t/document-binary-op-opcodes/23884/10 |
| 51 | +
|
| 52 | +""" |
| 53 | + |
| 54 | +import dis |
| 55 | +import constants |
| 56 | + |
| 57 | +delta = 32 |
| 58 | +# Online: https://tinyurl.com/ycej54sv |
| 59 | +def celsius_to_fahrenheit(c): |
| 60 | + """ Convert celsius to faherenheit """ |
| 61 | + ## BINARY_OP_ADAPTIVE example |
| 62 | + return 1.8*c + 32 |
| 63 | + |
| 64 | +def celsius_to_fahrenheit_constant(c): |
| 65 | + """ Convert celsius to faherenheit using a module attribute """ |
| 66 | + ## LOAD_GLOBAL_MODULE example |
| 67 | + return constants.FACTOR*c + delta |
| 68 | + |
| 69 | +def index_access(seq={}, index=0): |
| 70 | + """ Index access of a sequence """ |
| 71 | + return seq[index] |
| 72 | + |
| 73 | +def run(count=7, second_count=52, func=celsius_to_fahrenheit, arg=37.0, other_arg=37.0): |
| 74 | + """ |
| 75 | + Output of dis.dis |
| 76 | + 1. Line number |
| 77 | + 2. Bytecode Offset |
| 78 | + 3. Opname |
| 79 | + 4. Oparg |
| 80 | + """ |
| 81 | + |
| 82 | + for i in range(count): |
| 83 | + print(func(arg)) |
| 84 | + print('**dis#1**') |
| 85 | + # Not yet optimized |
| 86 | + dis.dis(func, adaptive=True, show_caches=True) |
| 87 | + # Run once more - adaptive bytecode changes |
| 88 | + # after the eighth time |
| 89 | + print(func(arg)) |
| 90 | + print('**dis#2**') |
| 91 | + dis.dis(func, adaptive=True, show_caches=True) |
| 92 | + # Also print the bytecode via getbytecode |
| 93 | + getbytecode(func) |
| 94 | + # Now for 52 times it doesn't change |
| 95 | + for i in range(second_count): |
| 96 | + func(other_arg) |
| 97 | + # Still hopes to multiply float |
| 98 | + print('**dis#3**') |
| 99 | + dis.dis(func, adaptive=True, show_caches=True) |
| 100 | + func(other_arg) |
| 101 | + # 53rd time it switches back |
| 102 | + print('**dis#4**') |
| 103 | + dis.dis(func, adaptive=True, show_caches=True) |
| 104 | + |
| 105 | +def getbytecode(func=celsius_to_fahrenheit): |
| 106 | + bytecode=dis.Bytecode(func, adaptive=True) |
| 107 | + # bytecode=dis.Bytecode(func) |
| 108 | + print('**getbytecode**') |
| 109 | + for instr in bytecode: |
| 110 | + # import pdb;pdb.set_trace() |
| 111 | + print(instr.opname) |
| 112 | + |
| 113 | +if __name__ == "__main__": |
| 114 | + if 1 == 0: |
| 115 | + # Show this first |
| 116 | + run() |
| 117 | + else: |
| 118 | + # Show this next |
| 119 | + d={0:10, 1:20} |
| 120 | + l=[10,20] |
| 121 | + run(func=index_access, arg=d, other_arg=l, second_count=52) |
| 122 | + # getbytecode() |
0 commit comments