Skip to content

Commit fd7ff05

Browse files
committed
BangPypers - March 2024
1 parent fa2a99a commit fd7ff05

File tree

6 files changed

+797
-0
lines changed

6 files changed

+797
-0
lines changed

bangpypers/march-2024/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
FACTOR=1.8

bangpypers/march-2024/inline.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
""" Demo - inlining a certain number of Python functions """
2+
3+
import inspect
4+
import timeit
5+
6+
def process(op, *args):
7+
return call(op, args[0], args[1])
8+
9+
def call(op, x, y):
10+
if op == 'add':
11+
return add(x,y)
12+
elif op == 'subtract':
13+
return minus(x,y)
14+
15+
def add(x,y):
16+
return x+y
17+
18+
def minus(x,y):
19+
return x-y
20+
21+
if __name__ == "__main__":
22+
print(process('add',10,20))
23+
# Python 3.11 is 2x faster than Python 3.10
24+
print(timeit.Timer("process('add', 10, 20)", globals=globals()).timeit(number=1000000),'usec/pass')

bangpypers/march-2024/quicken.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
""" Test code to demo quickening in the Python interpreter
2+
3+
Specialization is typically done in the context of a JIT compiler, but research shows specialization
4+
in an interpreter can boost performance significantly, even outperforming a naive compiler .
5+
6+
Specialization at the level of individual bytecodes makes de-optimization trivial, as it cannot occur
7+
in the middle of a region.
8+
9+
This PEP proposes using a specializing, adaptive interpreter that specializes code aggressively, but
10+
over a very small region, and is able to adjust to mis-specialization rapidly and at low cost.
11+
12+
Selectively adapts bytecode to try and optimize and de-optimizes where the optimization is not
13+
stable. The de-optimization should be able to be run continually and cheaply.
14+
15+
Most of the speedup comes directly from specialization. The largest contributors are speedups to attribute
16+
lookup, global variables, and calls.
17+
18+
Speedups seem to be in the range 10% - 60%.
19+
20+
NOTE: Caching of data in the bytecode is done using the opcode 'CACHE'.
21+
Ref -> https://docs.python.org/3.11/library/dis.html#opcode-CACHE
22+
23+
Any instruction that would benefit from specialization will be replaced by an "adaptive" form of that instruction.
24+
When executed, the adaptive instructions will specialize themselves in response to the types and values that they see.
25+
This process is known as "quickening".
26+
27+
# Ref: https://www.unibw.de/ucsrl/pubs/ecoop10.pdf
28+
29+
Each instruction that would benefit from specialization is replaced by an adaptive version during quickening.
30+
For example, the LOAD_ATTR instruction would be replaced with LOAD_ATTR_ADAPTIVE. BINARY_OP is replaced by
31+
BINARY_OP_ADAPTIVE .
32+
33+
LOAD_ATTR Specializations:
34+
- LOAD_ATTR_INSTANCE_VALUE - Attribute stored in objec'ts value array
35+
- LOAD_ATTR_MODULE - Load an attribute from a module
36+
- LOAD_ATTR_SLOT - Load an attribute from an object's __slot__
37+
38+
LOAD_GLOBAL
39+
- LOAD_GLOBAL_MODULE - Load an attribute from a module's global dict
40+
41+
BINARY_SUBSCR
42+
- BINARY_SUBSCR_<type> - Index subscription of a sequence like dict,list etc
43+
44+
Notes on Opcodes
45+
46+
RESUME_QUICK - A no-op. Performs internal tracing, debugging and optimization checks.
47+
this can be seen when the interpreter performs an optimization
48+
LOAD_CONST_LOAD_FAST - No documentation
49+
50+
Ref: https://discuss.python.org/t/document-binary-op-opcodes/23884/10
51+
52+
"""
53+
54+
import dis
55+
import constants
56+
57+
delta = 32
58+
# Online: https://tinyurl.com/ycej54sv
59+
def celsius_to_fahrenheit(c):
60+
""" Convert celsius to faherenheit """
61+
## BINARY_OP_ADAPTIVE example
62+
return 1.8*c + 32
63+
64+
def celsius_to_fahrenheit_constant(c):
65+
""" Convert celsius to faherenheit using a module attribute """
66+
## LOAD_GLOBAL_MODULE example
67+
return constants.FACTOR*c + delta
68+
69+
def index_access(seq={}, index=0):
70+
""" Index access of a sequence """
71+
return seq[index]
72+
73+
def run(count=7, second_count=52, func=celsius_to_fahrenheit, arg=37.0, other_arg=37.0):
74+
"""
75+
Output of dis.dis
76+
1. Line number
77+
2. Bytecode Offset
78+
3. Opname
79+
4. Oparg
80+
"""
81+
82+
for i in range(count):
83+
print(func(arg))
84+
print('**dis#1**')
85+
# Not yet optimized
86+
dis.dis(func, adaptive=True, show_caches=True)
87+
# Run once more - adaptive bytecode changes
88+
# after the eighth time
89+
print(func(arg))
90+
print('**dis#2**')
91+
dis.dis(func, adaptive=True, show_caches=True)
92+
# Also print the bytecode via getbytecode
93+
getbytecode(func)
94+
# Now for 52 times it doesn't change
95+
for i in range(second_count):
96+
func(other_arg)
97+
# Still hopes to multiply float
98+
print('**dis#3**')
99+
dis.dis(func, adaptive=True, show_caches=True)
100+
func(other_arg)
101+
# 53rd time it switches back
102+
print('**dis#4**')
103+
dis.dis(func, adaptive=True, show_caches=True)
104+
105+
def getbytecode(func=celsius_to_fahrenheit):
106+
bytecode=dis.Bytecode(func, adaptive=True)
107+
# bytecode=dis.Bytecode(func)
108+
print('**getbytecode**')
109+
for instr in bytecode:
110+
# import pdb;pdb.set_trace()
111+
print(instr.opname)
112+
113+
if __name__ == "__main__":
114+
if 1 == 0:
115+
# Show this first
116+
run()
117+
else:
118+
# Show this next
119+
d={0:10, 1:20}
120+
l=[10,20]
121+
run(func=index_access, arg=d, other_arg=l, second_count=52)
122+
# getbytecode()

bangpypers/march-2024/speedup.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
""" Measure speed up of adaptive instructions """
2+
3+
import timeit
4+
import dis
5+
import constants
6+
import numpy as np
7+
import math
8+
delta = 32
9+
def c_to_f(c):
10+
""" Convert celsius to faherenheit """
11+
## BINARY_OP_ADAPTIVE example
12+
return 1.8*c + 32
13+
14+
def c_to_f_constant(c):
15+
return constants.FACTOR*c + delta
16+
17+
def np_example():
18+
arr = np.array([1, 2, 3, 4, 5])
19+
arr2 = arr*math.pi
20+
return arr2
21+
22+
if __name__ == "__main__":
23+
# Results
24+
# Python 3.10 - 0.064 usec/pass
25+
# Python 3.11 and upwards - 0.035 - 0.04 usec/pass (1.5X improvement)
26+
print(timeit.Timer('c_to_f(37.0)', globals=globals()).timeit(number=1000000),'usec/pass')
27+
# dis.dis(c_to_f, adaptive=True)
28+
# dis.dis(c_to_f)
29+
# Python 3.10 - 0.095 usec/pass
30+
# Python 3.11 and upwards - 0.04 - 0.06 usec/pass (1.8~2X improvement)
31+
print(timeit.Timer('c_to_f_constant(37.0)', globals=globals()).timeit(number=1000000),'usec/pass')
32+
print(timeit.Timer('np_example', globals=globals()).timeit(number=1000000),'usec/pass')

0 commit comments

Comments
 (0)