BangPypers - March 2024

pythonhacker · pythonhacker · commit fd7ff05c8f83 · 2024-03-19T15:56:58.000+05:30
diff --git a/bangpypers/march-2024/constants.py b/bangpypers/march-2024/constants.py
@@ -0,0 +1 @@
+FACTOR=1.8
diff --git a/bangpypers/march-2024/inline.py b/bangpypers/march-2024/inline.py
@@ -0,0 +1,24 @@
+""" Demo - inlining a certain number of Python functions """
+
+import inspect
+import timeit
+
+def process(op, *args):
+    return call(op, args[0], args[1])
+
+def call(op, x, y):
+    if op == 'add':
+        return add(x,y)
+    elif op == 'subtract':
+        return minus(x,y)
+
+def add(x,y):
+    return x+y
+
+def minus(x,y):
+    return x-y
+
+if __name__ == "__main__":
+    print(process('add',10,20))
+    # Python 3.11 is 2x faster than Python 3.10
+    print(timeit.Timer("process('add', 10, 20)", globals=globals()).timeit(number=1000000),'usec/pass')
diff --git a/bangpypers/march-2024/quicken.py b/bangpypers/march-2024/quicken.py
@@ -0,0 +1,122 @@
+""" Test code to demo quickening in the Python interpreter 
+
+Specialization is typically done in the context of a JIT compiler, but research shows specialization
+in an interpreter can boost performance significantly, even outperforming a naive compiler .
+
+Specialization at the level of individual bytecodes makes de-optimization trivial, as it cannot occur
+in the middle of a region.
+
+This PEP proposes using a specializing, adaptive interpreter that specializes code aggressively, but
+over a very small region, and is able to adjust to mis-specialization rapidly and at low cost.
+
+Selectively adapts bytecode to try and optimize and de-optimizes where the optimization is not
+stable. The de-optimization should be able to be run continually and cheaply.
+
+Most of the speedup comes directly from specialization. The largest contributors are speedups to attribute
+lookup, global variables, and calls.
+
+Speedups seem to be in the range 10% - 60%.
+
+NOTE: Caching of data in the bytecode is done using the opcode 'CACHE'.
+Ref -> https://docs.python.org/3.11/library/dis.html#opcode-CACHE
+
+Any instruction that would benefit from specialization will be replaced by an "adaptive" form of that instruction.
+When executed, the adaptive instructions will specialize themselves in response to the types and values that they see.
+This process is known as "quickening".
+
+# Ref: https://www.unibw.de/ucsrl/pubs/ecoop10.pdf
+
+Each instruction that would benefit from specialization is replaced by an adaptive version during quickening.
+For example, the LOAD_ATTR instruction would be replaced with LOAD_ATTR_ADAPTIVE. BINARY_OP is replaced by
+BINARY_OP_ADAPTIVE .
+
+LOAD_ATTR Specializations:
+  - LOAD_ATTR_INSTANCE_VALUE - Attribute stored in objec'ts value array
+  - LOAD_ATTR_MODULE - Load an attribute from a module
+  - LOAD_ATTR_SLOT - Load an attribute from an object's __slot__
+
+LOAD_GLOBAL
+  - LOAD_GLOBAL_MODULE - Load an attribute from a module's global dict
+
+BINARY_SUBSCR
+  - BINARY_SUBSCR_<type> - Index subscription of a sequence like dict,list etc
+
+Notes on Opcodes
+
+RESUME_QUICK - A no-op. Performs internal tracing, debugging and optimization checks.
+this can be seen when the interpreter performs an optimization
+LOAD_CONST_LOAD_FAST - No documentation
+
+Ref: https://discuss.python.org/t/document-binary-op-opcodes/23884/10
+
+"""
+
+import dis
+import constants
+
+delta = 32
+# Online: https://tinyurl.com/ycej54sv
+def celsius_to_fahrenheit(c):
+    """ Convert celsius to faherenheit """
+    ## BINARY_OP_ADAPTIVE example
+    return 1.8*c + 32
+
+def celsius_to_fahrenheit_constant(c):
+    """ Convert celsius to faherenheit using a module attribute """
+    ## LOAD_GLOBAL_MODULE example
+    return constants.FACTOR*c + delta
+
+def index_access(seq={}, index=0):
+    """ Index access of a sequence """
+    return seq[index]
+
+def run(count=7, second_count=52, func=celsius_to_fahrenheit, arg=37.0, other_arg=37.0):
+    """
+    Output of dis.dis
+    1. Line number
+    2. Bytecode Offset 
+    3. Opname
+    4. Oparg
+    """
+    
+    for i in range(count):
+        print(func(arg))
+    print('**dis#1**')
+    # Not yet optimized
+    dis.dis(func, adaptive=True, show_caches=True)
+    # Run once more - adaptive bytecode changes
+    # after the eighth time
+    print(func(arg))
+    print('**dis#2**')    
+    dis.dis(func, adaptive=True, show_caches=True)
+    # Also print the bytecode via getbytecode
+    getbytecode(func)
+    # Now for 52 times it doesn't change
+    for i in range(second_count):
+        func(other_arg)
+    # Still hopes to multiply float
+    print('**dis#3**')        
+    dis.dis(func, adaptive=True, show_caches=True)
+    func(other_arg)
+    # 53rd time it switches back
+    print('**dis#4**')        
+    dis.dis(func, adaptive=True, show_caches=True)    
+
+def getbytecode(func=celsius_to_fahrenheit):
+    bytecode=dis.Bytecode(func, adaptive=True)
+    # bytecode=dis.Bytecode(func)
+    print('**getbytecode**')
+    for instr in bytecode:
+        # import pdb;pdb.set_trace()
+        print(instr.opname)
+        
+if __name__ == "__main__":
+    if 1 == 0:
+        # Show this first
+        run()
+    else:
+        # Show this next
+        d={0:10, 1:20}
+        l=[10,20]
+        run(func=index_access, arg=d, other_arg=l, second_count=52)
+    # getbytecode()
diff --git a/bangpypers/march-2024/speedup.py b/bangpypers/march-2024/speedup.py
@@ -0,0 +1,32 @@
+""" Measure speed up of adaptive instructions """
+
+import timeit
+import dis
+import constants
+import numpy as np
+import math
+delta = 32
+def c_to_f(c):
+    """ Convert celsius to faherenheit """
+    ## BINARY_OP_ADAPTIVE example
+    return 1.8*c + 32
+
+def c_to_f_constant(c):
+    return constants.FACTOR*c + delta
+
+def np_example():
+    arr = np.array([1, 2, 3, 4, 5])
+    arr2 = arr*math.pi
+    return arr2
+
+if __name__ == "__main__":
+    # Results
+    # Python 3.10 - 0.064 usec/pass
+    # Python 3.11 and upwards - 0.035 - 0.04 usec/pass (1.5X improvement)    
+    print(timeit.Timer('c_to_f(37.0)', globals=globals()).timeit(number=1000000),'usec/pass')
+    # dis.dis(c_to_f, adaptive=True)
+    # dis.dis(c_to_f)
+    # Python 3.10 - 0.095 usec/pass
+    # Python 3.11 and upwards - 0.04 - 0.06 usec/pass (1.8~2X improvement)        
+    print(timeit.Timer('c_to_f_constant(37.0)', globals=globals()).timeit(number=1000000),'usec/pass')
+    print(timeit.Timer('np_example', globals=globals()).timeit(number=1000000),'usec/pass')        
diff --git a/bangpypers/march-2024/talk.ipynb b/bangpypers/march-2024/talk.ipynb
diff --git a/bangpypers/march-2024/test_specialist.py b/bangpypers/march-2024/test_specialist.py