Performance

TODO: fix pyinstrument output colors

import inspect
import starstar
from starstar import unpack

N = 1000000

def time_signature(n=100000):
    heading('signature()')

    def funcA(a=1, b=1, c=1): pass

    def baseline():
        inspect.signature(funcA)

    def starstar_signature(**kw):
        starstar.signature(funcA)

    dtbase = timed(baseline, n=n)
    dt = timed(starstar_signature, n=n, compare=dtbase)


def time_divide(n=N):
    heading('divide()')

    def funcA(a=1, b=1, c=1): pass
    def funcB(x=1, y=1, z=1): pass

    def baseline(a=1, b=1, c=1, x=1, y=1, z=1):
        funcA(a, b, c)
        funcB(x, y, z)

    def divide(**kw):
        kwa, kwb = starstar.divide(kw, funcA, funcB)
        funcA(**kwa)
        funcB(**kwb)

    @starstar.traceto(funcA, funcB)
    def traced_divide(**kw):
        kwa, kwb = starstar.divide(kw, funcA, funcB)
        funcA(**kwa)
        funcB(**kwb)

    dtbase = timed(baseline, n=n)
    dt = timed(divide, compare=dtbase, n=n)
    dt = timed(traced_divide, compare=dtbase, n=n)


def time_unpack(n=N):
    heading('unpack()')

    data = {'a': 5, 'b': 6, 'x': 0, 'y': 1, 'z': 2}

    def baseline():
        a, b, c = (
            data.get('a'), data.get('b'), 
            {k: data[k] for k in set(data) - {'b','a'}})

    def unpack_test():
        a, b, *(c,) = unpack(data, b=0, c=10)

    dt = timed(baseline, n=n)
    dt = timed(unpack_test, compare=dt, n=n)
    


def heading(txt):
    print('*'*20)
    print()
    print(txt)
    print()
    print('*'*20)


def timed(__func, compare=None, n=N, source=None, **kw):
    import time

    # calculate the overhead of using this for loop
    t0 = time.time()
    for _ in range(N): pass
    overhead = (time.time() - t0)/N

    # get the 
    t0 = time.time()
    for _ in range(N):
        __func(**kw)
    dt = (time.time() - t0 - overhead)/N

    print('---')
    print(f'time for {__func.__name__} ({n} iters):')
    print(source or inspect.getsource(__func))
    print('===')
    print(f'{dt:.3g}s / iter', (f'({dt/compare:.3g}x slower)' if dt > compare else f'({compare/dt:.3g}x faster)') if compare else '')
    print('---')
    print()
    return dt



import pyinstrument

with pyinstrument.Profiler() as p:
    time_signature()
p.print()

with pyinstrument.Profiler() as p:
    time_divide()
p.print()

with pyinstrument.Profiler() as p:
    time_unpack()
p.print()

Output:

********************

signature()

********************
---
time for baseline (100000 iters):
    def baseline():
        inspect.signature(funcA)

===
2.14e-05s / iter 
---

---
time for starstar_signature (100000 iters):
    def starstar_signature(**kw):
        starstar.signature(funcA)

===
5.79e-07s / iter (36.9x faster)
---


  _     ._   __/__   _ _  _  _ _/_   Recorded: 23:10:37  Samples:  21983
 /_//_/// /_\ / //_// / //_'/ //     Duration: 22.029    CPU time: 22.027
/   _/                      v4.5.3

Program: performance.py

22.028 <module>  performance.py:1
└─ 22.028 time_signature  performance.py:7
   └─ 22.028 timed  performance.py:74
      ├─ 21.194 baseline  performance.py:12
      │  ├─ 20.574 signature  inspect.py:3081
      │  │     [29 frames hidden]  inspect, enum, <built-in>
      │  │        15.330 Signature._signature_from_function  inspect.py:2117
      │  │        ├─ 5.269 [self]  inspect.py
      │  └─ 0.621 [self]  performance.py
      ├─ 0.440 [self]  performance.py
      └─ 0.388 starstar_signature  performance.py:15
         └─ 0.261 [self]  performance.py


********************

divide()

********************
---
time for baseline (1000000 iters):
    def baseline(a=1, b=1, c=1, x=1, y=1, z=1):
        funcA(a, b, c)
        funcB(x, y, z)

===
6.72e-07s / iter 
---

---
time for divide (1000000 iters):
    def divide(**kw):
        kwa, kwb = starstar.divide(kw, funcA, funcB)
        funcA(**kwa)
        funcB(**kwb)

===
7.49e-06s / iter (11.2x slower)
---

---
time for traced_divide (1000000 iters):
    @starstar.traceto(funcA, funcB)
    def traced_divide(**kw):
        kwa, kwb = starstar.divide(kw, funcA, funcB)
        funcA(**kwa)
        funcB(**kwb)

===
7.85e-06s / iter (11.7x slower)
---


  _     ._   __/__   _ _  _  _ _/_   Recorded: 23:11:00  Samples:  16020
 /_//_/// /_\ / //_// / //_'/ //     Duration: 16.083    CPU time: 16.081
/   _/                      v4.5.3

Program: performance.py

16.082 <module>  performance.py:1
└─ 16.082 time_divide  performance.py:22
   └─ 16.082 timed  performance.py:74
      ├─ 7.611 f  starstar/core.py:227
      │     [2 frames hidden]  starstar
      │        7.302 traced_divide  performance.py:37
      │        ├─ 6.407 divide  starstar/core.py:25
      │        │     [11 frames hidden]  starstar, <built-in>, inspect
      │        └─ 0.761 [self]  performance.py
      ├─ 7.248 divide  performance.py:32
      │  ├─ 6.338 divide  starstar/core.py:25
      │  │     [11 frames hidden]  starstar, <built-in>, inspect
      │  └─ 0.740 [self]  performance.py
      ├─ 0.737 [self]  performance.py
      └─ 0.485 baseline  performance.py:28
         └─ 0.356 [self]  performance.py


********************

unpack()

********************
---
time for baseline (1000000 iters):
    def baseline():
        a, b, c = (
            data.get('a'), data.get('b'), 
            {k: data[k] for k in set(data) - {'b','a'}})

===
1.63e-06s / iter 
---

---
time for unpack_test (1000000 iters):
    def unpack_test():
        a, b, *(c,) = unpack(data, b=0, c=10)

===
7.23e-06s / iter (4.43x slower)
---


  _     ._   __/__   _ _  _  _ _/_   Recorded: 23:11:16  Samples:  8864
 /_//_/// /_\ / //_// / //_'/ //     Duration: 8.907     CPU time: 8.906
/   _/                      v4.5.3

Program: performance.py

8.906 <module>  performance.py:1
└─ 8.906 time_unpack  performance.py:48
   └─ 8.906 timed  performance.py:74
      ├─ 7.036 unpack_test  performance.py:58
      │  ├─ 4.391 _unpack  starstar/unpack.py:232
      │  │     [6 frames hidden]  starstar, <built-in>
      │  │        3.089 [self]  starstar/unpack.py
      │  ├─ 1.585 unpack  starstar/unpack.py:142
      │  │     [6 frames hidden]  starstar, <built-in>
      │  └─ 1.059 [self]  performance.py
      ├─ 1.449 baseline  performance.py:53
      │  ├─ 0.981 [self]  performance.py
      │  ├─ 0.332 <dictcomp>  performance.py:56
      │  └─ 0.136 dict.get  <built-in>
      └─ 0.421 [self]  performance.py