Commit 277eca3b authored by Martin Bauer's avatar Martin Bauer
Browse files

Improvements in tutorials/demos

- typos, more content
- added numba to benchmark comparison
parent 1c3e9880
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from pystencils.session import * from pystencils.session import *
import timeit import timeit
%load_ext Cython %load_ext Cython
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Demo: Benchmark numpy, cython, pystencils # Demo: Benchmark numpy, Cython, pystencils
In this benchmark we compare different ways of implementing a simple stencil kernel in Python. In this notebook we compare and benchmark different ways of implementing a simple stencil kernel in Python.
The benchmark kernel computes the average of the four neighbors in 2D and stores in a second array. To prevent out-of-bounds accesses, we skip the cells at the border and compute values only in the range `[1:-1, 1:-1]` Our simple example computes the average of the four neighbors in 2D and stores it in a second array. To prevent out-of-bounds accesses, we skip the cells at the border and compute values only in the range `[1:-1, 1:-1]`
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Implementations ## Implementations
The first implementation is a pure Python implementation: The first implementation is a pure Python implementation:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def avg_pure_python(input_arr, output_arr): def avg_pure_python(src, dst):
for x in range(1, input_arr.shape[0] - 1): for x in range(1, src.shape[0] - 1):
for y in range(1, input_arr.shape[1] - 1): for y in range(1, src.shape[1] - 1):
output_arr[x, y] = (input_arr[x + 1, y] + input_arr[x - 1, y] + dst[x, y] = (src[x + 1, y] + src[x - 1, y] +
input_arr[x, y + 1] + input_arr[x, y - 1]) / 4 src[x, y + 1] + src[x, y - 1]) / 4
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Obviously, this will be a rather slow version, since the loops are written directly in Python. Obviously, this will be a rather slow version, since the loops are written directly in Python.
Next, we use *numpy* functions to delegate the looping to numpy. The first version uses the `roll` function to shift the array by one element in each direction. This version has to allocate a new array for each accessed neighbor. Next, we use *numpy* functions to delegate the looping to numpy. The first version uses the `roll` function to shift the array by one element in each direction. This version has to allocate a new array for each accessed neighbor.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def avg_numpy_roll(input_arr, output_arr): def avg_numpy_roll(src, dst):
neighbors = [np.roll(input_arr, axis=a, shift=s) for a in (0, 1) for s in (-1, 1)] neighbors = [np.roll(src, axis=a, shift=s) for a in (0, 1) for s in (-1, 1)]
np.divide(sum(neighbors), 4, out=output_arr) np.divide(sum(neighbors), 4, out=dst)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Using views, we can get rid of the additional copies: Using views, we can get rid of the additional copies:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def avg_numpy_slice(input_arr, output_arr): def avg_numpy_slice(src, dst):
output_arr[1:-1, 1:-1] = input_arr[2:, 1:-1] + input_arr[:-2, 1:-1] + \ dst[1:-1, 1:-1] = src[2:, 1:-1] + src[:-2, 1:-1] + \
input_arr[1:-1, 2:] + input_arr[1:-1, :-2] src[1:-1, 2:] + src[1:-1, :-2]
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
To further optimize the kernel we switch to Cython, to get a compiled C version. To further optimize the kernel we switch to Cython, to get a compiled C version.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%%cython %%cython
import cython import cython
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
def avg_cython(object[double, ndim=2] input_arr, object[double, ndim=2] output_arr): def avg_cython(object[double, ndim=2] src, object[double, ndim=2] dst):
cdef int xs, ys, x, y cdef int xs, ys, x, y
xs, ys = input_arr.shape xs, ys = src.shape
for x in range(1, xs - 1): for x in range(1, xs - 1):
for y in range(1, ys - 1): for y in range(1, ys - 1):
output_arr[x, y] = (input_arr[x + 1, y] + input_arr[x - 1, y] + dst[x, y] = (src[x + 1, y] + src[x - 1, y] +
input_arr[x, y + 1] + input_arr[x, y - 1]) / 4 src[x, y + 1] + src[x, y - 1]) / 4
```
%% Cell type:markdown id: tags:
If available, we also try the numba just-in-time compiler
%% Cell type:code id: tags:
``` python
try:
from numba import jit
@jit(nopython=True)
def avg_numba(src, dst):
dst[1:-1, 1:-1] = src[2:, 1:-1] + src[:-2, 1:-1] + \
src[1:-1, 2:] + src[1:-1, :-2]
except ImportError:
pass
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
And finally we also create a *pystencils* version of the same stencil code: And finally we also create a *pystencils* version of the same stencil code:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
src, dst = ps.fields("src, dst: [2D]") src, dst = ps.fields("src, dst: [2D]")
update = ps.Assignment(dst[0,0], update = ps.Assignment(dst[0,0],
(src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4) (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4)
kernel = ps.create_kernel(update).compile() avg_pystencils = ps.create_kernel(update).compile()
def avg_pystencils(input_arr, output_arr):
kernel(src=input_arr, dst=output_arr)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
all_implementations = { all_implementations = {
'pure Python': avg_pure_python, 'pure Python': avg_pure_python,
'numpy roll': avg_numpy_roll, 'numpy roll': avg_numpy_roll,
'numpy slice': avg_numpy_slice, 'numpy slice': avg_numpy_slice,
'Cython': None,
'pystencils': avg_pystencils, 'pystencils': avg_pystencils,
} }
if 'avg_cython' in globals(): if 'avg_cython' in globals():
all_implementations['Cython'] = avg_cython all_implementations['Cython'] = avg_cython
else: if 'avg_numba' in globals():
del all_implementations['Cython'] all_implementations['numba'] = avg_numba
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Benchmark functions ## Benchmark functions
We implement a short function to get in- and output arrays of a given shape and to measure the runtime. We implement a short function to get in- and output arrays of a given shape and to measure the runtime.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def get_arrays(shape): def get_arrays(shape):
in_arr = np.random.rand(*shape) in_arr = np.random.rand(*shape)
out_arr = np.empty_like(in_arr) out_arr = np.empty_like(in_arr)
return in_arr, out_arr return in_arr, out_arr
def do_benchmark(func, shape): def do_benchmark(func, shape):
in_arr, out_arr = get_arrays(shape) in_arr, out_arr = get_arrays(shape)
timer = timeit.Timer('f(a, b)', globals={'f': func, 'a': in_arr, 'b': out_arr}) func(src=in_arr, dst=out_arr) # warmup
timer = timeit.Timer('f(src=src, dst=dst)', globals={'f': func, 'src': in_arr, 'dst': out_arr})
calls, time_taken = timer.autorange() calls, time_taken = timer.autorange()
return time_taken / calls return time_taken / calls
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Comparison ## Comparison
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_order = ['pystencils', 'Cython', 'numba', 'numpy slice', 'numpy roll', 'pure Python']
plot_order = [p for p in plot_order if p in all_implementations]
def bar_plot(*shape): def bar_plot(*shape):
names = tuple(all_implementations.keys()) names = plot_order
runtimes = tuple(do_benchmark(all_implementations[name], shape) for name in names) runtimes = tuple(do_benchmark(all_implementations[name], shape) for name in names)
for runtime, name in zip(runtimes, names): for runtime, name in zip(runtimes, names):
assert runtime >= runtimes[names.index('pystencils')], runtimes # assert that pystencils is the fastest
# if some change degrades performance of pystencils, we see this automatically in CI system
assert runtime >= runtimes[names.index('pystencils')], "pystencils is slower than " + name
speedups = tuple(runtime / min(runtimes) for runtime in runtimes) speedups = tuple(runtime / min(runtimes) for runtime in runtimes)
y_pos = np.arange(len(names)) y_pos = np.arange(len(names))
labels = tuple(f"{name} ({round(speedup, 1)} x)" for name, speedup in zip(names, speedups)) labels = tuple(f"{name} ({round(speedup, 1)} x)" for name, speedup in zip(names, speedups))
plt.text(0.5, 0.5, f"Size {shape}", horizontalalignment='center', fontsize=16, plt.text(0.5, 0.5, f"Size {shape}", horizontalalignment='center', fontsize=16,
verticalalignment='center', transform=plt.gca().transAxes) verticalalignment='center', transform=plt.gca().transAxes)
plt.barh(y_pos, runtimes, log=True) plt.barh(y_pos, runtimes, log=True)
plt.yticks(y_pos, labels); plt.yticks(y_pos, labels);
plt.xlabel('Runtime of single iteration') plt.xlabel('Runtime of single iteration')
plt.figure(figsize=(8, 8)) plt.figure(figsize=(8, 8))
plt.subplot(3, 1, 1) plt.subplot(3, 1, 1)
bar_plot(16, 16) bar_plot(32, 32)
plt.subplot(3, 1, 2) plt.subplot(3, 1, 2)
bar_plot(128, 128) bar_plot(128, 128)
plt.subplot(3, 1, 3) plt.subplot(3, 1, 3)
bar_plot(1024, 1024) bar_plot(2048, 2048)
``` ```
%%%% Output: display_data %%%% Output: display_data
![]() ![]()
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
All runtimes are plotted logarithmically. Next number next to the labels shows how much slower the version is than the fastest one. For small arrays Cython produces faster code than *pystencils*. The larger the arrays, the better pystencils gets. All runtimes are plotted logarithmically. Numbers next to the labels show how much slower the version is than the fastest one.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment