Commit 52775e94 authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'create_kernel_api' into 'master'

`create_kernel` API Update

See merge request pycodegen/pystencils!261
parents 7cd099db 2a6d08ec
__pycache__
.ipynb_checkpoints
.coverage
.coverage*
*.pyc
*.vti
/build
......@@ -18,4 +18,4 @@ pystencils/boundaries/createindexlistcython.c
pystencils/boundaries/createindexlistcython.*.so
pystencils_tests/tmp
pystencils_tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
pystencils_tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
\ No newline at end of file
pystencils_tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:code id: tags:
 
``` python
from pystencils.session import *
import pystencils as ps
sp.init_printing()
frac = sp.Rational
```
 
%% Cell type:markdown id: tags:
 
# Tutorial 06: Phase-field simulation of dentritic solidification
 
This is the second tutorial on phase field methods with pystencils. Make sure to read the previous tutorial first.
 
In this tutorial we again implement a model described in **Programming Phase-Field Modelling** by S. Bulent Biner.
This time we implement the model from chapter 4.7 that describes dentritic growth. So get ready for some beautiful snowflake pictures.
 
We start again by adding all required arrays fields. This time we explicitly store the change of the phase variable φ in time, since the dynamics is calculated using an Allen-Cahn formulation where a term $\partial_t \phi$ occurs.
 
%% Cell type:code id: tags:
 
``` python
dh = ps.create_data_handling(domain_size=(300, 300), periodicity=True,
default_target='cpu')
default_target=ps.Target.CPU)
φ_field = dh.add_array('phi', latex_name='φ')
φ_field_tmp = dh.add_array('phi_temp', latex_name='φ_temp')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
```
 
%% Cell type:markdown id: tags:
 
This model has a lot of parameters that are created here in a symbolic fashion.
 
%% Cell type:code id: tags:
 
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
 
φ = φ_field.center
φ_tmp = φ_field_tmp.center
T = t_field.center
 
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
 
free_energy_density = ε**2 / 2 * (ps.fd.Diff(φ,0)**2 + ps.fd.Diff(φ,1)**2 ) + f(φ, m)
free_energy_density
```
 
%% Output
 
$\displaystyle \frac{{{φ}_{(0,0)}}^{4}}{4} - {{φ}_{(0,0)}}^{3} \left(\frac{1}{2} - \frac{m}{3}\right) + {{φ}_{(0,0)}}^{2} \left(\frac{1}{4} - \frac{m}{2}\right) + \frac{ε^{2} \left({\partial_{0} {{φ}_{(0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0)}}}^{2}\right)}{2}$
4 2 ⎛ 2 2⎞
φ_C 3 ⎛1 m⎞ 2 ⎛1 m⎞ ε ⋅⎝D(φ[0,0]) + D(φ[0,0]) ⎠
──── - φ_C ⋅⎜─ - ─⎟ + φ_C ⋅⎜─ - ─⎟ + ────────────────────────────
4 ⎝2 3⎠ ⎝4 2⎠ 2
 
%% Cell type:markdown id: tags:
 
The free energy is again composed of a bulk and interface part.
 
%% Cell type:code id: tags:
 
``` python
plt.figure(figsize=(7,4))
plt.sympy_function(f(φ, m=1), x_values=(-1.05, 1.5) )
plt.xlabel("φ")
plt.title("Bulk free energy");
```
 
%% Output
 
 
%% Cell type:markdown id: tags:
 
Compared to last tutorial, this bulk free energy has also two minima, but at different values.
 
Another complexity of the model is its anisotropy. The gradient parameter $\epsilon$ depends on the interface normal.
 
%% Cell type:code id: tags:
 
``` python
def σ(θ):
return 1 + δ * sp.cos(j * (θ - θzero))
 
θ = sp.atan2(ps.fd.Diff(φ, 1), ps.fd.Diff(φ, 0))
 
ε_val = εb * σ(θ)
ε_val
```
 
%% Output
 
$\displaystyle \bar{\epsilon} \left(δ \cos{\left(j \left(- θ_{0} + \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)}\right) \right)} + 1\right)$
\bar{\epsilon}⋅(δ⋅cos(j⋅(-θ₀ + atan2(D(φ[0,0]), D(φ[0,0])))) + 1)
 
%% Cell type:code id: tags:
 
``` python
def m_func(T):
return (α / sp.pi) * sp.atan(γ * (Teq - T))
```
 
%% Cell type:markdown id: tags:
 
However, we just insert these parameters into the free energy formulation before doing the functional derivative, to make the dependence of $\epsilon$ on $\nabla \phi$ explicit.
 
%% Cell type:code id: tags:
 
``` python
fe = free_energy_density.subs({
m: m_func(T),
ε: εb * σ(θ),
})
 
dF_dφ = ps.fd.functional_derivative(fe, φ)
dF_dφ = ps.fd.expand_diff_full(dF_dφ, functions=[φ])
dF_dφ
```
 
%% Output
 
$\displaystyle {{φ}_{(0,0)}}^{3} - \frac{{{φ}_{(0,0)}}^{2} α \operatorname{atan}{\left({{T}_{(0,0)}} γ - T_{eq} γ \right)}}{\pi} - \frac{3 {{φ}_{(0,0)}}^{2}}{2} + \frac{{{φ}_{(0,0)}} α \operatorname{atan}{\left({{T}_{(0,0)}} γ - T_{eq} γ \right)}}{\pi} + \frac{{{φ}_{(0,0)}}}{2} - \bar{\epsilon}^{2} δ^{2} \cos^{2}{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} δ^{2} \cos^{2}{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}} - 2 \bar{\epsilon}^{2} δ \cos{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - 2 \bar{\epsilon}^{2} δ \cos{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}}$
2 2
3 φ_C ⋅α⋅atan(T_C⋅γ - T_eq⋅γ) 3⋅φ_C φ_C⋅α⋅atan(T_C⋅γ - T_eq⋅γ) φ_C
φ_C - ─────────────────────────── - ────── + ────────────────────────── + ───
π 2 π 2
2 2 2
- \bar{\epsilon} ⋅δ ⋅cos (j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0]))
2 2 2
- \bar{\epsilon} ⋅δ ⋅cos (j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) -
2
2⋅\bar{\epsilon} ⋅δ⋅cos(j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) -
2
2⋅\bar{\epsilon} ⋅δ⋅cos(j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) - \
2 2
bar{\epsilon} ⋅D(D(φ[0,0])) - \bar{\epsilon} ⋅D(D(φ[0,0]))
 
%% Cell type:markdown id: tags:
 
Then we insert all the numeric parameters and discretize:
 
%% Cell type:code id: tags:
 
``` python
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.02,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
 
%% Output
 
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.02, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.02, θ₀: 0.2, κ: 1.8, τ: 0.0003}
 
%% Cell type:code id: tags:
 
``` python
dφ_dt = - dF_dφ / τ
φ_eqs = ps.simp.sympy_cse_on_assignment_list([ps.Assignment(φ_delta_field.center,
discretize(dφ_dt.subs(parameters)))])
φ_eqs.append(ps.Assignment(φ_tmp, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
 
temperature_evolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperature_eqs = [
ps.Assignment(T, discretize(temperature_evolution.subs(parameters)))
]
```
 
%% Cell type:markdown id: tags:
 
When creating the kernels we pass as target (which may be 'cpu' or 'gpu') the default target of the target handling. This enables to switch to a GPU simulation just by changing the parameter of the data handling.
When creating the kernels we pass as target (which may be 'Target.CPU' or 'Target.GPU') the default target of the target handling. This enables to switch to a GPU simulation just by changing the parameter of the data handling.
 
The rest is similar to the previous tutorial.
 
%% Cell type:code id: tags:
 
``` python
φ_kernel = ps.create_kernel(φ_eqs, cpu_openmp=4, target=dh.default_target).compile()
temperature_kernel = ps.create_kernel(temperature_eqs, target=dh.default_target).compile()
```
 
%% Cell type:code id: tags:
 
``` python
def timeloop(steps=200):
φ_sync = dh.synchronization_function(['phi'])
temperature_sync = dh.synchronization_function(['T'])
dh.all_to_gpu() # this does nothing when running on CPU
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
dh.swap('phi', 'phi_temp')
temperature_sync()
dh.run_kernel(temperature_kernel)
dh.all_to_cpu()
return dh.gather_array('phi')
 
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y = b.cell_index_arrays
x, y = x-b.shape[0]//2, y-b.shape[0]//2
bArr = (x**2 + y**2) < nucleus_size**2
b['phi'].fill(0)
b['phi'][(x**2 + y**2) < nucleus_size**2] = 1.0
b['T'].fill(0.0)
 
def plot():
plt.subplot(1,3,1)
plt.scalar_field(dh.gather_array('phi'))
plt.title("φ")
plt.colorbar()
plt.subplot(1,3,2)
plt.title("T")
plt.scalar_field(dh.gather_array('T'))
plt.colorbar()
plt.subplot(1,3,3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta'))
plt.colorbar()
```
 
%% Cell type:code id: tags:
 
``` python
timeloop(10)
init()
plot()
print(dh)
```
 
%% Output
 
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phi_temp| ( 0, 0)| ( 0, 0)
phidelta| ( 0, 0)| ( 0, 0)
 
 
%% Cell type:code id: tags:
 
``` python
result = None
if 'is_test_run' not in globals():
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=600)
result = ps.jupyter.display_as_html_video(ani)
result
```
 
%% Output
 
<IPython.core.display.HTML object>
 
%% Cell type:code id: tags:
 
``` python
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
```
......
%% Cell type:code id: tags:
``` python
import psutil
from pystencils.session import *
import shutil
```
%% Cell type:markdown id: tags:
# Demo: Finite differences - 2D wave equation
In this tutorial we show how to use the finite difference module of *pystencils* to solve a 2D wave equations. The time derivative is discretized by a simple forward Euler method.
%% Cell type:markdown id: tags:
$$ \frac{\partial^2 u}{\partial t^2} = \mbox{div} \left( q(x,y) \nabla u \right)$$
%% Cell type:markdown id: tags:
We begin by creating three *numpy* arrays for the current, the previous and the next timestep. Actually we will see later that two fields are enough, but let's keep it simple. From these *numpy* arrays we create *pystencils* fields to formulate our update rule.
%% Cell type:code id: tags:
``` python
size = (60, 70) # domain size
u_arrays = [np.zeros(size), np.zeros(size), np.zeros(size)]
u_fields = [ps.Field.create_from_numpy_array("u%s" % (name,), arr)
for name, arr in zip(["0", "1", "2"], u_arrays)]
# Nicer display for fields
for i, field in enumerate(u_fields):
field.latex_name = "u^{(%d)}" % (i,)
```
%% Cell type:markdown id: tags:
*pystencils* contains already simple rules to discretize the a diffusion term. The time discretization is done manually.
%% Cell type:code id: tags:
``` python
discretize = ps.fd.Discretization2ndOrder()
def central2nd_time_derivative(fields):
f_next, f_current, f_last = fields
return (f_next[0, 0] - 2 * f_current[0, 0] + f_last[0, 0]) / discretize.dt**2
rhs = ps.fd.diffusion(u_fields[1], 1)
wave_eq = sp.Eq(central2nd_time_derivative(u_fields), discretize(rhs))
wave_eq = sp.simplify(wave_eq)
wave_eq
```
%% Output
$$\frac{{{u^{(0)}}_{(0,0)}} - 2 {{u^{(1)}}_{(0,0)}} + {{u^{(2)}}_{(0,0)}}}{dt^{2}} = \frac{{{u^{(1)}}_{(-1,0)}} + {{u^{(1)}}_{(0,-1)}} - 4 {{u^{(1)}}_{(0,0)}} + {{u^{(1)}}_{(0,1)}} + {{u^{(1)}}_{(1,0)}}}{dx^{2}}$$
u_0_C - 2⋅u_1_C + u_2_C u_1_W + u_1_S - 4⋅u_1_C + u_1_N + u_1_E
─────────────────────── = ───────────────────────────────────────
2 2
dt dx
%% Cell type:markdown id: tags:
The explicit Euler scheme is now obtained by solving above equation with respect to $u_C^{next}$.
%% Cell type:code id: tags:
``` python
u_next_C = u_fields[-1][0, 0]
update_rule = ps.Assignment(u_next_C, sp.solve(wave_eq, u_next_C)[0])
update_rule
```
%% Output
$${{u^{(2)}}_{(0,0)}} \leftarrow \frac{{{u^{(1)}}_{(-1,0)}} dt^{2} + {{u^{(1)}}_{(0,-1)}} dt^{2} - 4 {{u^{(1)}}_{(0,0)}} dt^{2} + {{u^{(1)}}_{(0,1)}} dt^{2} + {{u^{(1)}}_{(1,0)}} dt^{2} + dx^{2} \left(- {{u^{(0)}}_{(0,0)}} + 2 {{u^{(1)}}_{(0,0)}}\right)}{dx^{2}}$$
2 2 2 2 2 2
u_1_W⋅dt + u_1_S⋅dt - 4⋅u_1_C⋅dt + u_1_N⋅dt + u_1_E⋅dt + dx ⋅(-u
u_2_C := ─────────────────────────────────────────────────────────────────────
2
dx
_0_C + 2⋅u_1_C)
───────────────
%% Cell type:markdown id: tags:
Before creating the kernel, we substitute numeric values for $dx$ and $dt$.
Then a kernel is created just like in the last tutorial.
%% Cell type:code id: tags:
``` python
update_rule = update_rule.subs({discretize.dx: 0.1, discretize.dt: 0.05})
ast = ps.create_kernel(update_rule)
kernel = ast.compile()
ps.show_code(ast)
```
%% Output
FUNC_PREFIX void kernel(double * RESTRICT const _data_u0, double * RESTRICT const _data_u1, double * RESTRICT _data_u2)
{
for (int ctr_0 = 1; ctr_0 < 59; ctr_0 += 1)
{
double * RESTRICT _data_u2_00 = _data_u2 + 70*ctr_0;
double * RESTRICT const _data_u1_00 = _data_u1 + 70*ctr_0;
double * RESTRICT const _data_u0_00 = _data_u0 + 70*ctr_0;
double * RESTRICT const _data_u1_01 = _data_u1 + 70*ctr_0 + 70;
double * RESTRICT const _data_u1_0m1 = _data_u1 + 70*ctr_0 - 70;
for (int ctr_1 = 1; ctr_1 < 69; ctr_1 += 1)
{
_data_u2_00[ctr_1] = 0.25*_data_u1_00[ctr_1 + 1] + 0.25*_data_u1_00[ctr_1 - 1] + 0.25*_data_u1_01[ctr_1] + 0.25*_data_u1_0m1[ctr_1] - 1.0*_data_u0_00[ctr_1] + 1.0*_data_u1_00[ctr_1];
}
}
}
%% Cell type:code id: tags:
``` python
ast.get_parameters()
```
%% Output
[_data_u0, _data_u1, _data_u2]
%% Cell type:code id: tags:
``` python
ast.fields_accessed
```
%% Output
{u0, u1, u2}
%% Cell type:markdown id: tags:
To run simulation a suitable initial condition and boundary treatment is required. We chose an initial condition which is zero at the borders of the domain. The outermost layer is not changed by the update kernel, so we have an implicit homogenous Dirichlet boundary condition.
%% Cell type:code id: tags:
``` python
X,Y = np.meshgrid( np.linspace(0, 1, size[1]), np.linspace(0,1, size[0]))
Z = np.sin(2*X*np.pi) * np.sin(2*Y*np.pi)
# Initialize the previous and current values with the initial function
np.copyto(u_arrays[0], Z)
np.copyto(u_arrays[1], Z)
# The values for the next timesteps do not matter, since they are overwritten
u_arrays[2][:, :] = 0
```
%% Cell type:markdown id: tags:
One timestep now consists of applying the kernel once, then shifting the arrays.
%% Cell type:code id: tags:
``` python
def run(timesteps=1):
for t in range(timesteps):
kernel(u0=u_arrays[0], u1=u_arrays[1], u2=u_arrays[2])
u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]
return u_arrays[2]
```
%% Cell type:markdown id: tags:
Lets create an animation of the solution:
%% Cell type:code id: tags:
``` python
if shutil.which("ffmpeg") is not None:
ani = plt.surface_plot_animation(run, zlim=(-1, 1))
ps.jupyter.display_as_html_video(ani)
else:
print("No ffmpeg installed")
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
assert np.isfinite(np.max(u_arrays[2]))
```
%% Cell type:markdown id: tags:
__LLVM backend__
The next cell demonstrates how to run the same simulation with the LLVM backend. The only difference is that in the `create_kernel` function the `target` is set to llvm.
%% Cell type:code id: tags:
``` python
try:
import llvmlite
except ImportError:
llvmlite=None
print('No llvmlite installed')
if llvmlite:
kernel = ps.create_kernel(update_rule, target='llvm').compile()
kernel = ps.create_kernel(update_rule, backend=ps.Backend.LLVM).compile()
X,Y = np.meshgrid( np.linspace(0, 1, size[1]), np.linspace(0,1, size[0]))
Z = np.sin(2*X*np.pi) * np.sin(2*Y*np.pi)
# Initialize the previous and current values with the initial function
np.copyto(u_arrays[0], Z)
np.copyto(u_arrays[1], Z)
# The values for the next timesteps do not matter, since they are overwritten
u_arrays[2][:, :] = 0
def run_LLVM(timesteps=1):
for t in range(timesteps):
kernel(u0=u_arrays[0], u1=u_arrays[1], u2=u_arrays[2])
u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]
return u_arrays[2]
assert np.isfinite(np.max(u_arrays[2]))
if shutil.which("ffmpeg") is not None:
ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))
ps.jupyter.display_as_html_video(ani)
else:
print("No ffmpeg installed")
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:markdown id: tags:
__Runing on GPU__
We can also run the same kernel on the GPU, by using the ``pycuda`` package.
%% Cell type:code id: tags:
``` python
try:
import pycuda
except ImportError:
pycuda=None
print('No pycuda installed')
res = None
if pycuda:
gpu_ast = ps.create_kernel(update_rule, target='gpu')
gpu_ast = ps.create_kernel(update_rule, target=ps.Target.GPU)
gpu_kernel = gpu_ast.compile()
res = ps.show_code(gpu_ast)
res
```
%% Output
FUNC_PREFIX __launch_bounds__(256) void kernel(double * RESTRICT const _data_u0, double * RESTRICT const _data_u1, double * RESTRICT _data_u2)
{
if (blockDim.x*blockIdx.x + threadIdx.x + 1 < 59 && blockDim.y*blockIdx.y + threadIdx.y + 1 < 69)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x + 1;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y + 1;
double * RESTRICT _data_u2_10 = _data_u2 + ctr_1;
double * RESTRICT const _data_u1_10 = _data_u1 + ctr_1;
double * RESTRICT const _data_u0_10 = _data_u0 + ctr_1;
double * RESTRICT const _data_u1_11 = _data_u1 + ctr_1 + 1;
double * RESTRICT const _data_u1_1m1 = _data_u1 + ctr_1 - 1;
_data_u2_10[70*ctr_0] = 0.25*_data_u1_10[70*ctr_0 + 70] + 0.25*_data_u1_10[70*ctr_0 - 70] + 0.25*_data_u1_11[70*ctr_0] + 0.25*_data_u1_1m1[70*ctr_0] - 1.0*_data_u0_10[70*ctr_0] + 1.0*_data_u1_10[70*ctr_0];
}
}
%% Cell type:markdown id: tags:
The run function has to be changed now slightly, since the data has to be transfered to the GPU first, then the kernel can be executed, and in the end the data has to be transfered back
%% Cell type:code id: tags:
``` python
if pycuda:
import pycuda.gpuarray as gpuarray
def run_on_gpu(timesteps=1):
# Transfer arrays to GPU
gpuArrs = [gpuarray.to_gpu(a) for a in u_arrays]
for t in range(timesteps):
gpu_kernel(u0=gpuArrs[0], u1=gpuArrs[1], u2=gpuArrs[2])
gpuArrs[0], gpuArrs[1], gpuArrs[2] = gpuArrs[1], gpuArrs[2], gpuArrs[0]
# Transfer arrays to CPU
for gpuArr, cpuArr in zip(gpuArrs, u_arrays):
gpuArr.get(cpuArr)
assert np.isfinite(np.max(u_arrays[2]))
```
%% Cell type:code id: tags:
``` python
if pycuda:
run_on_gpu(400)
```
......
......@@ -8,6 +8,10 @@ Creating kernels
.. autofunction:: pystencils.create_kernel
.. autofunction:: pystencils.CreateKernelConfig
.. autofunction:: pystencils.create_domain_kernel
.. autofunction:: pystencils.create_indexed_kernel
.. autofunction:: pystencils.create_staggered_kernel
......
"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
from .enums import Backend, Target
from . import fd
from . import stencil as stencil
from .assignment import Assignment, assignment_from_stencil
from .data_types import TypedSymbol
from .datahandling import create_data_handling
from .display_utils import show_code, get_code_obj, get_code_str, to_dot
from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .field import Field, FieldType, fields
from .kernel_decorator import kernel
from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel
from .kernelcreation import (
CreateKernelConfig, create_domain_kernel, create_indexed_kernel, create_kernel, create_staggered_kernel)
from .simp import AssignmentCollection
from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator
from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector,
y_, y_staggered, z_, z_staggered)
try:
import pystencils_autodiff
autodiff = pystencils_autodiff
except ImportError:
pass
__all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol',
'make_slice',
'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
'create_kernel', 'create_domain_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
'CreateKernelConfig',
'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection',
'Assignment',
......@@ -39,5 +42,6 @@ __all__ = ['Field', 'FieldType', 'fields',
'stencil']
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
......@@ -7,6 +7,7 @@ import sympy as sp
import pystencils
from pystencils.data_types import TypedImaginaryUnit, TypedSymbol, cast_func, create_type
from pystencils.enums import Target, Backend
from pystencils.field import Field
from pystencils.kernelparameters import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol
from pystencils.sympyextensions import fast_subs
......@@ -136,7 +137,6 @@ class Conditional(Node):
class KernelFunction(Node):
class Parameter:
"""Function parameter.
......@@ -176,7 +176,9 @@ class KernelFunction(Node):
def field_name(self):
return self.fields[0].name
def __init__(self, body, target, backend, compile_function, ghost_layers, function_name="kernel", assignments=None):
def __init__(self, body, target: Target, backend: Backend, compile_function, ghost_layers,
function_name: str = "kernel",
assignments=None):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
......@@ -194,12 +196,12 @@ class KernelFunction(Node):
@property
def target(self):
"""Currently either 'cpu' or 'gpu' """
"""See pystencils.Target"""
return self._target
@property
def backend(self):
"""Backend for generating the code e.g. 'llvm', 'c', 'cuda' """
"""Backend for generating the code: `Backend`"""
return self._backend
@property
......
......@@ -14,6 +14,7 @@ from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize
from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
reinterpret_cast_func, vector_memory_access, BasicType, TypedSymbol)
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import (
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
......@@ -34,7 +35,7 @@ KERNCRAFT_NO_TERNARY_MODE = False
def generate_c(ast_node: Node,
signature_only: bool = False,
dialect='c',
dialect: Backend = Backend.C,
custom_backend=None,
with_globals=True) -> str:
"""Prints an abstract syntax tree node as C or CUDA code.
......@@ -46,7 +47,7 @@ def generate_c(ast_node: Node,
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
dialect: 'c', 'cuda' or opencl
dialect: `Backend`: 'C', 'CUDA' or 'OPENCL'
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
......@@ -60,21 +61,21 @@ def generate_c(ast_node: Node,
ast_node.global_variables = d.symbols_defined
if custom_backend:
printer = custom_backend
elif dialect == 'c':
elif dialect == Backend.C:
try:
instruction_set = ast_node.instruction_set
except Exception:
instruction_set = None
printer = CBackend(signature_only=signature_only,
vector_instruction_set=instruction_set)
elif dialect == 'cuda':
elif dialect == Backend.CUDA:
from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only)
elif dialect == 'opencl':
elif dialect == Backend.OPENCL:
from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenClBackend(signature_only=signature_only)
else:
raise ValueError("Unknown dialect: " + str(dialect))
raise ValueError(f'Unknown {dialect=}')
code = printer(ast_node)
if not signature_only and isinstance(ast_node, KernelFunction):
if with_globals and global_declarations:
......@@ -189,7 +190,7 @@ class CFunction(TypedSymbol):
# noinspection PyPep8Naming
class CBackend:
def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect='c'):
def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect=Backend.C):
if sympy_printer is None:
if vector_instruction_set is not None:
self.sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set)
......@@ -228,7 +229,7 @@ class CBackend:
function_arguments = [f"{self._print(s.symbol.dtype)} {s.symbol.name}" for s in node.get_parameters()
if not type(s.symbol) is CFunction]
launch_bounds = ""
if self._dialect == 'cuda':
if self._dialect == Backend.CUDA:
max_threads = node.indexing.max_threads_per_block()
if max_threads:
launch_bounds = f"__launch_bounds__({max_threads}) "
......
......@@ -2,6 +2,7 @@ from os.path import dirname, join
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode
......@@ -22,7 +23,7 @@ def generate_cuda(ast_node: Node, signature_only: bool = False, custom_backend=N
Returns:
CUDA code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect='cuda',
return generate_c(ast_node, signature_only, dialect=Backend.CUDA,
custom_backend=custom_backend, with_globals=with_globals)
......@@ -33,7 +34,7 @@ class CudaBackend(CBackend):
if not sympy_printer:
sympy_printer = CudaSympyPrinter()
super().__init__(sympy_printer, signature_only, dialect='cuda')
super().__init__(sympy_printer, signature_only, dialect=Backend.CUDA)
def _print_SharedMemoryAllocation(self, node):
dtype = node.symbol.dtype
......
......@@ -4,6 +4,7 @@ import pystencils.data_types
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
......@@ -12,7 +13,7 @@ with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
"""Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
Args:
ast_node: ast representation of kernel
......@@ -23,7 +24,7 @@ def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend
Returns:
OpenCL code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect='opencl',
return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
custom_backend=custom_backend, with_globals=with_globals)
......@@ -36,7 +37,7 @@ class OpenClBackend(CudaBackend):
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = 'opencl'
self._dialect = Backend.OPENCL
def _print_Type(self, node):
code = super()._print_Type(node)
......
import numpy as np
import sympy as sp
from pystencils import create_indexed_kernel
from pystencils import create_kernel, CreateKernelConfig, Target
from pystencils.assignment import Assignment
from pystencils.backends.cbackend import CustomCodeNode
from pystencils.boundaries.createindexlist import (
......@@ -84,7 +84,7 @@ class FlagInterface:
class BoundaryHandling:
def __init__(self, data_handling, field_name, stencil, name="boundary_handling", flag_interface=None,
target='cpu', openmp=True):
target: Target = Target.CPU, openmp=True):
assert data_handling.has_data(field_name)
assert data_handling.dim == len(stencil[0]), "Dimension of stencil and data handling do not match"
self._data_handling = data_handling
......@@ -442,9 +442,10 @@ class BoundaryOffsetInfo(CustomCodeNode):
INV_DIR_SYMBOL = TypedSymbol("invdir", np.int64)
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target='cpu', **kernel_creation_args):
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target=Target.CPU, **kernel_creation_args):
elements = [BoundaryOffsetInfo(stencil)]
dir_symbol = TypedSymbol("dir", np.int64)
elements += [Assignment(dir_symbol, index_field[0]('dir'))]
elements += boundary_functor(field, direction_symbol=dir_symbol, index_field=index_field)
return create_indexed_kernel(elements, [index_field], target=target, **kernel_creation_args)
config = CreateKernelConfig(index_fields=[index_field], target=target, **kernel_creation_args)
return create_kernel(elements, config=config)
......@@ -5,6 +5,7 @@ import numpy as np
import pystencils.astnodes as ast
from pystencils.assignment import Assignment
from pystencils.enums import Target, Backend
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.cpujit import make_python_function
from pystencils.data_types import StructType, TypedSymbol, create_type
......@@ -70,7 +71,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order)
ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments)
implement_interpolations(body)
......@@ -151,7 +152,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body.append(assignment)
function_body = Block([loop_node])
ast_node = KernelFunction(function_body, "cpu", "c", make_python_function,
ast_node = KernelFunction(function_body, Target.CPU, Backend.C, make_python_function,
ghost_layers=None, function_name=function_name, assignments=assignments)
fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields}
......
import warnings
from typing import Tuple, Union
from .datahandling_interface import DataHandling
from ..enums import Target
from .serial_datahandling import SerialDataHandling
try:
......@@ -18,7 +21,7 @@ except ImportError:
def create_data_handling(domain_size: Tuple[int, ...],
periodicity: Union[bool, Tuple[bool, ...]] = False,
default_layout: str = 'SoA',
default_target: str = 'cpu',
default_target: Target = Target.CPU,