Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 4962 additions and 0 deletions
import warnings
from typing import Tuple, Union
from .datahandling_interface import DataHandling
from ..enums import Target
from .serial_datahandling import SerialDataHandling
try:
# noinspection PyPep8Naming
import waLBerla as wlb
if wlb.cpp_available:
from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
else:
wlb = None
except ImportError:
wlb = None
ParallelDataHandling = None
def create_data_handling(domain_size: Tuple[int, ...],
periodicity: Union[bool, Tuple[bool, ...]] = False,
default_layout: str = 'SoA',
default_target: Target = Target.CPU,
parallel: bool = False,
default_ghost_layers: int = 1,
device_number: Union[int, None] = None) -> DataHandling:
"""Creates a data handling instance.
Args:
domain_size: size of the rectangular domain
periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
for each coordinate
default_layout: default array layout, that is used if not explicitly specified in 'add_array'
default_target: `Target`
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
device_number: If `default_target` is set to 'GPU' and `parallel` is False, a device number should be
specified. If none is given, the device with the largest amount of memory is used. If multiple
devices have the same amount of memory, the one with the lower number is used
"""
if isinstance(default_target, str):
new_target = Target[default_target.upper()]
warnings.warn(f'Target "{default_target}" as str is deprecated. Use {new_target} instead',
category=DeprecationWarning)
default_target = new_target
if parallel:
if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available")
if periodicity is False or periodicity is None:
periodicity = (0, 0, 0)
elif periodicity is True:
periodicity = (1, 1, 1)
else:
periodicity = tuple(int(bool(x)) for x in periodicity)
if len(periodicity) == 2:
periodicity += (1,)
if len(domain_size) == 2:
dim = 2
domain_size = (domain_size[0], domain_size[1], 1)
else:
dim = 3
# noinspection PyArgumentList
block_storage = wlb.createUniformBlockGrid(cells=domain_size, periodic=periodicity)
return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers)
else:
return SerialDataHandling(domain_size,
periodicity=periodicity,
default_target=default_target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
device_number=device_number)
__all__ = ['create_data_handling']
"""
This module contains function that simplify the iteration over walberla's distributed data structure.
These function simplify the iteration over rectangular slices, managing the mapping between block local coordinates and
global coordinates.
"""
import numpy as np
from pystencils.datahandling.datahandling_interface import Block
from pystencils.slicing import normalize_slice
try:
# noinspection PyPep8Naming
import waLBerla as wlb
except ImportError:
wlb = None
def block_iteration(blocks, ghost_layers, dim=3, access_prefix=''):
"""Simple iteration over parallel walberla domain.
Iterator that simplifies the access to field data by automatically converting from walberla fields to
numpy arrays
Args:
blocks: walberla block data structure
ghost_layers: how many ghost layers to include (outer and inner)
dim: walberla's block data structure is 3D - 2D domains can be done by setting z_size=1
if dim=2 is set here, the third coordinate of the returned fields is accessed at z=0 automatically
access_prefix: see documentation of sliced_block_iteration
"""
for block in blocks:
cell_interval = blocks.getBlockCellBB(block)
cell_interval.expand(ghost_layers)
local_slice = [slice(0, w, None) for w in cell_interval.size]
if dim == 2:
local_slice[2] = ghost_layers
yield ParallelBlock(block, cell_interval.min[:dim], tuple(local_slice), ghost_layers, access_prefix)
def sliced_block_iteration(blocks, slice_obj=None, inner_ghost_layers=1, outer_ghost_layers=1, dim=3, access_prefix=''):
"""Iterates of all blocks that have an intersection with the given slice object.
For intersection blocks a Block object is yielded
Args:
blocks: walberla block data structure
slice_obj: a slice (i.e. rectangular sub-region), can be created with make_slice[]
inner_ghost_layers: how many ghost layers are included in the local slice and the optional index arrays
outer_ghost_layers: slices can have relative coordinates e.g. make_slice[0.2, :, :]
when computing absolute values, the domain size is needed. This parameter
specifies how many ghost layers are taken into account for this operation.
dim: set to 2 for pseudo 2D simulation (i.e. where z coordinate of blocks has extent 1)
the arrays returned when indexing the block
access_prefix: when accessing block data, this prefix is prepended to the access name
mostly used to switch between CPU and GPU field access (gpu fields are added with a
certain prefix 'gpu_')
Example:
assume no slice is given, then slice_normalization_ghost_layers effectively sets how much ghost layers at the
border of the domain are included. The inner_ghost_layers parameter specifies how many inner ghost layers are
included
"""
if slice_obj is None:
slice_obj = tuple([slice(None, None, None)] * dim)
if dim == 2:
slice_obj += (inner_ghost_layers,)
domain_cell_bb = blocks.getDomainCellBB()
domain_extent = [s + 2 * outer_ghost_layers for s in domain_cell_bb.size]
slice_obj = normalize_slice(slice_obj, domain_extent)
target_cell_bb = wlb.CellInterval.fromSlice(slice_obj)
target_cell_bb.shift(*[a - outer_ghost_layers for a in domain_cell_bb.min])
for block in blocks:
intersection = blocks.getBlockCellBB(block).getExpanded(inner_ghost_layers)
intersection.intersect(target_cell_bb)
if intersection.empty():
continue
local_target_bb = blocks.transformGlobalToLocal(block, intersection)
local_target_bb.shift(inner_ghost_layers, inner_ghost_layers, inner_ghost_layers)
local_slice = local_target_bb.toSlice(False)
if dim == 2:
local_slice = (local_slice[0], local_slice[1], inner_ghost_layers)
yield ParallelBlock(block, intersection.min[:dim], local_slice, inner_ghost_layers, access_prefix)
# ----------------------------- Implementation details -----------------------------------------------------------------
class SerialBlock(Block):
"""Simple mock-up block that is used for SerialDataHandling."""
def __init__(self, field_dict, offset, local_slice):
super(SerialBlock, self).__init__(offset, local_slice)
self._fieldDict = field_dict
def __getitem__(self, data_name):
result = self._fieldDict[data_name]
if isinstance(result, np.ndarray):
result = result[self._localSlice]
return result
class ParallelBlock(Block):
def __init__(self, block, offset, local_slice, inner_ghost_layers, name_prefix):
super(ParallelBlock, self).__init__(offset, local_slice)
self._block = block
self._gls = inner_ghost_layers
self._name_prefix = name_prefix
def __getitem__(self, data_name):
result = self._block[self._name_prefix + data_name]
type_name = type(result).__name__
if 'GhostLayerField' in type_name:
result = wlb.field.toArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result)
elif 'GpuField' in type_name:
result = wlb.gpu.toGpuArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result)
return result
def _normalize_array_shape(self, arr):
if arr.shape[-1] == 1 and len(arr.shape) == 4:
arr = arr[..., 0]
return arr[self._localSlice]
from abc import ABC, abstractmethod
from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
import numpy as np
from pystencils.enums import Target, Backend
from pystencils.field import Field, FieldType
class DataHandling(ABC):
"""
Manages the storage of arrays and maps them to a symbolic field.
Two versions are available: a simple, pure Python implementation for single node
simulations :py:class:SerialDataHandling and a distributed version using walberla in :py:class:ParallelDataHandling
Keep in mind that the data can be distributed, so use the 'access' method whenever possible and avoid the
'gather' function that has collects (parts of the) distributed data on a single process.
"""
_GPU_LIKE_TARGETS = [Target.GPU]
_GPU_LIKE_BACKENDS = [Backend.CUDA]
# ---------------------------- Adding and accessing data -----------------------------------------------------------
@property
@abstractmethod
def default_target(self) -> Target:
"""Target Enum indicating the target of the computation"""
@property
@abstractmethod
def dim(self) -> int:
"""Dimension of the domain, either 2 or 3"""
@property
@abstractmethod
def shape(self) -> Tuple[int, ...]:
"""Shape of outer bounding box."""
@property
@abstractmethod
def periodicity(self) -> Tuple[bool, ...]:
"""Returns tuple of booleans for x,y,(z) directions with True if domain is periodic in that direction."""
@abstractmethod
def add_array(self, name: str, values_per_cell, dtype=np.float64,
latex_name: Optional[str] = None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
cpu: bool = True, gpu: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary
Args:
name: unique name that is used to access the field later
values_per_cell: shape of the dim+1 coordinate. DataHandling supports zero or one index dimensions,
i.e. scalar fields and vector fields. This parameter gives the shape of the index
dimensions. The default value of 1 means no index dimension are created.
dtype: data type of the array as numpy data type
latex_name: optional, name of the symbolic field, if not given 'name' is used
ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
is used
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
pystencils field, that can be used to formulate symbolic kernels
"""
def add_arrays(self,
description: str,
dtype=np.float64,
ghost_layers: Optional[int] = None,
layout: Optional[str] = None,
cpu: bool = True,
gpu: Optional[bool] = None,
alignment=False,
field_type=FieldType.GENERIC) -> Tuple[Field]:
"""Adds multiple arrays using a string description similar to :func:`pystencils.fields`
>>> from pystencils.datahandling import create_data_handling
>>> dh = create_data_handling((20, 30))
>>> x, y =dh.add_arrays('x, y(9)')
>>> print(dh.fields)
{'x': x: double[22,32], 'y': y(9): double[22,32]}
>>> assert x == dh.fields['x']
>>> assert dh.fields['x'].shape == (22, 32)
>>> assert dh.fields['y'].index_shape == (9,)
Args:
description (str): String description of the fields to add
dtype: data type of the array as numpy data type
ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
is used
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
Fields representing the just created arrays
"""
from pystencils.field import _parse_part1
names = []
for name, indices in _parse_part1(description):
names.append(name)
self.add_array(name,
values_per_cell=indices,
dtype=dtype,
ghost_layers=ghost_layers,
layout=layout,
cpu=cpu,
gpu=gpu,
alignment=alignment,
field_type=field_type)
return (self.fields[n] for n in names)
@abstractmethod
def has_data(self, name):
"""Returns true if a field or custom data element with this name was added."""
@abstractmethod
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
"""
Adds an array with the same parameters (number of ghost layers, values_per_cell, dtype) as existing array.
Args:
name: name of new array
name_of_template_field: name of array that is used as template
latex_name: see 'add' method
cpu: see 'add' method
gpu: see 'add' method
"""
@abstractmethod
def add_custom_data(self, name: str, cpu_creation_function,
gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
"""Adds custom (non-array) data to domain.
Args:
name: name to access data
cpu_creation_function: function returning a new instance of the data that should be stored
gpu_creation_function: optional, function returning a new instance, stored on GPU
cpu_to_gpu_transfer_func: function that transfers cpu to gpu version,
getting two parameters (gpu_instance, cpu_instance)
gpu_to_cpu_transfer_func: function that transfers gpu to cpu version, getting two parameters
(gpu_instance, cpu_instance)
"""
def add_custom_class(self, name: str, class_obj, cpu: bool = True, gpu: bool = False):
"""Adds non-array data by passing a class object with optional 'to_gpu' and 'to_cpu' member functions."""
cpu_to_gpu_transfer_func = class_obj.to_gpu if cpu and gpu and hasattr(class_obj, 'to_gpu') else None
gpu_to_cpu_transfer_func = class_obj.to_cpu if cpu and gpu and hasattr(class_obj, 'to_cpu') else None
self.add_custom_data(name,
cpu_creation_function=class_obj if cpu else None,
gpu_creation_function=class_obj if gpu else None,
cpu_to_gpu_transfer_func=cpu_to_gpu_transfer_func,
gpu_to_cpu_transfer_func=gpu_to_cpu_transfer_func)
@property
@abstractmethod
def fields(self) -> Dict[str, Field]:
"""Dictionary mapping data name to symbolic pystencils field - use this to create pystencils kernels."""
@property
@abstractmethod
def array_names(self) -> Sequence[str]:
"""Sequence of all array names."""
@property
@abstractmethod
def custom_data_names(self) -> Sequence[str]:
"""Sequence of all custom data names."""
@abstractmethod
def ghost_layers_of_field(self, name: str) -> int:
"""Returns the number of ghost layers for a specific field/array."""
@abstractmethod
def values_per_cell(self, name: str) -> Tuple[int, ...]:
"""Returns values_per_cell of array."""
@abstractmethod
def iterate(self, slice_obj=None, gpu=False, ghost_layers=None,
inner_ghost_layers=True) -> Iterable['Block']:
"""Iterate over local part of potentially distributed data structure."""
@abstractmethod
def gather_array(self, name, slice_obj=None, all_gather=False, ghost_layers=False) -> Optional[np.ndarray]:
"""
Gathers part of the domain on a local process. Whenever possible use 'access' instead, since this method copies
the distributed data to a single process which is inefficient and may exhaust the available memory
Args:
name: name of the array to gather
slice_obj: slice expression of the rectangular sub-part that should be gathered
all_gather: if False only the root process receives the result, if True all processes
ghost_layers: number of outer ghost layers to include (only available for serial version of data handling)
Returns:
gathered field that does not include any ghost layers, or None if gathered on another process
"""
@abstractmethod
def run_kernel(self, kernel_function, *args, **kwargs) -> None:
"""Runs a compiled pystencils kernel.
Uses the arrays stored in the DataHandling class for all array parameters. Additional passed arguments are
directly passed to the kernel function and override possible parameters from the DataHandling
"""
@abstractmethod
def get_kernel_kwargs(self, kernel_function, **kwargs):
"""Returns the input arguments of a kernel"""
@abstractmethod
def swap(self, name1, name2, gpu=False):
"""Swaps data of two arrays"""
# ------------------------------- CPU/GPU transfer -----------------------------------------------------------------
@abstractmethod
def to_cpu(self, name):
"""Copies GPU data of array with specified name to CPU.
Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
@abstractmethod
def to_gpu(self, name):
"""Copies GPU data of array with specified name to GPU.
Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
@abstractmethod
def all_to_cpu(self):
"""Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation."""
@abstractmethod
def all_to_gpu(self):
"""Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation."""
@abstractmethod
def is_on_gpu(self, name):
"""Checks if this data was also allocated on the GPU - does not check if this data item is in synced."""
@abstractmethod
def create_vtk_writer(self, file_name, data_names, ghost_layers=False) -> Callable[[int], None]:
"""VTK output for one or multiple arrays.
Args
file_name: base file name without extension for the VTK output
data_names: list of array names that should be included in the vtk output
ghost_layers: true if ghost layer information should be written out as well
Returns:
a function that can be called with an integer time step to write the current state
i.e create_vtk_writer('some_file', ['velocity', 'density']) (1)
"""
@abstractmethod
def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name,
ghost_layers=False) -> Callable[[int], None]:
"""VTK output for an unsigned integer field, where bits are interpreted as flags.
Args:
file_name: see create_vtk_writer
data_name: name of an array with uint type
masks_to_name: dictionary mapping integer masks to a name in the output
ghost_layers: see create_vtk_writer
Returns:
functor that can be called with time step
"""
# ------------------------------- Communication --------------------------------------------------------------------
@abstractmethod
def synchronization_function(self, names, stencil=None, target=None, **kwargs) -> Callable[[], None]:
"""Synchronizes ghost layers for distributed arrays.
For serial scenario this has to be called for correct periodicity handling
Args:
names: what data to synchronize: name of array or sequence of names
stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
target: `Target` either 'CPU' or 'GPU'
kwargs: implementation specific, optional optimization parameters for communication
Returns:
function object to run the communication
"""
def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array:
"""Takes a sequence of floating point values on each process and reduces it element-wise.
If all_reduce, all processes get the result, otherwise only the root process.
Possible operations are 'sum', 'min', 'max'
"""
def reduce_int_sequence(self, sequence, operation, all_reduce=False) -> np.array:
"""See function reduce_float_sequence - this is the same for integers"""
# ------------------------------- Data access and modification -----------------------------------------------------
def fill(self, array_name: str, val, value_idx: Optional[Union[int, Tuple[int, ...]]] = None,
slice_obj=None, ghost_layers=False, inner_ghost_layers=False) -> None:
"""Sets all cells to the same value.
Args:
array_name: name of the array that should be modified
val: value to set the array to
value_idx: If an array stores multiple values per cell, this index chooses which of this values to fill.
If None, all values are set
slice_obj: if passed, only the defined slice is filled
ghost_layers: True if the outer ghost layers should also be filled
inner_ghost_layers: True if the inner ghost layers should be filled. Inner ghost layers occur only in
parallel setups for distributed memory communication.
"""
if ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
if inner_ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
for b in self.iterate(slice_obj, ghost_layers=ghost_layers, inner_ghost_layers=inner_ghost_layers):
if value_idx is not None:
if isinstance(value_idx, int):
value_idx = (value_idx,)
assert len(value_idx) == len(self.values_per_cell(array_name))
b[array_name][(Ellipsis, *value_idx)].fill(val)
else:
b[array_name].fill(val)
self.to_gpu(array_name)
def min(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True):
"""Returns the minimum value inside the domain or slice of the domain.
For meaning of arguments see documentation of :func:`DataHandling.fill`.
Returns:
the minimum of the locally stored domain part is returned if reduce is False, otherwise the global minimum
on the root process, on other processes None
"""
result = None
if ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
if inner_ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
for b in self.iterate(slice_obj, ghost_layers=ghost_layers, inner_ghost_layers=inner_ghost_layers):
m = np.min(b[array_name])
result = m if result is None else np.min(result, m)
return self.reduce_float_sequence([result], 'min', all_reduce=True)[0] if reduce else result
def max(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True):
"""Returns the maximum value inside the domain or slice of the domain.
For argument description see :func:`DataHandling.min`
"""
result = None
if ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
if inner_ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(array_name)
for b in self.iterate(slice_obj, ghost_layers=ghost_layers, inner_ghost_layers=inner_ghost_layers):
m = np.max(b[array_name])
result = m if result is None else np.max(result, m)
return self.reduce_float_sequence([result], 'max', all_reduce=True)[0] if reduce else result
def save_all(self, file):
"""Saves all field data to disk into a file"""
def load_all(self, file):
"""Loads all field data from disk into a file
Works only if save_all was called with exactly the same field sizes, layouts etc.
When run in parallel save and load has to be called with the same number of processes.
Use for check pointing only - to store results use VTK output
"""
def __str__(self):
result = ""
first_column_width = max(len("Name"), max(len(a) for a in self.array_names))
row_format = "{:>%d}|{:>21}|{:>21}\n" % (first_column_width,)
separator_line = "-" * (first_column_width + 21 + 21 + 2) + "\n"
result += row_format.format("Name", "Inner (min/max)", "WithGl (min/max)")
result += separator_line
for arr_name in sorted(self.array_names):
inner_min_max = (self.min(arr_name, ghost_layers=False), self.max(arr_name, ghost_layers=False))
with_gl_min_max = (self.min(arr_name, ghost_layers=True), self.max(arr_name, ghost_layers=True))
inner_min_max = "({0[0]:3.3g},{0[1]:3.3g})".format(inner_min_max)
with_gl_min_max = "({0[0]:3.3g},{0[1]:3.3g})".format(with_gl_min_max)
result += row_format.format(arr_name, inner_min_max, with_gl_min_max)
return result
def log(self, *args, level='INFO'):
"""Similar to print with additional information (time, rank)."""
def log_on_root(self, *args, level='INFO'):
"""Logs only on root process. For serial setups this is equivalent to log"""
@property
def is_root(self):
"""Returns True for exactly one process in the simulation"""
@property
def world_rank(self):
"""Number of current process"""
class Block:
"""Represents locally stored part of domain.
Instances of this class are returned by DataHandling.iterate, do not create manually!
"""
def __init__(self, offset, local_slice):
self._offset = offset
self._localSlice = local_slice
@property
def offset(self):
"""Offset of the current block in global coordinates (where lower ghost layers have negative indices)."""
return self._offset
@property
def cell_index_arrays(self):
"""Global coordinate mesh-grid of cell coordinates.
Cell indices start at 0 at the first inner cell, lower ghost layers have negative indices
"""
mesh_grid_params = [offset + np.arange(width, dtype=np.int32)
for offset, width in zip(self.offset, self.shape)]
return np.meshgrid(*mesh_grid_params, indexing='ij', copy=False)
@property
def midpoint_arrays(self):
"""Global coordinate mesh-grid of cell midpoints which are shifted by 0.5 compared to cell indices."""
mesh_grid_params = [offset + 0.5 + np.arange(width, dtype=float)
for offset, width in zip(self.offset, self.shape)]
return np.meshgrid(*mesh_grid_params, indexing='ij', copy=False)
@property
def shape(self):
"""Shape of the fields (potentially including ghost layers)."""
return tuple(s.stop - s.start for s in self._localSlice[:len(self._offset)])
@property
def global_slice(self):
"""Slice in global coordinates."""
return tuple(slice(off, off + size) for off, size in zip(self._offset, self.shape))
def __getitem__(self, data_name: str) -> np.ndarray:
raise NotImplementedError()
import os
import warnings
import numpy as np
# noinspection PyPep8Naming
import waLBerla as wlb
from pystencils.datahandling.blockiteration import block_iteration, sliced_block_iteration
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.enums import Backend
from pystencils.field import Field, FieldType
from pystencils.typing.typed_sympy import FieldPointerSymbol
from pystencils.utils import DotDict
from pystencils import Target
class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX = "gpu_"
VTK_COUNTER = 0
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target=Target.CPU):
"""
Creates data handling based on walberla block storage
Args:
blocks: walberla block storage
default_ghost_layers: nr of ghost layers used if not specified in add() method
default_layout: layout used if no layout is given to add() method
dim: dimension of scenario,
walberla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1
default_target: `Target`, either 'CPU' or 'GPU' . If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
"""
super(ParallelDataHandling, self).__init__()
assert dim in (2, 3)
self._blocks = blocks
self._default_ghost_layers = default_ghost_layers
self._default_layout = default_layout
self._fields = DotDict() # maps name to symbolic pystencils field
self._field_name_to_cpu_data_name = {}
self._field_name_to_gpu_data_name = {}
self._data_names = set()
self._dim = dim
self._fieldInformation = {}
self._cpu_gpu_pairs = []
self._custom_data_transfer_functions = {}
self._custom_data_names = []
self._reduce_map = {
'sum': wlb.mpi.SUM,
'min': wlb.mpi.MIN,
'max': wlb.mpi.MAX,
}
if self._dim == 2:
assert self.blocks.getDomainCellBB().size[2] == 1
self._default_target = default_target
@property
def default_target(self):
return self._default_target
@property
def dim(self):
return self._dim
@property
def shape(self):
return self.blocks.getDomainCellBB().size[:self.dim]
@property
def periodicity(self):
return self.blocks.periodic[:self._dim]
@property
def fields(self):
return self._fields
@property
def blocks(self):
return self._blocks
@property
def default_ghost_layers(self):
return self._default_ghost_layers
@property
def default_layout(self):
return self._default_layout
@property
def data_names(self):
return self.data_names
def ghost_layers_of_field(self, name):
return self._fieldInformation[name]['ghost_layers']
def values_per_cell(self, name):
return self._fieldInformation[name]['values_per_cell']
def add_custom_data(self, name, cpu_creation_function,
gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
if cpu_creation_function and gpu_creation_function:
if cpu_to_gpu_transfer_func is None or gpu_to_cpu_transfer_func is None:
raise ValueError("For GPU data, both transfer functions have to be specified")
self._custom_data_transfer_functions[name] = (cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
if cpu_creation_function:
self.blocks.addBlockData(name, cpu_creation_function)
if gpu_creation_function:
self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpu_creation_function)
self._custom_data_names.append(name)
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None,
layout=None, cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
if ghost_layers is None:
ghost_layers = self.default_ghost_layers
if gpu is None:
gpu = self.default_target == Target.GPU
if layout is None:
layout = self.default_layout
if len(self.blocks) == 0:
raise ValueError("Data handling expects that each process has at least one block")
if hasattr(dtype, 'type'):
dtype = dtype.type
if name in self.blocks[0].fieldNames or self.GPU_DATA_PREFIX + name in self.blocks[0].fieldNames:
raise ValueError("Data with this name has already been added")
if alignment is False or alignment is None:
alignment = 0
if hasattr(values_per_cell, '__len__'):
raise NotImplementedError("Parallel data handling does not support multiple index dimensions")
self._fieldInformation[name] = {
'ghost_layers': ghost_layers,
'values_per_cell': values_per_cell,
'layout': layout,
'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
'f': wlb.field.Layout.fzyx,
'SoA': wlb.field.Layout.fzyx, 'AoS': wlb.field.Layout.zyxf}
if cpu:
wlb.field.addToStorage(self.blocks, name, dtype, fSize=values_per_cell, layout=layout_map[layout],
ghostLayers=ghost_layers, alignment=alignment)
if gpu:
if alignment != 0:
raise ValueError("Alignment for walberla GPU fields not yet supported")
wlb.gpu.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell,
usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout])
if cpu and gpu:
self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name))
block_bb = self.blocks.getBlockCellBB(self.blocks[0])
shape = tuple(s + 2 * ghost_layers for s in block_bb.size[:self.dim])
index_dimensions = 1 if values_per_cell > 1 else 0
if index_dimensions == 1:
shape += (values_per_cell,)
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_generic(name, self.dim, dtype, index_dimensions, layout,
index_shape=(values_per_cell,) if index_dimensions > 0 else None,
field_type=field_type)
self.fields[name].latex_name = latex_name
self._field_name_to_cpu_data_name[name] = name
if gpu:
self._field_name_to_gpu_data_name[name] = self.GPU_DATA_PREFIX + name
return self.fields[name]
def has_data(self, name):
return name in self._fields
@property
def array_names(self):
return tuple(self.fields.keys())
@property
def custom_data_names(self):
return tuple(self._custom_data_names)
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
return self.add_array(name, latex_name=latex_name, cpu=cpu, gpu=gpu,
**self._fieldInformation[name_of_template_field])
def swap(self, name1, name2, gpu=False):
if gpu:
name1 = self.GPU_DATA_PREFIX + name1
name2 = self.GPU_DATA_PREFIX + name2
for block in self.blocks:
block[name1].swapDataPointers(block[name2])
def iterate(self, slice_obj=None, gpu=False, ghost_layers=True, inner_ghost_layers=True):
if ghost_layers is True:
ghost_layers = self.default_ghost_layers
elif ghost_layers is False:
ghost_layers = 0
elif isinstance(ghost_layers, str):
ghost_layers = self.ghost_layers_of_field(ghost_layers)
if inner_ghost_layers is True:
inner_ghost_layers = self.default_ghost_layers
elif inner_ghost_layers is False:
inner_ghost_layers = 0
elif isinstance(ghost_layers, str):
ghost_layers = self.ghost_layers_of_field(ghost_layers)
prefix = self.GPU_DATA_PREFIX if gpu else ""
if slice_obj is not None:
yield from sliced_block_iteration(self.blocks, slice_obj, inner_ghost_layers, ghost_layers,
self.dim, prefix)
else:
yield from block_iteration(self.blocks, ghost_layers, self.dim, prefix)
def gather_array(self, name, slice_obj=None, all_gather=False, ghost_layers=False):
if ghost_layers is not False:
warnings.warn("gather_array with ghost layers is only supported in serial data handling. "
"Array without ghost layers is returned")
if slice_obj is None:
slice_obj = tuple([slice(None, None, None)] * self.dim)
if self.dim == 2:
slice_obj = slice_obj[:2] + (0.5,) + slice_obj[2:]
last_element = slice_obj[3:]
array = wlb.field.gatherField(self.blocks, name, slice_obj[:3], all_gather)
if array is None:
return None
if self.dim == 2:
array = array[:, :, 0]
if last_element and self.fields[name].index_dimensions > 0:
array = array[..., last_element[0]]
return array
def _normalize_arr_shape(self, arr, index_dimensions):
if index_dimensions == 0 and len(arr.shape) > 3:
arr = arr[..., 0]
if self.dim == 2 and len(arr.shape) > 2:
arr = arr[:, :, 0]
return arr
def run_kernel(self, kernel_function, **kwargs):
for arg_dict in self.get_kernel_kwargs(kernel_function, **kwargs):
kernel_function(**arg_dict)
def get_kernel_kwargs(self, kernel_function, **kwargs):
if kernel_function.ast.backend == Backend.CUDA:
name_map = self._field_name_to_gpu_data_name
to_array = wlb.gpu.toGpuArray
else:
name_map = self._field_name_to_cpu_data_name
to_array = wlb.field.toArray
data_used_in_kernel = [(name_map[p.symbol.field_name], self.fields[p.symbol.field_name])
for p in kernel_function.parameters if
isinstance(p.symbol, FieldPointerSymbol) and p.symbol.field_name not in kwargs]
result = []
for block in self.blocks:
field_args = {}
for data_name, f in data_used_in_kernel:
arr = to_array(block[data_name], with_ghost_layers=[True, True, self.dim == 3])
arr = self._normalize_arr_shape(arr, f.index_dimensions)
field_args[f.name] = arr
field_args.update(kwargs)
result.append(field_args)
return result
def to_cpu(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][1]
for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else:
if self.is_on_gpu(name):
wlb.gpu.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def to_gpu(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0]
for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else:
if self.is_on_gpu(name):
wlb.gpu.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def is_on_gpu(self, name):
return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs
def all_to_cpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.gpu.copyFieldToCpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys():
self.to_cpu(name)
def all_to_gpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.gpu.copyFieldToGpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys():
self.to_gpu(name)
def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, Target.CPU, buffered, stencil_restricted)
def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, Target.GPU, buffered, stencil_restricted)
def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False):
if target is None:
target = self.default_target
if stencil is None:
stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
if target == Target.CPU:
create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
if buffered and stencil_restricted:
create_packing = wlb.field.createStencilRestrictedPackInfo
else:
assert target == Target.GPU
create_packing = wlb.gpu.createPackInfo if buffered else wlb.gpu.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names]
sync_function = create_scheme(self.blocks, stencil)
for name in names:
sync_function.addDataToCommunicate(create_packing(self.blocks, name))
return sync_function
def reduce_float_sequence(self, sequence, operation, all_reduce=False):
if all_reduce:
return np.array(wlb.mpi.allreduceReal(sequence, self._reduce_map[operation.lower()]))
else:
result = np.array(wlb.mpi.reduceReal(sequence, self._reduce_map[operation.lower()], 0))
return result if wlb.mpi.worldRank() == 0 else None
def reduce_int_sequence(self, sequence, operation, all_reduce=False):
if all_reduce:
return np.array(wlb.mpi.allreduceInt(sequence, self._reduce_map[operation.lower()]))
else:
result = np.array(wlb.mpi.reduceInt(sequence, self._reduce_map[operation.lower()], 0))
return result if wlb.mpi.worldRank() == 0 else None
def create_vtk_writer(self, file_name, data_names, ghost_layers=False):
if ghost_layers is False:
ghost_layers = 0
if ghost_layers is True:
ghost_layers = min(self.ghost_layers_of_field(n) for n in data_names)
file_name = "%s_%02d" % (file_name, ParallelDataHandling.VTK_COUNTER)
ParallelDataHandling.VTK_COUNTER += 1
output = wlb.vtk.makeOutput(self.blocks, file_name, ghostLayers=ghost_layers)
for n in data_names:
output.addCellDataWriter(wlb.field.createVTKWriter(self.blocks, n))
return output
def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False):
if ghost_layers is False:
ghost_layers = 0
if ghost_layers is True:
ghost_layers = self.ghost_layers_of_field(data_name)
output = wlb.vtk.makeOutput(self.blocks, file_name, ghostLayers=ghost_layers)
for mask, name in masks_to_name.items():
w = wlb.field.createBinarizationVTKWriter(self.blocks, data_name, mask, name)
output.addCellDataWriter(w)
return output
@staticmethod
def log(*args, level='INFO'):
_log_map = {
'DEVEL': wlb.log_devel,
'RESULT': wlb.log_result,
'INFO': wlb.log_info,
'WARNING': wlb.log_warning,
'PROGRESS': wlb.log_progress,
}
level = level.upper()
message = " ".join(str(e) for e in args)
_log_map[level](message)
def log_on_root(self, *args, level='INFO'):
if self.is_root:
ParallelDataHandling.log(*args, level=level)
@property
def is_root(self):
return wlb.mpi.worldRank() == 0
@property
def world_rank(self):
return wlb.mpi.worldRank()
def save_all(self, directory):
if not os.path.exists(directory):
os.mkdir(directory)
if os.path.isfile(directory):
raise RuntimeError(f"Trying to save to {directory}, but file exists already")
for field_name, data_name in self._field_name_to_cpu_data_name.items():
self.blocks.writeBlockData(data_name, os.path.join(directory, field_name + ".dat"))
def load_all(self, directory):
for field_name, data_name in self._field_name_to_cpu_data_name.items():
self.blocks.readBlockData(data_name, os.path.join(directory, field_name + ".dat"))
import itertools
import time
from typing import Sequence, Union
import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.enums import Target
from pystencils.field import (Field, FieldType, create_numpy_array_with_layout,
layout_string_to_tuple, spatial_layout_string_to_tuple)
from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
from pystencils.slicing import normalize_slice, remove_ghost_layers
from pystencils.utils import DotDict
class SerialDataHandling(DataHandling):
def __init__(self,
domain_size: Sequence[int],
default_ghost_layers: int = 1,
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU,
array_handler=None,
device_number=None) -> None:
"""
Creates a data handling for single node simulations.
Args:
domain_size: size of the spatial domain as tuple
default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method
default_layout: default layout used, if not overridden in add_array() method
periodicity: List of booleans that indicate which dimensions have periodic boundary conditions.
Alternatively, a single boolean can be given, which is used for all dimensions. Defaults to
False (non-periodic)
default_target: `Target` either 'CPU' or 'GPU'. If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
array_handler: An object that provides the same interface as `GPUArrayHandler`, which is used for creation
and transferring of GPU arrays. Default is to construct a fresh `GPUArrayHandler`
device_number: If `default_target` is set to 'GPU', a device number should be specified. If none is given,
the device with the largest amount of memory is used. If multiple devices have the same
amount of memory, the one with the lower number is used
"""
super(SerialDataHandling, self).__init__()
self._domainSize = tuple(domain_size)
self.default_ghost_layers = default_ghost_layers
self.default_layout = default_layout
self._fields = DotDict()
self.cpu_arrays = DotDict()
self.gpu_arrays = DotDict()
self.custom_data_cpu = DotDict()
self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {}
if not array_handler:
try:
if device_number is None:
import cupy.cuda.runtime
if cupy.cuda.runtime.getDeviceCount() > 0:
device_number = sorted(range(cupy.cuda.runtime.getDeviceCount()),
key=lambda i: cupy.cuda.Device(i).mem_info[1], reverse=True)[0]
self.array_handler = GPUArrayHandler(device_number)
except ImportError:
self.array_handler = GPUNotAvailableHandler()
else:
self.array_handler = array_handler
if periodicity is None or periodicity is False:
periodicity = [False] * self.dim
if periodicity is True:
periodicity = [True] * self.dim
self._periodicity = periodicity
self._field_information = {}
self._default_target = default_target
self._start_time = time.perf_counter()
@property
def default_target(self):
return self._default_target
@property
def dim(self):
return len(self._domainSize)
@property
def shape(self):
return self._domainSize
@property
def periodicity(self):
return self._periodicity
@property
def fields(self):
return self._fields
def ghost_layers_of_field(self, name):
return self._field_information[name]['ghost_layers']
def values_per_cell(self, name):
return self._field_information[name]['values_per_cell']
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None,
cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
if ghost_layers is None:
ghost_layers = self.default_ghost_layers
if layout is None:
layout = self.default_layout
if gpu is None:
gpu = self.default_target in self._GPU_LIKE_TARGETS
kwargs = {
'shape': tuple(s + 2 * ghost_layers for s in self._domainSize),
'dtype': dtype,
}
if not hasattr(values_per_cell, '__len__'):
values_per_cell = (values_per_cell,)
if len(values_per_cell) == 1 and values_per_cell[0] == 1:
values_per_cell = ()
self._field_information[name] = {
'ghost_layers': ghost_layers,
'values_per_cell': values_per_cell,
'layout': layout,
'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
index_dimensions = len(values_per_cell)
kwargs['shape'] = kwargs['shape'] + values_per_cell
if index_dimensions > 0:
layout_tuple = layout_string_to_tuple(layout, self.dim + index_dimensions)
else:
layout_tuple = spatial_layout_string_to_tuple(layout, self.dim)
# cpu_arr is always created - since there is no create_gpu_array_with_layout()
byte_offset = ghost_layers * np.dtype(dtype).itemsize
if gpu:
cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype)
else:
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs)
if alignment and gpu:
raise NotImplementedError("Alignment for GPU fields not supported")
if cpu:
if name in self.cpu_arrays:
raise ValueError("CPU Field with this name already exists")
self.cpu_arrays[name] = cpu_arr
if gpu:
if name in self.gpu_arrays:
raise ValueError("GPU Field with this name already exists")
self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
field_type=field_type)
self.fields[name].latex_name = latex_name
return self.fields[name]
def add_custom_data(self, name, cpu_creation_function,
gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
if cpu_creation_function and gpu_creation_function:
if cpu_to_gpu_transfer_func is None or gpu_to_cpu_transfer_func is None:
raise ValueError("For GPU data, both transfer functions have to be specified")
self._custom_data_transfer_functions[name] = (cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
assert name not in self.custom_data_cpu
if cpu_creation_function:
assert name not in self.cpu_arrays
self.custom_data_cpu[name] = cpu_creation_function()
if gpu_creation_function:
assert name not in self.gpu_arrays
self.custom_data_gpu[name] = gpu_creation_function()
def has_data(self, name):
return name in self.fields
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
return self.add_array(name, latex_name=latex_name, cpu=cpu, gpu=gpu,
**self._field_information[name_of_template_field])
def iterate(self, slice_obj=None, gpu=False, ghost_layers=True, inner_ghost_layers=True):
if ghost_layers is True:
ghost_layers = self.default_ghost_layers
elif ghost_layers is False:
ghost_layers = 0
elif isinstance(ghost_layers, str):
ghost_layers = self.ghost_layers_of_field(ghost_layers)
if slice_obj is None:
slice_obj = (slice(None, None, None),) * self.dim
slice_obj = normalize_slice(slice_obj, tuple(s + 2 * ghost_layers for s in self._domainSize))
slice_obj = tuple(s if type(s) is slice else slice(s, s + 1, None) for s in slice_obj)
arrays = self.gpu_arrays if gpu else self.cpu_arrays
custom_data_dict = self.custom_data_gpu if gpu else self.custom_data_cpu
iter_dict = custom_data_dict.copy()
for name, arr in arrays.items():
field_gls = self._field_information[name]['ghost_layers']
if field_gls < ghost_layers:
continue
arr = remove_ghost_layers(arr, index_dimensions=len(arr.shape) - self.dim,
ghost_layers=field_gls - ghost_layers)
iter_dict[name] = arr
offset = tuple(s.start - ghost_layers for s in slice_obj)
yield SerialBlock(iter_dict, offset, slice_obj)
def gather_array(self, name, slice_obj=None, ghost_layers=False, **kwargs):
gl_to_remove = self._field_information[name]['ghost_layers']
if isinstance(ghost_layers, int):
gl_to_remove -= ghost_layers
if ghost_layers is True:
gl_to_remove = 0
arr = self.cpu_arrays[name]
ind_dimensions = self.fields[name].index_dimensions
spatial_dimensions = self.fields[name].spatial_dimensions
arr = remove_ghost_layers(arr, index_dimensions=ind_dimensions, ghost_layers=gl_to_remove)
if slice_obj is not None:
normalized_slice = normalize_slice(slice_obj[:spatial_dimensions], arr.shape[:spatial_dimensions])
normalized_slice = tuple(s if type(s) is slice else slice(s, s + 1, None) for s in normalized_slice)
normalized_slice += slice_obj[spatial_dimensions:]
arr = arr[normalized_slice]
else:
arr = arr.view()
arr.flags.writeable = False
return arr
def swap(self, name1, name2, gpu=None):
if gpu is None:
gpu = self.default_target in self._GPU_LIKE_TARGETS
arr = self.gpu_arrays if gpu else self.cpu_arrays
arr[name1], arr[name2] = arr[name2], arr[name1]
def all_to_cpu(self):
for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()) | self._custom_data_transfer_functions.keys():
self.to_cpu(name)
def all_to_gpu(self):
for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()) | self._custom_data_transfer_functions.keys():
self.to_gpu(name)
def run_kernel(self, kernel_function, **kwargs):
arrays = self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays
kernel_function(**{**arrays, **kwargs})
def get_kernel_kwargs(self, kernel_function, **kwargs):
result = {}
result.update(self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays)
result.update(kwargs)
return [result]
def to_cpu(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][1]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else:
if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
def to_gpu(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else:
if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
def is_on_gpu(self, name):
return name in self.gpu_arrays
def synchronization_function_cpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, target=Target.CPU)
def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, target=Target.GPU)
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None:
target = self.default_target
assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
filtered_stencil = []
neighbors = [-1, 0, 1]
if (stencil is None and self.dim == 2) or (stencil is not None and stencil.startswith('D2')):
directions = itertools.product(*[neighbors] * 2)
elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')):
directions = itertools.product(*[neighbors] * 3)
else:
raise ValueError("Invalid stencil")
for direction in directions:
use_direction = True
if direction == (0, 0) or direction == (0, 0, 0):
use_direction = False
for component, periodicity in zip(direction, self._periodicity):
if not periodicity and component != 0:
use_direction = False
if use_direction:
filtered_stencil.append(direction)
result = []
for name in names:
gls = self._field_information[name]['ghost_layers']
values_per_cell = self._field_information[name]['values_per_cell']
if values_per_cell == ():
values_per_cell = (1,)
if len(values_per_cell) == 1:
values_per_cell = values_per_cell[0]
if len(filtered_stencil) > 0:
if target == Target.CPU:
if functor is None:
from pystencils.slicing import get_periodic_boundary_functor
functor = get_periodic_boundary_functor
result.append(functor(filtered_stencil, ghost_layers=gls))
else:
if functor is None:
from pystencils.gpu.periodicity import get_periodic_boundary_functor as functor
target = Target.GPU
result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls,
target=target))
if target == Target.CPU:
def result_functor():
for arr_name, func in zip(names, result):
func(pdfs=self.cpu_arrays[arr_name])
else:
def result_functor():
for arr_name, func in zip(names, result):
func(pdfs=self.gpu_arrays[arr_name])
return result_functor
@property
def array_names(self):
return tuple(self.fields.keys())
@property
def custom_data_names(self):
return tuple(self.custom_data_cpu.keys())
def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array:
return np.array(sequence)
def reduce_int_sequence(self, sequence, operation, all_reduce=False) -> np.array:
return np.array(sequence)
def create_vtk_writer(self, file_name, data_names, ghost_layers=False):
from pystencils.datahandling.vtk import image_to_vtk
def writer(step):
full_file_name = "%s_%08d" % (file_name, step,)
cell_data = {}
for name in data_names:
field = self._get_field_with_given_number_of_ghost_layers(name, ghost_layers)
if self.dim == 2:
field = field[:, :, np.newaxis]
if len(field.shape) == 3:
cell_data[name] = np.ascontiguousarray(field)
elif len(field.shape) == 4:
values_per_cell = field.shape[-1]
if values_per_cell == self.dim:
field = [np.ascontiguousarray(field[..., i]) for i in range(values_per_cell)]
if len(field) == 2:
field.append(np.zeros_like(field[0]))
cell_data[name] = tuple(field)
else:
for i in range(values_per_cell):
cell_data["%s[%d]" % (name, i)] = np.ascontiguousarray(field[..., i])
else:
raise NotImplementedError("VTK export for fields with more than one index "
"coordinate not implemented")
image_to_vtk(full_file_name, cell_data=cell_data)
return writer
def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False):
from pystencils.datahandling.vtk import image_to_vtk
def writer(step):
full_file_name = "%s_%08d" % (file_name, step,)
field = self._get_field_with_given_number_of_ghost_layers(data_name, ghost_layers)
if self.dim == 2:
field = field[:, :, np.newaxis]
cell_data = {name: np.ascontiguousarray(np.bitwise_and(field, field.dtype.type(mask)) > 0, dtype=np.uint8)
for mask, name in masks_to_name.items()}
image_to_vtk(full_file_name, cell_data=cell_data)
return writer
def _get_field_with_given_number_of_ghost_layers(self, name, ghost_layers):
actual_ghost_layers = self.ghost_layers_of_field(name)
if ghost_layers is True:
ghost_layers = actual_ghost_layers
gl_to_remove = actual_ghost_layers - ghost_layers
ind_dims = len(self._field_information[name]['values_per_cell'])
return remove_ghost_layers(self.cpu_arrays[name], ind_dims, gl_to_remove)
def log(self, *args, level='INFO'):
level = level.upper()
message = " ".join(str(e) for e in args)
time_running = time.perf_counter() - self._start_time
spacing = 7 - len(str(int(time_running)))
message = f"[{level: <8}]{spacing * '-'}({time_running:.3f} sec) {message} "
print(message, flush=True)
def log_on_root(self, *args, level='INFO'):
self.log(*args, level=level)
@property
def is_root(self):
return True
@property
def world_rank(self):
return 0
def save_all(self, filename, compressed=True, synchronise_data=True):
if synchronise_data:
for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()):
self.to_cpu(name)
if compressed:
np.savez_compressed(filename, **self.cpu_arrays)
else:
np.savez(filename, **self.cpu_arrays)
def load_all(self, filename, synchronise_data=True):
if '.npz' not in filename:
filename += '.npz'
file_contents = np.load(filename)
for arr_name, arr_contents in self.cpu_arrays.items():
if arr_name not in file_contents:
print(f"Skipping read data {arr_name} because there is no data with this name in data handling")
continue
if file_contents[arr_name].shape != arr_contents.shape:
print(f"Skipping read data {arr_name} because shapes don't match. "
f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}")
continue
np.copyto(arr_contents, file_contents[arr_name])
if synchronise_data:
if arr_name in self.gpu_arrays.keys():
self.to_gpu(arr_name)
from pyevtk.hl import _addDataToFile, _appendDataToFile
from pyevtk.vtk import VtkFile, VtkImageData
def image_to_vtk(path, cell_data, origin=(0.0, 0.0, 0.0), spacing=(1.0, 1.0, 1.0)):
"""
Writes numpy data to VTK
Numpy arrays have to be contiguous in memory - if this is a problem call :func:`numpy.ascontiguousarray` first
Patched version of same pyevtk function that also supports vector-valued data
Args:
path: path with file name, without file ending (.vtk) where data should be stored
cell_data: dictionary, mapping name of the data to a 3D numpy array, or to a 3-tuple of 3D numpy arrays
in case of vector-valued data
origin: 3-tuple describing the origin of the field in 3D
spacing: 3-tuple describing the grid spacing in x,y, z direction
Returns:
path to file that was written
Examples:
>>> from tempfile import TemporaryDirectory
>>> import os
>>> import numpy as np
>>> with TemporaryDirectory() as tmp_dir:
... path = os.path.join(tmp_dir, 'out')
... size = (20, 20, 20)
... res_file = image_to_vtk(path, cell_data={'vector': (np.ones(size), np.ones(size), np.ones(size)),
... 'scalar': np.zeros(size)
... })
"""
# Extract dimensions
start = (0, 0, 0)
end = None
keys = list(cell_data.keys())
data = cell_data[keys[0]]
if hasattr(data, 'shape'):
end = data.shape
elif isinstance(data, tuple):
shapes = set(d.shape for d in data)
if len(shapes) > 1:
raise ValueError("All components have to have the same shape")
end = shapes.pop()
# Write data to file
w = VtkFile(path, VtkImageData)
w.openGrid(start=start, end=end, origin=origin, spacing=spacing)
w.openPiece(start=start, end=end)
_addDataToFile(w, cell_data, pointData=None)
w.closePiece()
w.closeGrid()
_appendDataToFile(w, cell_data, pointData=None)
w.save()
return w.getFileName()
from typing import Any, Dict, Optional, Union
import sympy as sp
from pystencils.astnodes import KernelFunction
from pystencils.enums import Backend
from pystencils.kernel_wrapper import KernelWrapper
def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
"""Show a sympy or pystencils AST as dot graph"""
from pystencils.astnodes import Node
try:
import graphviz
except ImportError:
print("graphviz is not installed. Visualizing the AST is not available")
return
graph_style = {} if graph_style is None else graph_style
if isinstance(expr, Node):
from pystencils.backends.dot import print_dot
return graphviz.Source(print_dot(expr, short=short, graph_attr=graph_style))
else:
from sympy.printing.dot import dotprint
return graphviz.Source(dotprint(expr, graph_attr=graph_style))
def highlight_cpp(code: str):
"""Highlight the given C/C++ source code with pygments."""
from IPython.display import HTML, display
from pygments import highlight
# noinspection PyUnresolvedReferences
from pygments.formatters import HtmlFormatter
# noinspection PyUnresolvedReferences
from pygments.lexers import CppLexer
css = HtmlFormatter().get_style_defs('.highlight')
css_tag = f"<style>{css}</style>"
display(HTML(css_tag))
return HTML(highlight(code, CppLexer(), HtmlFormatter()))
def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
"""Returns an object to display generated code (C/C++ or CUDA)
Can either be displayed as HTML in Jupyter notebooks or printed as normal string.
"""
from pystencils.backends.cbackend import generate_c
if isinstance(ast, KernelWrapper):
ast = ast.ast
if ast.backend not in {Backend.C, Backend.CUDA}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend
class CodeDisplay:
def __init__(self, ast_input):
self.ast = ast_input
def _repr_html_(self):
return highlight_cpp(generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)).__html__()
def __str__(self):
return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)
def __repr__(self):
return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)
return CodeDisplay(ast)
def get_code_str(ast, custom_backend=None):
return str(get_code_obj(ast, custom_backend))
def _isnotebook():
try:
shell = get_ipython().__class__.__name__
if shell == 'ZMQInteractiveShell':
return True # Jupyter notebook or qtconsole
elif shell == 'TerminalInteractiveShell':
return False # Terminal running IPython
else:
return False # Other type (?)
except NameError:
return False
def show_code(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
code = get_code_obj(ast, custom_backend)
if _isnotebook():
from IPython.display import display
display(code)
else:
try:
import rich.syntax
import rich.console
syntax = rich.syntax.Syntax(str(code), "c++", theme="monokai", line_numbers=True)
console = rich.console.Console()
console.print(syntax)
except ImportError:
print(code)
from enum import Enum, auto
class Target(Enum):
"""
The Target enumeration represents all possible targets that can be used for the code generation.
"""
CPU = auto()
"""
Target CPU architecture.
"""
GPU = auto()
"""
Target GPU architecture.
"""
class Backend(Enum):
"""
The Backend enumeration represents all possible backends that can be used for the code generation.
Backends and targets must be combined with care. For example CPU as a target and CUDA as a backend makes no sense.
"""
C = auto()
"""
Use the C Backend of pystencils.
"""
CUDA = auto()
"""
Use the CUDA backend to generate code for NVIDIA GPUs.
"""
from typing import List, Union
import sympy as sp
from pystencils.astnodes import Node
from pystencils.simp import AssignmentCollection
from pystencils.assignment import Assignment
# noinspection PyPep8Naming
class fast_division(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (2,)
# noinspection PyPep8Naming
class fast_sqrt(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (1, )
# noinspection PyPep8Naming
class fast_inv_sqrt(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (1, )
def _run(term, visitor):
if isinstance(term, AssignmentCollection):
new_main_assignments = _run(term.main_assignments, visitor)
new_subexpressions = _run(term.subexpressions, visitor)
return term.copy(new_main_assignments, new_subexpressions)
elif isinstance(term, list):
return [_run(e, visitor) for e in term]
else:
return visitor(term)
def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
def visit(expr):
if isinstance(expr, Node):
return expr
if expr.func == sp.Pow and isinstance(expr.exp, sp.Rational) and expr.exp.q == 2:
power = expr.exp.p
if power < 0:
return fast_inv_sqrt(expr.args[0]) ** (-power)
else:
return fast_sqrt(expr.args[0]) ** power
else:
new_args = [visit(a) for a in expr.args]
return expr.func(*new_args) if new_args else expr
return _run(term, visit)
def insert_fast_divisions(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
def visit(expr):
if isinstance(expr, Node):
return expr
if expr.func == sp.Mul:
div_args = []
other_args = []
for a in expr.args:
if a.func == sp.Pow and a.exp.is_integer and a.exp < 0:
div_args.append(visit(a.base) ** (-a.exp))
else:
other_args.append(visit(a))
if div_args:
return fast_division(sp.Mul(*other_args), sp.Mul(*div_args))
else:
return sp.Mul(*other_args)
elif expr.func == sp.Pow and expr.exp.is_integer and expr.exp < 0:
return fast_division(1, visit(expr.base) ** (-expr.exp))
else:
new_args = [visit(a) for a in expr.args]
return expr.func(*new_args) if new_args else expr
return _run(term, visit)
from .derivative import (
Diff, DiffOperator, collect_diffs, combine_diff_products, diff, diff_terms, evaluate_diffs,
expand_diff_full, expand_diff_linear, expand_diff_products, functional_derivative,
normalize_diff_order, zero_diffs)
from .finitedifferences import Discretization2ndOrder, advection, diffusion, transient
from .finitevolumes import FVM1stOrder, VOF
from .spatial import discretize_spatial, discretize_spatial_staggered
__all__ = ['Diff', 'diff', 'DiffOperator', 'diff_terms', 'collect_diffs',
'zero_diffs', 'evaluate_diffs', 'normalize_diff_order', 'expand_diff_full', 'expand_diff_linear',
'expand_diff_products', 'combine_diff_products', 'functional_derivative',
'advection', 'diffusion', 'transient', 'Discretization2ndOrder', 'discretize_spatial',
'discretize_spatial_staggered', 'FVM1stOrder', 'VOF']
import itertools
from collections import defaultdict
import numpy as np
import sympy as sp
from pystencils.field import Field
from pystencils.stencil import direction_string_to_offset
from pystencils.sympyextensions import multidimensional_sum, prod
from pystencils.utils import LinearEquationSystem, fully_contains
class FiniteDifferenceStencilDerivation:
"""Derives finite difference stencils.
Can derive standard finite difference stencils, as well as isotropic versions
(see Isotropic Finite Differences by A. Kumar)
Args:
derivative_coordinates: tuple indicating which derivative should be approximated,
(1, ) stands for first derivative in second direction (y),
(0, 1) would be a mixed second derivative in x and y
(0, 0, 0) would be a third derivative in x direction
stencil: list of offset tuples, defining the stencil
dx: spacing between grid points, one for all directions, i.e. dx=dy=dz
Examples:
Central differences
>>> fd_1d = FiniteDifferenceStencilDerivation((0,), stencil=[(-1,), (0,), (1,)])
>>> result = fd_1d.get_stencil()
>>> result
Finite difference stencil of accuracy 2, isotropic error: False
>>> result.weights
[-1/2, 0, 1/2]
Forward differences
>>> fd_1d = FiniteDifferenceStencilDerivation((0,), stencil=[(0,), (1,)])
>>> result = fd_1d.get_stencil()
>>> result
Finite difference stencil of accuracy 1, isotropic error: False
>>> result.weights
[-1, 1]
"""
def __init__(self, derivative_coordinates, stencil, dx=1):
self.dim = len(stencil[0])
self.field = Field.create_generic('f', spatial_dimensions=self.dim)
self._derivative = tuple(sorted(derivative_coordinates))
self._stencil = stencil
self._dx = dx
self.weights = {tuple(d): self.symbolic_weight(*d) for d in self._stencil}
def assume_symmetric(self, dim, anti_symmetric=False):
"""Adds restriction that weight in opposite directions of a dimension are equal (symmetric) or
the negative of each other (anti symmetric)
For example: dim=1, assumes that w(1, 1) == w(1, -1), if anti_symmetric=False or
w(1, 1) == -w(1, -1) if anti_symmetric=True
"""
update = {}
for direction, value in self.weights.items():
inv_direction = tuple(-offset if i == dim else offset for i, offset in enumerate(direction))
if direction[dim] < 0:
inv_weight = self.weights[inv_direction]
update[direction] = -inv_weight if anti_symmetric else inv_weight
self.weights.update(update)
def set_weight(self, offset, value):
assert offset in self.weights
self.weights[offset] = value
def get_stencil(self, isotropic=False) -> 'FiniteDifferenceStencilDerivation.Result':
weights = [self.weights[d] for d in self._stencil]
system = LinearEquationSystem(sp.Matrix(weights).atoms(sp.Symbol))
order = 0
while True:
new_system = system.copy()
eq = self.error_term_equations(order)
new_system.add_equations(eq)
sol_structure = new_system.solution_structure()
if sol_structure == 'single':
system = new_system
elif sol_structure == 'multiple':
system = new_system
elif sol_structure == 'none':
break
else:
assert False
order += 1
accuracy = order - len(self._derivative)
error_is_isotropic = False
if isotropic:
new_system = system.copy()
new_system.add_equations(self.isotropy_equations(order))
sol_structure = new_system.solution_structure()
error_is_isotropic = sol_structure != 'none'
if error_is_isotropic:
system = new_system
solve_res = system.solution()
weight_list = [self.weights[d].subs(solve_res) for d in self._stencil]
return self.Result(self._stencil, weight_list, accuracy, error_is_isotropic)
@staticmethod
def symbolic_weight(*args):
str_args = [str(e) for e in args]
return sp.Symbol(f"w_({','.join(str_args)})")
def error_term_dict(self, order):
error_terms = defaultdict(lambda: 0)
for direction in self._stencil:
weight = self.weights[tuple(direction)]
x = tuple(self._dx * d_i for d_i in direction)
for offset in multidimensional_sum(order, dim=self.field.spatial_dimensions):
fac = sp.factorial(order)
error_terms[tuple(sorted(offset))] += weight / fac * prod(x[off] for off in offset)
if self._derivative in error_terms:
error_terms[self._derivative] -= 1
return error_terms
def error_term_equations(self, order):
return list(self.error_term_dict(order).values())
def isotropy_equations(self, order):
def cycle_int_sequence(sequence, modulus):
result = []
arr = np.array(sequence, dtype=int)
while True:
if tuple(arr) in result:
break
result.append(tuple(arr))
arr = (arr + 1) % modulus
return tuple(set(tuple(sorted(t)) for t in result))
error_dict = self.error_term_dict(order)
eqs = []
for derivative_tuple in list(error_dict.keys()):
if fully_contains(self._derivative, derivative_tuple):
remaining = list(derivative_tuple)
for e in self._derivative:
del remaining[remaining.index(e)]
permutations = cycle_int_sequence(remaining, self.dim)
if len(permutations) == 1:
eqs.append(error_dict[derivative_tuple])
else:
for i in range(1, len(permutations)):
new_eq = (error_dict[tuple(sorted(permutations[i] + self._derivative))]
- error_dict[tuple(sorted(permutations[i - 1] + self._derivative))])
if new_eq:
eqs.append(new_eq)
else:
eqs.append(error_dict[derivative_tuple])
return eqs
class Result:
def __init__(self, stencil, weights, accuracy, is_isotropic):
self.stencil = stencil
self.weights = weights
self.accuracy = accuracy
self.is_isotropic = is_isotropic
def visualize(self):
from pystencils.stencil import plot
plot(self.stencil, data=self.weights)
def apply(self, field_access: Field.Access):
f = field_access
return sum(f.get_shifted(*offset) * weight for offset, weight in zip(self.stencil, self.weights))
def __array__(self):
return np.array(self.as_array().tolist())
def as_array(self):
dim = len(self.stencil[0])
assert (dim == 2 or dim == 3), "Only 2D or 3D matrix representations are available"
max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
shape_list = []
for i in range(dim):
shape_list.append(2 * max_offset + 1)
number_of_elements = np.prod(shape_list)
shape = tuple(shape_list)
result = sp.MutableDenseNDimArray([0] * number_of_elements, shape)
if dim == 2:
for direction, weight in zip(self.stencil, self.weights):
result[max_offset - direction[1], max_offset + direction[0]] = weight
if dim == 3:
for direction, weight in zip(self.stencil, self.weights):
result[max_offset - direction[1], max_offset + direction[0], max_offset + direction[2]] = weight
return result
def rotate_weights_and_apply(self, field_access: Field.Access, axes):
"""derive gradient weights of other direction with already calculated weights of one direction
via rotation and apply them to a field."""
dim = len(self.stencil[0])
assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available"
rotated_weights = np.rot90(np.array(self.__array__()), 1, axes)
result = []
max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
if dim == 2:
for direction in self.stencil:
result.append(rotated_weights[max_offset - direction[1],
max_offset + direction[0]])
if dim == 3:
for direction in self.stencil:
result.append(rotated_weights[max_offset - direction[1],
max_offset + direction[0],
max_offset + direction[2]])
f = field_access
return sum(f.get_shifted(*offset) * weight for offset, weight in zip(self.stencil, result))
def __repr__(self):
return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy,
self.is_isotropic)
class FiniteDifferenceStaggeredStencilDerivation:
"""Derives a finite difference stencil for application at a staggered position
Args:
neighbor: the neighbor direction string or vector at whose staggered position to calculate the derivative
dim: how many dimensions (2 or 3)
derivative: a tuple of directions over which to perform derivatives
free_weights_prefix: a string to prefix to free weight symbols. If None, do not return free weights
"""
def __init__(self, neighbor, dim, derivative=tuple(), free_weights_prefix=None):
if type(neighbor) is str:
neighbor = direction_string_to_offset(neighbor)
if dim == 2:
assert neighbor[dim:] == 0
assert derivative is tuple() or max(derivative) < dim
neighbor = sp.Matrix(neighbor[:dim])
pos = neighbor / 2
def unitvec(i):
"""return the `i`-th unit vector in three dimensions"""
a = np.zeros(dim, dtype=int)
a[i] = 1
return a
def flipped(a, i):
"""return `a` with its `i`-th element's sign flipped"""
a = a.copy()
a[i] *= -1
return a
# determine the points to use, coordinates are relative to position
points = []
if np.linalg.norm(neighbor, 1) == 1:
main_points = [neighbor / 2, neighbor / -2]
elif np.linalg.norm(neighbor, 1) == 2:
nonzero_indices = [i for i, v in enumerate(neighbor) if v != 0 and i < dim]
main_points = [neighbor / 2, neighbor / -2, flipped(neighbor / 2, nonzero_indices[0]),
flipped(neighbor / -2, nonzero_indices[0])]
else:
main_points = [sp.Matrix(np.multiply(neighbor, sp.Matrix(c) / 2))
for c in itertools.product([-1, 1], repeat=3)]
points += main_points
zero_indices = [i for i, v in enumerate(neighbor) if v == 0 and i < dim]
for i in zero_indices:
points += [point + sp.Matrix(unitvec(i)) for point in main_points]
points += [point - sp.Matrix(unitvec(i)) for point in main_points]
points_tuple = tuple([tuple(p) for p in points])
self._stencil = points_tuple
# determine the stencil weights
if len(derivative) == 0:
weights = None
else:
derivation = FiniteDifferenceStencilDerivation(derivative, points_tuple).get_stencil()
if not derivation.accuracy:
raise Exception('the requested derivative cannot be performed with the available neighbors')
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if free_weights_prefix is not None:
weights = [w.subs({fw: sp.Symbol(f"{free_weights_prefix}_{i}") for i, fw in enumerate(free_weights)})
for w in weights]
elif len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1: # if there are multiple, pick the one that contains a nonzero center weight
center = [tuple(p + pos) for p in points].index((0, 0, 0)[:dim])
best = [b for b in best if b[center] != 0]
if len(best) > 1: # if there are still multiple, they are equivalent, so we average
weights = [sum([b[i] for b in best]) / len(best) for i in range(len(weights))]
else:
weights = best[0]
assert weights
points_tuple = tuple([tuple(p + pos) for p in points])
self._points = points_tuple
self._weights = weights
@property
def points(self):
"""return the points of the stencil"""
return self._points
@property
def stencil(self):
"""return the points of the stencil relative to the staggered position specified by neighbor"""
return self._stencil
@property
def weights(self):
"""return the weights of the stencil"""
assert self._weights is not None
return self._weights
def visualize(self):
if self._weights is None:
ws = None
else:
ws = np.array([w for w in self.weights if w != 0], dtype=float)
pts = np.array([p for i, p in enumerate(self.points) if self.weights[i] != 0], dtype=int)
from pystencils.stencil import plot
plot(pts, data=ws)
def apply(self, access: Field.Access):
return sum([access.get_shifted(*point) * weight for point, weight in zip(self.points, self.weights)])
from collections import defaultdict, namedtuple
import sympy as sp
from pystencils.field import Field
from pystencils.sympyextensions import normalize_product, prod
def _default_diff_sort_key(d):
return str(d.superscript), str(d.target)
class Diff(sp.Expr):
"""Sympy Node representing a derivative.
The difference to sympy's built in differential is:
- shortened latex representation
- all simplifications have to be done manually
- optional marker displayed as superscript
"""
is_number = False
is_Rational = False
_diff_wrt = True
def __new__(cls, argument, target=-1, superscript=-1):
if argument == 0:
return sp.Rational(0, 1)
if isinstance(argument, Field):
argument = argument.center
return sp.Expr.__new__(cls, argument.expand(), sp.sympify(target), sp.sympify(superscript))
@property
def is_commutative(self):
any_non_commutative = any(not s.is_commutative for s in self.atoms(sp.Symbol))
if any_non_commutative:
return False
else:
return True
def get_arg_recursive(self):
"""Returns the argument the derivative acts on, for nested derivatives the inner argument is returned"""
if not isinstance(self.arg, Diff):
return self.arg
else:
return self.arg.get_arg_recursive()
def change_arg_recursive(self, new_arg):
"""Returns a Diff node with the given 'new_arg' instead of the current argument. For nested derivatives
a new nested derivative is returned where the inner Diff has the 'new_arg'"""
if not isinstance(self.arg, Diff):
return Diff(new_arg, self.target, self.superscript)
else:
return Diff(self.arg.change_arg_recursive(new_arg), self.target, self.superscript)
def split_linear(self, functions):
"""
Applies linearity property of Diff: i.e. 'Diff(c*a+b)' is transformed to 'c * Diff(a) + Diff(b)'
The parameter functions is a list of all symbols that are considered functions, not constants.
For the example above: functions=[a, b]
"""
constant, variable = 1, 1
if self.arg.func != sp.Mul:
constant, variable = 1, self.arg
else:
for factor in normalize_product(self.arg):
if factor in functions or isinstance(factor, Diff):
variable *= factor
else:
constant *= factor
if isinstance(variable, sp.Symbol) and variable not in functions:
return 0
if isinstance(variable, int) or variable.is_number:
return 0
else:
return constant * Diff(variable, target=self.target, superscript=self.superscript)
@property
def arg(self):
"""Expression the derivative acts on"""
return self.args[0]
@property
def target(self):
"""Subscript, usually the variable the Diff is w.r.t. """
return self.args[1]
@property
def superscript(self):
"""Superscript, for example used as the Chapman-Enskog order index"""
return self.args[2]
def _latex(self, printer, *_):
result = r"{\partial"
if self.superscript >= 0:
result += "^{(%s)}" % (self.superscript,)
if self.target != -1:
result += "_{%s}" % (printer.doprint(self.target),)
contents = printer.doprint(self.arg)
if isinstance(self.arg, int) or isinstance(self.arg, sp.Symbol) or self.arg.is_number or self.arg.func == Diff:
result += " " + contents
else:
result += " (" + contents + ") "
result += "}"
return result
def __str__(self):
return f"D({self.arg})"
def interpolated_access(self, offset, **kwargs):
"""Represents an interpolated access on a spatially differentiated field
Args:
offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative
"""
from pystencils.interpolation_astnodes import DiffInterpolatorAccess
assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses"
return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset)
class DiffOperator(sp.Expr):
"""Un-applied differential, i.e. differential operator
Args:
target: the differential is w.r.t to this variable.
This target is mainly for display purposes (its the subscript) and to distinguish DiffOperators
If the target is '-1' no subscript is displayed
superscript: optional marker displayed as superscript
is not displayed if set to '-1'
The DiffOperator behaves much like a variable with special name. Its main use is to be applied later, using the
DiffOperator.apply(expr, arg) which transforms 'DiffOperator's to applied 'Diff's
"""
is_commutative = True
is_number = False
is_Rational = False
def __new__(cls, target=-1, superscript=-1):
return sp.Expr.__new__(cls, sp.sympify(target), sp.sympify(superscript))
@property
def target(self):
return self.args[0]
@property
def superscript(self):
return self.args[1]
def _latex(self, *_):
result = r"{\partial"
if self.superscript >= 0:
result += "^{(%s)}" % (self.superscript,)
if self.target != -1:
result += "_{%s}" % (self.target,)
result += "}"
return result
@staticmethod
def apply(expr, argument, apply_to_constants=True):
"""
Returns a new expression where each 'DiffOperator' is replaced by a 'Diff' node.
Multiplications of 'DiffOperator's are interpreted as nested application of differentiation:
i.e. DiffOperator('x')*DiffOperator('x') is a second derivative replaced by Diff(Diff(arg, x), t)
"""
def handle_mul(mul):
args = normalize_product(mul)
diffs = [a for a in args if isinstance(a, DiffOperator)]
if len(diffs) == 0:
return mul * argument if apply_to_constants else mul
rest = [a for a in args if not isinstance(a, DiffOperator)]
diffs.sort(key=_default_diff_sort_key)
result = argument
for d in reversed(diffs):
result = Diff(result, target=d.target, superscript=d.superscript)
return prod(rest) * result
expr = expr.expand()
if expr.func == sp.Mul or expr.func == sp.Pow:
return handle_mul(expr)
elif expr.func == sp.Add:
return expr.func(*[handle_mul(a) for a in expr.args])
else:
return expr * argument if apply_to_constants else expr
# ----------------------------------------------------------------------------------------------------------------------
def diff(expr, *args):
"""Shortcut function to create nested derivatives
>>> f = sp.Symbol("f")
>>> diff(f, 0, 0, 1) == Diff(Diff( Diff(f, 1), 0), 0)
True
"""
if len(args) == 0:
return expr
result = expr
for index in reversed(args):
result = Diff(result, index)
return result
def diff_args(expr):
"""Extracts the indices and argument of possibly nested derivative - inverse of diff function
>>> args = (sp.Symbol("x"), 0, 1, 2, 5, 1)
>>> e = diff(*args)
>>> assert diff_args(e) == args
"""
if not isinstance(expr, Diff):
return expr,
else:
inner_res = diff_args(expr.args[0])
return (inner_res[0], expr.args[1], *inner_res[1:])
def diff_terms(expr):
"""Returns set of all derivatives in an expression.
This function yields different results than 'expr.atoms(Diff)' when nested derivatives are in the expression,
since this function only returns the outer derivatives
Example:
>>> x, y = sp.symbols("x, y")
>>> diff_terms( diff(x, 0, 0) )
{Diff(Diff(x, 0, -1), 0, -1)}
>>> diff_terms( diff(x, 0, 0) + y )
{Diff(Diff(x, 0, -1), 0, -1)}
"""
result = set()
def visit(e):
if isinstance(e, Diff):
result.add(e)
else:
for a in e.args:
visit(a)
visit(expr)
return result
def collect_diffs(expr):
"""Rewrites expression into a sum of distinct derivatives with pre-factors"""
return expr.collect(diff_terms(expr))
def zero_diffs(expr, label):
"""Replaces all differentials with the given target by 0
Example:
>>> x, y, f = sp.symbols("x y f")
>>> expression = Diff(f, x) + Diff(f, y) + Diff(Diff(f, y), x) + 7
>>> zero_diffs(expression, x)
Diff(f, y, -1) + 7
"""
def visit(e):
if isinstance(e, Diff):
if e.target == label:
return 0
new_args = [visit(arg) for arg in e.args]
return e.func(*new_args) if new_args else e
return visit(expr)
def evaluate_diffs(expr, var=None):
"""Replaces pystencils diff objects by sympy diff objects and evaluates them.
Replaces Diff nodes by sp.diff , the free variable is either the target (if var=None) otherwise
the specified var
"""
if isinstance(expr, Diff):
if var is None:
var = expr.target
return sp.diff(evaluate_diffs(expr.arg, var), var)
else:
new_args = [evaluate_diffs(arg, var) for arg in expr.args]
return expr.func(*new_args) if new_args else expr
def normalize_diff_order(expression, functions=None, constants=None, sort_key=_default_diff_sort_key):
"""Assumes order of differentiation can be exchanged. Changes the order of nested Diffs to a standard order defined
by the sorting key 'sort_key' such that the derivative terms can be further simplified """
def visit(expr):
if isinstance(expr, Diff):
nodes = [expr]
while isinstance(nodes[-1].arg, Diff):
nodes.append(nodes[-1].arg)
processed_arg = visit(nodes[-1].arg)
nodes.sort(key=sort_key)
result = processed_arg
for d in reversed(nodes):
result = Diff(result, target=d.target, superscript=d.superscript)
return result
else:
new_args = [visit(e) for e in expr.args]
return expr.func(*new_args) if new_args else expr
expression = expand_diff_linear(expression.expand(), functions, constants).expand()
return visit(expression)
def expand_diff_full(expr, functions=None, constants=None):
if functions is None:
functions = expr.atoms(sp.Symbol)
if constants is not None:
functions.difference_update(constants)
def visit(e):
if not isinstance(e, sp.Tuple):
e = e.expand()
if e.func == Diff:
result = 0
diff_args = {'target': e.target, 'superscript': e.superscript}
diff_inner = e.args[0]
diff_inner = visit(diff_inner)
if diff_inner.func not in (sp.Add, sp.Mul):
return e
for term in diff_inner.args if diff_inner.func == sp.Add else [diff_inner]:
independent_terms = 1
dependent_terms = []
for factor in normalize_product(term):
if factor in functions or isinstance(factor, Diff):
dependent_terms.append(factor)
else:
independent_terms *= factor
for i in range(len(dependent_terms)):
dependent_term = dependent_terms[i]
other_dependent_terms = dependent_terms[:i] + dependent_terms[i + 1:]
processed_diff = normalize_diff_order(Diff(dependent_term, **diff_args))
result += independent_terms * prod(other_dependent_terms) * processed_diff
return result
elif isinstance(e, sp.Piecewise):
return sp.Piecewise(*((expand_diff_full(a, functions, constants), b) for a, b in e.args))
elif isinstance(expr, sp.Tuple):
new_args = [visit(arg) for arg in e.args]
return sp.Tuple(*new_args)
else:
new_args = [visit(arg) for arg in e.args]
return e.func(*new_args) if new_args else e
if isinstance(expr, sp.Matrix):
return expr.applyfunc(visit)
else:
return visit(expr)
def expand_diff_linear(expr, functions=None, constants=None):
"""Expands all derivative nodes by applying Diff.split_linear
Args:
expr: expression containing derivatives
functions: sequence of symbols that are considered functions and can not be pulled before the derivative.
if None, all symbols are viewed as functions
constants: sequence of symbols which are considered constants and can be pulled before the derivative
"""
if functions is None:
functions = expr.atoms(sp.Symbol)
if constants is not None:
functions.difference_update(constants)
if isinstance(expr, Diff):
arg = expand_diff_linear(expr.arg, functions)
if hasattr(arg, 'func') and arg.func == sp.Add:
result = 0
for a in arg.args:
result += Diff(a, target=expr.target, superscript=expr.superscript).split_linear(functions)
return result
else:
diff = Diff(arg, target=expr.target, superscript=expr.superscript)
if diff == 0:
return 0
else:
return diff.split_linear(functions)
elif isinstance(expr, sp.Piecewise):
return sp.Piecewise(*((expand_diff_linear(a, functions, constants), b) for a, b in expr.args))
elif isinstance(expr, sp.Tuple):
new_args = [expand_diff_linear(e, functions) for e in expr.args]
return sp.Tuple(*new_args)
else:
new_args = [expand_diff_linear(e, functions) for e in expr.args]
result = sp.expand(expr.func(*new_args) if new_args else expr)
return result
def expand_diff_products(expr):
"""Fully expands all derivatives by applying product rule"""
if isinstance(expr, Diff):
arg = expand_diff_products(expr.args[0])
if arg.func == sp.Add:
new_args = [Diff(e, target=expr.target, superscript=expr.superscript)
for e in arg.args]
return sp.Add(*new_args)
if arg.func not in (sp.Mul, sp.Pow):
return Diff(arg, target=expr.target, superscript=expr.superscript)
else:
prod_list = normalize_product(arg)
result = 0
for i in range(len(prod_list)):
pre_factor = prod(prod_list[j] for j in range(len(prod_list)) if i != j)
result += pre_factor * Diff(prod_list[i], target=expr.target, superscript=expr.superscript)
return result
else:
new_args = [expand_diff_products(e) for e in expr.args]
return expr.func(*new_args) if new_args else expr
def combine_diff_products(expr):
"""Inverse product rule"""
def expr_to_diff_decomposition(expression):
"""Decomposes a sp.Add node containing CeDiffs into:
diff_dict: maps (target, superscript) -> [ (pre_factor, argument), ... ]
i.e. a partial(b) ( a is pre-factor, b is argument)
in case of partial(a) partial(b) two entries are created (0.5 partial(a), b), (0.5 partial(b), a)
"""
DiffInfo = namedtuple("DiffInfo", ["target", "superscript"])
class DiffSplit:
def __init__(self, fac, argument):
self.pre_factor = fac
self.argument = argument
def __repr__(self):
return str((self.pre_factor, self.argument))
assert isinstance(expression, sp.Add)
diff_dict = defaultdict(list)
rest = 0
for term in expression.args:
if isinstance(term, Diff):
diff_dict[DiffInfo(term.target, term.superscript)].append(DiffSplit(1, term.arg))
else:
mul_args = normalize_product(term)
diffs = [d for d in mul_args if isinstance(d, Diff)]
factor = prod(d for d in mul_args if not isinstance(d, Diff))
if len(diffs) == 0:
rest += factor
else:
for i, diff in enumerate(diffs):
all_but_current = [d for j, d in enumerate(diffs) if i != j]
pre_factor = factor * prod(all_but_current) * sp.Rational(1, len(diffs))
diff_dict[DiffInfo(diff.target, diff.superscript)].append(DiffSplit(pre_factor, diff.arg))
return diff_dict, rest
def match_diff_splits(own, other):
own_fac = own.pre_factor / other.argument
other_fac = other.pre_factor / own.argument
count = sp.count_ops
if count(own_fac) > count(own.pre_factor) or count(other_fac) > count(other.pre_factor):
return None
new_other_factor = own_fac - other_fac
return new_other_factor
def process_diff_list(diff_list, label, superscript):
if len(diff_list) == 0:
return 0
elif len(diff_list) == 1:
return diff_list[0].pre_factor * Diff(diff_list[0].argument, label, superscript)
result = 0
matches = []
for i in range(1, len(diff_list)):
match_result = match_diff_splits(diff_list[i], diff_list[0])
if match_result is not None:
matches.append((i, match_result))
if len(matches) == 0:
result += diff_list[0].pre_factor * Diff(diff_list[0].argument, label, superscript)
else:
other_idx, match_result = sorted(matches, key=lambda e: sp.count_ops(e[1]))[0]
new_argument = diff_list[0].argument * diff_list[other_idx].argument
result += (diff_list[0].pre_factor / diff_list[other_idx].argument) * Diff(new_argument, label, superscript)
if match_result == 0:
del diff_list[other_idx]
else:
diff_list[other_idx].pre_factor = match_result * diff_list[0].argument
result += process_diff_list(diff_list[1:], label, superscript)
return result
def combine(expression):
expression = expression.expand()
if isinstance(expression, sp.Add):
diff_dict, rest = expr_to_diff_decomposition(expression)
for (label, superscript), diff_list in diff_dict.items():
rest += process_diff_list(diff_list, label, superscript)
return rest
else:
new_args = [combine_diff_products(e) for e in expression.args]
return expression.func(*new_args) if new_args else expression
return combine(expr)
def replace_generic_laplacian(expr, dim=None):
"""Laplacian can be written as Diff(Diff(term)) without explicitly giving the dimensions.
This function replaces these constructs by diff(term, 0, 0) + diff(term, 1, 1) + ...
For this to work, the arguments of the derivative have to be field or field accesses such that the spatial
dimension can be determined.
>>> l = Diff(Diff(sp.symbols('x')))
>>> replace_generic_laplacian(l, 3)
Diff(Diff(x, 0, -1), 0, -1) + Diff(Diff(x, 1, -1), 1, -1) + Diff(Diff(x, 2, -1), 2, -1)
"""
if isinstance(expr, Diff):
arg, *indices = diff_args(expr)
if isinstance(arg, Field.Access):
dim = arg.field.spatial_dimensions
assert dim is not None
if len(indices) == 2 and all(i == -1 for i in indices):
return sum(diff(arg, i, i) for i in range(dim))
else:
return expr
else:
new_args = [replace_generic_laplacian(a, dim) for a in expr.args]
return expr.func(*new_args) if new_args else expr
def functional_derivative(functional, v):
r"""Computes functional derivative of functional with respect to v using Euler-Lagrange equation
.. math ::
\frac{\delta F}{\delta v} =
\frac{\partial F}{\partial v} - \nabla \cdot \frac{\partial F}{\partial \nabla v}
- assumes that gradients are represented by Diff() node
- Diff(Diff(r)) represents the divergence of r
- the constants parameter is a list with symbols not affected by the derivative. This is used for simplification
of the derivative terms.
"""
diffs = functional.atoms(Diff)
bulk_substitutions = {d: sp.Dummy() for d in diffs}
bulk_substitutions_inverse = {v: k for k, v in bulk_substitutions.items()}
non_diff_part = functional.subs(bulk_substitutions)
partial_f_partial_v = sp.diff(non_diff_part, v).subs(bulk_substitutions_inverse)
gradient_part = 0
for diff_obj in diffs:
if diff_obj.args[0] != v:
continue
dummy = sp.Dummy()
partial_f_partial_grad_v = functional.subs(diff_obj, dummy).diff(dummy).subs(dummy, diff_obj)
gradient_part += Diff(partial_f_partial_grad_v, target=diff_obj.target, superscript=diff_obj.superscript)
result = partial_f_partial_v - gradient_part
return result
from typing import Optional, Union
import numpy as np
import sympy as sp
from pystencils.fd import Diff
from pystencils.fd.derivative import diff_args
from pystencils.fd.spatial import fd_stencils_standard
from pystencils.field import Field
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.sympyextensions import fast_subs
FieldOrFieldAccess = Union[Field, Field.Access]
# --------------------------------------- Advection Diffusion ----------------------------------------------------------
def diffusion(scalar, diffusion_coeff, idx=None):
"""Diffusion term ∇·( diffusion_coeff · ∇(scalar))
Examples:
>>> f = Field.create_generic('f', spatial_dimensions=2)
>>> d = sp.Symbol("d")
>>> dx = sp.Symbol("dx")
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=d)
>>> discretization = Discretization2ndOrder()
>>> expected_output = ((f[-1, 0] + f[0, -1] - 4 * f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
>>> sp.simplify(discretization(diffusion_term) - expected_output)
0
"""
if isinstance(scalar, Field):
first_arg = scalar.center
elif isinstance(scalar, Field.Access):
first_arg = scalar
else:
raise ValueError("Diffused scalar has to be a pystencils Field or Field.Access")
args = [first_arg, diffusion_coeff if not isinstance(diffusion_coeff, Field) else diffusion_coeff.center]
if idx is not None:
args.append(idx)
return Diffusion(*args)
def advection(advected_scalar: FieldOrFieldAccess, velocity_field: FieldOrFieldAccess, idx: Optional[int] = None):
"""Advection term ∇·(velocity_field · advected_scalar)
Term that describes the advection of a scalar quantity in a velocity field.
"""
if isinstance(advected_scalar, Field):
first_arg = advected_scalar.center
elif isinstance(advected_scalar, Field.Access):
first_arg = advected_scalar
else:
raise ValueError("Advected scalar has to be a pystencils Field or Field.Access")
args = [first_arg, velocity_field if not isinstance(velocity_field, Field) else velocity_field.center]
if idx is not None:
args.append(idx)
return Advection(*args)
def transient(scalar, idx=None):
"""Transient term ∂_t(scalar)"""
if isinstance(scalar, Field):
args = [scalar.center]
elif isinstance(scalar, Field.Access):
args = [scalar]
else:
raise ValueError("Scalar has to be a pystencils Field or Field.Access")
if idx is not None:
args.append(idx)
return Transient(*args)
class Discretization2ndOrder:
def __init__(self, dx=sp.Symbol("dx"), dt=sp.Symbol("dt"), discretization_stencil_func=fd_stencils_standard):
self.dx = dx
self.dt = dt
self.spatial_stencil = discretization_stencil_func
def _discretize_diffusion(self, e):
result = 0
for c in range(e.dim):
first_diffs = [offset
* (e.diffusion_scalar_at_offset(c, offset) * e.diffusion_coefficient_at_offset(c, offset)
- e.diffusion_scalar_at_offset(0, 0) * e.diffusion_coefficient_at_offset(0, 0))
for offset in [-1, 1]]
result += first_diffs[1] - first_diffs[0]
return result / (self.dx ** 2)
def _discretize_advection(self, expr):
result = 0
for c in range(expr.dim):
interpolated = [(expr.advected_scalar_at_offset(c, offset) * expr.velocity_field_at_offset(c, offset, c)
+ expr.advected_scalar_at_offset(c, 0) * expr.velocity_field_at_offset(c, 0, c)) / 2
for offset in [-1, 1]]
result += interpolated[1] - interpolated[0]
return result / self.dx
def _discretize_spatial(self, e):
if isinstance(e, Diffusion):
return self._discretize_diffusion(e)
elif isinstance(e, Advection):
return self._discretize_advection(e)
elif isinstance(e, Diff):
arg, *indices = diff_args(e)
if not isinstance(arg, Field.Access):
raise ValueError("Only derivatives with field or field accesses as arguments can be discretized")
return self.spatial_stencil(indices, self.dx, arg)
else:
new_args = [self._discretize_spatial(a) for a in e.args]
return e.func(*new_args) if new_args else e
def __call__(self, expr):
if isinstance(expr, list):
return [self(e) for e in expr]
elif isinstance(expr, sp.Matrix) or isinstance(expr, sp.ImmutableDenseMatrix):
return expr.applyfunc(self.__call__)
elif isinstance(expr, AssignmentCollection):
return expr.copy(main_assignments=[e for e in expr.main_assignments],
subexpressions=[e for e in expr.subexpressions])
transient_terms = expr.atoms(Transient)
if len(transient_terms) == 0:
return self._discretize_spatial(expr)
elif len(transient_terms) == 1:
transient_term = transient_terms.pop()
solve_result = sp.solve(expr, transient_term)
if len(solve_result) != 1:
raise ValueError("Could not solve for transient term" + str(solve_result))
rhs = solve_result.pop()
# explicit euler
return transient_term.scalar + self.dt * self._discretize_spatial(rhs)
else:
print(transient_terms)
raise NotImplementedError("Cannot discretize expression with more than one transient term")
# -------------------------------------- Helper Classes ----------------------------------------------------------------
class Advection(sp.Function):
@property
def scalar(self):
return self.args[0].field
@property
def vector(self):
if isinstance(self.args[1], Field.Access):
return self.args[1].field
else:
return self.args[1]
@property
def scalar_index(self):
return None if len(self.args) <= 2 else int(self.args[2])
@property
def dim(self):
return self.scalar.spatial_dimensions
def _latex(self, printer):
name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
if isinstance(self.vector, Field):
return r"\nabla \cdot(%s %s)" % (printer.doprint(sp.Symbol(self.vector.name)),
printer.doprint(sp.Symbol(self.scalar.name + name_suffix)))
else:
args = [r"\partial_%d(%s %s)" % (i, printer.doprint(sp.Symbol(self.scalar.name + name_suffix)),
printer.doprint(self.vector[i]))
for i in range(self.dim)]
return " + ".join(args)
# --- Interface for discretization strategy
def velocity_field_at_offset(self, offset_dim, offset_value, index):
v = self.vector
if isinstance(v, Field):
assert v.index_dimensions == 1
return v.neighbor(offset_dim, offset_value)(index)
else:
return v[index]
def advected_scalar_at_offset(self, offset_dim, offset_value):
idx = 0 if self.scalar_index is None else int(self.scalar_index)
return self.scalar.neighbor(offset_dim, offset_value)(idx)
class Diffusion(sp.Function):
@property
def scalar(self):
return self.args[0].field
@property
def diffusion_coeff(self):
if isinstance(self.args[1], Field.Access):
return self.args[1].field
else:
return self.args[1]
@property
def scalar_index(self):
return None if len(self.args) <= 2 else int(self.args[2])
@property
def dim(self):
return self.scalar.spatial_dimensions
def _latex(self, printer):
name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
coeff = self.diffusion_coeff
diff_coeff = sp.Symbol(coeff.name) if isinstance(coeff, Field) else coeff
return r"div(%s \nabla %s)" % (printer.doprint(diff_coeff),
printer.doprint(sp.Symbol(self.scalar.name + name_suffix)))
# --- Interface for discretization strategy
def diffusion_scalar_at_offset(self, offset_dim, offset_value):
idx = 0 if self.scalar_index is None else self.scalar_index
return self.scalar.neighbor(offset_dim, offset_value)(idx)
def diffusion_coefficient_at_offset(self, offset_dim, offset_value):
d = self.diffusion_coeff
if isinstance(d, Field):
return d.neighbor(offset_dim, offset_value)
else:
return d
class Transient(sp.Function):
@property
def scalar(self):
if self.scalar_index is None:
return self.args[0].field.center
else:
return self.args[0].field(self.scalar_index)
@property
def scalar_index(self):
return None if len(self.args) <= 1 else int(self.args[1])
def _latex(self, printer):
name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
return r"\partial_t %s" % (printer.doprint(sp.Symbol(self.scalar.name + name_suffix)),)
# -------------------------------------------- Deprecated Functions ----------------------------------------------------
def grad(var, dim=3):
r"""
Gradients are represented as a special symbol:
e.g. :math:`\nabla x = (x^{\Delta 0}, x^{\Delta 1}, x^{\Delta 2})`
This function takes a symbol and creates the gradient symbols according to convention above
Args:
var: symbol to take the gradient of
dim: dimension (length) of the gradient vector
"""
if hasattr(var, "__getitem__"):
return [[sp.Symbol("%s^Delta^%d" % (v.name, i)) for v in var] for i in range(dim)]
else:
return [sp.Symbol("%s^Delta^%d" % (var.name, i)) for i in range(dim)]
def discretize_center(term, symbols_to_field_dict, dx, dim=3):
"""
Expects term that contains given symbols and gradient components of these symbols and replaces them
by field accesses. Gradients are replaced by centralized approximations:
``(upper neighbor - lower neighbor ) / ( 2*dx)``
Args:
term: term where symbols and gradient(symbol) should be replaced
symbols_to_field_dict: mapping of symbols to Field
dx: width and height of one cell
dim: dimension
Example:
>>> x = sp.Symbol("x")
>>> grad_x = grad(x, dim=3)
>>> term = x * grad_x[0]
>>> term
x*x^Delta^0
>>> f = Field.create_generic('f', spatial_dimensions=3)
>>> expected_output = f[0, 0, 0] * (-f[-1, 0, 0]/2 + f[1, 0, 0]/2)
>>> sp.simplify(discretize_center(term, { x: f }, dx=1, dim=3) - expected_output)
0
"""
substitutions = {}
for symbols, field in symbols_to_field_dict.items():
if not hasattr(symbols, "__getitem__"):
symbols = [symbols]
g = grad(symbols, dim)
substitutions.update({symbol: field(i) for i, symbol in enumerate(symbols)})
for d in range(dim):
up, down = __up_down_offsets(d, dim)
substitutions.update({g[d][i]: (field[up](i) - field[down](i)) / dx / 2 for i in range(len(symbols))})
return fast_subs(term, substitutions)
def discretize_staggered(term, symbols_to_field_dict, coordinate, coordinate_offset, dx, dim=3):
"""
Expects term that contains given symbols and gradient components of these symbols and replaces them
by field accesses. Gradients in coordinate direction are replaced by staggered version at cell boundary.
Symbols themselves and gradients in other directions are replaced by interpolated version at cell face.
Args:
term: input term where symbols and gradients are replaced
symbols_to_field_dict: mapping of symbols to Field
coordinate: id for coordinate (0 for x, 1 for y, ... ) defining cell boundary.
Only gradients in this direction are replaced e.g. if symbol^Delta^coordinate
coordinate_offset: either +1 or -1 for upper or lower face in coordinate direction
dx: width and height of one cell
dim: dimension
Examples:
Discretizing at right/east face of cell i.e. coordinate=0, offset=1)
>>> x, dx = sp.symbols("x dx")
>>> grad_x = grad(x, dim=3)
>>> term = x * grad_x[0]
>>> term
x*x^Delta^0
>>> f = Field.create_generic('f', spatial_dimensions=3)
>>> discretize_staggered(term, symbols_to_field_dict={ x: f}, dx=dx, coordinate=0, coordinate_offset=1, dim=3)
(-f_C + f_E)*(f_C/2 + f_E/2)/dx
"""
assert coordinate_offset == 1 or coordinate_offset == -1
assert 0 <= coordinate < dim
substitutions = {}
for symbols, field in symbols_to_field_dict.items():
if not hasattr(symbols, "__getitem__"):
symbols = [symbols]
offset = [0] * dim
offset[coordinate] = coordinate_offset
offset = np.array(offset, dtype=int)
gradient = grad(symbols)[coordinate]
substitutions.update({s: (field[offset](i) + field(i)) / 2 for i, s in enumerate(symbols)})
substitutions.update({g: (field[offset](i) - field(i)) / dx * coordinate_offset
for i, g in enumerate(gradient)})
for d in range(dim):
if d == coordinate:
continue
up, down = __up_down_offsets(d, dim)
for i, s in enumerate(symbols):
center_grad = (field[up](i) - field[down](i)) / (2 * dx)
neighbor_grad = (field[up + offset](i) - field[down + offset](i)) / (2 * dx)
substitutions[grad(s)[d]] = (center_grad + neighbor_grad) / 2
return fast_subs(term, substitutions)
def discretize_divergence(vector_term, symbols_to_field_dict, dx):
"""
Computes discrete divergence of symbolic vector
Args:
vector_term: sequence of terms, interpreted as vector
symbols_to_field_dict: mapping of symbols to Field
dx: length of a cell
Examples:
Laplace stencil
>>> x, dx = sp.symbols("x dx")
>>> grad_x = grad(x, dim=3)
>>> f = Field.create_generic('f', spatial_dimensions=3)
>>> expected_output = (f[-1, 0, 0] + f[0, -1, 0] + f[0, 0, -1] -
... 6*f[0, 0, 0] + f[0, 0, 1] + f[0, 1, 0] + f[1, 0, 0])/dx**2
>>> sp.simplify(discretize_divergence(grad_x, {x : f}, dx) - expected_output)
0
"""
dim = len(vector_term)
result = 0
for d in range(dim):
for offset in [-1, 1]:
result += offset * discretize_staggered(vector_term[d], symbols_to_field_dict, d, offset, dx, dim)
return result / dx
def __up_down_offsets(d, dim):
coord = [0] * dim
coord[d] = 1
up = np.array(coord, dtype=int)
coord[d] = -1
down = np.array(coord, dtype=int)
return up, down
import pystencils as ps
import sympy as sp
from pystencils.fd.derivation import FiniteDifferenceStaggeredStencilDerivation as FDS, \
FiniteDifferenceStencilDerivation as FD
import itertools
from collections import defaultdict
from collections.abc import Iterable
def get_access_and_direction(term):
direction1 = term.args[1]
if isinstance(term.args[0], ps.Field.Access): # first derivative
access = term.args[0]
direction = (direction1,)
elif isinstance(term.args[0], ps.fd.Diff): # nested derivative
if isinstance(term.args[0].args[0], ps.fd.Diff): # third or higher derivative
raise ValueError("can only handle first and second derivatives")
elif not isinstance(term.args[0].args[0], ps.Field.Access):
raise ValueError("can only handle derivatives of field accesses")
access, direction2 = term.args[0].args[:2]
direction = (direction1, direction2)
else:
raise NotImplementedError(f"can only deal with derivatives of field accesses, "
f"but not {type(term.args[0])}; expansion of derivatives probably failed")
return access, direction
class FVM1stOrder:
"""Finite-volume discretization
Args:
field: the field with the quantity to calculate, e.g. a concentration
flux: a list of sympy expressions that specify the flux, one for each cartesian direction
source: a list of sympy expressions that specify the source
"""
def __init__(self, field: ps.field.Field, flux=0, source=0):
def normalize(f, shape):
shape = tuple(s for s in shape if s != 1)
if not shape:
shape = None
if isinstance(f, sp.Array) or isinstance(f, Iterable) or isinstance(f, sp.Matrix):
return sp.Array(f, shape)
else:
return sp.Array([f] * (sp.Mul(*shape) if shape else 1))
self.c = field
self.dim = self.c.spatial_dimensions
self.j = normalize(flux, (self.dim, ) + self.c.index_shape)
self.q = normalize(source, self.c.index_shape)
def discrete_flux(self, flux_field: ps.field.Field):
"""Return a list of assignments for the discrete fluxes
Args:
flux_field: a staggered field to which the fluxes should be assigned
"""
assert ps.FieldType.is_staggered(flux_field)
num = 0
def discretize(term, neighbor):
nonlocal num
if isinstance(term, sp.Matrix):
nw = term.applyfunc(lambda t: discretize(t, neighbor))
return nw
elif isinstance(term, ps.field.Field.Access):
avg = (term.get_shifted(*neighbor) + term) * sp.Rational(1, 2)
return avg
elif isinstance(term, ps.fd.Diff):
access, direction = get_access_and_direction(term)
fds = FDS(neighbor, access.field.spatial_dimensions, direction,
free_weights_prefix=f'fvm_free_{num}' if sp.Matrix(neighbor).dot(neighbor) > 2 else None)
num += 1
return fds.apply(access)
if term.args:
new_args = [discretize(a, neighbor) for a in term.args]
return term.func(*new_args)
else:
return term
fluxes = self.j.applyfunc(ps.fd.derivative.expand_diff_full)
fluxes = [sp.Matrix(fluxes.tolist()[i]) if flux_field.index_dimensions > 1 else fluxes.tolist()[i]
for i in range(self.dim)]
A0 = sum([sp.Matrix(ps.stencil.direction_string_to_offset(d)).norm()
for d in flux_field.staggered_stencil]) / self.dim
discrete_fluxes = []
for neighbor in flux_field.staggered_stencil:
neighbor = ps.stencil.direction_string_to_offset(neighbor)
directional_flux = fluxes[0] * int(neighbor[0])
for i in range(1, self.dim):
directional_flux += fluxes[i] * int(neighbor[i])
discrete_flux = sp.simplify(discretize(directional_flux, neighbor))
free_weights = [s for s in discrete_flux.atoms(sp.Symbol) if s.name.startswith('fvm_free_')]
if len(free_weights) > 0:
discrete_flux = discrete_flux.collect(discrete_flux.atoms(ps.field.Field.Access))
access_counts = defaultdict(list)
for values in itertools.product([-1, 0, 1],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
simp = discrete_flux.subs(subs)
access_count = len(simp.atoms(ps.field.Field.Access))
access_counts[access_count].append(simp)
best_count = min(access_counts.keys())
discrete_flux = sum(access_counts[best_count]) / len(access_counts[best_count])
discrete_fluxes.append(discrete_flux / sp.Matrix(neighbor).norm())
if flux_field.index_dimensions > 1:
return [ps.Assignment(lhs, rhs / A0)
for i, d in enumerate(flux_field.staggered_stencil) if discrete_fluxes[i]
for lhs, rhs in zip(flux_field.staggered_vector_access(d), sp.simplify(discrete_fluxes[i]))]
else:
return [ps.Assignment(flux_field.staggered_access(d), sp.simplify(discrete_fluxes[i]) / A0)
for i, d in enumerate(flux_field.staggered_stencil)]
def discrete_source(self):
"""Return a list of assignments for the discrete source term"""
def discretize(term):
if isinstance(term, ps.fd.Diff):
access, direction = get_access_and_direction(term)
if self.dim == 2:
stencil = ["".join(a).replace(" ", "") for a in itertools.product("NS ", "EW ")
if "".join(a).strip()]
else:
stencil = ["".join(a).replace(" ", "") for a in itertools.product("NS ", "EW ", "TB ")
if "".join(a).strip()]
weights = None
for stencil in [["N", "S", "E", "W", "T", "B"][:2 * self.dim], stencil]:
stencil = [tuple(ps.stencil.direction_string_to_offset(d, self.dim)) for d in stencil]
derivation = FD(direction, stencil).get_stencil()
if not derivation.accuracy:
continue
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1:
raise NotImplementedError("more than one suitable set of weights found, "
"don't know how to proceed")
weights = best[0]
break
if not weights:
raise Exception('the requested derivative cannot be performed with the available neighbors')
assert weights
if access._field.index_dimensions == 0:
return sum([access._field.__getitem__(point) * weight for point, weight in zip(stencil, weights)])
else:
total = access.get_shifted(*stencil[0]).at_index(*access.index) * weights[0]
for point, weight in zip(stencil[1:], weights[1:]):
addl = access.get_shifted(*point).at_index(*access.index) * weight
total += addl
return total
if term.args:
new_args = [discretize(a) for a in term.args]
return term.func(*new_args)
else:
return term
source = self.q.applyfunc(ps.fd.derivative.expand_diff_full)
source = source.applyfunc(discretize)
return [ps.Assignment(lhs, rhs) for lhs, rhs in zip(self.c.center_vector, sp.flatten(source)) if rhs]
def discrete_continuity(self, flux_field: ps.field.Field):
"""Return a list of assignments for the continuity equation, which includes the source term
Args:
flux_field: a staggered field from which the fluxes are taken
"""
assert ps.FieldType.is_staggered(flux_field)
neighbors = flux_field.staggered_stencil + [ps.stencil.inverse_direction_string(d)
for d in flux_field.staggered_stencil]
divergence = flux_field.staggered_vector_access(neighbors[0])
for d in neighbors[1:]:
divergence += flux_field.staggered_vector_access(d)
source = self.discrete_source()
source = {s.lhs: s.rhs for s in source}
return [ps.Assignment(lhs, (lhs - rhs + source[lhs]) if lhs in source else (lhs - rhs))
for lhs, rhs in zip(self.c.center_vector, divergence)]
def VOF(j: ps.field.Field, v: ps.field.Field, ρ: ps.field.Field):
"""Volume-of-fluid discretization of advection
Args:
j: the staggered field to write the fluxes to. Should have a D2Q9/D3Q27 stencil. Other stencils work too, but
incur a small error (D2Q5/D3Q7: v^2, D3Q19: v^3).
v: the flow velocity field
ρ: the quantity to advect
"""
assert ps.FieldType.is_staggered(j)
fluxes = [[] for i in range(j.index_shape[0])]
v0 = v.center_vector
for d, neighbor in enumerate(j.staggered_stencil):
c = ps.stencil.direction_string_to_offset(neighbor)
v1 = v.neighbor_vector(c)
# going out
cond = sp.And(*[sp.Or(c[i] * v0[i] > 0, c[i] == 0) for i in range(len(v0))])
overlap1 = [1 - sp.Abs(v0[i]) for i in range(len(v0))]
overlap2 = [c[i] * v0[i] for i in range(len(v0))]
overlap = sp.Mul(*[(overlap1[i] if c[i] == 0 else overlap2[i]) for i in range(len(v0))])
fluxes[d].append(ρ.center_vector * overlap * sp.Piecewise((1, cond), (0, True)))
# coming in
cond = sp.And(*[sp.Or(c[i] * v1[i] < 0, c[i] == 0) for i in range(len(v1))])
overlap1 = [1 - sp.Abs(v1[i]) for i in range(len(v1))]
overlap2 = [v1[i] for i in range(len(v1))]
overlap = sp.Mul(*[(overlap1[i] if c[i] == 0 else overlap2[i]) for i in range(len(v1))])
sign = (c == 1).sum() % 2 * 2 - 1
fluxes[d].append(sign * ρ.neighbor_vector(c) * overlap * sp.Piecewise((1, cond), (0, True)))
for i, ff in enumerate(fluxes):
fluxes[i] = ff[0]
for f in ff[1:]:
fluxes[i] += f
assignments = []
for i, d in enumerate(j.staggered_stencil):
for lhs, rhs in zip(j.staggered_vector_access(d).values(), fluxes[i].values()):
assignments.append(ps.Assignment(lhs, rhs))
return assignments
from functools import lru_cache
from typing import Tuple
import sympy as sp
from pystencils.astnodes import LoopOverCoordinate
from pystencils.fd import Diff
from pystencils.field import Field
from pystencils.transformations import generic_visit
from .derivation import FiniteDifferenceStencilDerivation
from .derivative import diff_args
def fd_stencils_standard(indices, dx, fa):
order = len(indices)
assert all(i >= 0 for i in indices), "Can only discretize objects with (integer) subscripts"
if order == 1:
idx = indices[0]
return (fa.neighbor(idx, 1) - fa.neighbor(idx, -1)) / (2 * dx)
elif order == 2:
if indices[0] == indices[1]:
return (-2 * fa + fa.neighbor(indices[0], -1) + fa.neighbor(indices[0], +1)) / (dx ** 2)
else:
offsets = [(1, 1), [-1, 1], [1, -1], [-1, -1]]
return sum(o1 * o2 * fa.neighbor(indices[0], o1).neighbor(indices[1], o2)
for o1, o2 in offsets) / (4 * dx ** 2)
raise NotImplementedError("Supports only derivatives up to order 2")
def fd_stencils_isotropic(indices, dx, fa):
dim = fa.field.spatial_dimensions
if dim == 1:
return fd_stencils_standard(indices, dx, fa)
elif dim == 2:
order = len(indices)
if order == 1:
idx = indices[0]
assert 0 <= idx < 2
other_idx = 1 if indices[0] == 0 else 0
weights = {-1: sp.Rational(1, 12) / dx,
0: sp.Rational(1, 3) / dx,
1: sp.Rational(1, 12) / dx}
upper_terms = sum(fa.neighbor(idx, +1).neighbor(other_idx, off) * w for off, w in weights.items())
lower_terms = sum(fa.neighbor(idx, -1).neighbor(other_idx, off) * w for off, w in weights.items())
return upper_terms - lower_terms
elif order == 2:
if indices[0] == indices[1]:
idx = indices[0]
other_idx = 1 if idx == 0 else 0
diagonals = sp.Rational(1, 12) * sum(fa.neighbor(0, i).neighbor(1, j) for i in (-1, 1) for j in (-1, 1))
div_direction = sp.Rational(5, 6) * sum(fa.neighbor(idx, i) for i in (-1, 1))
other_direction = - sp.Rational(1, 6) * sum(fa.neighbor(other_idx, i) for i in (-1, 1))
center = - sp.Rational(5, 3) * fa
return (diagonals + div_direction + other_direction + center) / (dx ** 2)
else:
return fd_stencils_standard(indices, dx, fa)
raise NotImplementedError("Supports only derivatives up to order 2 for 1D and 2D setups")
def fd_stencils_forth_order_isotropic(indices, dx, fa):
order = len(indices)
if order != 1:
raise NotImplementedError("Forth order finite difference discretization is "
"currently only supported for first derivatives")
dim = indices[0]
if dim not in (0, 1):
raise NotImplementedError("Forth order finite difference discretization is only implemented for 2D")
stencils = forth_order_2d_derivation()
return stencils[dim].apply(fa) / dx
def discretize_spatial(expr, dx, stencil=fd_stencils_standard):
if isinstance(stencil, str):
if stencil == 'standard':
stencil = fd_stencils_standard
elif stencil == 'isotropic':
stencil = fd_stencils_isotropic
else:
raise ValueError("Unknown stencil. Supported 'standard' and 'isotropic'")
def visitor(e):
if isinstance(e, Diff):
arg, *indices = diff_args(e)
if not isinstance(arg, Field.Access):
raise ValueError("Only derivatives with field or field accesses as arguments can be discretized")
return stencil(indices, dx, arg)
else:
new_args = [discretize_spatial(a, dx, stencil) for a in e.args]
return e.func(*new_args) if new_args else e
return generic_visit(expr, visitor)
def discretize_spatial_staggered(expr, dx, stencil=fd_stencils_standard):
def staggered_visitor(e, coordinate, sign):
if isinstance(e, Diff):
arg, *indices = diff_args(e)
if len(indices) != 1:
raise ValueError("Function supports only up to second derivatives")
if not isinstance(arg, Field.Access):
raise ValueError("Argument of inner derivative has to be field access")
target = indices[0]
if target == coordinate:
assert sign in (-1, 1)
return (arg.neighbor(coordinate, sign) - arg) / dx * sign
else:
return (stencil(indices, dx, arg.neighbor(coordinate, sign))
+ stencil(indices, dx, arg)) / 2
elif isinstance(e, Field.Access):
return (e.neighbor(coordinate, sign) + e) / 2
elif isinstance(e, sp.Symbol):
loop_idx = LoopOverCoordinate.is_loop_counter_symbol(e)
return e + sign / 2 if loop_idx == coordinate else e
else:
new_args = [staggered_visitor(a, coordinate, sign) for a in e.args]
return e.func(*new_args) if new_args else e
def visitor(e):
if isinstance(e, Diff):
arg, *indices = diff_args(e)
if isinstance(arg, Field.Access):
return stencil(indices, dx, arg)
else:
if not len(indices) == 1:
raise ValueError("This term is not support by the staggered discretization strategy")
target = indices[0]
return (staggered_visitor(arg, target, 1) - staggered_visitor(arg, target, -1)) / dx
else:
new_args = [visitor(a) for a in e.args]
return e.func(*new_args) if new_args else e
return generic_visit(expr, visitor)
# -------------------------------------- special stencils --------------------------------------------------------------
@lru_cache(maxsize=1)
def forth_order_2d_derivation() -> Tuple[FiniteDifferenceStencilDerivation.Result, ...]:
# Symmetry, isotropy and 4th order conditions are not enough to fully specify the stencil
# one weight has to be specifically set to a somewhat arbitrary value
second_neighbor_weight = sp.Rational(1, 10)
second_neighbor_stencil = [(i, j)
for i in (-2, -1, 0, 1, 2)
for j in (-2, -1, 0, 1, 2)
]
x_diff = FiniteDifferenceStencilDerivation((0,), second_neighbor_stencil)
x_diff.set_weight((2, 0), second_neighbor_weight)
x_diff.assume_symmetric(0, anti_symmetric=True)
x_diff.assume_symmetric(1)
x_diff_stencil = x_diff.get_stencil(isotropic=True)
y_diff = FiniteDifferenceStencilDerivation((1,), second_neighbor_stencil)
y_diff.set_weight((0, 2), second_neighbor_weight)
y_diff.assume_symmetric(1, anti_symmetric=True)
y_diff.assume_symmetric(0)
y_diff_stencil = y_diff.get_stencil(isotropic=True)
return x_diff_stencil, y_diff_stencil
import functools
import hashlib
import operator
import pickle
import re
from enum import Enum
from itertools import chain
from typing import List, Optional, Sequence, Set, Tuple, Union
import numpy as np
import sympy as sp
from sympy.core.cache import cacheit
import pystencils
from pystencils.alignedarray import aligned_empty
from pystencils.typing import StructType, TypedSymbol, BasicType, create_type
from pystencils.typing.typed_sympy import FieldShapeSymbol, FieldStrideSymbol
from pystencils.stencil import (
direction_string_to_offset, inverse_direction, offset_to_direction_string)
from pystencils.sympyextensions import is_integer_sequence
__all__ = ['Field', 'fields', 'FieldType', 'Field']
class FieldType(Enum):
# generic fields
GENERIC = 0
# index fields are currently only used for boundary handling
# the coordinates are not the loop counters in that case, but are read from this index field
INDEXED = 1
# communication buffer, used for (un)packing data in communication.
BUFFER = 2
# unsafe fields may be accessed in an absolute fashion - the index depends on the data
# and thus may lead to out-of-bounds accesses
CUSTOM = 3
# staggered field
STAGGERED = 4
# staggered field that reverses sign when accessed via opposite direction
STAGGERED_FLUX = 5
@staticmethod
def is_generic(field):
assert isinstance(field, Field)
return field.field_type == FieldType.GENERIC
@staticmethod
def is_indexed(field):
assert isinstance(field, Field)
return field.field_type == FieldType.INDEXED
@staticmethod
def is_buffer(field):
assert isinstance(field, Field)
return field.field_type == FieldType.BUFFER
@staticmethod
def is_custom(field):
assert isinstance(field, Field)
return field.field_type == FieldType.CUSTOM
@staticmethod
def is_staggered(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED or field.field_type == FieldType.STAGGERED_FLUX
@staticmethod
def is_staggered_flux(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED_FLUX
class Field:
"""
With fields one can formulate stencil-like update rules on structured grids.
This Field class knows about the dimension, memory layout (strides) and optionally about the size of an array.
Creating Fields:
The preferred method to create fields is the `fields` function.
Alternatively one can use one of the static functions `Field.create_generic`, `Field.create_from_numpy_array`
and `Field.create_fixed_size`. Don't instantiate the Field directly!
Fields can be created with known or unknown shapes:
1. If you want to create a kernel with fixed loop sizes i.e. the shape of the array is already known.
This is usually the case if just-in-time compilation directly from Python is done.
(see `Field.create_from_numpy_array`
2. create a more general kernel that works for variable array sizes. This can be used to create kernels
beforehand for a library. (see `Field.create_generic`)
Dimensions and Indexing:
A field has spatial and index dimensions, where the spatial dimensions come first.
The interpretation is that the field has multiple cells in (usually) two or three dimensional space which are
looped over. Additionally N values are stored per cell. In this case spatial_dimensions is two or three,
and index_dimensions equals N. If you want to store a matrix on each point in a two dimensional grid, there
are four dimensions, two spatial and two index dimensions: ``len(arr.shape) == spatial_dims + index_dims``
The shape of the index dimension does not have to be specified. Just use the 'index_dimensions' parameter.
However, it is good practice to define the shape, since out of bounds accesses can be directly detected in this
case. The shape can be passed with the 'index_shape' parameter of the field creation functions.
When accessing (indexing) a field the result is a `Field.Access` which is derived from sympy Symbol.
First specify the spatial offsets in [], then in case index_dimension>0 the indices in ()
e.g. ``f[-1,0,0](7)``
Staggered Fields:
Staggered fields are used to store a value on a second grid shifted by half a cell with respect to the usual
grid.
The first index dimension is used to specify the position on the staggered grid (e.g. 0 means half-way to the
eastern neighbor, 1 is half-way to the northern neighbor, etc.), while additional indices can be used to store
multiple values at each position.
Example using no index dimensions:
>>> a = np.zeros([10, 10])
>>> f = Field.create_from_numpy_array("f", a, index_dimensions=0)
>>> jacobi = (f[-1,0] + f[1,0] + f[0,-1] + f[0,1]) / 4
Examples for index dimensions to create LB field and implement stream pull:
>>> from pystencils import Assignment
>>> stencil = np.array([[0,0], [0,1], [0,-1]])
>>> src, dst = fields("src(3), dst(3) : double[2D]")
>>> assignments = [Assignment(dst[0,0](i), src[-offset](i)) for i, offset in enumerate(stencil)];
"""
@staticmethod
def create_generic(field_name, spatial_dimensions, dtype=np.float64, index_dimensions=0, layout='numpy',
index_shape=None, field_type=FieldType.GENERIC) -> 'Field':
"""
Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes
Args:
field_name: symbolic name for the field
dtype: numpy data type of the array the kernel is called with later
spatial_dimensions: see documentation of Field
index_dimensions: see documentation of Field
layout: tuple specifying the loop ordering of the spatial dimensions e.g. (2, 1, 0 ) means that
the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop
over dimension 0. Also allowed: the strings 'numpy' (0,1,..d) or 'reverse_numpy' (d, ..., 1, 0)
index_shape: optional shape of the index dimensions i.e. maximum values allowed for each index dimension,
has to be a list or tuple
field_type: besides the normal GENERIC fields, there are INDEXED fields that store indices of the domain
that should be iterated over, BUFFER fields that are used to generate communication
packing/unpacking kernels, and STAGGERED fields, which store values half-way to the next
cell
"""
if index_shape is not None:
assert index_dimensions == 0 or index_dimensions == len(index_shape)
index_dimensions = len(index_shape)
if isinstance(layout, str):
layout = spatial_layout_string_to_tuple(layout, dim=spatial_dimensions)
total_dimensions = spatial_dimensions + index_dimensions
if index_shape is None or len(index_shape) == 0:
shape = tuple([FieldShapeSymbol([field_name], i) for i in range(total_dimensions)])
else:
shape = tuple([FieldShapeSymbol([field_name], i) for i in range(spatial_dimensions)] + list(index_shape))
strides = tuple([FieldStrideSymbol(field_name, i) for i in range(total_dimensions)])
np_data_type = np.dtype(dtype)
if np_data_type.fields is not None:
if index_dimensions != 0:
raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,)
strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
return Field(field_name, field_type, dtype, layout, shape, strides)
@staticmethod
def create_from_numpy_array(field_name: str, array: np.ndarray, index_dimensions: int = 0,
field_type=FieldType.GENERIC) -> 'Field':
"""Creates a field based on the layout, data type, and shape of a given numpy array.
Kernels created for these kind of fields can only be called with arrays of the same layout, shape and type.
Args:
field_name: symbolic name for the field
array: numpy array
index_dimensions: see documentation of Field
field_type: kind of field
"""
spatial_dimensions = len(array.shape) - index_dimensions
if spatial_dimensions < 1:
raise ValueError("Too many index dimensions. At least one spatial dimension required")
full_layout = get_layout_of_array(array)
spatial_layout = tuple([i for i in full_layout if i < spatial_dimensions])
assert len(spatial_layout) == spatial_dimensions
strides = tuple([s // np.dtype(array.dtype).itemsize for s in array.strides])
shape = tuple(int(s) for s in array.shape)
numpy_dtype = np.dtype(array.dtype)
if numpy_dtype.fields is not None:
if index_dimensions != 0:
raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,)
strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
return Field(field_name, field_type, array.dtype, spatial_layout, shape, strides)
@staticmethod
def create_fixed_size(field_name: str, shape: Tuple[int, ...], index_dimensions: int = 0,
dtype=np.float64, layout: str = 'numpy', strides: Optional[Sequence[int]] = None,
field_type=FieldType.GENERIC) -> 'Field':
"""
Creates a field with fixed sizes i.e. can be called only with arrays of the same size and layout
Args:
field_name: symbolic name for the field
shape: overall shape of the array
index_dimensions: how many of the trailing dimensions are interpreted as index (as opposed to spatial)
dtype: numpy data type of the array the kernel is called with later
layout: full layout of array, not only spatial dimensions
strides: strides in bytes or None to automatically compute them from shape (assuming no padding)
field_type: kind of field
"""
spatial_dimensions = len(shape) - index_dimensions
assert spatial_dimensions >= 1
if isinstance(layout, str):
layout = layout_string_to_tuple(layout, spatial_dimensions + index_dimensions)
shape = tuple(int(s) for s in shape)
if strides is None:
strides = compute_strides(shape, layout)
else:
assert len(strides) == len(shape)
strides = tuple([s // np.dtype(dtype).itemsize for s in strides])
numpy_dtype = np.dtype(dtype)
if numpy_dtype.fields is not None:
if index_dimensions != 0:
raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,)
strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
spatial_layout = list(layout)
for i in range(spatial_dimensions, len(layout)):
spatial_layout.remove(i)
return Field(field_name, field_type, dtype, tuple(spatial_layout), shape, strides)
def __init__(self, field_name, field_type, dtype, layout, shape, strides):
"""Do not use directly. Use static create* methods"""
self._field_name = field_name
assert isinstance(field_type, FieldType)
assert len(shape) == len(strides)
self.field_type = field_type
self._dtype = create_type(dtype)
self._layout = normalize_layout(layout)
self.shape = shape
self.strides = strides
self.latex_name: Optional[str] = None
self.coordinate_origin = sp.Matrix([0] * self.spatial_dimensions)
self.coordinate_transform = sp.eye(self.spatial_dimensions)
if field_type == FieldType.STAGGERED:
assert self.staggered_stencil
def new_field_with_different_name(self, new_name):
if self.has_fixed_shape:
return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides)
else:
return Field(new_name, self.field_type, self.dtype, self.layout, self.shape, self.strides)
@property
def spatial_dimensions(self) -> int:
return len(self._layout)
@property
def index_dimensions(self) -> int:
return len(self.shape) - len(self._layout)
@property
def ndim(self) -> int:
return len(self.shape)
def values_per_cell(self) -> int:
return functools.reduce(operator.mul, self.index_shape, 1)
@property
def layout(self):
return self._layout
@property
def name(self) -> str:
return self._field_name
@property
def spatial_shape(self) -> Tuple[int, ...]:
return self.shape[:self.spatial_dimensions]
@property
def has_fixed_shape(self):
return is_integer_sequence(self.shape)
@property
def index_shape(self):
return self.shape[self.spatial_dimensions:]
@property
def has_fixed_index_shape(self):
return is_integer_sequence(self.index_shape)
@property
def spatial_strides(self):
return self.strides[:self.spatial_dimensions]
@property
def index_strides(self):
return self.strides[self.spatial_dimensions:]
@property
def dtype(self):
return self._dtype
@property
def itemsize(self):
return self.dtype.numpy_dtype.itemsize
def __repr__(self):
if any(isinstance(s, sp.Symbol) for s in self.spatial_shape):
spatial_shape_str = f'{self.spatial_dimensions}d'
else:
spatial_shape_str = ','.join(str(i) for i in self.spatial_shape)
index_shape_str = ','.join(str(i) for i in self.index_shape)
if self.index_shape:
return f'{self._field_name}({index_shape_str}): {self.dtype}[{spatial_shape_str}]'
else:
return f'{self._field_name}: {self.dtype}[{spatial_shape_str}]'
def __str__(self):
return self.name
def neighbor(self, coord_id, offset):
offset_list = [0] * self.spatial_dimensions
offset_list[coord_id] = offset
return Field.Access(self, tuple(offset_list))
def neighbors(self, stencil):
return [self.__getitem__(s) for s in stencil]
@property
def center_vector(self):
index_shape = self.index_shape
if len(index_shape) == 0:
return sp.Matrix([self.center])
elif len(index_shape) == 1:
return sp.Matrix([self(i) for i in range(index_shape[0])])
elif len(index_shape) == 2:
return sp.Matrix([[self(i, j) for j in range(index_shape[1])] for i in range(index_shape[0])])
elif len(index_shape) == 3:
return sp.Array([[[self(i, j, k) for k in range(index_shape[2])]
for j in range(index_shape[1])] for i in range(index_shape[0])])
else:
raise NotImplementedError("center_vector is not implemented for more than 3 index dimensions")
@property
def center(self):
center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)
def neighbor_vector(self, offset):
"""Like neighbor, but returns the entire vector/tensor stored at offset."""
if self.spatial_dimensions == 2 and len(offset) == 3:
assert offset[2] == 0
offset = offset[:2]
if self.index_dimensions == 0:
return sp.Matrix([self.__getitem__(offset)])
elif self.index_dimensions == 1:
return sp.Matrix([self.__getitem__(offset)(i) for i in range(self.index_shape[0])])
elif self.index_dimensions == 2:
return sp.Matrix([[self.__getitem__(offset)(i, k) for k in range(self.index_shape[1])]
for i in range(self.index_shape[0])])
else:
raise NotImplementedError("neighbor_vector is not implemented for more than 2 index dimensions")
def __getitem__(self, offset):
if type(offset) is np.ndarray:
offset = tuple(offset)
if type(offset) is str:
offset = tuple(direction_string_to_offset(offset, self.spatial_dimensions))
if type(offset) is not tuple:
offset = (offset,)
if len(offset) != self.spatial_dimensions:
raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
return Field.Access(self, offset)
def absolute_access(self, offset, index):
assert FieldType.is_custom(self)
return Field.Access(self, offset, index, is_absolute_access=True)
def staggered_access(self, offset, index=None):
"""If this field is a staggered field, it can be accessed using half-integer offsets.
For example, an offset of ``(0, sp.Rational(1,2))`` or ``"E"`` corresponds to the staggered point to the east
of the cell center, i.e. half-way to the eastern-next cell.
If the field stores more than one value per staggered point (e.g. a vector or a tensor), the index (integer or
tuple of integers) refers to which of these values to access.
"""
assert FieldType.is_staggered(self)
offset_orig = offset
if type(offset) is np.ndarray:
offset = tuple(offset)
if type(offset) is str:
offset = tuple(direction_string_to_offset(offset, self.spatial_dimensions))
offset = tuple([o * sp.Rational(1, 2) for o in offset])
if len(offset) != self.spatial_dimensions:
raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
prefactor = 1
neighbor_vec = [0] * len(offset)
for i in range(self.spatial_dimensions):
if (offset[i] + sp.Rational(1, 2)).is_Integer:
neighbor_vec[i] = sp.sign(offset[i])
neighbor = offset_to_direction_string(neighbor_vec)
if neighbor not in self.staggered_stencil:
neighbor_vec = inverse_direction(neighbor_vec)
neighbor = offset_to_direction_string(neighbor_vec)
if FieldType.is_staggered_flux(self):
prefactor = -1
if neighbor not in self.staggered_stencil:
raise ValueError(f"{offset_orig} is not a valid neighbor for the {self.staggered_stencil_name} stencil")
offset = tuple(sp.Matrix(offset) - sp.Rational(1, 2) * sp.Matrix(neighbor_vec))
idx = self.staggered_stencil.index(neighbor)
if self.index_dimensions == 1: # this field stores a scalar value at each staggered position
if index is not None:
raise ValueError("Cannot specify an index for a scalar staggered field")
return prefactor * Field.Access(self, offset, (idx,))
else: # this field stores a vector or tensor at each staggered position
if index is None:
raise ValueError(f"Wrong number of indices: Got 0, expected {self.index_dimensions - 1}")
if type(index) is np.ndarray:
index = tuple(index)
if type(index) is not tuple:
index = (index,)
if self.index_dimensions != len(index) + 1:
raise ValueError(f"Wrong number of indices: Got {len(index)}, expected {self.index_dimensions - 1}")
return prefactor * Field.Access(self, offset, (idx, *index))
def staggered_vector_access(self, offset):
"""Like staggered_access, but returns the entire vector/tensor stored at offset."""
assert FieldType.is_staggered(self)
if self.index_dimensions == 1:
return sp.Matrix([self.staggered_access(offset)])
elif self.index_dimensions == 2:
return sp.Matrix([self.staggered_access(offset, i) for i in range(self.index_shape[1])])
elif self.index_dimensions == 3:
return sp.Matrix([[self.staggered_access(offset, (i, k)) for k in range(self.index_shape[2])]
for i in range(self.index_shape[1])])
else:
raise NotImplementedError("staggered_vector_access is not implemented for more than 3 index dimensions")
@property
def staggered_stencil(self):
assert FieldType.is_staggered(self)
stencils = {
2: {
2: ["W", "S"], # D2Q5
4: ["W", "S", "SW", "NW"] # D2Q9
},
3: {
3: ["W", "S", "B"], # D3Q7
7: ["W", "S", "B", "BSW", "TSW", "BNW", "TNW"], # D3Q15
9: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS"], # D3Q19
13: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS", "BSW", "TSW", "BNW", "TNW"] # D3Q27
}
}
if not self.index_shape[0] in stencils[self.spatial_dimensions]:
raise ValueError(f"No known stencil has {self.index_shape[0]} staggered points")
return stencils[self.spatial_dimensions][self.index_shape[0]]
@property
def staggered_stencil_name(self):
assert FieldType.is_staggered(self)
return f"D{self.spatial_dimensions}Q{self.index_shape[0] * 2 + 1}"
def __call__(self, *args, **kwargs):
center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)(*args, **kwargs)
def hashable_contents(self):
return (self._layout,
self.shape,
self.strides,
self.field_type,
self._field_name,
self.latex_name,
self._dtype)
def __hash__(self):
return hash(self.hashable_contents())
def __eq__(self, other):
if not isinstance(other, Field):
return False
return self.hashable_contents() == other.hashable_contents()
@property
def physical_coordinates(self):
if hasattr(self.coordinate_transform, '__call__'):
return self.coordinate_transform(self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
else:
return self.coordinate_transform @ (self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
@property
def physical_coordinates_staggered(self):
return self.coordinate_transform @ \
(self.coordinate_origin + pystencils.x_staggered_vector(self.spatial_dimensions))
def index_to_physical(self, index_coordinates: sp.Matrix, staggered=False):
if staggered:
index_coordinates = sp.Matrix([0.5] * len(self.coordinate_origin)) + index_coordinates
if hasattr(self.coordinate_transform, '__call__'):
return self.coordinate_transform(self.coordinate_origin + index_coordinates)
else:
return self.coordinate_transform @ (self.coordinate_origin + index_coordinates)
def physical_to_index(self, physical_coordinates: sp.Matrix, staggered=False):
if hasattr(self.coordinate_transform, '__call__'):
if hasattr(self.coordinate_transform, 'inv'):
return self.coordinate_transform.inv()(physical_coordinates) - self.coordinate_origin
else:
idx = sp.Matrix(sp.symbols(f'index_coordinates:{self.ndim}', real=True))
rtn = sp.solve(self.index_to_physical(idx) - physical_coordinates, idx)
assert rtn, f'Could not find inverese of coordinate_transform: {self.index_to_physical(idx)}'
return rtn
else:
rtn = self.coordinate_transform.inv() @ physical_coordinates - self.coordinate_origin
if staggered:
rtn = sp.Matrix([i - 0.5 for i in rtn])
return rtn
def set_coordinate_origin_to_field_center(self):
self.coordinate_origin = -sp.Matrix([i / 2 for i in self.spatial_shape])
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class Access(TypedSymbol):
"""Class representing a relative access into a `Field`.
This class behaves like a normal sympy Symbol, it is actually derived from it. One can built up
sympy expressions using field accesses, solve for them, etc.
Examples:
>>> vector_field_2d = fields("v(2): double[2D]") # create a 2D vector field
>>> northern_neighbor_y_component = vector_field_2d[0, 1](1)
>>> northern_neighbor_y_component
v_N^1
>>> central_y_component = vector_field_2d(1)
>>> central_y_component
v_C^1
>>> central_y_component.get_shifted(1, 0) # move the existing access
v_E^1
>>> central_y_component.at_index(0) # change component
v_C^0
"""
_iterable = False # see https://i10git.cs.fau.de/pycodegen/pystencils/-/merge_requests/166#note_10680
def __new__(cls, name, *args, **kwargs):
obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
return obj
def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False, dtype=None):
field_name = field.name
offsets_and_index = (*offsets, *idx) if idx is not None else offsets
constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index])
if not idx:
idx = tuple([0] * field.index_dimensions)
if constant_offsets:
offset_name = offset_to_direction_string(offsets)
if field.index_dimensions == 0:
superscript = None
elif field.index_dimensions == 1:
superscript = str(idx[0])
else:
idx_str = ",".join([str(e) for e in idx])
superscript = idx_str
if field.has_fixed_index_shape and not isinstance(field.dtype, StructType):
for i, bound in zip(idx, field.index_shape):
if i >= bound:
raise ValueError("Field index out of bounds")
else:
offset_name = hashlib.md5(pickle.dumps(offsets_and_index)).hexdigest()[:12]
superscript = None
symbol_name = f"{field_name}_{offset_name}"
if superscript is not None:
symbol_name += "^" + superscript
if dtype:
obj = super(Field.Access, self).__xnew__(self, symbol_name, dtype)
else:
obj = super(Field.Access, self).__xnew__(self, symbol_name, field.dtype)
obj._field = field
obj._offsets = []
for o in offsets:
if isinstance(o, sp.Basic):
obj._offsets.append(o)
else:
obj._offsets.append(int(o))
obj._offsets = tuple(sp.sympify(obj._offsets))
obj._offsetName = offset_name
obj._superscript = superscript
obj._index = idx
obj._indirect_addressing_fields = set()
for e in chain(obj._offsets, obj._index):
if isinstance(e, sp.Basic):
obj._indirect_addressing_fields.update(a.field for a in e.atoms(Field.Access))
obj._is_absolute_access = is_absolute_access
return obj
def __getnewargs__(self):
return self.field, self.offsets, self.index, self.is_absolute_access, self.dtype
def __getnewargs_ex__(self):
return (self.field, self.offsets, self.index, self.is_absolute_access, self.dtype), {}
# noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__)
# noinspection SpellCheckingInspection
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __call__(self, *idx):
if self._index != tuple([0] * self.field.index_dimensions):
raise ValueError("Indexing an already indexed Field.Access")
idx = tuple(idx)
if self.field.index_dimensions == 0 and idx == (0,):
idx = ()
if len(idx) != self.field.index_dimensions:
raise ValueError(f"Wrong number of indices: Got {len(idx)}, expected {self.field.index_dimensions}")
if len(idx) == 1 and isinstance(idx[0], str):
dtype = BasicType(self.field.dtype.numpy_dtype[idx[0]])
return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=dtype)
else:
return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def __getitem__(self, *idx):
return self.__call__(*idx)
@property
def field(self) -> 'Field':
"""Field that the Access points to"""
return self._field
@property
def offsets(self) -> Tuple:
"""Spatial offset as tuple"""
return self._offsets
@property
def required_ghost_layers(self) -> int:
"""Largest spatial distance that is accessed."""
return int(np.max(np.abs(self._offsets)))
@property
def nr_of_coordinates(self):
return len(self._offsets)
@property
def offset_name(self) -> str:
"""Spatial offset as string, East-West for x, North-South for y and Top-Bottom for z coordinate.
Example:
>>> f = fields("f: double[2D]")
>>> f[1, 1].offset_name # north-east
'NE'
"""
return self._offsetName
@property
def index(self):
"""Value of index coordinates as tuple."""
return self._index
def neighbor(self, coord_id: int, offset: int) -> 'Field.Access':
"""Returns a new Access with changed spatial coordinates.
Args:
coord_id: index of the coordinate to change (0 for x, 1 for y,...)
offset: incremental change of this coordinate
Example:
>>> f = fields('f: [2D]')
>>> f[0,0].neighbor(coord_id=1, offset=-1)
f_S
"""
offset_list = list(self.offsets)
offset_list[coord_id] += offset
return Field.Access(self.field, tuple(offset_list), self.index,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def get_shifted(self, *shift) -> 'Field.Access':
"""Returns a new Access with changed spatial coordinates
Example:
>>> f = fields("f: [2D]")
>>> f[0,0].get_shifted(1, 1)
f_NE
"""
return Field.Access(self.field,
tuple(a + b for a, b in zip(shift, self.offsets)),
self.index,
is_absolute_access=self.is_absolute_access,
dtype=self.dtype)
def at_index(self, *idx_tuple) -> 'Field.Access':
"""Returns new Access with changed index.
Example:
>>> f = fields("f(9): [2D]")
>>> f(0).at_index(8)
f_C^8
"""
return Field.Access(self.field, self.offsets, idx_tuple,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def _eval_subs(self, old, new):
return Field.Access(self.field,
tuple(sp.sympify(a).subs(old, new) for a in self.offsets),
tuple(sp.sympify(a).subs(old, new) for a in self.index),
is_absolute_access=self.is_absolute_access,
dtype=self.dtype)
@property
def is_absolute_access(self) -> bool:
"""Indicates if a field access is relative to the loop counters (this is the default) or absolute"""
return self._is_absolute_access
@property
def indirect_addressing_fields(self) -> Set['Field']:
"""Returns a set of fields that the access depends on.
e.g. f[index_field[1, 0]], the outer access to f depends on index_field
"""
return self._indirect_addressing_fields
def _hashable_content(self):
super_class_contents = super(Field.Access, self)._hashable_content()
return (super_class_contents, self._field.hashable_contents(), *self._index,
*self._offsets, self._is_absolute_access)
def _staggered_offset(self, offsets, index):
assert FieldType.is_staggered(self._field)
neighbor = self._field.staggered_stencil[index]
neighbor = direction_string_to_offset(neighbor, self._field.spatial_dimensions)
return [(o + sp.Rational(int(neighbor[i]), 2)) for i, o in enumerate(offsets)]
def _latex(self, _):
n = self._field.latex_name if self._field.latex_name else self._field.name
offset_str = ",".join([sp.latex(o) for o in self.offsets])
if FieldType.is_staggered(self._field):
offset_str = ",".join([sp.latex(self._staggered_offset(self.offsets, self.index[0])[i])
for i in range(len(self.offsets))])
if self.is_absolute_access:
offset_str = f"\\mathbf{offset_str}"
elif self.field.spatial_dimensions > 1:
offset_str = f"({offset_str})"
if FieldType.is_staggered(self._field):
if self.index and self.field.index_dimensions > 1:
return f"{{{n}}}_{{{offset_str}}}^{{{self.index[1:] if len(self.index) > 2 else self.index[1]}}}"
else:
return f"{{{n}}}_{{{offset_str}}}"
else:
if self.index and self.field.index_dimensions > 0:
return f"{{{n}}}_{{{offset_str}}}^{{{self.index if len(self.index) > 1 else self.index[0]}}}"
else:
return f"{{{n}}}_{{{offset_str}}}"
def __str__(self):
n = self._field.latex_name if self._field.latex_name else self._field.name
offset_str = ",".join([sp.latex(o) for o in self.offsets])
if FieldType.is_staggered(self._field):
offset_str = ",".join([sp.latex(self._staggered_offset(self.offsets, self.index[0])[i])
for i in range(len(self.offsets))])
if self.is_absolute_access:
offset_str = f"[abs]{offset_str}"
if FieldType.is_staggered(self._field):
if self.index and self.field.index_dimensions > 1:
return f"{n}[{offset_str}]({self.index[1:] if len(self.index) > 2 else self.index[1]})"
else:
return f"{n}[{offset_str}]"
else:
if self.index and self.field.index_dimensions > 0:
return f"{n}[{offset_str}]({self.index if len(self.index) > 1 else self.index[0]})"
else:
return f"{n}[{offset_str}]"
def fields(description=None, index_dimensions=0, layout=None,
field_type=FieldType.GENERIC, **kwargs) -> Union[Field, List[Field]]:
"""Creates pystencils fields from a string description.
Examples:
Create a 2D scalar and vector field:
>>> s, v = fields("s, v(2): double[2D]")
>>> assert s.spatial_dimensions == 2 and s.index_dimensions == 0
>>> assert (v.spatial_dimensions, v.index_dimensions, v.index_shape) == (2, 1, (2,))
Create an integer field of shape (10, 20):
>>> f = fields("f : int32[10, 20]")
>>> f.has_fixed_shape, f.shape
(True, (10, 20))
Numpy arrays can be used as template for shape and data type of field:
>>> arr_s, arr_v = np.zeros([20, 20]), np.zeros([20, 20, 2])
>>> s, v = fields("s, v(2)", s=arr_s, v=arr_v)
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
>>> assert f.index_dimensions == 1
>>> f = fields("pdfs(19) : float32[3D]", layout='fzyx')
>>> f.layout
(2, 1, 0)
"""
result = []
if description:
field_descriptions, dtype, shape = _parse_description(description)
layout = 'numpy' if layout is None else layout
for field_name, idx_shape in field_descriptions:
if field_name in kwargs:
arr = kwargs[field_name]
idx_shape_of_arr = () if not len(idx_shape) else arr.shape[-len(idx_shape):]
assert idx_shape_of_arr == idx_shape
f = Field.create_from_numpy_array(field_name, kwargs[field_name], index_dimensions=len(idx_shape),
field_type=field_type)
elif isinstance(shape, tuple):
f = Field.create_fixed_size(field_name, shape + idx_shape, dtype=dtype,
index_dimensions=len(idx_shape), layout=layout, field_type=field_type)
elif isinstance(shape, int):
f = Field.create_generic(field_name, spatial_dimensions=shape, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
elif shape is None:
f = Field.create_generic(field_name, spatial_dimensions=2, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
else:
assert False
result.append(f)
else:
assert layout is None, "Layout can not be specified when creating Field from numpy array"
for field_name, arr in kwargs.items():
result.append(Field.create_from_numpy_array(field_name, arr, index_dimensions=index_dimensions,
field_type=field_type))
if len(result) == 0:
raise ValueError("Could not parse field description")
elif len(result) == 1:
return result[0]
else:
return result
def get_layout_from_strides(strides: Sequence[int], index_dimension_ids: Optional[List[int]] = None):
index_dimension_ids = [] if index_dimension_ids is None else index_dimension_ids
coordinates = list(range(len(strides)))
relevant_strides = [stride for i, stride in enumerate(strides) if i not in index_dimension_ids]
result = [x for (y, x) in sorted(zip(relevant_strides, coordinates), key=lambda pair: pair[0], reverse=True)]
return normalize_layout(result)
def get_layout_of_array(arr: np.ndarray, index_dimension_ids: Optional[List[int]] = None):
""" Returns a list indicating the memory layout (linearization order) of the numpy array.
Examples:
>>> get_layout_of_array(np.zeros([3,3,3]))
(0, 1, 2)
In this example the loop over the zeroth coordinate should be the outermost loop,
followed by the first and second. Elements arr[x,y,0] and arr[x,y,1] are adjacent in memory.
Normally constructed numpy arrays have this order, however by stride tricks or other frameworks, arrays
with different memory layout can be created.
The index_dimension_ids parameter leaves specifies which coordinates should not be
"""
index_dimension_ids = [] if index_dimension_ids is None else index_dimension_ids
return get_layout_from_strides(arr.strides, index_dimension_ids)
def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0, **kwargs):
"""Creates numpy array with given memory layout.
Args:
shape: shape of the resulting array
layout: layout as tuple, where the coordinates are ordered from slow to fast
alignment: number of bytes to align the beginning and the innermost coordinate to, or False for no alignment
byte_offset: only used when alignment is specified, align not beginning but address at this offset
mostly used to align first inner cell, not ghost cells
Example:
>>> res = create_numpy_array_with_layout(shape=(2, 3, 4, 5), layout=(3, 2, 0, 1))
>>> res.shape
(2, 3, 4, 5)
>>> get_layout_of_array(res)
(3, 2, 0, 1)
"""
assert set(layout) == set(range(len(shape))), "Wrong layout descriptor"
cur_layout = list(range(len(shape)))
swaps = []
for i in range(len(layout)):
if cur_layout[i] != layout[i]:
index_to_swap_with = cur_layout.index(layout[i])
swaps.append((i, index_to_swap_with))
cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i]
assert tuple(cur_layout) == tuple(layout)
shape = list(shape)
for a, b in swaps:
shape[a], shape[b] = shape[b], shape[a]
if not alignment:
res = np.empty(shape, order='c', **kwargs)
else:
res = aligned_empty(shape, alignment, byte_offset=byte_offset, **kwargs)
for a, b in reversed(swaps):
res = res.swapaxes(a, b)
return res
def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]:
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower()
if layout_str in ('fzyx', 'zyxf', 'soa', 'aos'):
if dim > 3:
raise ValueError(f"Invalid spatial dimensionality for layout descriptor {layout_str}: May be at most 3.")
return tuple(reversed(range(dim)))
if layout_str in ('f', 'reverse_numpy'):
return tuple(reversed(range(dim)))
elif layout_str in ('c', 'numpy'):
return tuple(range(dim))
raise ValueError("Unknown layout descriptor " + layout_str)
def layout_string_to_tuple(layout_str, dim):
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower()
if layout_str == 'fzyx' or layout_str == 'soa':
if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim)))
elif layout_str == 'zyxf' or layout_str == 'aos':
if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim - 1))) + (dim - 1,)
elif layout_str == 'f' or layout_str == 'reverse_numpy':
return tuple(reversed(range(dim)))
elif layout_str == 'c' or layout_str == 'numpy':
return tuple(range(dim))
raise ValueError("Unknown layout descriptor " + layout_str)
def normalize_layout(layout):
"""Takes a layout tuple and subtracts the minimum from all entries"""
min_entry = min(layout)
return tuple(i - min_entry for i in layout)
def compute_strides(shape, layout):
"""
Computes strides assuming no padding exists
Args:
shape: shape (size) of array
layout: layout specification as tuple
Returns:
strides in elements, not in bytes
"""
dim = len(shape)
assert len(layout) == dim
assert len(set(layout)) == dim
strides = [0] * dim
product = 1
for j in reversed(layout):
strides[j] = product
product *= shape[j]
return tuple(strides)
# ---------------------------------------- Parsing of string in fields() function --------------------------------------
field_description_regex = re.compile(r"""
\s* # ignore leading white spaces
(\w+) # identifier is a sequence of alphanumeric characters, is stored in first group
(?: # optional index specification e.g. (1, 4, 2)
\s*
\(
([^\)]+) # read everything up to closing bracket
\)
\s*
)?
\s*,?\s* # ignore trailing white spaces and comma
""", re.VERBOSE)
type_description_regex = re.compile(r"""
\s*
(\w+)? # optional dtype
\s*
\[
([^\]]+)
\]
\s*
""", re.VERBOSE | re.IGNORECASE)
def _parse_part1(d):
result = field_description_regex.match(d)
while result:
name, index_str = result.group(1), result.group(2)
index = tuple(int(e) for e in index_str.split(",")) if index_str else ()
yield name, index
d = d[result.end():]
result = field_description_regex.match(d)
def _parse_description(description):
def parse_part2(d):
result = type_description_regex.match(d)
if result:
data_type_str, size_info = result.group(1), result.group(2).strip().lower()
if data_type_str is None:
data_type_str = 'float64'
data_type_str = data_type_str.lower().strip()
if not data_type_str:
data_type_str = 'float64'
if size_info.endswith('d'):
size_info = int(size_info[:-1])
else:
size_info = tuple(int(e) for e in size_info.split(","))
return data_type_str, size_info
else:
raise ValueError("Could not parse field description")
if ':' in description:
field_description, field_info = description.split(':')
else:
field_description, field_info = description, 'float64[2D]'
fields_info = [e for e in _parse_part1(field_description)]
if not field_info:
raise ValueError("Could not parse field description")
data_type, size = parse_part2(field_info)
return fields_info, data_type, size
import sympy as sp
from pystencils.typing import PointerType
class DivFunc(sp.Function):
"""
DivFunc represents a division operation, since sympy represents divisions with ^-1
"""
is_Atom = True
is_real = True
def __new__(cls, *args, **kwargs):
if len(args) != 2:
raise ValueError(f'{cls} takes only 2 arguments, instead {len(args)} received!')
divisor, dividend, *other_args = args
return sp.Function.__new__(cls, divisor, dividend, *other_args, **kwargs)
def _eval_evalf(self, *args, **kwargs):
return self.divisor.evalf() / self.dividend.evalf()
@property
def divisor(self):
return self.args[0]
@property
def dividend(self):
return self.args[1]
class AddressOf(sp.Function):
"""
AddressOf is the '&' operation in C. It gets the address of a lvalue.
"""
is_Atom = True
def __new__(cls, arg):
obj = sp.Function.__new__(cls, arg)
return obj
@property
def canonical(self):
if hasattr(self.args[0], 'canonical'):
return self.args[0].canonical
else:
raise NotImplementedError()
@property
def is_commutative(self):
return self.args[0].is_commutative
@property
def dtype(self):
if hasattr(self.args[0], 'dtype'):
return PointerType(self.args[0].dtype, restrict=True)
else:
raise ValueError(f'pystencils supports only non void pointers. Current address_of type: {self.args[0]}')
from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
from pystencils.gpu.gpujit import make_python_function
from pystencils.gpu.kernelcreation import create_cuda_kernel, created_indexed_cuda_kernel
from .indexing import AbstractIndexing, BlockIndexing, LineIndexing
__all__ = ['GPUArrayHandler', 'GPUNotAvailableHandler',
'create_cuda_kernel', 'created_indexed_cuda_kernel', 'make_python_function',
'AbstractIndexing', 'BlockIndexing', 'LineIndexing']
try:
import cupy as cp
import cupyx as cpx
except ImportError:
cp = None
cpx = None
import numpy as np
class GPUArrayHandler:
def __init__(self, device_number):
self._device_number = device_number
def zeros(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.zeros(shape=shape, dtype=dtype, order=order)
def ones(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.ones(shape=shape, dtype=dtype, order=order)
def empty(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.empty(shape=shape, dtype=dtype, order=order)
def to_gpu(self, numpy_array):
swaps = _get_index_swaps(numpy_array)
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
gpu_array = cp.asarray(numpy_array.base)
for a, b in reversed(swaps):
gpu_array = gpu_array.swapaxes(a, b)
return gpu_array
else:
return cp.asarray(numpy_array)
def upload(self, array, numpy_array):
assert self._device_number == array.device.id
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
array.base.set(numpy_array.base)
else:
with cp.cuda.Device(self._device_number):
array.set(numpy_array)
def download(self, array, numpy_array):
assert self._device_number == array.device.id
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
numpy_array.base[:] = array.base.get()
else:
with cp.cuda.Device(self._device_number):
numpy_array[:] = array.get()
def randn(self, shape, dtype=np.float64):
with cp.cuda.Device(self._device_number):
return cp.random.randn(*shape, dtype=dtype)
@staticmethod
def pinned_numpy_array(layout, shape, dtype):
assert set(layout) == set(range(len(shape))), "Wrong layout descriptor"
cur_layout = list(range(len(shape)))
swaps = []
for i in range(len(layout)):
if cur_layout[i] != layout[i]:
index_to_swap_with = cur_layout.index(layout[i])
swaps.append((i, index_to_swap_with))
cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i]
assert tuple(cur_layout) == tuple(layout)
shape = list(shape)
for a, b in swaps:
shape[a], shape[b] = shape[b], shape[a]
res = cpx.empty_pinned(tuple(shape), order='c', dtype=dtype)
for a, b in reversed(swaps):
res = res.swapaxes(a, b)
return res
from_numpy = to_gpu
class GPUNotAvailableHandler:
def __getattribute__(self, name):
raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!")
def _get_index_swaps(array):
swaps = []
if array.base is not None and isinstance(array.base, np.ndarray):
for stride in array.base.strides:
index_base = array.base.strides.index(stride)
index_view = array.strides.index(stride)
if index_base != index_view and (index_view, index_base) not in swaps:
swaps.append((index_base, index_view))
return swaps
import numpy as np
from pystencils.backends.cbackend import get_headers
from pystencils.backends.cuda_backend import generate_cuda
from pystencils.typing import StructType
from pystencils.field import FieldType
from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
from pystencils.typing import BasicType, FieldPointerSymbol
USE_FAST_MATH = True
def get_cubic_interpolation_include_paths():
from os.path import join, dirname
return [join(dirname(__file__), "CubicInterpolationCUDA", "code"),
join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None):
"""
Creates a kernel function from an abstract syntax tree which
was created e.g. by :func:`pystencils.gpu.create_cuda_kernel`
or :func:`pystencils.gpu.created_indexed_cuda_kernel`
Args:
kernel_function_node: the abstract syntax tree
argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
returned kernel functor.
custom_backend: use own custom printer for code generation
Returns:
compiled kernel as Python function
"""
import cupy as cp
if argument_dict is None:
argument_dict = {}
headers = get_headers(kernel_function_node)
if cp.cuda.runtime.is_hip:
headers.add('"gpu_defines.h"')
for field in kernel_function_node.fields_accessed:
if isinstance(field.dtype, BasicType) and field.dtype.is_half():
headers.add('<hip/hip_fp16.h>')
else:
headers.update({'"gpu_defines.h"', '<cstdint>'})
for field in kernel_function_node.fields_accessed:
if isinstance(field.dtype, BasicType) and field.dtype.is_half():
headers.add('<cuda_fp16.h>')
header_list = sorted(headers)
includes = "\n".join([f"#include {include_file}" for include_file in header_list])
code = includes + "\n"
code += "#define FUNC_PREFIX __global__\n"
code += "#define RESTRICT __restrict__\n\n"
code += 'extern "C" {\n%s\n}\n' % str(generate_cuda(kernel_function_node, custom_backend=custom_backend))
options = ["-w", "-std=c++11"]
if USE_FAST_MATH:
options.append("-use_fast_math")
options.append("-I" + get_pystencils_include_path())
func = cp.RawKernel(code, kernel_function_node.function_name, options=tuple(options), backend="nvrtc", jitify=True)
parameters = kernel_function_node.get_parameters()
cache = {}
cache_values = []
def wrapper(**kwargs):
key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v))
for k, v in kwargs.items()))
try:
args, block_and_thread_numbers = cache[key]
device = set(a.device.id for a in args if type(a) is cp.ndarray)
assert len(device) == 1, "All arrays used by a kernel need to be allocated on the same device"
with cp.cuda.Device(device.pop()):
func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args)
except KeyError:
full_arguments = argument_dict.copy()
full_arguments.update(kwargs)
shape = _check_arguments(parameters, full_arguments)
indexing = kernel_function_node.indexing
block_and_thread_numbers = indexing.call_parameters(shape)
block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
block_and_thread_numbers['grid'] = tuple(int(i) for i in block_and_thread_numbers['grid'])
args = tuple(_build_numpy_argument_list(parameters, full_arguments))
cache[key] = (args, block_and_thread_numbers)
cache_values.append(kwargs) # keep objects alive such that ids remain unique
device = set(a.device.id for a in args if type(a) is cp.ndarray)
assert len(device) == 1, "All arrays used by a kernel need to be allocated on the same device"
with cp.cuda.Device(device.pop()):
func(block_and_thread_numbers['grid'], block_and_thread_numbers['block'], args)
# useful for debugging:
# cp.cuda.runtime.deviceSynchronize()
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
ast = kernel_function_node
parameters = kernel_function_node.get_parameters()
wrapper = KernelWrapper(wrapper, parameters, ast)
wrapper.num_regs = func.num_regs
return wrapper
def _build_numpy_argument_list(parameters, argument_dict):
argument_dict = {k: v for k, v in argument_dict.items()}
result = []
for param in parameters:
if param.is_field_pointer:
array = argument_dict[param.field_name]
actual_type = array.dtype
expected_type = param.fields[0].dtype.numpy_dtype
if expected_type != actual_type:
raise ValueError(f"Data type mismatch for field {param.field_name}. "
f"Expected {expected_type} got {actual_type}.")
result.append(array)
elif param.is_field_stride:
cast_to_dtype = param.symbol.dtype.numpy_dtype.type
array = argument_dict[param.field_name]
stride = cast_to_dtype(array.strides[param.symbol.coordinate] // array.dtype.itemsize)
result.append(stride)
elif param.is_field_shape:
cast_to_dtype = param.symbol.dtype.numpy_dtype.type
array = argument_dict[param.field_name]
result.append(cast_to_dtype(array.shape[param.symbol.coordinate]))
else:
expected_type = param.symbol.dtype.numpy_dtype
result.append(expected_type.type(argument_dict[param.symbol.name]))
assert len(result) == len(parameters)
return result
def _check_arguments(parameter_specification, argument_dict):
"""
Checks if parameters passed to kernel match the description in the AST function node.
If not it raises a ValueError, on success it returns the array shape that determines the CUDA blocks and threads
"""
argument_dict = {k: v for k, v in argument_dict.items()}
array_shapes = set()
index_arr_shapes = set()
for param in parameter_specification:
if isinstance(param.symbol, FieldPointerSymbol):
symbolic_field = param.fields[0]
try:
field_arr = argument_dict[symbolic_field.name]
except KeyError:
raise KeyError(f"Missing field parameter for kernel call {str(symbolic_field)}")
if symbolic_field.has_fixed_shape:
symbolic_field_shape = tuple(int(i) for i in symbolic_field.shape)
if isinstance(symbolic_field.dtype, StructType):
symbolic_field_shape = symbolic_field_shape[:-1]
if symbolic_field_shape != field_arr.shape:
raise ValueError(f"Passed array {symbolic_field.name} has shape {str(field_arr.shape)} "
f"which does not match expected shape {str(symbolic_field.shape)}")
if symbolic_field.has_fixed_shape:
symbolic_field_strides = tuple(int(i) * field_arr.dtype.itemsize for i in symbolic_field.strides)
if isinstance(symbolic_field.dtype, StructType):
symbolic_field_strides = symbolic_field_strides[:-1]
if symbolic_field_strides != field_arr.strides:
raise ValueError(f"Passed array {symbolic_field.name} has strides {str(field_arr.strides)} "
f"which does not match expected strides {str(symbolic_field_strides)}")
if FieldType.is_indexed(symbolic_field):
index_arr_shapes.add(field_arr.shape[:symbolic_field.spatial_dimensions])
elif FieldType.is_generic(symbolic_field):
array_shapes.add(field_arr.shape[:symbolic_field.spatial_dimensions])
if len(array_shapes) > 1:
raise ValueError(f"All passed arrays have to have the same size {str(array_shapes)}")
if len(index_arr_shapes) > 1:
raise ValueError(f"All passed index arrays have to have the same size {str(array_shapes)}")
if len(index_arr_shapes) > 0:
return list(index_arr_shapes)[0]
else:
return list(array_shapes)[0]