Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 296 additions and 155 deletions
from typing import Union
import sympy as sp import sympy as sp
import pystencils.astnodes as ast import pystencils.astnodes as ast
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.config import CreateKernelConfig from pystencils.config import CreateKernelConfig
from pystencils.enums import Target, Backend from pystencils.enums import Target, Backend
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
...@@ -13,12 +10,13 @@ from pystencils.typing.transformations import add_types ...@@ -13,12 +10,13 @@ from pystencils.typing.transformations import add_types
from pystencils.field import Field, FieldType from pystencils.field import Field, FieldType
from pystencils.node_collection import NodeCollection from pystencils.node_collection import NodeCollection
from pystencils.transformations import ( from pystencils.transformations import (
filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, make_loop_over_domain, filtered_tree_iteration, iterate_loops_by_depth, get_base_buffer_index, get_optimal_loop_ordering,
make_loop_over_domain, add_outer_loop_over_indexed_elements,
move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses, move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses,
resolve_field_accesses, split_inner_loop) resolve_field_accesses, split_inner_loop)
def create_kernel(assignments: Union[AssignmentCollection, NodeCollection], def create_kernel(assignments: NodeCollection,
config: CreateKernelConfig) -> KernelFunction: config: CreateKernelConfig) -> KernelFunction:
"""Creates an abstract syntax tree for a kernel function, by taking a list of update rules. """Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
...@@ -56,6 +54,8 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -56,6 +54,8 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
loop_order = get_optimal_loop_ordering(fields_without_buffers) loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice, loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order) ghost_layers=ghost_layers, loop_order=loop_order)
loop_node = add_outer_loop_over_indexed_elements(loop_node)
ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function, ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments) ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments)
...@@ -76,7 +76,9 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -76,7 +76,9 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups]
split_inner_loop(ast_node, typed_split_groups) split_inner_loop(ast_node, typed_split_groups)
base_pointer_spec = [['spatialInner0'], ['spatialInner1']] if len(loop_order) >= 2 else [['spatialInner0']] base_pointer_spec = config.base_pointer_specification
if base_pointer_spec is None:
base_pointer_spec = []
base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order, base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order,
field.spatial_dimensions, field.index_dimensions) field.spatial_dimensions, field.index_dimensions)
for field in fields_without_buffers} for field in fields_without_buffers}
...@@ -94,7 +96,7 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection], ...@@ -94,7 +96,7 @@ def create_kernel(assignments: Union[AssignmentCollection, NodeCollection],
return ast_node return ast_node
def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollection], def create_indexed_kernel(assignments: NodeCollection,
config: CreateKernelConfig) -> KernelFunction: config: CreateKernelConfig) -> KernelFunction:
""" """
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
...@@ -115,21 +117,24 @@ def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollectio ...@@ -115,21 +117,24 @@ def create_indexed_kernel(assignments: Union[AssignmentCollection, NodeCollectio
fields_written = assignments.bound_fields fields_written = assignments.bound_fields
fields_read = assignments.rhs_fields fields_read = assignments.rhs_fields
all_fields = fields_read.union(fields_written)
# extract the index fields based on the name. The original index field might have been modified
index_fields = [idx_field for idx_field in index_fields if idx_field.name in [f.name for f in all_fields]]
non_index_fields = [f for f in all_fields if f not in index_fields]
spatial_coordinates = {f.spatial_dimensions for f in non_index_fields}
assert len(spatial_coordinates) == 1, f"Non-index fields do not have the same number of spatial coordinates " \
f"Non index fields are {non_index_fields}, spatial coordinates are " \
f"{spatial_coordinates}"
spatial_coordinates = list(spatial_coordinates)[0]
assignments = assignments.all_assignments assignments = assignments.all_assignments
assignments = add_types(assignments, config) assignments = add_types(assignments, config)
all_fields = fields_read.union(fields_written)
for index_field in index_fields: for index_field in index_fields:
index_field.field_type = FieldType.INDEXED index_field.field_type = FieldType.INDEXED
assert FieldType.is_indexed(index_field) assert FieldType.is_indexed(index_field)
assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" assert index_field.spatial_dimensions == 1, "Index fields have to be 1D"
non_index_fields = [f for f in all_fields if f not in index_fields]
spatial_coordinates = {f.spatial_dimensions for f in non_index_fields}
assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates"
spatial_coordinates = list(spatial_coordinates)[0]
def get_coordinate_symbol_assignment(name): def get_coordinate_symbol_assignment(name):
for idx_field in index_fields: for idx_field in index_fields:
assert isinstance(idx_field.dtype, StructType), "Index fields have to have a struct data type" assert isinstance(idx_field.dtype, StructType), "Index fields have to have a struct data type"
...@@ -211,3 +216,18 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, ass ...@@ -211,3 +216,18 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, ass
if collapse: if collapse:
prefix += f" collapse({collapse})" prefix += f" collapse({collapse})"
loop_to_parallelize.prefix_lines.append(prefix) loop_to_parallelize.prefix_lines.append(prefix)
def add_pragmas(ast_node, pragma_lines, nesting_depth=-1):
"""Prepends given pragma lines to all loops of specified nesting depth.
Args:
ast_node: pystencils abstract syntax tree
pragma_lines: Iterable of strings containing the pragma lines
nesting_depth: Nesting depth of the loops the pragmas should be applied to.
Outermost loop has depth 0.
A depth of -1 indicates the innermost loops.
"""
loop_nodes = iterate_loops_by_depth(ast_node, nesting_depth)
for n in loop_nodes:
n.prefix_lines += list(pragma_lines)
...@@ -123,12 +123,14 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', ...@@ -123,12 +123,14 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
"to differently typed floating point fields") "to differently typed floating point fields")
float_size = field_float_dtypes.pop().numpy_dtype.itemsize float_size = field_float_dtypes.pop().numpy_dtype.itemsize
assert float_size in (8, 4) assert float_size in (8, 4)
default_float_type = 'double' if float_size == 8 else 'float' default_float_type = 'float64' if float_size == 8 else 'float32'
vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set) vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
kernel_ast.instruction_set = vector_is kernel_ast.instruction_set = vector_is
if nontemporal and 'cachelineZero' in vector_is:
kernel_ast.use_all_written_field_sizes = True
strided = 'storeS' in vector_is and 'loadS' in vector_is strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU'] keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned and 'storeA' in vector_is else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal, vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
strided, keep_loop_stop, assume_sufficient_line_padding, strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type) default_float_type)
...@@ -138,12 +140,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -138,12 +140,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
strided, keep_loop_stop, assume_sufficient_line_padding, strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type): default_float_type):
"""Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type.""" """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
vector_width = ast_node.instruction_set['width'] all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
inner_loops = [loop for loop in all_loops if loop.is_innermost_loop] inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops} zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
vector_is = ast_node.instruction_set
assert vector_is, "The ast needs to hold information about the instruction_set for the vectorisation"
vector_width = vector_is['width']
vector_int_width = vector_is['intwidth']
for loop_node in inner_loops: for loop_node in inner_loops:
loop_range = loop_node.stop - loop_node.start loop_range = loop_node.stop - loop_node.start
...@@ -156,6 +161,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -156,6 +161,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
loop_node.stop = new_stop loop_node.stop = new_stop
else: else:
cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
# TODO cut_loop calls deepcopy on the loop_node. This is bad as documented in cut_loop
loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
if isinstance(loop, ast.LoopOverCoordinate)] if isinstance(loop, ast.LoopOverCoordinate)]
assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width
...@@ -171,8 +177,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -171,8 +177,15 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
for indexed in loop_node.atoms(sp.Indexed): for indexed in loop_node.atoms(sp.Indexed):
base, index = indexed.args base, index = indexed.args
if loop_counter_symbol in index.atoms(sp.Symbol): if loop_counter_symbol in index.atoms(sp.Symbol):
if 'loadA' not in vector_is and 'storeA' not in vector_is and 'maskStoreA' not in vector_is:
# don't need to generate the alignment check when there are no aligned load/store instructions
aligned_access = False
else:
if not isinstance(vector_width, int):
raise NotImplementedError('Access alignment cannot be statically determined for sizeless '
'vector ISAs')
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms() loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0
stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index) stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()): if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
successful = False successful = False
...@@ -201,7 +214,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -201,7 +214,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
loop_node.step = vector_width loop_node.step = vector_width
loop_node.subs(substitutions) loop_node.subs(substitutions)
vector_int_width = ast_node.instruction_set['intwidth']
arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width)) arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)), arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
VectorType(loop_counter_symbol.dtype, vector_int_width)) VectorType(loop_counter_symbol.dtype, vector_int_width))
...@@ -220,7 +232,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -220,7 +232,7 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
substitutions.update({s[0]: s[1] for s in zip(rng.result_symbols, new_result_symbols)}) substitutions.update({s[0]: s[1] for s in zip(rng.result_symbols, new_result_symbols)})
rng._symbols_defined = set(new_result_symbols) rng._symbols_defined = set(new_result_symbols)
fast_subs(loop_node, substitutions, skip=lambda e: isinstance(e, RNGBase)) fast_subs(loop_node, substitutions, skip=lambda e: isinstance(e, RNGBase))
insert_vector_casts(loop_node, ast_node.instruction_set, default_float_type) insert_vector_casts(loop_node, vector_is, default_float_type)
def mask_conditionals(loop_body): def mask_conditionals(loop_body):
...@@ -252,24 +264,47 @@ def mask_conditionals(loop_body): ...@@ -252,24 +264,47 @@ def mask_conditionals(loop_body):
def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
"""Inserts necessary casts from scalar values to vector values.""" """Inserts necessary casts from scalar values to vector values."""
handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.UnevaluatedExpr, sp.Abs) handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.Abs)
def is_scalar(expr) -> bool:
if hasattr(expr, "dtype"):
if type(expr.dtype) is VectorType:
return False
# Else branch: If expr is a CastFunc, then whether the expression
# is scalar is determined by the argument (remember: vector casts
# are not inserted yet). Therefore, we must recurse into the args of
# expr below. Otherwise, this expression is atomic and in that case
# it is assumed to be scalar below.
if isinstance(expr, ast.ResolvedFieldAccess):
# expr.field is not in expr.args
return is_scalar(expr.field)
elif isinstance(expr, (vec_any, vec_all)):
return True
def visit_expr(expr, default_type='double'): # TODO Vectorization Revamp: get rid of default_type if not hasattr(expr, "args"):
return True
return all(is_scalar(arg) for arg in expr.args)
# TODO Vectorization Revamp: get rid of default_type
def visit_expr(expr, default_type='double', force_vectorize=False):
if isinstance(expr, VectorMemoryAccess): if isinstance(expr, VectorMemoryAccess):
return VectorMemoryAccess(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:]) return VectorMemoryAccess(*expr.args[0:4], visit_expr(expr.args[4], default_type, force_vectorize),
*expr.args[5:])
elif isinstance(expr, CastFunc): elif isinstance(expr, CastFunc):
cast_type = expr.args[1] cast_type = expr.args[1]
arg = visit_expr(expr.args[0]) arg = visit_expr(expr.args[0], default_type, force_vectorize)
assert cast_type in [BasicType('float32'), BasicType('float64')],\ assert cast_type in [BasicType('float32'), BasicType('float64')], \
f'Vectorization cannot vectorize type {cast_type}' f'Vectorization cannot vectorize type {cast_type}'
return expr.func(arg, VectorType(cast_type, instruction_set['width'])) return expr.func(arg, VectorType(cast_type, instruction_set['width']))
elif expr.func is sp.Abs and 'abs' not in instruction_set: elif expr.func is sp.Abs and 'abs' not in instruction_set:
new_arg = visit_expr(expr.args[0], default_type) new_arg = visit_expr(expr.args[0], default_type, force_vectorize)
base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is VectorMemoryAccess \ base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is VectorMemoryAccess \
else get_type_of_expression(expr.args[0]) else get_type_of_expression(expr.args[0])
pw = sp.Piecewise((-new_arg, new_arg < CastFunc(0, base_type.numpy_dtype)), pw = sp.Piecewise((-new_arg, new_arg < CastFunc(0, base_type.numpy_dtype)),
(new_arg, True)) (new_arg, True))
return visit_expr(pw, default_type) return visit_expr(pw, default_type, force_vectorize)
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction): elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
if expr.func is sp.Mul and expr.args[0] == -1: if expr.func is sp.Mul and expr.args[0] == -1:
# special treatment for the unary minus: make sure that the -1 has the same type as the argument # special treatment for the unary minus: make sure that the -1 has the same type as the argument
...@@ -284,7 +319,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): ...@@ -284,7 +319,7 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
if dtype is np.float32: if dtype is np.float32:
default_type = 'float' default_type = 'float'
expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:]) expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
new_args = [visit_expr(a, default_type) for a in expr.args] new_args = [visit_expr(a, default_type, force_vectorize) for a in expr.args]
arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args] arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
if not any(type(t) is VectorType for t in arg_types): if not any(type(t) is VectorType for t in arg_types):
return expr return expr
...@@ -294,12 +329,32 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): ...@@ -294,12 +329,32 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
for a, t in zip(new_args, arg_types)] for a, t in zip(new_args, arg_types)]
return expr.func(*casted_args) return expr.func(*casted_args)
elif expr.func is sp.UnevaluatedExpr:
assert expr.args[0].is_Pow or expr.args[0].is_Mul, "UnevaluatedExpr only implemented holding Mul or Pow"
# TODO this is only because cut_loop evaluates the multiplications again due to deepcopy. All this should
# TODO be fixed for real at some point.
if expr.args[0].is_Pow:
base = expr.args[0].base
exp = expr.args[0].exp
expr = sp.UnevaluatedExpr(sp.Mul(*([base] * +exp), evaluate=False))
new_args = [visit_expr(a, default_type, force_vectorize) for a in expr.args[0].args]
arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
target_type = collate_types(arg_types)
if not any(type(t) is VectorType for t in arg_types):
target_type = VectorType(target_type, instruction_set['width'])
casted_args = [
CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
for a, t in zip(new_args, arg_types)]
return expr.func(expr.args[0].func(*casted_args, evaluate=False))
elif expr.func is sp.Pow: elif expr.func is sp.Pow:
new_arg = visit_expr(expr.args[0], default_type) new_arg = visit_expr(expr.args[0], default_type, force_vectorize)
return expr.func(new_arg, expr.args[1]) return expr.func(new_arg, expr.args[1])
elif expr.func == sp.Piecewise: elif expr.func == sp.Piecewise:
new_results = [visit_expr(a[0], default_type) for a in expr.args] new_results = [visit_expr(a[0], default_type, force_vectorize) for a in expr.args]
new_conditions = [visit_expr(a[1], default_type) for a in expr.args] new_conditions = [visit_expr(a[1], default_type, force_vectorize) for a in expr.args]
types_of_results = [get_type_of_expression(a) for a in new_results] types_of_results = [get_type_of_expression(a) for a in new_results]
types_of_conditions = [get_type_of_expression(a) for a in new_conditions] types_of_conditions = [get_type_of_expression(a) for a in new_conditions]
...@@ -318,7 +373,14 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): ...@@ -318,7 +373,14 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
for a, t in zip(new_conditions, types_of_conditions)] for a, t in zip(new_conditions, types_of_conditions)]
return sp.Piecewise(*[(r, c) for r, c in zip(casted_results, casted_conditions)]) return sp.Piecewise(*[(r, c) for r, c in zip(casted_results, casted_conditions)])
elif isinstance(expr, (sp.Number, TypedSymbol, BooleanAtom)): elif isinstance(expr, TypedSymbol):
if force_vectorize:
expr_type = get_type_of_expression(expr)
if type(expr_type) is not VectorType:
vector_type = VectorType(expr_type, instruction_set['width'])
return CastFunc(expr, vector_type)
return expr
elif isinstance(expr, (sp.Number, BooleanAtom)):
return expr return expr
else: else:
raise NotImplementedError(f'Due to defensive programming we handle only specific expressions.\n' raise NotImplementedError(f'Due to defensive programming we handle only specific expressions.\n'
...@@ -334,11 +396,18 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'): ...@@ -334,11 +396,18 @@ def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
# continue # continue
subs_expr = fast_subs(assignment.rhs, substitution_dict, subs_expr = fast_subs(assignment.rhs, substitution_dict,
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
assignment.rhs = visit_expr(subs_expr, default_type)
rhs_type = get_type_of_expression(assignment.rhs) # If either side contains a vectorized subexpression, both sides
# must be fully vectorized.
lhs_scalar = is_scalar(assignment.lhs)
rhs_scalar = is_scalar(subs_expr)
assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=not (lhs_scalar and rhs_scalar))
if isinstance(assignment.lhs, TypedSymbol): if isinstance(assignment.lhs, TypedSymbol):
lhs_type = assignment.lhs.dtype if lhs_scalar and not rhs_scalar:
if type(rhs_type) is VectorType and type(lhs_type) is not VectorType: lhs_type = get_type_of_expression(assignment.lhs)
rhs_type = get_type_of_expression(assignment.rhs)
new_lhs_type = VectorType(lhs_type, rhs_type.width) new_lhs_type = VectorType(lhs_type, rhs_type.width)
new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type) new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type)
substitution_dict[assignment.lhs] = new_lhs substitution_dict[assignment.lhs] = new_lhs
......
...@@ -23,7 +23,8 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -23,7 +23,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1) -> DataHandling: default_ghost_layers: int = 1,
device_number: Union[int, None] = None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -34,6 +35,9 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -34,6 +35,9 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_target: `Target` default_target: `Target`
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array' default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
device_number: If `default_target` is set to 'GPU' and `parallel` is False, a device number should be
specified. If none is given, the device with the largest amount of memory is used. If multiple
devices have the same amount of memory, the one with the lower number is used
""" """
if isinstance(default_target, str): if isinstance(default_target, str):
new_target = Target[default_target.upper()] new_target = Target[default_target.upper()]
...@@ -69,7 +73,8 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -69,7 +73,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity=periodicity, periodicity=periodicity,
default_target=default_target, default_target=default_target,
default_layout=default_layout, default_layout=default_layout,
default_ghost_layers=default_ghost_layers) default_ghost_layers=default_ghost_layers,
device_number=device_number)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
...@@ -115,7 +115,7 @@ class ParallelBlock(Block): ...@@ -115,7 +115,7 @@ class ParallelBlock(Block):
result = wlb.field.toArray(result, with_ghost_layers=self._gls) result = wlb.field.toArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result) result = self._normalize_array_shape(result)
elif 'GpuField' in type_name: elif 'GpuField' in type_name:
result = wlb.cuda.toGpuArray(result, with_ghost_layers=self._gls) result = wlb.gpu.toGpuArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result) result = self._normalize_array_shape(result)
return result return result
......
...@@ -331,6 +331,7 @@ class DataHandling(ABC): ...@@ -331,6 +331,7 @@ class DataHandling(ABC):
b[array_name][(Ellipsis, *value_idx)].fill(val) b[array_name][(Ellipsis, *value_idx)].fill(val)
else: else:
b[array_name].fill(val) b[array_name].fill(val)
self.to_gpu(array_name)
def min(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True): def min(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True):
"""Returns the minimum value inside the domain or slice of the domain. """Returns the minimum value inside the domain or slice of the domain.
......
...@@ -151,8 +151,8 @@ class ParallelDataHandling(DataHandling): ...@@ -151,8 +151,8 @@ class ParallelDataHandling(DataHandling):
if gpu: if gpu:
if alignment != 0: if alignment != 0:
raise ValueError("Alignment for walberla GPU fields not yet supported") raise ValueError("Alignment for walberla GPU fields not yet supported")
wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell, wlb.gpu.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell,
usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout]) usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout])
if cpu and gpu: if cpu and gpu:
self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name)) self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name))
...@@ -255,7 +255,7 @@ class ParallelDataHandling(DataHandling): ...@@ -255,7 +255,7 @@ class ParallelDataHandling(DataHandling):
def get_kernel_kwargs(self, kernel_function, **kwargs): def get_kernel_kwargs(self, kernel_function, **kwargs):
if kernel_function.ast.backend == Backend.CUDA: if kernel_function.ast.backend == Backend.CUDA:
name_map = self._field_name_to_gpu_data_name name_map = self._field_name_to_gpu_data_name
to_array = wlb.cuda.toGpuArray to_array = wlb.gpu.toGpuArray
else: else:
name_map = self._field_name_to_cpu_data_name name_map = self._field_name_to_cpu_data_name
to_array = wlb.field.toArray to_array = wlb.field.toArray
...@@ -280,7 +280,8 @@ class ParallelDataHandling(DataHandling): ...@@ -280,7 +280,8 @@ class ParallelDataHandling(DataHandling):
for block in self.blocks: for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name]) transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else: else:
wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name) if self.is_on_gpu(name):
wlb.gpu.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def to_gpu(self, name): def to_gpu(self, name):
if name in self._custom_data_transfer_functions: if name in self._custom_data_transfer_functions:
...@@ -288,20 +289,21 @@ class ParallelDataHandling(DataHandling): ...@@ -288,20 +289,21 @@ class ParallelDataHandling(DataHandling):
for block in self.blocks: for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name]) transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else: else:
wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name) if self.is_on_gpu(name):
wlb.gpu.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def is_on_gpu(self, name): def is_on_gpu(self, name):
return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs
def all_to_cpu(self): def all_to_cpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs: for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.cuda.copyFieldToCpu(self.blocks, gpu_name, cpu_name) wlb.gpu.copyFieldToCpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys(): for name in self._custom_data_transfer_functions.keys():
self.to_cpu(name) self.to_cpu(name)
def all_to_gpu(self): def all_to_gpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs: for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.cuda.copyFieldToGpu(self.blocks, gpu_name, cpu_name) wlb.gpu.copyFieldToGpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys(): for name in self._custom_data_transfer_functions.keys():
self.to_gpu(name) self.to_gpu(name)
...@@ -328,7 +330,7 @@ class ParallelDataHandling(DataHandling): ...@@ -328,7 +330,7 @@ class ParallelDataHandling(DataHandling):
create_packing = wlb.field.createStencilRestrictedPackInfo create_packing = wlb.field.createStencilRestrictedPackInfo
else: else:
assert target == Target.GPU assert target == Target.GPU
create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo create_packing = wlb.gpu.createPackInfo if buffered else wlb.gpu.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names] names = [self.GPU_DATA_PREFIX + name for name in names]
sync_function = create_scheme(self.blocks, stencil) sync_function = create_scheme(self.blocks, stencil)
......
...@@ -6,11 +6,10 @@ import numpy as np ...@@ -6,11 +6,10 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
from pystencils.enums import Target from pystencils.enums import Target
from pystencils.field import ( from pystencils.field import (Field, FieldType, create_numpy_array_with_layout,
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, layout_string_to_tuple, spatial_layout_string_to_tuple)
spatial_layout_string_to_tuple) from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
from pystencils.slicing import normalize_slice, remove_ghost_layers from pystencils.slicing import normalize_slice, remove_ghost_layers
from pystencils.utils import DotDict from pystencils.utils import DotDict
...@@ -23,7 +22,8 @@ class SerialDataHandling(DataHandling): ...@@ -23,7 +22,8 @@ class SerialDataHandling(DataHandling):
default_layout: str = 'SoA', default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False, periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
array_handler=None) -> None: array_handler=None,
device_number=None) -> None:
""" """
Creates a data handling for single node simulations. Creates a data handling for single node simulations.
...@@ -31,9 +31,17 @@ class SerialDataHandling(DataHandling): ...@@ -31,9 +31,17 @@ class SerialDataHandling(DataHandling):
domain_size: size of the spatial domain as tuple domain_size: size of the spatial domain as tuple
default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method
default_layout: default layout used, if not overridden in add_array() method default_layout: default layout used, if not overridden in add_array() method
periodicity: List of booleans that indicate which dimensions have periodic boundary conditions.
Alternatively, a single boolean can be given, which is used for all dimensions. Defaults to
False (non-periodic)
default_target: `Target` either 'CPU' or 'GPU'. If set to 'GPU' for each array also a GPU version is default_target: `Target` either 'CPU' or 'GPU'. If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default default
array_handler: An object that provides the same interface as `GPUArrayHandler`, which is used for creation
and transferring of GPU arrays. Default is to construct a fresh `GPUArrayHandler`
device_number: If `default_target` is set to 'GPU', a device number should be specified. If none is given,
the device with the largest amount of memory is used. If multiple devices have the same
amount of memory, the one with the lower number is used
""" """
super(SerialDataHandling, self).__init__() super(SerialDataHandling, self).__init__()
self._domainSize = tuple(domain_size) self._domainSize = tuple(domain_size)
...@@ -48,9 +56,14 @@ class SerialDataHandling(DataHandling): ...@@ -48,9 +56,14 @@ class SerialDataHandling(DataHandling):
if not array_handler: if not array_handler:
try: try:
self.array_handler = PyCudaArrayHandler() if device_number is None:
except Exception: import cupy.cuda.runtime
self.array_handler = PyCudaNotAvailableHandler() if cupy.cuda.runtime.getDeviceCount() > 0:
device_number = sorted(range(cupy.cuda.runtime.getDeviceCount()),
key=lambda i: cupy.cuda.Device(i).mem_info[1], reverse=True)[0]
self.array_handler = GPUArrayHandler(device_number)
except ImportError:
self.array_handler = GPUNotAvailableHandler()
else: else:
self.array_handler = array_handler self.array_handler = array_handler
...@@ -126,10 +139,14 @@ class SerialDataHandling(DataHandling): ...@@ -126,10 +139,14 @@ class SerialDataHandling(DataHandling):
else: else:
layout_tuple = spatial_layout_string_to_tuple(layout, self.dim) layout_tuple = spatial_layout_string_to_tuple(layout, self.dim)
# cpu_arr is always created - since there is no create_pycuda_array_with_layout() # cpu_arr is always created - since there is no create_gpu_array_with_layout()
byte_offset = ghost_layers * np.dtype(dtype).itemsize byte_offset = ghost_layers * np.dtype(dtype).itemsize
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs) if gpu:
cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype)
else:
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs)
if alignment and gpu: if alignment and gpu:
raise NotImplementedError("Alignment for GPU fields not supported") raise NotImplementedError("Alignment for GPU fields not supported")
...@@ -251,14 +268,16 @@ class SerialDataHandling(DataHandling): ...@@ -251,14 +268,16 @@ class SerialDataHandling(DataHandling):
transfer_func = self._custom_data_transfer_functions[name][1] transfer_func = self._custom_data_transfer_functions[name][1]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name]) if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
def to_gpu(self, name): def to_gpu(self, name):
if name in self._custom_data_transfer_functions: if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0] transfer_func = self._custom_data_transfer_functions[name][0]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name]) if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
def is_on_gpu(self, name): def is_on_gpu(self, name):
return name in self.gpu_arrays return name in self.gpu_arrays
...@@ -313,7 +332,7 @@ class SerialDataHandling(DataHandling): ...@@ -313,7 +332,7 @@ class SerialDataHandling(DataHandling):
result.append(functor(filtered_stencil, ghost_layers=gls)) result.append(functor(filtered_stencil, ghost_layers=gls))
else: else:
if functor is None: if functor is None:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor from pystencils.gpu.periodicity import get_periodic_boundary_functor as functor
target = Target.GPU target = Target.GPU
result.append(functor(filtered_stencil, self._domainSize, result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions, index_dimensions=self.fields[name].index_dimensions,
...@@ -419,13 +438,19 @@ class SerialDataHandling(DataHandling): ...@@ -419,13 +438,19 @@ class SerialDataHandling(DataHandling):
def world_rank(self): def world_rank(self):
return 0 return 0
def save_all(self, file): def save_all(self, filename, compressed=True, synchronise_data=True):
np.savez_compressed(file, **self.cpu_arrays) if synchronise_data:
for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()):
self.to_cpu(name)
if compressed:
np.savez_compressed(filename, **self.cpu_arrays)
else:
np.savez(filename, **self.cpu_arrays)
def load_all(self, file): def load_all(self, filename, synchronise_data=True):
if '.npz' not in file: if '.npz' not in filename:
file += '.npz' filename += '.npz'
file_contents = np.load(file) file_contents = np.load(filename)
for arr_name, arr_contents in self.cpu_arrays.items(): for arr_name, arr_contents in self.cpu_arrays.items():
if arr_name not in file_contents: if arr_name not in file_contents:
print(f"Skipping read data {arr_name} because there is no data with this name in data handling") print(f"Skipping read data {arr_name} because there is no data with this name in data handling")
...@@ -435,3 +460,6 @@ class SerialDataHandling(DataHandling): ...@@ -435,3 +460,6 @@ class SerialDataHandling(DataHandling):
f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}") f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}")
continue continue
np.copyto(arr_contents, file_contents[arr_name]) np.copyto(arr_contents, file_contents[arr_name])
if synchronise_data:
if arr_name in self.gpu_arrays.keys():
self.to_gpu(arr_name)
File moved
File moved
...@@ -5,7 +5,7 @@ import pickle ...@@ -5,7 +5,7 @@ import pickle
import re import re
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
from typing import List, Optional, Sequence, Set, Tuple from typing import List, Optional, Sequence, Set, Tuple, Union
import numpy as np import numpy as np
import sympy as sp import sympy as sp
...@@ -69,74 +69,6 @@ class FieldType(Enum): ...@@ -69,74 +69,6 @@ class FieldType(Enum):
return field.field_type == FieldType.STAGGERED_FLUX return field.field_type == FieldType.STAGGERED_FLUX
def fields(description=None, index_dimensions=0, layout=None, field_type=FieldType.GENERIC, **kwargs):
"""Creates pystencils fields from a string description.
Examples:
Create a 2D scalar and vector field:
>>> s, v = fields("s, v(2): double[2D]")
>>> assert s.spatial_dimensions == 2 and s.index_dimensions == 0
>>> assert (v.spatial_dimensions, v.index_dimensions, v.index_shape) == (2, 1, (2,))
Create an integer field of shape (10, 20):
>>> f = fields("f : int32[10, 20]")
>>> f.has_fixed_shape, f.shape
(True, (10, 20))
Numpy arrays can be used as template for shape and data type of field:
>>> arr_s, arr_v = np.zeros([20, 20]), np.zeros([20, 20, 2])
>>> s, v = fields("s, v(2)", s=arr_s, v=arr_v)
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
>>> assert f.index_dimensions == 1
>>> f = fields("pdfs(19) : float32[3D]", layout='fzyx')
>>> f.layout
(2, 1, 0)
"""
result = []
if description:
field_descriptions, dtype, shape = _parse_description(description)
layout = 'numpy' if layout is None else layout
for field_name, idx_shape in field_descriptions:
if field_name in kwargs:
arr = kwargs[field_name]
idx_shape_of_arr = () if not len(idx_shape) else arr.shape[-len(idx_shape):]
assert idx_shape_of_arr == idx_shape
f = Field.create_from_numpy_array(field_name, kwargs[field_name], index_dimensions=len(idx_shape),
field_type=field_type)
elif isinstance(shape, tuple):
f = Field.create_fixed_size(field_name, shape + idx_shape, dtype=dtype,
index_dimensions=len(idx_shape), layout=layout, field_type=field_type)
elif isinstance(shape, int):
f = Field.create_generic(field_name, spatial_dimensions=shape, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
elif shape is None:
f = Field.create_generic(field_name, spatial_dimensions=2, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
else:
assert False
result.append(f)
else:
assert layout is None, "Layout can not be specified when creating Field from numpy array"
for field_name, arr in kwargs.items():
result.append(Field.create_from_numpy_array(field_name, arr, index_dimensions=index_dimensions,
field_type=field_type))
if len(result) == 0:
return None
elif len(result) == 1:
return result[0]
else:
return result
class Field: class Field:
""" """
With fields one can formulate stencil-like update rules on structured grids. With fields one can formulate stencil-like update rules on structured grids.
...@@ -324,9 +256,7 @@ class Field: ...@@ -324,9 +256,7 @@ class Field:
self.shape = shape self.shape = shape
self.strides = strides self.strides = strides
self.latex_name: Optional[str] = None self.latex_name: Optional[str] = None
self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple( self.coordinate_origin = sp.Matrix([0] * self.spatial_dimensions)
0 for _ in range(self.spatial_dimensions)
))
self.coordinate_transform = sp.eye(self.spatial_dimensions) self.coordinate_transform = sp.eye(self.spatial_dimensions)
if field_type == FieldType.STAGGERED: if field_type == FieldType.STAGGERED:
assert self.staggered_stencil assert self.staggered_stencil
...@@ -335,8 +265,7 @@ class Field: ...@@ -335,8 +265,7 @@ class Field:
if self.has_fixed_shape: if self.has_fixed_shape:
return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides) return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides)
else: else:
return Field.create_generic(new_name, self.spatial_dimensions, self.dtype.numpy_dtype, return Field(new_name, self.field_type, self.dtype, self.layout, self.shape, self.strides)
self.index_dimensions, self._layout, self.index_shape, self.field_type)
@property @property
def spatial_dimensions(self) -> int: def spatial_dimensions(self) -> int:
...@@ -722,9 +651,11 @@ class Field: ...@@ -722,9 +651,11 @@ class Field:
raise ValueError(f"Wrong number of indices: Got {len(idx)}, expected {self.field.index_dimensions}") raise ValueError(f"Wrong number of indices: Got {len(idx)}, expected {self.field.index_dimensions}")
if len(idx) == 1 and isinstance(idx[0], str): if len(idx) == 1 and isinstance(idx[0], str):
dtype = BasicType(self.field.dtype.numpy_dtype[idx[0]]) dtype = BasicType(self.field.dtype.numpy_dtype[idx[0]])
return Field.Access(self.field, self._offsets, idx, dtype=dtype) return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=dtype)
else: else:
return Field.Access(self.field, self._offsets, idx, dtype=self.dtype) return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def __getitem__(self, *idx): def __getitem__(self, *idx):
return self.__call__(*idx) return self.__call__(*idx)
...@@ -778,7 +709,8 @@ class Field: ...@@ -778,7 +709,8 @@ class Field:
""" """
offset_list = list(self.offsets) offset_list = list(self.offsets)
offset_list[coord_id] += offset offset_list[coord_id] += offset
return Field.Access(self.field, tuple(offset_list), self.index, dtype=self.dtype) return Field.Access(self.field, tuple(offset_list), self.index,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def get_shifted(self, *shift) -> 'Field.Access': def get_shifted(self, *shift) -> 'Field.Access':
"""Returns a new Access with changed spatial coordinates """Returns a new Access with changed spatial coordinates
...@@ -791,6 +723,7 @@ class Field: ...@@ -791,6 +723,7 @@ class Field:
return Field.Access(self.field, return Field.Access(self.field,
tuple(a + b for a, b in zip(shift, self.offsets)), tuple(a + b for a, b in zip(shift, self.offsets)),
self.index, self.index,
is_absolute_access=self.is_absolute_access,
dtype=self.dtype) dtype=self.dtype)
def at_index(self, *idx_tuple) -> 'Field.Access': def at_index(self, *idx_tuple) -> 'Field.Access':
...@@ -801,12 +734,14 @@ class Field: ...@@ -801,12 +734,14 @@ class Field:
>>> f(0).at_index(8) >>> f(0).at_index(8)
f_C^8 f_C^8
""" """
return Field.Access(self.field, self.offsets, idx_tuple, dtype=self.dtype) return Field.Access(self.field, self.offsets, idx_tuple,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def _eval_subs(self, old, new): def _eval_subs(self, old, new):
return Field.Access(self.field, return Field.Access(self.field,
tuple(sp.sympify(a).subs(old, new) for a in self.offsets), tuple(sp.sympify(a).subs(old, new) for a in self.offsets),
tuple(sp.sympify(a).subs(old, new) for a in self.index), tuple(sp.sympify(a).subs(old, new) for a in self.index),
is_absolute_access=self.is_absolute_access,
dtype=self.dtype) dtype=self.dtype)
@property @property
...@@ -824,7 +759,8 @@ class Field: ...@@ -824,7 +759,8 @@ class Field:
def _hashable_content(self): def _hashable_content(self):
super_class_contents = super(Field.Access, self)._hashable_content() super_class_contents = super(Field.Access, self)._hashable_content()
return (super_class_contents, self._field.hashable_contents(), *self._index, *self._offsets) return (super_class_contents, self._field.hashable_contents(), *self._index,
*self._offsets, self._is_absolute_access)
def _staggered_offset(self, offsets, index): def _staggered_offset(self, offsets, index):
assert FieldType.is_staggered(self._field) assert FieldType.is_staggered(self._field)
...@@ -875,6 +811,75 @@ class Field: ...@@ -875,6 +811,75 @@ class Field:
return f"{n}[{offset_str}]" return f"{n}[{offset_str}]"
def fields(description=None, index_dimensions=0, layout=None,
field_type=FieldType.GENERIC, **kwargs) -> Union[Field, List[Field]]:
"""Creates pystencils fields from a string description.
Examples:
Create a 2D scalar and vector field:
>>> s, v = fields("s, v(2): double[2D]")
>>> assert s.spatial_dimensions == 2 and s.index_dimensions == 0
>>> assert (v.spatial_dimensions, v.index_dimensions, v.index_shape) == (2, 1, (2,))
Create an integer field of shape (10, 20):
>>> f = fields("f : int32[10, 20]")
>>> f.has_fixed_shape, f.shape
(True, (10, 20))
Numpy arrays can be used as template for shape and data type of field:
>>> arr_s, arr_v = np.zeros([20, 20]), np.zeros([20, 20, 2])
>>> s, v = fields("s, v(2)", s=arr_s, v=arr_v)
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
>>> assert f.index_dimensions == 1
>>> f = fields("pdfs(19) : float32[3D]", layout='fzyx')
>>> f.layout
(2, 1, 0)
"""
result = []
if description:
field_descriptions, dtype, shape = _parse_description(description)
layout = 'numpy' if layout is None else layout
for field_name, idx_shape in field_descriptions:
if field_name in kwargs:
arr = kwargs[field_name]
idx_shape_of_arr = () if not len(idx_shape) else arr.shape[-len(idx_shape):]
assert idx_shape_of_arr == idx_shape
f = Field.create_from_numpy_array(field_name, kwargs[field_name], index_dimensions=len(idx_shape),
field_type=field_type)
elif isinstance(shape, tuple):
f = Field.create_fixed_size(field_name, shape + idx_shape, dtype=dtype,
index_dimensions=len(idx_shape), layout=layout, field_type=field_type)
elif isinstance(shape, int):
f = Field.create_generic(field_name, spatial_dimensions=shape, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
elif shape is None:
f = Field.create_generic(field_name, spatial_dimensions=2, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
else:
assert False
result.append(f)
else:
assert layout is None, "Layout can not be specified when creating Field from numpy array"
for field_name, arr in kwargs.items():
result.append(Field.create_from_numpy_array(field_name, arr, index_dimensions=index_dimensions,
field_type=field_type))
if len(result) == 0:
raise ValueError("Could not parse field description")
elif len(result) == 1:
return result[0]
else:
return result
def get_layout_from_strides(strides: Sequence[int], index_dimension_ids: Optional[List[int]] = None): def get_layout_from_strides(strides: Sequence[int], index_dimension_ids: Optional[List[int]] = None):
index_dimension_ids = [] if index_dimension_ids is None else index_dimension_ids index_dimension_ids = [] if index_dimension_ids is None else index_dimension_ids
coordinates = list(range(len(strides))) coordinates = list(range(len(strides)))
...@@ -943,24 +948,35 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0 ...@@ -943,24 +948,35 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0
def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]: def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]:
if layout_str in ('fzyx', 'zyxf'): if dim <= 0:
assert dim <= 3 raise ValueError("Dimensionality must be positive")
return tuple(reversed(range(dim)))
layout_str = layout_str.lower()
if layout_str in ('fzyx', 'f', 'reverse_numpy', 'SoA'): if layout_str in ('fzyx', 'zyxf', 'soa', 'aos'):
if dim > 3:
raise ValueError(f"Invalid spatial dimensionality for layout descriptor {layout_str}: May be at most 3.")
return tuple(reversed(range(dim)))
if layout_str in ('f', 'reverse_numpy'):
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
elif layout_str in ('c', 'numpy', 'AoS'): elif layout_str in ('c', 'numpy'):
return tuple(range(dim)) return tuple(range(dim))
raise ValueError("Unknown layout descriptor " + layout_str) raise ValueError("Unknown layout descriptor " + layout_str)
def layout_string_to_tuple(layout_str, dim): def layout_string_to_tuple(layout_str, dim):
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower() layout_str = layout_str.lower()
if layout_str == 'fzyx' or layout_str == 'soa': if layout_str == 'fzyx' or layout_str == 'soa':
assert dim <= 4 if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
elif layout_str == 'zyxf' or layout_str == 'aos': elif layout_str == 'zyxf' or layout_str == 'aos':
assert dim <= 4 if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim - 1))) + (dim - 1,) return tuple(reversed(range(dim - 1))) + (dim - 1,)
elif layout_str == 'f' or layout_str == 'reverse_numpy': elif layout_str == 'f' or layout_str == 'reverse_numpy':
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
......
File moved