diff --git a/backends/cbackend.py b/backends/cbackend.py index b80170168bb76189cdb1d2008280e61b99eb8cb3..9becd107d649c5a01b215d52a5f6dc53ecd3a22a 100644 --- a/backends/cbackend.py +++ b/backends/cbackend.py @@ -176,7 +176,7 @@ class CBackend: return "%s = %s;" % (self.sympy_printer.doprint(node.lhs), self.sympy_printer.doprint(node.rhs)) def _print_TemporaryMemoryAllocation(self, node): - align = 128 + align = 64 np_dtype = node.symbol.dtype.base_type.numpy_dtype required_size = np_dtype.itemsize * node.size + align size = modulo_ceil(required_size, align) @@ -188,7 +188,7 @@ class CBackend: align=align) def _print_TemporaryMemoryFree(self, node): - align = 128 + align = 64 return "free(%s - %d);" % (self.sympy_printer.doprint(node.symbol.name), node.offset(align)) @staticmethod diff --git a/datahandling/parallel_datahandling.py b/datahandling/parallel_datahandling.py index 70caacb3c55ec16368756f2281df1a95bfdfb63f..6bdce83a61bf5476936ee504627d3d075777f44b 100644 --- a/datahandling/parallel_datahandling.py +++ b/datahandling/parallel_datahandling.py @@ -274,13 +274,13 @@ class ParallelDataHandling(DataHandling): for name in self._custom_data_transfer_functions.keys(): self.to_gpu(name) - def synchronization_function_cpu(self, names, stencil=None, buffered=True, **_): - return self.synchronization_function(names, stencil, 'cpu', buffered, ) + def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_): + return self.synchronization_function(names, stencil, 'cpu', buffered, stencil_restricted) - def synchronization_function_gpu(self, names, stencil=None, buffered=True, **_): - return self.synchronization_function(names, stencil, 'gpu', buffered) + def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_): + return self.synchronization_function(names, stencil, 'gpu', buffered, stencil_restricted) - def synchronization_function(self, names, stencil=None, target='cpu', buffered=True): + def synchronization_function(self, names, stencil=None, target='cpu', buffered=True, stencil_restricted=False): if target is None: target = self.default_target @@ -293,6 +293,8 @@ class ParallelDataHandling(DataHandling): create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme if target == 'cpu': create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo + if not buffered and stencil_restricted: + create_packing = wlb.field.createStencilRestrictedPackInfo else: assert target == 'gpu' create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo