Commit ba7b20ac authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Add 'cuda' compiler config (with preferred_block_size and always_autotune)

parent 0800d84a
Pipeline #20503 passed with stage
in 3 minutes and 14 seconds
......@@ -175,9 +175,15 @@ def read_config():
('object_cache', os.path.join(user_cache_dir('pystencils'), 'objectcache')),
('clear_cache_on_start', False),
])
default_cuda_config = OrderedDict([
('always_autotune', False),
('preferred_block_size', (16, 16, 1)),
])
default_config = OrderedDict([('compiler', default_compiler_config),
('cache', default_cache_config)])
('cache', default_cache_config),
('cuda', default_cuda_config)
])
config_path, config_exists = get_configuration_file_path()
config = default_config.copy()
......@@ -219,6 +225,10 @@ def get_cache_config():
return _config['cache']
def get_cuda_config():
return _config['cuda']
def add_or_change_compiler_flags(flags):
if not isinstance(flags, list) and not isinstance(flags, tuple):
flags = [flags]
......
......@@ -4,6 +4,7 @@ import numpy as np
import pystencils
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.cpu.cpujit import get_cuda_config
from pystencils.data_types import StructType
from pystencils.field import FieldType
from pystencils.gpucuda.texture_utils import ndarray_to_tex
......@@ -88,7 +89,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer)
args = _build_numpy_argument_list(parameters, full_arguments)
indexing = kernel_function_node.indexing
if kernel_function_node.do_cudaautotune:
if kernel_function_node.do_cudaautotune or get_cuda_config()['always_autotune']:
block_and_thread_numbers = (
indexing.autotune_call_parameters(partial(func, *args),
shape,
......
......@@ -7,6 +7,7 @@ from sympy.core.cache import cacheit
from pystencils.astnodes import Block, Conditional
from pystencils.cache import disk_cache
from pystencils.cpu.cpujit import get_cuda_config
from pystencils.data_types import TypedSymbol, create_type
from pystencils.integer_functions import div_ceil, div_floor
from pystencils.slicing import normalize_slice
......@@ -130,7 +131,7 @@ class AbstractIndexing(abc.ABC):
current_best = block_and_thread_numbers
print(f'{current_best} is the best out of {self._autotune_block_sizes or self.AUTOTUNE_BLOCK_SIZES}')
self._block_size = current_best
self._block_size = current_best['block']
return current_best
return _autotune_call_parameters(self,
call_shape,
......@@ -157,7 +158,10 @@ class BlockIndexing(AbstractIndexing):
AUTOTUNE_NUM_CALLS = 10
def __init__(self, field, iteration_slice,
block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False,
block_size=tuple(get_cuda_config()['preferred_block_size']),
permute_block_size_dependent_on_layout=True,
compile_time_block_size=False,
maximum_block_size=(1024, 1024, 64)):
if field.spatial_dimensions > 3:
raise NotImplementedError("This indexing scheme supports at most 3 spatial dimensions")
......@@ -304,6 +308,7 @@ class LineIndexing(AbstractIndexing):
self._coordinates = coordinates
self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape)
self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
self._autotune_block_sizes = None
@property
def coordinates(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment