Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Stephan Seitz
pystencils
Commits
ba7b20ac
Commit
ba7b20ac
authored
Dec 12, 2019
by
Stephan Seitz
Browse files
Add 'cuda' compiler config (with preferred_block_size and always_autotune)
parent
0800d84a
Pipeline
#20503
passed with stage
in 3 minutes and 14 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
pystencils/cpu/cpujit.py
View file @
ba7b20ac
...
...
@@ -175,9 +175,15 @@ def read_config():
(
'object_cache'
,
os
.
path
.
join
(
user_cache_dir
(
'pystencils'
),
'objectcache'
)),
(
'clear_cache_on_start'
,
False
),
])
default_cuda_config
=
OrderedDict
([
(
'always_autotune'
,
False
),
(
'preferred_block_size'
,
(
16
,
16
,
1
)),
])
default_config
=
OrderedDict
([(
'compiler'
,
default_compiler_config
),
(
'cache'
,
default_cache_config
)])
(
'cache'
,
default_cache_config
),
(
'cuda'
,
default_cuda_config
)
])
config_path
,
config_exists
=
get_configuration_file_path
()
config
=
default_config
.
copy
()
...
...
@@ -219,6 +225,10 @@ def get_cache_config():
return
_config
[
'cache'
]
def
get_cuda_config
():
return
_config
[
'cuda'
]
def
add_or_change_compiler_flags
(
flags
):
if
not
isinstance
(
flags
,
list
)
and
not
isinstance
(
flags
,
tuple
):
flags
=
[
flags
]
...
...
pystencils/gpucuda/cudajit.py
View file @
ba7b20ac
...
...
@@ -4,6 +4,7 @@ import numpy as np
import
pystencils
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.cpu.cpujit
import
get_cuda_config
from
pystencils.data_types
import
StructType
from
pystencils.field
import
FieldType
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
...
...
@@ -88,7 +89,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
tex
.
filter_mode
,
tex
.
use_normalized_coordinates
,
tex
.
read_as_integer
)
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
indexing
=
kernel_function_node
.
indexing
if
kernel_function_node
.
do_cudaautotune
:
if
kernel_function_node
.
do_cudaautotune
or
get_cuda_config
()[
'always_autotune'
]
:
block_and_thread_numbers
=
(
indexing
.
autotune_call_parameters
(
partial
(
func
,
*
args
),
shape
,
...
...
pystencils/gpucuda/indexing.py
View file @
ba7b20ac
...
...
@@ -7,6 +7,7 @@ from sympy.core.cache import cacheit
from
pystencils.astnodes
import
Block
,
Conditional
from
pystencils.cache
import
disk_cache
from
pystencils.cpu.cpujit
import
get_cuda_config
from
pystencils.data_types
import
TypedSymbol
,
create_type
from
pystencils.integer_functions
import
div_ceil
,
div_floor
from
pystencils.slicing
import
normalize_slice
...
...
@@ -130,7 +131,7 @@ class AbstractIndexing(abc.ABC):
current_best
=
block_and_thread_numbers
print
(
f
'
{
current_best
}
is the best out of
{
self
.
_autotune_block_sizes
or
self
.
AUTOTUNE_BLOCK_SIZES
}
'
)
self
.
_block_size
=
current_best
self
.
_block_size
=
current_best
[
'block'
]
return
current_best
return
_autotune_call_parameters
(
self
,
call_shape
,
...
...
@@ -157,7 +158,10 @@ class BlockIndexing(AbstractIndexing):
AUTOTUNE_NUM_CALLS
=
10
def
__init__
(
self
,
field
,
iteration_slice
,
block_size
=
(
16
,
16
,
1
),
permute_block_size_dependent_on_layout
=
True
,
compile_time_block_size
=
False
,
block_size
=
tuple
(
get_cuda_config
()[
'preferred_block_size'
]),
permute_block_size_dependent_on_layout
=
True
,
compile_time_block_size
=
False
,
maximum_block_size
=
(
1024
,
1024
,
64
)):
if
field
.
spatial_dimensions
>
3
:
raise
NotImplementedError
(
"This indexing scheme supports at most 3 spatial dimensions"
)
...
...
@@ -304,6 +308,7 @@ class LineIndexing(AbstractIndexing):
self
.
_coordinates
=
coordinates
self
.
_iterationSlice
=
normalize_slice
(
iteration_slice
,
field
.
spatial_shape
)
self
.
_symbolicShape
=
[
e
if
isinstance
(
e
,
sp
.
Basic
)
else
None
for
e
in
field
.
spatial_shape
]
self
.
_autotune_block_sizes
=
None
@
property
def
coordinates
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment