Commit 09a59f7f authored by Martin Bauer's avatar Martin Bauer
Browse files

Merge branch 'run-opencl-without-pycuda' into 'master'

Run opencl without pycuda

Closes #15

See merge request pycodegen/pystencils!77
parents ef12c745 e9605ab2
......@@ -81,7 +81,10 @@ class OpenClSympyPrinter(CudaSympyPrinter):
# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
# since built-in math functions are generic.
# In CUDA, you have to differentiate between `sin` and `sinf`
_print_math_func = CustomSympyPrinter._print_math_func
try:
_print_math_func = CustomSympyPrinter._print_math_func
except AttributeError:
pass
_print_Pow = CustomSympyPrinter._print_Pow
def _print_Function(self, expr):
......
......@@ -41,8 +41,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
code += str(generate_c(kernel_function_node, dialect='opencl', custom_backend=custom_backend))
options = []
if USE_FAST_MATH:
options.append("-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only")
options.append("-I \"" + get_pystencils_include_path() + "\"")
options.append("-cl-unsafe-math-optimizations")
options.append("-cl-mad-enable")
options.append("-cl-fast-relaxed-math")
options.append("-cl-finite-math-only")
options.append("-I")
options.append(get_pystencils_include_path())
mod = cl.Program(opencl_ctx, code).build(options=options)
func = getattr(mod, kernel_function_node.function_name)
......
......@@ -1315,13 +1315,16 @@ def implement_interpolations(ast_node: ast.Node,
substitutions = {i: to_texture_map[i.symbol.interpolator].at(
[o for o in i.offsets]) for i in interpolation_accesses}
import pycuda.driver as cuda
for texture in substitutions.values():
if can_use_hw_interpolation(texture):
texture.filter_mode = cuda.filter_mode.LINEAR
else:
texture.filter_mode = cuda.filter_mode.POINT
texture.read_as_integer = True
try:
import pycuda.driver as cuda
for texture in substitutions.values():
if can_use_hw_interpolation(texture):
texture.filter_mode = cuda.filter_mode.LINEAR
else:
texture.filter_mode = cuda.filter_mode.POINT
texture.read_as_integer = True
except Exception:
pass
if isinstance(ast_node, AssignmentCollection):
ast_node = ast_node.subs(substitutions)
......
import numpy as np
import pytest
import sympy as sp
import pystencils
import sympy as sp
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import make_python_function
......@@ -40,6 +40,8 @@ def test_print_opencl():
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_fixed_size():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
......@@ -92,6 +94,8 @@ def test_opencl_jit_fixed_size():
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
......@@ -144,6 +148,8 @@ def test_opencl_jit():
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_with_parameter():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
a = sp.Symbol('a')
......@@ -195,5 +201,35 @@ def test_opencl_jit_with_parameter():
assert np.allclose(result_cuda, result_opencl)
if __name__ == '__main__':
test_opencl_jit()
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_without_cuda():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment