Commit 0df63c2d authored by Martin Bauer's avatar Martin Bauer
Browse files

Additional tests for packinfo generation & fast approximation for div and sqrt

parent f780194d
...@@ -4,7 +4,7 @@ from pystencils import Field, Assignment ...@@ -4,7 +4,7 @@ from pystencils import Field, Assignment
from pystencils.simp import sympy_cse_on_assignment_list from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.gpucuda.indexing import LineIndexing from pystencils.gpucuda.indexing import LineIndexing
from pystencils.slicing import remove_ghost_layers, add_ghost_layers, make_slice from pystencils.slicing import remove_ghost_layers, add_ghost_layers, make_slice
from pystencils.gpucuda import make_python_function, create_cuda_kernel from pystencils.gpucuda import make_python_function, create_cuda_kernel, BlockIndexing
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
from scipy.ndimage import convolve from scipy.ndimage import convolve
...@@ -145,3 +145,8 @@ def test_periodicity(): ...@@ -145,3 +145,8 @@ def test_periodicity():
periodic_gpu_kernel(pdfs=arr_gpu) periodic_gpu_kernel(pdfs=arr_gpu)
arr_gpu.get(gpu_result) arr_gpu.get(gpu_result)
np.testing.assert_equal(cpu_result, gpu_result) np.testing.assert_equal(cpu_result, gpu_result)
def test_block_size_limiting():
res = BlockIndexing.limit_block_size_to_device_maximum((4096, 4096, 4096))
assert all(r < 4096 for r in res)
...@@ -10,11 +10,18 @@ def test_fast_sqrt(): ...@@ -10,11 +10,18 @@ def test_fast_sqrt():
assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1 assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1 assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
code_str = str(ps.show_code(ast))
assert '__fsqrt_rn' in code_str
expr = 3 / sp.sqrt(f[0, 0] + f[1, 0]) expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
assert len(insert_fast_sqrts(expr).atoms(fast_inv_sqrt)) == 1 assert len(insert_fast_sqrts(expr).atoms(fast_inv_sqrt)) == 1
ac = ps.AssignmentCollection([expr], []) ac = ps.AssignmentCollection([expr], [])
assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1 assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
code_str = str(ps.show_code(ast))
assert '__frsqrt_rn' in code_str
def test_fast_divisions(): def test_fast_divisions():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment