Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 0 additions and 3082 deletions
try:
import pycuda.gpuarray as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyCudaArrayHandler:
def __init__(self):
import pycuda.autoinit # NOQA
def zeros(self, shape, dtype=np.float64, order='C'):
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(shape, dtype)
@staticmethod
def to_gpu(array):
return gpuarray.to_gpu(array)
@staticmethod
def upload(array, numpy_array):
array.set(numpy_array)
@staticmethod
def download(array, numpy_array):
array.get(numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.to_gpu(cpu_array)
from_numpy = to_gpu
class PyCudaNotAvailableHandler:
def __getattribute__(self, name):
raise NotImplementedError("Unable to initiaize PyCuda! "
"Try to run `import pycuda.autoinit` to check whether PyCuda is working correctly!")
#pragma once
extern "C++" {
#ifdef __CUDA_ARCH__
template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
DTYPE_T *data;
DTYPE_T shape[DIMENSION];
DTYPE_T stride[DIMENSION];
};
#else
#include <array>
template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
DTYPE_T *data;
std::array<DTYPE_T, DIMENSION> shape;
std::array<DTYPE_T, DIMENSION> stride;
};
#endif
}
This diff is collapsed.
#ifndef OPENCL_STDINT
#define OPENCL_STDINT
typedef unsigned int uint_t;
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long int int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long int uint64_t;
#endif
"""
Test of pystencils.data_types.address_of
"""
import sympy as sp
import pystencils
from pystencils.data_types import PointerType, address_of, cast_func, create_type
from pystencils.simp.simplifications import sympy_cse
def test_address_of():
x, y = pystencils.fields('x,y: int64[2d]')
s = pystencils.TypedSymbol('s', PointerType(create_type('int64')))
assert address_of(x[0, 0]).canonical() == x[0, 0]
assert address_of(x[0, 0]).dtype == PointerType(x[0, 0].dtype, restrict=True)
assert address_of(sp.Symbol("a")).dtype == PointerType('void', restrict=True)
assignments = pystencils.AssignmentCollection({
s: address_of(x[0, 0]),
y[0, 0]: cast_func(s, create_type('int64'))
}, {})
ast = pystencils.create_kernel(assignments)
pystencils.show_code(ast)
assignments = pystencils.AssignmentCollection({
y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64'))
}, {})
ast = pystencils.create_kernel(assignments)
pystencils.show_code(ast)
def test_address_of_with_cse():
x, y = pystencils.fields('x,y: int64[2d]')
s = pystencils.TypedSymbol('s', PointerType(create_type('int64')))
assignments = pystencils.AssignmentCollection({
y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s,
x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1
}, {})
ast = pystencils.create_kernel(assignments)
pystencils.show_code(ast)
assignments_cse = sympy_cse(assignments)
ast = pystencils.create_kernel(assignments_cse)
pystencils.show_code(ast)
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import itertools
import numpy as np
import pytest
import sympy
from sympy.functions import im, re
import pystencils
from pystencils import AssignmentCollection
from pystencils.data_types import TypedSymbol, create_type
X, Y = pystencils.fields('x, y: complex64[2d]')
A, B = pystencils.fields('a, b: float32[2d]')
S1, S2, T = sympy.symbols('S1, S2, T')
TEST_ASSIGNMENTS = [
AssignmentCollection({X[0, 0]: 1j}),
AssignmentCollection({
S1: re(Y.center),
S2: im(Y.center),
X[0, 0]: 2j * S1 + S2
}),
AssignmentCollection({
A.center: re(Y.center),
B.center: im(Y.center),
}),
AssignmentCollection({
Y.center: re(Y.center) + X.center + 2j,
}),
AssignmentCollection({
T: 2 + 4j,
Y.center: X.center / T,
})
]
SCALAR_DTYPES = ['float32', 'float64']
@pytest.mark.parametrize("assignment, scalar_dtypes",
itertools.product(TEST_ASSIGNMENTS, (np.float32,)))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_complex_numbers(assignment, scalar_dtypes, target):
ast = pystencils.create_kernel(assignment,
target=target,
data_type=scalar_dtypes)
code = pystencils.get_code_str(ast)
print(code)
assert "Not supported" not in code
if target == pystencils.Target.GPU:
pytest.importorskip('pycuda')
kernel = ast.compile()
assert kernel is not None
X, Y = pystencils.fields('x, y: complex128[2d]')
A, B = pystencils.fields('a, b: float64[2d]')
S1, S2 = sympy.symbols('S1, S2')
T128 = TypedSymbol('ts', create_type('complex128'))
TEST_ASSIGNMENTS = [
AssignmentCollection({X[0, 0]: 1j}),
AssignmentCollection({
S1: re(Y.center),
S2: im(Y.center),
X[0, 0]: 2j * S1 + S2
}),
AssignmentCollection({
A.center: re(Y.center),
B.center: im(Y.center),
}),
AssignmentCollection({
Y.center: re(Y.center) + X.center + 2j,
}),
AssignmentCollection({
T128: 2 + 4j,
Y.center: X.center / T128,
})
]
SCALAR_DTYPES = ['float64']
@pytest.mark.parametrize("assignment", TEST_ASSIGNMENTS)
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_complex_numbers_64(assignment, target):
ast = pystencils.create_kernel(assignment,
target=target,
data_type='double')
code = pystencils.get_code_str(ast)
print(code)
assert "Not supported" not in code
if target == pystencils.Target.GPU:
pytest.importorskip('pycuda')
kernel = ast.compile()
assert kernel is not None
@pytest.mark.parametrize('dtype', (np.float32, np.float64))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
@pytest.mark.parametrize('with_complex_argument', ('with_complex_argument', False))
def test_complex_execution(dtype, target, with_complex_argument):
complex_dtype = f'complex{64 if dtype ==np.float32 else 128}'
x, y = pystencils.fields(f'x, y: {complex_dtype}[2d]')
x_arr = np.zeros((20, 30), complex_dtype)
y_arr = np.zeros((20, 30), complex_dtype)
if with_complex_argument:
a = pystencils.TypedSymbol('a', create_type(complex_dtype))
else:
a = (2j+1)
assignments = AssignmentCollection({
y.center: x.center + a
})
if target == pystencils.Target.GPU:
pytest.importorskip('pycuda')
from pycuda.gpuarray import zeros
x_arr = zeros((20, 30), complex_dtype)
y_arr = zeros((20, 30), complex_dtype)
kernel = pystencils.create_kernel(assignments, target=target, data_type=dtype).compile()
if with_complex_argument:
kernel(x=x_arr, y=y_arr, a=2j+1)
else:
kernel(x=x_arr, y=y_arr)
if target == pystencils.Target.GPU:
y_arr = y_arr.get()
assert np.allclose(y_arr, 2j+1)
import pytest
import pystencils as ps
import numpy as np
# This test aims to trigger deprication warnings. Thus the warnings should not be displayed in the warning summary.
def test_create_kernel_backwards_compatibility():
size = (30, 20)
src_field_string = np.random.rand(*size)
src_field_enum = np.copy(src_field_string)
src_field_config = np.copy(src_field_string)
dst_field_string = np.zeros(size)
dst_field_enum = np.zeros(size)
dst_field_config = np.zeros(size)
f = ps.Field.create_from_numpy_array("f", src_field_enum)
d = ps.Field.create_from_numpy_array("d", dst_field_enum)
jacobi = ps.Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast_enum = ps.create_kernel(jacobi, target=ps.Target.CPU).compile()
with pytest.warns(DeprecationWarning):
ast_string = ps.create_kernel(jacobi, target='cpu').compile()
# noinspection PyTypeChecker
with pytest.warns(DeprecationWarning):
ast_config = ps.create_kernel(jacobi, config=ps.CreateKernelConfig(target='cpu')).compile()
ast_enum(f=src_field_enum, d=dst_field_enum)
ast_string(f=src_field_string, d=dst_field_string)
ast_config(f=src_field_config, d=dst_field_config)
error = np.sum(np.abs(dst_field_enum - dst_field_string))
np.testing.assert_almost_equal(error, 0.0)
error = np.sum(np.abs(dst_field_enum - dst_field_config))
np.testing.assert_almost_equal(error, 0.0)
import sympy
import pytest
import pystencils
from pystencils.astnodes import get_dummy_symbol
from pystencils.backends.cuda_backend import CudaSympyPrinter
from pystencils.data_types import address_of
from pystencils.enums import Target
def test_cuda_known_functions():
printer = CudaSympyPrinter()
print(printer.known_functions)
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2),
y.center(): sympy.Function('rsqrtf')(x[0, 0])
})
ast = pystencils.create_kernel(assignments, target=Target.GPU)
pytest.importorskip('pycuda')
pystencils.show_code(ast)
kernel = ast.compile()
assert(kernel is not None)
def test_cuda_but_not_c():
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2),
y.center(): sympy.Function('rsqrtf')(x[0, 0])
})
ast = pystencils.create_kernel(assignments, target=Target.CPU)
pystencils.show_code(ast)
def test_cuda_unknown():
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('wtf')(address_of(y.center()), 2),
})
ast = pystencils.create_kernel(assignments, target=Target.GPU)
pystencils.show_code(ast)
This diff is collapsed.
import numpy as np
from pystencils import Assignment, Field
from pystencils.cpu import create_indexed_kernel, make_python_function
def test_indexed_kernel():
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
ast = create_indexed_kernel([update_rule], [indexed_field])
kernel = make_python_function(ast)
kernel(f=arr, index=index_arr)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
def test_indexed_cuda_kernel():
try:
import pycuda
except ImportError:
pycuda = None
if pycuda:
from pystencils.gpucuda import make_python_function
import pycuda.gpuarray as gpuarray
from pystencils.gpucuda.kernelcreation import created_indexed_cuda_kernel
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
ast = created_indexed_cuda_kernel([update_rule], [indexed_field])
kernel = make_python_function(ast)
gpu_arr = gpuarray.to_gpu(arr)
gpu_index_arr = gpuarray.to_gpu(index_arr)
kernel(f=gpu_arr, index=gpu_index_arr)
gpu_arr.get(arr)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
else:
print("Did not run test on GPU since no pycuda is available")
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.