Skip to content
Snippets Groups Projects
Commit 4bc5ff44 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

add cuda tests; add CUDA requirement to CI

parent b1b71c32
1 merge request!24Extend Support for CUDA and HIP kernel invocations
Pipeline #75633 canceled with stages
in 3 minutes and 4 seconds
This commit is part of merge request !24. Comments created here will be created in the context of that merge request.
......@@ -38,10 +38,13 @@ typechecker:
coverage_format: cobertura
path: coverage.xml
"testsuite-py3.10":
"testsuite-py3.10+cuda":
extends: .testsuite-base
script:
- nox --session testsuite-3.10
tags:
- docker
- cuda11
"testsuite-py3.13":
extends: .testsuite-base
......
#include "CudaKernels.hpp"
#include <cuda/cuda_runtime.h>
#include <experimental/mdspan>
#include <random>
#include <iostream>
#include <functional>
#undef NDEBUG
#include <cassert>
namespace stdex = std::experimental;
using extents_t = stdex::dextents<uint64_t, 3>;
using field_t = stdex::mdspan<double, extents_t, stdex::layout_right>;
void checkCudaError(cudaError_t err)
{
if (err != cudaSuccess)
{
std::cerr << "HIP Error: " << err << std::endl;
exit(2);
}
}
int main(void)
{
extents_t extents{23, 25, 132};
size_t items{extents.extent(0) * extents.extent(1) * extents.extent(2)};
double *data_src;
checkCudaError(cudaMallocManaged<double>(&data_src, sizeof(double) * items));
field_t src{data_src, extents};
double *data_dst;
checkCudaError(cudaMallocManaged<double>(&data_dst, sizeof(double) * items));
field_t dst{data_dst, extents};
std::random_device rd;
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
data_dst[i] = NAN;
}
invoke();
for (size_t i = 0; i < items; ++i)
{
const double desired = 2.0 * data_src[i];
if (std::abs(desired - data_dst[i]) >= 1e-12)
{
std::cerr << "Mismatch at element " << i << "; Desired: " << desired << "; Actual: " << data_dst[i] << std::endl;
exit(EXIT_FAILURE);
}
}
};
check([&]() {
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
check([&]() {
/* Blockwise4D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
check([&]() {
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
check([&]() {
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst));
return EXIT_SUCCESS;
}
......@@ -5,19 +5,79 @@ import pystencils as ps
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
src, dst = ps.fields("src, dst: double[3D]", layout="c")
asm = ps.Assignment(dst(0), 2 * src(0))
with SourceFileGenerator() as sfg:
sfg.use_cuda()
sfg.namespace("gen")
src, dst = ps.fields("src, dst: double[3D]", layout="c")
asm = ps.Assignment(dst(0), 2 * src(0))
cfg = ps.CreateKernelConfig(target=ps.Target.CUDA)
khandle = sfg.kernels.create(asm, "scale", cfg)
base_config = ps.CreateKernelConfig(target=ps.Target.CUDA)
block_size = sfg.gpu_api.dim3().var("blockSize")
grid_size = sfg.gpu_api.dim3().var("gridSize")
stream = sfg.gpu_api.stream_t().var("stream")
with sfg.namespace("linear3d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("linear3d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
)
with sfg.namespace("blockwise4d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("invoke")(
sfg.map_field(src, std.mdspan.from_field(src)),
sfg.map_field(dst, std.mdspan.from_field(dst)),
sfg.gpu_invoke(khandle, block_size=block_size),
)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment