diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp index a8ebd370e44f6304ec2f0f6c17c8028b65ff10e1..d2e719e5a23e0197d83b3f5f929d38960bee592b 100644 --- a/tests/cuda/codegen/CodegenJacobiGPU.cpp +++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp @@ -78,7 +78,7 @@ void testJacobi2D() // Create blocks shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction - xSize, ySize, uint_t(1), // how many cells per block (x,y,z) + xSize, ySize, uint_t(1), // how many cells per block (x,y,z) real_t(1), // dx: length of one cell in physical coordinates false, // one block per process - "false" means all blocks to one process true, true, true ); // no periodicity @@ -87,7 +87,8 @@ void testJacobi2D() BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); - + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { auto f = blockIt->getData<ScalarField>( cpuFieldID ); @@ -96,8 +97,6 @@ void testJacobi2D() f->get( x, y, 0 ) = 1.0; } - - typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; typedef cuda::communication::GPUPackInfo<GPUField> Packing; @@ -110,7 +109,7 @@ void testJacobi2D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 2.0), "Jacobi Kernel" ); cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); @@ -141,7 +140,8 @@ void testJacobi3D() BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); - + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { auto f = blockIt->getData<ScalarField>( cpuFieldID ); @@ -151,8 +151,6 @@ void testJacobi3D() f->get( x, y, z ) = 1.0; } - - typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme; typedef cuda::communication::GPUPackInfo<GPUField> Packing; diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py index 0015677f320bc6588c1f0098d0fd6425fb4cd9d8..9d1362066c7684c2ca6af5de48d4dc552097d054 100644 --- a/tests/cuda/codegen/CudaJacobiKernel.py +++ b/tests/cuda/codegen/CudaJacobiKernel.py @@ -7,11 +7,11 @@ with CodeGeneration() as ctx: h = sp.symbols("h") # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- - src, dst = ps.fields("src, src_tmp: [2D]") - stencil = [[0, 1, 0], - [1, 0, 1], - [0, 1, 0]] - assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=0.25 * h**2) + src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') + stencil = [[0, 4, 0], + [4, 0, 4], + [0, 4, 0]] + assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2)) generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu") # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py index b2da5369dddee8223a3035bff10174d27f52fe70..4aa645e1d85e5664411a38561647b90ad03fc66e 100644 --- a/tests/field/codegen/JacobiKernel.py +++ b/tests/field/codegen/JacobiKernel.py @@ -21,6 +21,6 @@ with CodeGeneration() as ctx: def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + - src[0, 0, 1] + src[0, 0, -1]) / (h ** 2 * 6) + src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2) generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)])