From 6fc7b5590903320dbcd8fa03e1cadcad7fd41cd5 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sat, 20 May 2017 14:26:09 +0200 Subject: [PATCH] CUDA support --- CMakeLists.txt | 41 +++ apps/tutorials/CMakeLists.txt | 1 + apps/tutorials/cuda/01_GameOfLife_cuda.cpp | 134 +++++++++ apps/tutorials/cuda/01_GameOfLife_cuda.dox | 139 +++++++++ apps/tutorials/cuda/01_GameOfLife_kernels.cu | 40 +++ apps/tutorials/cuda/01_GameOfLife_kernels.h | 12 + apps/tutorials/cuda/CMakeLists.txt | 7 + apps/tutorials/cuda/GosperGliderGun.png | Bin 0 -> 228 bytes cmake/waLBerlaFunctions.cmake | 18 +- src/cuda/AddGPUFieldToStorage.h | 72 +++++ src/cuda/AddGPUFieldToStorage.impl.h | 91 ++++++ src/cuda/CMakeLists.txt | 9 + src/cuda/ErrorChecking.h | 53 ++++ src/cuda/FieldAccessor.h | 112 +++++++ src/cuda/FieldAccessorXYZ.h | 79 +++++ src/cuda/FieldCopy.h | 209 +++++++++++++ src/cuda/FieldIndexing.cpp | 225 ++++++++++++++ src/cuda/FieldIndexing.h | 92 ++++++ src/cuda/FieldIndexingXYZ.cpp | 123 ++++++++ src/cuda/FieldIndexingXYZ.h | 79 +++++ src/cuda/GPUField.cpp | 187 ++++++++++++ src/cuda/GPUField.h | 122 ++++++++ src/cuda/GPUTypesExplicitInstantiation.h | 8 + src/cuda/HostFieldAllocator.h | 81 +++++ src/cuda/Kernel.h | 294 +++++++++++++++++++ src/cuda/doc/cuda.dox | 80 +++++ src/cuda/doc/drawing.svg | 285 ++++++++++++++++++ src/cuda/doc/fieldAccess.png | Bin 0 -> 46057 bytes src/cuda/ideasForCommunication.txt | 40 +++ src/field/Field.h | 22 +- src/field/Field.impl.h | 26 -- src/field/Layout.h | 43 +++ src/field/communication/MPIDatatypes.h | 66 ++--- src/field/communication/MPIDatatypes.impl.h | 69 ++--- src/field/iterators/FieldIterator.h | 11 +- tests/CMakeLists.txt | 1 + tests/cuda/CMakeLists.txt | 15 + tests/cuda/CudaMPI.cpp | 144 +++++++++ tests/cuda/FieldTransferTest.cpp | 65 ++++ tests/cuda/Kernels.cu | 33 +++ tests/cuda/SimpleKernelTest.cpp | 115 ++++++++ 41 files changed, 3126 insertions(+), 117 deletions(-) create mode 100644 apps/tutorials/cuda/01_GameOfLife_cuda.cpp create mode 100644 apps/tutorials/cuda/01_GameOfLife_cuda.dox create mode 100644 apps/tutorials/cuda/01_GameOfLife_kernels.cu create mode 100644 apps/tutorials/cuda/01_GameOfLife_kernels.h create mode 100644 apps/tutorials/cuda/CMakeLists.txt create mode 100644 apps/tutorials/cuda/GosperGliderGun.png create mode 100644 src/cuda/AddGPUFieldToStorage.h create mode 100644 src/cuda/AddGPUFieldToStorage.impl.h create mode 100644 src/cuda/CMakeLists.txt create mode 100644 src/cuda/ErrorChecking.h create mode 100644 src/cuda/FieldAccessor.h create mode 100644 src/cuda/FieldAccessorXYZ.h create mode 100644 src/cuda/FieldCopy.h create mode 100644 src/cuda/FieldIndexing.cpp create mode 100644 src/cuda/FieldIndexing.h create mode 100644 src/cuda/FieldIndexingXYZ.cpp create mode 100644 src/cuda/FieldIndexingXYZ.h create mode 100644 src/cuda/GPUField.cpp create mode 100755 src/cuda/GPUField.h create mode 100644 src/cuda/GPUTypesExplicitInstantiation.h create mode 100644 src/cuda/HostFieldAllocator.h create mode 100644 src/cuda/Kernel.h create mode 100644 src/cuda/doc/cuda.dox create mode 100644 src/cuda/doc/drawing.svg create mode 100644 src/cuda/doc/fieldAccess.png create mode 100644 src/cuda/ideasForCommunication.txt create mode 100644 src/field/Layout.h create mode 100644 tests/cuda/CMakeLists.txt create mode 100644 tests/cuda/CudaMPI.cpp create mode 100644 tests/cuda/FieldTransferTest.cpp create mode 100644 tests/cuda/Kernels.cu create mode 100644 tests/cuda/SimpleKernelTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b5a05f3ad..5a1148ba7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,8 @@ option ( WALBERLA_BUILD_WITH_PYTHON_LBM "Include LBM module into python modu option ( WALBERLA_BUILD_WITH_LIKWID_MARKERS "Compile in markers for likwid-perfctr" ) +option ( WALBERLA_BUILD_WITH_CUDA "Enable CUDA support" ) + option ( WALBERLA_BUILD_WITH_FASTMATH "Fast math" ) @@ -1013,6 +1015,45 @@ endif() +############################################################################################################################ +## +## CUDA +## +############################################################################################################################ +if ( WALBERLA_BUILD_WITH_CUDA ) + # set ( BUILD_SHARED_LIBS ON ) + set ( CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON ) + set ( CUDA_PROPAGATE_HOST_FLAGS OFF CACHE BOOL "" ) + + if ( (NOT DEFINED CUDA_HOST_COMPILER) AND (${CMAKE_C_COMPILER} MATCHES "ccache") ) + string ( STRIP "${CMAKE_C_COMPILER_ARG1}" stripped_compiler_string ) + find_program ( CUDA_HOST_COMPILER ${stripped_compiler_string} ) + endif () + + find_package ( CUDA REQUIRED ) + + if ( CUDA_FOUND ) + include_directories ( ${CUDA_INCLUDE_DIRS} ) + list ( APPEND SERVICE_LIBS ${CUDA_LIBRARIES} ) + + if ( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=" ) + list ( APPEND CUDA_NVCC_FLAGS "-std=c++11" ) + endif () + + # Bug with gcc5 and cuda7.5: + #list( APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__") + + # NOTICE: exisiting cuda flags are overwritten + #set ( CUDA_NVCC_FLAGS "--compiler-bindir=/usr/bin/g++-4.3" ) + #set ( CUDA_NVCC_FLAGS "-arch sm_20" ) + else() + set ( WALBERLA_BUILD_WITH_CUDA FALSE ) + endif ( ) +endif ( ) +############################################################################################################################ + + + ############################################################################################################################ ## ## Testing Coverage diff --git a/apps/tutorials/CMakeLists.txt b/apps/tutorials/CMakeLists.txt index 91dae24cd..1a6e321a6 100644 --- a/apps/tutorials/CMakeLists.txt +++ b/apps/tutorials/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(basics) +add_subdirectory(cuda) add_subdirectory(lbm) add_subdirectory(pde) add_subdirectory(pe) diff --git a/apps/tutorials/cuda/01_GameOfLife_cuda.cpp b/apps/tutorials/cuda/01_GameOfLife_cuda.cpp new file mode 100644 index 000000000..6f8da2197 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_cuda.cpp @@ -0,0 +1,134 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file 03_GameOfLife.cpp +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "01_GameOfLife_kernels.h" +#include "cuda/HostFieldAllocator.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" + +#include "core/Environment.h" + +#include "cuda/HostFieldAllocator.h" +#include "cuda/FieldCopy.h" +#include "cuda/GPUField.h" +#include "cuda/Kernel.h" +#include "cuda/AddGPUFieldToStorage.h" + +#include "field/AddToStorage.h" +#include "field/communication/UniformMPIDatatypeInfo.h" + +#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h" +#include "geometry/structured/GrayScaleImage.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<double,1> ScalarField; +typedef cuda::GPUField<double> GPUField; + + +ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new ScalarField ( + storage->getNumberOfXCells( *block ), // number of cells in x direction per block + storage->getNumberOfYCells( *block ), // number of cells in y direction per block + storage->getNumberOfZCells( *block ), // number of cells in z direction per block + 1, // one ghost layer + real_t(0), // initial value + field::fzyx, // layout + make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory + ); +} + +class GameOfLifeSweepCUDA +{ + public: + GameOfLifeSweepCUDA( BlockDataID gpuFieldSrcID, BlockDataID gpuFieldDstID ) + : gpuFieldSrcID_( gpuFieldSrcID ), gpuFieldDstID_( gpuFieldDstID ) + { + } + void operator() ( IBlock * block ) + { + auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ ); + auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ ); + + auto myKernel = cuda::make_kernel( &gameOfLifeKernel ); + myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) ); + myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) ); + myKernel(); + + srcCudaField->swapDataPointers( dstCudaField ); + } + private: + BlockDataID gpuFieldSrcID_; + BlockDataID gpuFieldDstID_; +}; + + +int main( int argc, char ** argv ) +{ + walberla::Environment env( argc, argv ); + + geometry::GrayScaleImage image ("GosperGliderGun.png"); + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(2), uint_t(1), // number of blocks in x,y,z direction + image.size( uint_t(0) ), image.size( uint_t(1) ) / uint_t(2), uint_t(1), // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); + + // Initializing the field from an image + using geometry::initializer::ScalarFieldFromGrayScaleImage; + ScalarFieldFromGrayScaleImage fieldInitializer ( *blocks, cpuFieldID ) ; + fieldInitializer.init( image, uint_t(2), false ); + + BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); + BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" ); + + typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme; + CommScheme communication( blocks ); + communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(10); // number of timesteps for non-gui runs + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( communication, "Communication" ) + << Sweep( GameOfLifeSweepCUDA(gpuFieldSrcID, gpuFieldDstID ), "GameOfLifeSweep" ); + + timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) ); + + GUI gui ( timeloop, blocks, argc, argv ); + gui.run(); + + return 0; +} diff --git a/apps/tutorials/cuda/01_GameOfLife_cuda.dox b/apps/tutorials/cuda/01_GameOfLife_cuda.dox new file mode 100644 index 000000000..4e654d4c0 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_cuda.dox @@ -0,0 +1,139 @@ +namespace walberla{ + +/** +\page tutorial_cuda01 Tutorial - CUDA 1: Game of Life on GPU + + +\image html tutorial_cuda01_nvidia_titan.png + +> _Note:_ This tutorial required a CUDA aware MPI library. +> If you get a SEGFAULT when executing this tutorial, make sure that your MPI library was built with +> CUDA support! For instructions how to build OpenMPI with CUDA see this [page](https://www.open-mpi.org/faq/?category=building#build-cuda). + +\section cuda01_fields Creating Fields + +To run a simulation on a NVIDIA graphics card, we have to allocate data on the GPU and +write a CUDA kernel that operates on this data. In this tutorial we first allocate a field on the GPU +and learn about functionality to transfer data between CPU and GPU fields. + +Since initialization and output routines are usually not time critical, they are implemented +for CPU fields only. In waLBerla we set up the complete simulation using +CPU fields, copy the initialized fields over to the GPU, do the complete computation there, and, in the +end, copy everything back to do the output from the CPU field. +So only the time critical kernels have to be written in CUDA. + +Thus the setup code of the GPU GameOfLife program is very similar to its CPU version, which was implemented +in a previous tutorial ( \ref tutorial_basics_03 ). +One difference is, that fields which are often transfered from/to the GPU should be allocated with +a different field allocator: cuda::HostFieldAllocator . This allocator uses cudaHostAlloc() instead of "new" , +such that the memory is marked "pinned", which means that it is always held in RAM and cannot be swapped out to disk. +Data transfer from pinned memory is faster than from normal memory. The usage of this allocator is not +mandatory, the data transfer functions work (slightly slower) also with normally allocated fields. + +\code +ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new ScalarField ( + storage->getNumberOfXCells( *block ), // number of cells in x direction per block + storage->getNumberOfYCells( *block ), // number of cells in y direction per block + storage->getNumberOfZCells( *block ), // number of cells in z direction per block + 1, // one ghost layer + real_t(0), // initial value + field::fzyx, // layout + make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory + ); +} +\endcode + +Now we initialize the CPU field just like in the previous tutorial \ref tutorial_basics03 . +Then two GPU fields are created: "source" and "destination" field. The helper function +cuda::addGPUFieldToStorage() creates a cuda::GPUField field of the same size and layout of the given +CPU field: +\code +BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); +BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" ); +\endcode +The contents of the new GPU fields are initialized with the contents of the given CPU field. + + + +\section cuda01_kernels Writing and calling CUDA kernels + +For a basic understanding of the CUDA support in waLBerla please read \ref cudaPage first. + +After reading this page you should know what a FieldAccessor is and how to call CUDA kernels from +cpp files. So we can now start with writing +a CUDA kernel for the Game of Life algorithm. We place this in a separate file with ".cu" extension. +The build system then automatically detects that this file should be compiled with the CUDA C++ compiler. + +The kernel gets two field accessors as arguments, one for the source and one for the destination field. +Both accessors have to be configured using the CUDA variables blockIdx and threadIdx, such that afterwards +the get() and getNeighbor() functions of the accessor class can work correctly. +\code +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ) +{ + src.set( blockIdx, threadIdx ); + dst.set( blockIdx, threadIdx ); + int liveNeighbors = 0; + if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors; + // normal Game of Life algorithm .... + // ... +} +\endcode + +To call this kernel we write a thin wrapper sweep which only has to get the GPU fields out of the blockstorage +and passes them to the CUDA kernel. We use the cuda::Kernel class from waLBerla here, so that we can write this +sweep in a normal cpp file. +Here are the contents of this sweep: +\code +auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ ); +auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ ); + +auto myKernel = cuda::make_kernel( &gameOfLifeKernel ); +myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) ); +myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) ); +myKernel(); + +srcCudaField->swapDataPointers( dstCudaField ); +\endcode + +All the computations are done on the GPU. The CPU field is not updated automatically! It was just used for +setup reasons. + +To see if our kernel works, we copy the contents back to the CPU field after every timestep: +\code +timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) ); +\endcode +Of course this makes no sense for real simulations, since the transfer time is much higher than the +time that was saved by doing the computation on the GPU. For production runs, one would usually transfer the +field back every n'th timestep and write e.g. a VTK frame. + + +\section cuda01_comm Communication + +In waLBerla there are two types of communication: _buffered_ and _direct_ communication. +While buffered communication first collects all data in a buffer and sends only one message per communciation step and neighbor +the direct communciation strategy, which is based on MPI datatypes, uses no intermediate buffers and therefore has to send +more messages than buffered communication. For details see \ref walberla_communication . + +In the tutorials up to now, only the buffered approach was used. In this tutorial, we switch to the direct communciation strategy +because then we can use the CUDA support of the MPI library to directly communciate from/to GPU memory. + +The usage of the two different communication schemes is very similar. Instead of creating a blockforest::communication::UniformBufferedScheme +we create a blockforest::communication::UniformDirectScheme. +Then we register a field::communication::UniformMPIDatatypeInfo instead of the field::communication::PackInfo. + +\code +typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme; +CommScheme communication( blocks ); +communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) ); +\endcode + +This scheme also supports heterogenous simulations, i.e. using a CPU field on +some processes and a GPU field on other processes. + +*/ + + +} diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.cu b/apps/tutorials/cuda/01_GameOfLife_kernels.cu new file mode 100644 index 000000000..399f705c8 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_kernels.cu @@ -0,0 +1,40 @@ +#include "../cuda/01_GameOfLife_kernels.h" + +#include <iostream> + + + +namespace walberla { + + +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ) +{ + src.set( blockIdx, threadIdx ); + dst.set( blockIdx, threadIdx ); + + // Count number of living neighbors + int liveNeighbors = 0; + if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( 0,+1,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( 0,-1,0 ) > 0.5 ) ++liveNeighbors; + + if ( src.getNeighbor( -1, -1, 0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, +1, 0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( +1, -1,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( +1, +1,0 ) > 0.5 ) ++liveNeighbors; + + + // cell dies because of under- or over-population + if ( liveNeighbors < 2 || liveNeighbors > 3 ) + dst.get() = 0.0; + else if ( liveNeighbors == 3 ) // cell comes alive + dst.get() = 1.0; + else + dst.get() = src.get(); +} + + + + +} // namespace walberla diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.h b/apps/tutorials/cuda/01_GameOfLife_kernels.h new file mode 100644 index 000000000..e0e30c85a --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_kernels.h @@ -0,0 +1,12 @@ +#include <iostream> + +#include "cuda/FieldIndexing.h" + + +namespace walberla { + + +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ); + + +} // namespace walberla diff --git a/apps/tutorials/cuda/CMakeLists.txt b/apps/tutorials/cuda/CMakeLists.txt new file mode 100644 index 000000000..efa4d2a55 --- /dev/null +++ b/apps/tutorials/cuda/CMakeLists.txt @@ -0,0 +1,7 @@ +waLBerla_link_files_to_builddir( *.prm ) +waLBerla_link_files_to_builddir( *.png ) + +waLBerla_add_executable ( NAME 01_GameOfLife_cuda + FILES 01_GameOfLife_cuda.cpp 01_GameOfLife_kernels.cu + DEPENDS blockforest core cuda field lbm geometry timeloop gui ) + \ No newline at end of file diff --git a/apps/tutorials/cuda/GosperGliderGun.png b/apps/tutorials/cuda/GosperGliderGun.png new file mode 100644 index 0000000000000000000000000000000000000000..f70d7036c6ae62d1f8cc2d1ea198fe90797b8747 GIT binary patch literal 228 zcmeAS@N?(olHy`uVBq!ia0vp^MnJ5>!2%=`cP`umq!^2X+?^QKos)S9<Zu>vL>4nJ za0`PlBg3pY5<o%r5>H=O_Pgw?ywb)$w`^Dg6e{y{aSZV|{&sR8UxNaVbM(7<llLL@ zCJ&th+Ag=0$a{O3-Mhu8xWrvDKhR7?ZT4F`#)Rj8nuHo3Jid8#t#<B_wO0gH_iBcu z$~naDI``vIWNJ(KrIN=cJ_oy-*K4zWdfh+2Dd)7t$!=Bg{0_x7i6kIV!+eu*`!^}J UN!iO>fi7V1boFyt=akR{0O$cvS^xk5 literal 0 HcmV?d00001 diff --git a/cmake/waLBerlaFunctions.cmake b/cmake/waLBerlaFunctions.cmake index 2bd75e087..cf594b58f 100644 --- a/cmake/waLBerlaFunctions.cmake +++ b/cmake/waLBerlaFunctions.cmake @@ -84,13 +84,17 @@ function ( waLBerla_add_module ) set( hasSourceFiles FALSE ) foreach ( sourceFile ${sourceFiles} ) - if ( ${sourceFile} MATCHES "\\.(c|cpp)" ) + if ( ${sourceFile} MATCHES "\\.(c|cpp|cu)" ) set( hasSourceFiles TRUE ) endif( ) endforeach( ) - if ( hasSourceFiles ) - add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + if ( hasSourceFiles ) + if ( CUDA_FOUND ) + cuda_add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + else() + add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + endif( CUDA_FOUND ) else( ) add_custom_target( ${moduleLibraryName} SOURCES ${sourceFiles} ${otherFiles} ) # dummy IDE target endif( ) @@ -194,7 +198,13 @@ function ( waLBerla_add_executable ) endif ( ) endif() - add_executable( ${ARG_NAME} ${sourceFiles} ) + if ( CUDA_FOUND ) + cuda_add_executable( ${ARG_NAME} ${sourceFiles} ) + else() + add_executable( ${ARG_NAME} ${sourceFiles} ) + endif() + + #add_executable( ${ARG_NAME} ${sourceFiles} ) target_link_modules ( ${ARG_NAME} ${ARG_DEPENDS} ) target_link_libraries( ${ARG_NAME} ${SERVICE_LIBS} ) diff --git a/src/cuda/AddGPUFieldToStorage.h b/src/cuda/AddGPUFieldToStorage.h new file mode 100644 index 000000000..67968b93e --- /dev/null +++ b/src/cuda/AddGPUFieldToStorage.h @@ -0,0 +1,72 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file AddGPUFieldToStorage.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "GPUField.h" + +#include "domain_decomposition/StructuredBlockStorage.h" + +#include <boost/bind.hpp> + + +namespace walberla { +namespace cuda { + + + + //******************************************************************************************************************* + /*! Adds a cuda::GPUField to a StructuredBlockStorage + * + * - Similar to walberla::field::addToStorage() functions + * - created field is uninitialized + */ + //******************************************************************************************************************* + template< typename GPUField_T> + BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, + const std::string & identifier, + uint_t fSize, + const Layout layout = fzyx, + uint_t nrOfGhostLayers = 1 ); + + + + //******************************************************************************************************************* + /*! Adds a cuda::GPUField to a StructuredBlockStorage using data from a CPU field + * + * - adds a GPU field to a StructuredBlockStorage using a CPU field + * - sizes, number of ghostlayers and layout are the same as the CPU field + * - GPU field is initialized with the data currently stored in the CPU field + * @tparam Field_T type of the CPU field, the created GPUField will be of type cuda::GPUField<Field_T::value_type> + */ + //******************************************************************************************************************* + template< typename Field_T> + BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, + ConstBlockDataID cpuFieldID, + const std::string & identifier ); + + + +} // namespace cuda +} // namespace walberla + + +#include "AddGPUFieldToStorage.impl.h" diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h new file mode 100644 index 000000000..dfde01a84 --- /dev/null +++ b/src/cuda/AddGPUFieldToStorage.impl.h @@ -0,0 +1,91 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file AddGPUFieldToStorage.impl.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +namespace walberla { +namespace cuda { + + + namespace internal + { + template< typename GPUField_T> + GPUField_T * createGPUField( const IBlock * const block, + const StructuredBlockStorage * const bs, + uint_t ghostLayers, + uint_t fSize, + const field::Layout & layout ) + { + return new GPUField_T( bs->getNumberOfXCells( *block ), + bs->getNumberOfYCells( *block ), + bs->getNumberOfZCells( *block ), + fSize, ghostLayers, layout ); + } + + template< typename Field_T> + GPUField< typename Field_T::value_type> * + createGPUFieldFromCPUField( const IBlock * const block, + const StructuredBlockStorage * const, + ConstBlockDataID cpuFieldID + ) + { + typedef GPUField< typename Field_T::value_type> GPUField_T; + + const Field_T * f = block->getData<Field_T>( cpuFieldID ); + auto gpuField = new GPUField_T( f->xSize(), f->ySize(), f->zSize(), f->fSize(), + f->nrOfGhostLayers(), f->layout() ); + + cuda::fieldCpy( *gpuField, *f ); + + return gpuField; + } + + } + + + template< typename GPUField_T> + BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, + const std::string & identifier, + uint_t fSize, + const Layout layout, + uint_t nrOfGhostLayers ) + { + auto func = boost::bind ( internal::createGPUField<GPUField_T>, _1, _2, nrOfGhostLayers, fSize, layout ); + return bs->addStructuredBlockData< GPUField_T >( func, identifier ); + } + + + template< typename Field_T> + BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, + ConstBlockDataID cpuFieldID, + const std::string & identifier ) + { + auto func = boost::bind ( internal::createGPUFieldFromCPUField<Field_T>, _1, _2, cpuFieldID ); + return bs->addStructuredBlockData< GPUField<typename Field_T::value_type> >( func, identifier ); + } + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt new file mode 100644 index 000000000..6959180c2 --- /dev/null +++ b/src/cuda/CMakeLists.txt @@ -0,0 +1,9 @@ +################################################################################################### +# +# Module cuda +# +################################################################################################### + +waLBerla_add_module( DEPENDS core domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA ) + +################################################################################################### \ No newline at end of file diff --git a/src/cuda/ErrorChecking.h b/src/cuda/ErrorChecking.h new file mode 100644 index 000000000..49b216744 --- /dev/null +++ b/src/cuda/ErrorChecking.h @@ -0,0 +1,53 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ErrorChecking.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" + +#include <sstream> +#include <cuda_runtime.h> + + +namespace walberla { +namespace cuda { + + +#define WALBERLA_CUDA_CHECK(ans) { ::walberla::cuda::checkForError((ans), __FILE__, __LINE__); } + + + +inline void checkForError( cudaError_t code, const std::string & callerPath, const int line ) +{ + if(code != cudaSuccess) + { + std::stringstream ss; + ss << "CUDA Error: " << cudaGetErrorString( code ); + Abort::instance()->abort( ss.str(), callerPath, line ); + } +} + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldAccessor.h b/src/cuda/FieldAccessor.h new file mode 100644 index 000000000..bf45831bd --- /dev/null +++ b/src/cuda/FieldAccessor.h @@ -0,0 +1,112 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldAccessor.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include <cuda_runtime.h> +#include "core/DataTypes.h" + + +namespace walberla { +namespace cuda { + + + + template<typename T> + class FieldAccessor + { + public: + enum IndexingScheme { FZYX, FZY, FZ, F, + ZYXF, ZYX, ZY, Z + }; + + FieldAccessor( char * ptr, + uint32_t xOffset, + uint32_t yOffset, + uint32_t zOffset, + uint32_t fOffset, + IndexingScheme indexingScheme ) + : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), + fOffset_(fOffset), indexingScheme_(indexingScheme ) + {} + + __device__ void set( uint3 blockIdx, uint3 threadIdx ) + { + switch ( indexingScheme_) + { + case FZYX: ptr_ += blockIdx.z * fOffset_ + blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; + case FZY : ptr_ += blockIdx.y * fOffset_ + blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; + case FZ : ptr_ += blockIdx.x * fOffset_ + threadIdx.x * zOffset_; break; + case F : ptr_ += threadIdx.x * fOffset_; break; + + case ZYXF: ptr_ += blockIdx.z * zOffset_ + blockIdx.y * yOffset_ + blockIdx.x * xOffset_ + threadIdx.x * fOffset_; break; + case ZYX : ptr_ += blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; + case ZY : ptr_ += blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; + case Z : ptr_ += threadIdx.x * zOffset_; break; + } + } + + + __device__ unsigned int getLinearIndex( uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim ) + { + return threadIdx.x + + blockIdx.x * blockDim.x + + blockIdx.y * blockDim.x * gridDim.x + + blockIdx.z * blockDim.x * gridDim.x * gridDim.y ; + } + + + __device__ T & get() { return * (T*)(ptr_); } + __device__ T & get( int f) { return * (T*)(ptr_ + f * fOffset_); } + + + __device__ T & getNeighbor( int cx, int cy, int cz ) const + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) ); + } + + __device__ T & getNeighbor( int cx, int cy, int cz, int cf ) + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) + + cf * (int)(fOffset_) ); + } + + + protected: + char * ptr_; + uint32_t xOffset_; + uint32_t yOffset_; + uint32_t zOffset_; + uint32_t fOffset_; + IndexingScheme indexingScheme_; + }; + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldAccessorXYZ.h b/src/cuda/FieldAccessorXYZ.h new file mode 100644 index 000000000..4e43dd199 --- /dev/null +++ b/src/cuda/FieldAccessorXYZ.h @@ -0,0 +1,79 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldAccessorXYZ.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +#include <cuda_runtime.h> + +#include "core/DataTypes.h" + +namespace walberla { +namespace cuda { + + + template<typename T> + class FieldAccessorXYZ + { + public: + FieldAccessorXYZ( char * ptr, size_t xOffset, size_t yOffset, size_t zOffset, size_t fOffset ) + : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), fOffset_(fOffset) + {} + + __device__ void set( uint3 blockIdx, uint3 threadIdx ) + { + ptr_ += threadIdx.x * xOffset_ + + blockIdx.x * yOffset_ + + blockIdx.y * zOffset_ ; + } + + __device__ T & get() { return * (T*)(ptr_); } + __device__ T & get( int f) { return * (T*)(ptr_ + f * fOffset_); } + + + __device__ T & getNeighbor( int cx, int cy, int cz ) const + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) ); + } + + __device__ T & getNeighbor( int cx, int cy, int cz, int cf ) + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) + + cf * (int)(fOffset_) ); + } + + protected: + char * ptr_; + size_t xOffset_; + size_t yOffset_; + size_t zOffset_; + size_t fOffset_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldCopy.h b/src/cuda/FieldCopy.h new file mode 100644 index 000000000..e51f26252 --- /dev/null +++ b/src/cuda/FieldCopy.h @@ -0,0 +1,209 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldCopy.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "ErrorChecking.h" +#include "GPUField.h" + +#include "domain_decomposition/StructuredBlockStorage.h" +#include "field/Field.h" +#include "field/GhostLayerField.h" + +#include "core/Abort.h" +#include "core/logging/Logging.h" + +#include <cuda_runtime.h> + +namespace walberla { +namespace cuda { + + + template<typename DstType, typename SrcType> + void fieldCpy( const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID dstID, ConstBlockDataID srcID ) + { + for ( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + DstType * dst = blockIt->getData<DstType>( dstID ); + const SrcType * src = blockIt->getData<SrcType>( srcID ); + fieldCpy( *dst, *src ); + } + } + + template<typename DstType, typename SrcType> + boost::function<void()> fieldCpyFunctor( const shared_ptr< StructuredBlockStorage > & blocks, + BlockDataID dstID, ConstBlockDataID srcID ) + { + return boost::bind( fieldCpy<DstType,SrcType>, blocks, dstID, srcID ); + } + + + + template<typename DstType, typename SrcType> + void fieldCpySweepFunction( BlockDataID dstID, ConstBlockDataID srcID, IBlock * block ) + { + DstType * dst = block->getData<DstType>( dstID ); + const SrcType * src = block->getData<SrcType>( srcID ); + fieldCpy( *dst, *src ); + } + + template<typename DstType, typename SrcType> + boost::function<void(IBlock*)> fieldCpyFunctor( BlockDataID dstID, ConstBlockDataID srcID ) + { + return boost::bind( fieldCpySweepFunction<DstType,SrcType>, dstID, srcID, _1 ); + } + + + + + + template<typename T, uint_t fs> + void fieldCpy( cuda::GPUField<T> & dst, const field::Field<T,fs> & src ); + + + + template<typename T, uint_t fs> + void fieldCpy( field::Field<T,fs> & dst, const cuda::GPUField<T> & src ); + + + + + //=================================================================================================================== + // + // Implementation + // + //=================================================================================================================== + + + + + template<typename T, uint_t fs> + void fieldCpy( cuda::GPUField<T> & dst, const field::Field<T,fs> & src ) + { + cudaMemcpy3DParms p; + memset( &p, 0, sizeof(p) ); + + + if ( dst.layout() != src.layout() ) { + WALBERLA_ABORT( "Cannot copy fields with different layout" ); + } + + bool canCopy = ( src.layout() == fzyx && + dst.fAllocSize() == src.fAllocSize() && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() ) + || + ( src.layout() == zyxf && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() && + dst.xAllocSize() == src.xAllocSize() ); + + if ( !canCopy ) { + WALBERLA_ABORT("Field have to have the same size "); + } + + if ( dst.layout() == fzyx ) + { + p.srcPtr = make_cudaPitchedPtr( (void*)(src.data()), // pointer + sizeof(T) * src.xAllocSize(), // pitch + src.xAllocSize(), // inner dimension size + src.yAllocSize() ); // next outer dimension size + + p.extent.width = src.xAllocSize() * sizeof(T); + p.extent.height = src.yAllocSize(); + p.extent.depth = src.zAllocSize() * src.fAllocSize(); + } + else + { + p.srcPtr = make_cudaPitchedPtr( (void*)(src.data()), // pointer + sizeof(T) * src.fAllocSize(), // pitch + src.fAllocSize(), // inner dimension size + src.xAllocSize() ); // next outer dimension size + + p.extent.width = src.fAllocSize() * sizeof(T); + p.extent.height = src.xAllocSize(); + p.extent.depth = src.yAllocSize() * src.zAllocSize(); + } + + p.dstPtr = dst.pitchedPtr(); + p.kind = cudaMemcpyHostToDevice; + WALBERLA_CUDA_CHECK( cudaMemcpy3D( &p ) ); + } + + + + template<typename T, uint_t fs> + void fieldCpy( field::Field<T,fs> & dst, const cuda::GPUField<T> & src ) + { + cudaMemcpy3DParms p; + memset( &p, 0, sizeof(p) ); + + if ( dst.layout() != src.layout() ) { + WALBERLA_ABORT( "Cannot copy fields with different layout" ); + } + + bool canCopy = ( src.layout() == fzyx && + dst.fAllocSize() == src.fAllocSize() && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() ) + || + ( src.layout() == zyxf && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() && + dst.xAllocSize() == src.xAllocSize() ); + + if ( !canCopy ) { + WALBERLA_ABORT("Field have to have the same size "); + } + + if ( dst.layout() == fzyx ) + { + p.dstPtr = make_cudaPitchedPtr( (void*)(dst.data()), // pointer + sizeof(T) * dst.xAllocSize(), // pitch + dst.xAllocSize(), // inner dimension size + dst.yAllocSize() ); // next outer dimension size + + p.extent.width = dst.xAllocSize() * sizeof(T); + p.extent.height = dst.yAllocSize(); + p.extent.depth = dst.zAllocSize() * dst.fAllocSize(); + } + else + { + p.dstPtr = make_cudaPitchedPtr( (void*)(dst.data()), // pointer + sizeof(T) * dst.fAllocSize(), // pitch + dst.fAllocSize(), // inner dimension size + dst.xAllocSize() ); // next outer dimension size + + p.extent.width = dst.fAllocSize() * sizeof(T); + p.extent.height = dst.xAllocSize(); + p.extent.depth = dst.yAllocSize() * dst.zAllocSize(); + } + + p.srcPtr = src.pitchedPtr(); + p.kind = cudaMemcpyDeviceToHost; + WALBERLA_CUDA_CHECK( cudaMemcpy3D( &p ) ); + + } + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexing.cpp b/src/cuda/FieldIndexing.cpp new file mode 100644 index 000000000..a61e50a0d --- /dev/null +++ b/src/cuda/FieldIndexing.cpp @@ -0,0 +1,225 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldIndexing.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "FieldIndexing.h" +#include "GPUTypesExplicitInstantiation.h" +#include "GPUField.h" + +#include "core/cell/CellInterval.h" +#include "core/debug/Debug.h" +#include "core/logging/Logging.h" +#include "field/Layout.h" + +#include <cuda_runtime.h> + +#include <limits> +#include <cmath> + +namespace walberla { +namespace cuda { + +template< typename T> +FieldIndexing<T>::FieldIndexing ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessor<T> _gpuAccess ) + : field_( field ), + blockDim_( _blockDim ), + gridDim_( _gridDim ), + gpuAccess_( _gpuAccess ) +{ + WALBERLA_DEBUG_SECTION() + { + cudaDeviceProp prop; + int count; + cudaGetDeviceCount(&count); + int threadsPerBlock = std::numeric_limits<int>::max(); + for (int i = 0; i < count; i++) { + cudaGetDeviceProperties(&prop, i); + threadsPerBlock = std::min( prop.maxThreadsPerBlock, threadsPerBlock ); + } + WALBERLA_ASSERT_LESS( int_c( blockDim_.x ), threadsPerBlock, + "InnerCoordThreadIndexing works only for fields where each dimension x,y,z is smaller " << + "than the maximal thread count per CUDA block." ); + } +} + + +template< typename T> +void shiftCoordinatesWhileFastestCoordHasSizeOne( typename FieldAccessor<T>::IndexingScheme & indexing, dim3 & gridDim, dim3 & blockDim ) +{ + bool runLoop = true; + while( blockDim.x == 1 && runLoop ) + { + blockDim.x = gridDim.x; + gridDim.x = gridDim.y; + gridDim.y = gridDim.z; + gridDim.z = 1; + + + switch ( indexing ) { + case FieldAccessor<T>::FZYX: indexing = FieldAccessor<T>::FZY; break; + case FieldAccessor<T>::FZY : indexing = FieldAccessor<T>::FZ ; break; + case FieldAccessor<T>::FZ : indexing = FieldAccessor<T>::F ; break; + + case FieldAccessor<T>::ZYXF: indexing = FieldAccessor<T>::ZYX ; break; + case FieldAccessor<T>::ZYX : indexing = FieldAccessor<T>::ZY ; break; + case FieldAccessor<T>::ZY : indexing = FieldAccessor<T>::Z ; break; + + // iteration goes only over a single element - stop the loop + case FieldAccessor<T>::Z: runLoop = false; break; + case FieldAccessor<T>::F: runLoop = false; break; + } + } +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::interval ( const GPUField<T> & f, const CellInterval & ci, int fBegin, int fEnd ) +{ + unsigned int xOffset, yOffset, zOffset, fOffset; + + if ( f.layout() == field::zyxf ) + { + fOffset = sizeof(T); + xOffset = uint32_c( f.pitchedPtr().pitch ); + yOffset = xOffset * uint32_c( f.xAllocSize() ); + zOffset = yOffset * uint32_c( f.yAllocSize() ); + } + else + { + xOffset = sizeof(T); + yOffset = uint32_c( f.pitchedPtr().pitch ); + zOffset = yOffset * uint32_c( f.yAllocSize() ); + fOffset = zOffset * uint32_c( f.zAllocSize() ); + } + char * data = (char*)f.pitchedPtr().ptr; + + // Jump over ghost cells to first inner cell + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + data += ( ci.xMin() + gl )* int_c(xOffset) + + ( ci.yMin() + gl )* int_c(yOffset) + + ( ci.zMin() + gl )* int_c(zOffset); + + + dim3 gridDim; + dim3 blockDim; + typename FieldAccessor<T>::IndexingScheme firstCoord; + if ( f.layout() == fzyx ) + { + firstCoord = FieldAccessor<T>::FZYX; + blockDim = dim3 ( (unsigned int)ci.xSize(), 1, 1 ); + gridDim = dim3 ( (unsigned int)ci.ySize(), (unsigned int)ci.zSize(), (unsigned int)( fEnd - fBegin) ); + } + else + { + firstCoord = FieldAccessor<T>::ZYXF; + blockDim = dim3 ( (unsigned int)(fEnd - fBegin), 1, 1 ); + gridDim = dim3 ( (unsigned int)ci.xSize(), (unsigned int)ci.ySize(), (unsigned int)ci.zSize() ); + } + + shiftCoordinatesWhileFastestCoordHasSizeOne<T>( firstCoord, gridDim, blockDim ); + + return FieldIndexing<T> ( f, blockDim, gridDim, + FieldAccessor<T>( data, xOffset, yOffset, zOffset, fOffset, firstCoord ) ); +} + + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::xyz ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci, 0,1 ); +} + + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::withGhostLayerXYZ( const GPUField<T> & f, uint_t numGhostLayers ) +{ + cell_idx_t gl = std::min( cell_idx_c(numGhostLayers), cell_idx_c( f.nrOfGhostLayers() ) ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::ghostLayerOnlyXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice ) +{ + CellInterval ci; + f.getGhostRegion( dir, ci, cell_idx_c(thickness), fullSlice ); + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::sliceBeforeGhostLayerXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice ) +{ + CellInterval ci; + f.getSliceBeforeGhostLayer( dir, ci, cell_idx_c(thickness), fullSlice ); + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::allInner ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::allWithGhostLayer ( const GPUField<T> & f ) +{ + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); + +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::all ( const GPUField<T> & f, const cell::CellInterval & ci ) +{ + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); +} + + + + +GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexing.h b/src/cuda/FieldIndexing.h new file mode 100644 index 000000000..437b03453 --- /dev/null +++ b/src/cuda/FieldIndexing.h @@ -0,0 +1,92 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldIndexing.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +//! \brief Indexing Scheme that executes all elements of inner coordinate within on thread block +// +//====================================================================================================================== + +#pragma once + +#include "FieldAccessor.h" + +#include "stencil/Directions.h" +#include <cuda_runtime.h> + +namespace walberla { namespace cell { class CellInterval; } } + +namespace walberla { +namespace cuda { + + // Forward Declarations + template< typename T> class GPUField; + + template<typename T> + class FieldIndexing + { + public: + + //** Kernel call ****************************************************************************************** + /*! \name Kernel call */ + //@{ + uint3 blockDim() const { return blockDim_; } + uint3 gridDim () const { return gridDim_; } + + const FieldAccessor<T> & gpuAccess() const { return gpuAccess_; } + //@} + //**************************************************************************************************************** + + + + + //** Creation ********************************************************************************************* + /*! \name Creation */ + //@{ + static FieldIndexing<T> interval ( const GPUField<T> & f, + const cell::CellInterval & ci, + int fBegin=0, int fEnd=1 ); + + + static FieldIndexing<T> xyz ( const GPUField<T> & f ); + static FieldIndexing<T> withGhostLayerXYZ ( const GPUField<T> & f, uint_t numGhostLayers ); + static FieldIndexing<T> ghostLayerOnlyXYZ ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice = false ); + static FieldIndexing<T> sliceBeforeGhostLayerXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice = false ); + + static FieldIndexing<T> allInner ( const GPUField<T> & f ); + static FieldIndexing<T> allWithGhostLayer ( const GPUField<T> & f ); + static FieldIndexing<T> all ( const GPUField<T> & f, const cell::CellInterval & ci ); + //@} + //**************************************************************************************************************** + + protected: + FieldIndexing ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessor<T> _gpuAccess ); + + const GPUField<T> & field_; + uint3 blockDim_; + uint3 gridDim_; + FieldAccessor<T> gpuAccess_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexingXYZ.cpp b/src/cuda/FieldIndexingXYZ.cpp new file mode 100644 index 000000000..e1bb2cbb2 --- /dev/null +++ b/src/cuda/FieldIndexingXYZ.cpp @@ -0,0 +1,123 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldIndexingXYZ.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "FieldIndexingXYZ.h" +#include "GPUTypesExplicitInstantiation.h" +#include "GPUField.h" + +#include "core/cell/CellInterval.h" +#include "core/debug/Debug.h" + +namespace walberla { +namespace cuda { + + +template< typename T> +FieldIndexingXYZ<T>::FieldIndexingXYZ ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessorXYZ<T> _gpuAccess ) + : field_( field ), + blockDim_( _blockDim ), + gridDim_( _gridDim ), + gpuAccess_( _gpuAccess ) +{ + WALBERLA_DEBUG_SECTION() + { + cudaDeviceProp prop; + int count; + cudaGetDeviceCount(&count); + int threadsPerBlock = std::numeric_limits<int>::max(); + for (int i = 0; i < count; i++) { + cudaGetDeviceProperties(&prop, i); + threadsPerBlock = std::min( prop.maxThreadsPerBlock, threadsPerBlock ); + } + WALBERLA_ASSERT_LESS( int_c( blockDim_.x ), threadsPerBlock, + "InnerCoordThreadIndexing works only for fields where each dimension x,y,z is smaller " << + "than the maximal thread count per CUDA block." ); + } +} + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::interval ( const GPUField<T> & f, const CellInterval & ci ) +{ + size_t xOffset, yOffset, zOffset, fOffset; + + if ( f.layout() == field::zyxf ) + { + fOffset = sizeof(T); + xOffset = f.pitchedPtr().pitch; + yOffset = xOffset * f.xSize(); + zOffset = yOffset * f.ySize(); + } + else + { + xOffset = sizeof(T); + yOffset = f.pitchedPtr().pitch; + zOffset = yOffset * f.ySize(); + fOffset = zOffset * f.zSize(); + } + char * data = (char*)f.pitchedPtr().ptr; + + // Jump over ghost cells to first inner cell + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + data += ( ci.xMin() + gl )* int_c(xOffset) + + ( ci.yMin() + gl )* int_c(yOffset) + + ( ci.zMin() + gl )* int_c(zOffset); + + dim3 gridDim ( (unsigned int)ci.xSize(), 1, 1 ); + dim3 blockDim( (unsigned int)ci.ySize(), (unsigned int)ci.zSize(), 1 ); + return FieldIndexingXYZ<T> ( f, gridDim, blockDim, + FieldAccessorXYZ<T>( data, xOffset, yOffset, zOffset, fOffset ) ); +} + + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::xyz ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci ); +} + + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::withGhostLayerXYZ( const GPUField<T> & f, uint_t numGhostLayers ) +{ + cell_idx_t gl = std::min( cell_idx_c(numGhostLayers), cell_idx_c( f.nrOfGhostLayers() ) ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci ); +} + + +GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexingXYZ ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexingXYZ.h b/src/cuda/FieldIndexingXYZ.h new file mode 100644 index 000000000..19d3f98c4 --- /dev/null +++ b/src/cuda/FieldIndexingXYZ.h @@ -0,0 +1,79 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldIndexingXYZ.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "FieldAccessorXYZ.h" +#include <cuda_runtime.h> + + +namespace walberla { namespace cell { class CellInterval; } } + + +namespace walberla { +namespace cuda { + +// Forward Declarations +template< typename T> class GPUField; + + + template<typename T> + class FieldIndexingXYZ + { + public: + + //** Kernel call ****************************************************************************************** + /*! \name Kernel call */ + //@{ + uint3 blockDim() const { return blockDim_; } + uint3 gridDim () const { return gridDim_; } + + const FieldAccessorXYZ<T> & gpuAccess() const { return gpuAccess_; } + //@} + //**************************************************************************************************************** + + + //** Creation ********************************************************************************************* + /*! \name Creation */ + //@{ + + static FieldIndexingXYZ<T> interval ( const GPUField<T> & f, const cell::CellInterval & ci ); + + + static FieldIndexingXYZ<T> xyz ( const GPUField<T> & f ); + static FieldIndexingXYZ<T> withGhostLayerXYZ ( const GPUField<T> & f, uint_t numGhostLayers ); + //@} + //**************************************************************************************************************** + + protected: + FieldIndexingXYZ<T> ( const GPUField<T> & field, uint3 _blockDim, uint3 _gridDim, const FieldAccessorXYZ<T> _gpuAccess ); + + const GPUField<T> & field_; + uint3 blockDim_; + uint3 gridDim_; + FieldAccessorXYZ<T> gpuAccess_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.cpp new file mode 100644 index 000000000..326e5c0b7 --- /dev/null +++ b/src/cuda/GPUField.cpp @@ -0,0 +1,187 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUField.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "GPUField.h" +#include "ErrorChecking.h" +#include "GPUTypesExplicitInstantiation.h" + +#include "core/logging/Logging.h" + +namespace walberla { +namespace cuda { + + +template<typename T> +GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, + uint_t _nrOfGhostLayers, const Layout & _layout ) + : nrOfGhostLayers_( _nrOfGhostLayers ), + xSize_( _xSize), ySize_( _ySize ), zSize_( _zSize ), fSize_( _fSize ), + layout_( _layout ) +{ + cudaExtent extent; + if ( layout_ == zyxf ) + { + extent.width = _fSize * sizeof(T); + extent.height = (_xSize + 2 * _nrOfGhostLayers ); + extent.depth = (_ySize + 2 * _nrOfGhostLayers ) * ( _zSize + 2 * _nrOfGhostLayers ); + } + else + { + extent.width = (_xSize + 2 * _nrOfGhostLayers ) * sizeof(T); + extent.height = (_ySize + 2 * _nrOfGhostLayers ); + extent.depth = (_zSize + 2 * _nrOfGhostLayers ) * _fSize; + } + + WALBERLA_CUDA_CHECK ( cudaMalloc3D ( &pitchedPtr_, extent ) ); +} + + +template<typename T> +GPUField<T>::~GPUField() +{ + cudaFree( pitchedPtr_.ptr ); +} + + +template<typename T> +void GPUField<T>::getGhostRegion(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const +{ + const cell_idx_t sizeArr [] = { cell_idx_c( xSize() ), + cell_idx_c( ySize() ), + cell_idx_c( zSize() )}; + + WALBERLA_ASSERT_GREATER( thickness, 0 ); + WALBERLA_ASSERT_LESS_EQUAL( uint_c(thickness), nrOfGhostLayers() ); + const cell_idx_t ghosts = cell_idx_c ( thickness ); + + cell_idx_t fullSliceInc = fullSlice ? cell_idx_c( nrOfGhostLayers() ) : 0; + + for(unsigned int dim = 0; dim< 3; ++dim) + switch ( stencil::c[dim][d] ) + { + case -1: ci.min()[dim] = -ghosts; ci.max()[dim] = 0 - 1; break; + case 0: ci.min()[dim] = -fullSliceInc; ci.max()[dim] = sizeArr[dim]+fullSliceInc - 1; break; + case 1: ci.min()[dim] = sizeArr[dim]; ci.max()[dim] = sizeArr[dim]+ghosts - 1; break; + } +} + + +template<typename T> +void GPUField<T>::getSliceBeforeGhostLayer(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const +{ + WALBERLA_ASSERT_GREATER( thickness, 0 ); + + const cell_idx_t sizeArr [] = { cell_idx_c( xSize() ), + cell_idx_c( ySize() ), + cell_idx_c( zSize() )}; + + cell_idx_t fullSliceInc = fullSlice ? cell_idx_c( nrOfGhostLayers() ) : 0; + for(unsigned int dim = 0; dim< 3; ++dim) + switch ( stencil::c[dim][d] ) + { + case -1: ci.min()[dim] = 0; ci.max()[dim] = thickness - 1; break; + case 0: ci.min()[dim] = -fullSliceInc; ci.max()[dim] = sizeArr[dim] +fullSliceInc- 1; break; + case 1: ci.min()[dim] = sizeArr[dim]-thickness; ci.max()[dim] = sizeArr[dim] - 1; break; + } +} + +//******************************************************************************************************************* +/*! Implementation of equality operator, GPUField are equal if identical +* +* operator== is required in order to store GPUFields as block data. +* Implementing a correct equality check would require to call a kernel, +* which should be done manually. +* Therefore only identical GPUFields are considered equal. +*/ +//******************************************************************************************************************* +template<typename T> +bool GPUField<T>::operator==( const GPUField & o ) const +{ + return pitchedPtr_.ptr == o.pitchedPtr_.ptr && + nrOfGhostLayers_ == o.nrOfGhostLayers_ && + xSize_ == o.xSize_ && + ySize_ == o.ySize_ && + zSize_ == o.zSize_ && + fSize_ == o.fSize_ && + layout_ == o.layout_ ; +} + + + +template<typename T> +uint_t GPUField<T>::xAllocSize() const +{ + if ( layout_ == field::fzyx ) + { + // allocation size is stored in pitched pointer + // pitched pointer stores the amount of padded region in bytes + // but we have to return the size in #elements + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + return pitchedPtr_.pitch / sizeof(T); + } + return xSize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::yAllocSize() const +{ + return ySize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::zAllocSize() const +{ + return zSize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::fAllocSize() const +{ + if ( layout_ == field::zyxf ) + { + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + return pitchedPtr_.pitch / sizeof(T); + } + return fSize_; +} + +template<typename T> +void GPUField<T>::swapDataPointers( GPUField<T> & other ) +{ + WALBERLA_ASSERT_EQUAL( xAllocSize(), other.xAllocSize() ); + WALBERLA_ASSERT_EQUAL( yAllocSize(), other.yAllocSize() ); + WALBERLA_ASSERT_EQUAL( zAllocSize(), other.zAllocSize() ); + WALBERLA_ASSERT_EQUAL( fAllocSize(), other.fAllocSize() ); + WALBERLA_ASSERT_EQUAL( layout(), other.layout() ); + std::swap( pitchedPtr_, other.pitchedPtr_ ); +} + + + +GPU_CLASS_TEMPLATE_INSTANTIATION( GPUField ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h new file mode 100755 index 000000000..52cc002b3 --- /dev/null +++ b/src/cuda/GPUField.h @@ -0,0 +1,122 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUField.h +//! \ingroup moduleName +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "field/Layout.h" +#include "stencil/Directions.h" + +#include <cuda_runtime.h> + + + +namespace walberla { +namespace cuda { + + using field::Layout; + using field::fzyx; + using field::zyxf; + + + //******************************************************************************************************************* + /*! GhostLayerField stored on a CUDA GPU + * + * Basically a wrapper around a CUDA device pointer together with size information about the field + * i.e. sizes in x,y,z,f directions and number of ghost layers. + * + * Internally represented by a cudaPitchedPtr which is allocated with cudaMalloc3D to take padding of the + * innermost coordinate into account. + * + * Supports Array-of-Structures (AoS,zyxf) layout and Structure-of-Arrays (SoA, fzyx) layout, in a similiar way + * to field::Field + * + * To work with the GPUField look at the cuda::fieldCpy functions to transfer a field::Field to a cuda::GPUField + * and vice versa. + * When writing CUDA kernels for GPUFields have a look at the FieldIndexing and FieldAccessor concepts. + * These simplify the "iteration" i.e. indexing of cells in GPUFields. + */ + //******************************************************************************************************************* + template<typename T> + class GPUField + { + public: + typedef T value_type; + + GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, + uint_t _nrOfGhostLayers, const Layout & _layout = zyxf ); + + ~GPUField(); + + Layout layout() const { return layout_; } + + cudaPitchedPtr pitchedPtr() const { return pitchedPtr_; } + + + inline uint_t xSize() const { return xSize_; } + inline uint_t ySize() const { return ySize_; } + inline uint_t zSize() const { return zSize_; } + inline uint_t fSize() const { return fSize_; } + + cell_idx_t xOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t yOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t zOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + + + uint_t xAllocSize() const; + uint_t yAllocSize() const; + uint_t zAllocSize() const; + uint_t fAllocSize() const; + + + void swapDataPointers( GPUField<T> & other ); + void swapDataPointers( GPUField<T> * other ) { swapDataPointers( *other); } + + + inline uint_t nrOfGhostLayers() const { return nrOfGhostLayers_; } + + bool operator==( const GPUField & other ) const; + + void getGhostRegion( stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const; + void getSliceBeforeGhostLayer(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const; + + + void * data() { return pitchedPtr_.ptr; } + const void * data() const { return pitchedPtr_.ptr; } + + protected: + cudaPitchedPtr pitchedPtr_; + uint_t nrOfGhostLayers_; + uint_t xSize_; + uint_t ySize_; + uint_t zSize_; + uint_t fSize_; + Layout layout_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUTypesExplicitInstantiation.h b/src/cuda/GPUTypesExplicitInstantiation.h new file mode 100644 index 000000000..bdc4b5846 --- /dev/null +++ b/src/cuda/GPUTypesExplicitInstantiation.h @@ -0,0 +1,8 @@ +#define GPU_CLASS_TEMPLATE_INSTANTIATION(ClassName)\ + template class ClassName< double >;\ + template class ClassName< float >;\ + template class ClassName< int >;\ + template class ClassName< uint8_t >;\ + template class ClassName< uint16_t >; + + diff --git a/src/cuda/HostFieldAllocator.h b/src/cuda/HostFieldAllocator.h new file mode 100644 index 000000000..0c08bfadb --- /dev/null +++ b/src/cuda/HostFieldAllocator.h @@ -0,0 +1,81 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CudaHostFieldAllocator.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +//! \brief Allocator that allocates a CPU! field using cudaHostAlloc +// +//====================================================================================================================== + +#pragma once + +#include "ErrorChecking.h" +#include "field/allocation/FieldAllocator.h" + +#include <cuda_runtime.h> + + +namespace walberla { +namespace cuda { + + + //******************************************************************************************************************* + /*! + * Allocator that allocates a CPU! field using cudaHostAlloc without padding + * + * Uses cudaHostAlloc for the allocation - which allocates page-locked memory that is faster to transfer to the GPU + * This allocator should be used for CPU fields that are often transfered to GPU and back + * + * \ingroup cuda + * + */ + //******************************************************************************************************************* + template<typename T, unsigned int cudaHostAllocFlags = cudaHostAllocDefault> + class HostFieldAllocator : public field::FieldAllocator<T> + { + public: + virtual ~HostFieldAllocator() {} + + virtual T * allocateMemory ( uint_t size0, uint_t size1, uint_t size2, uint_t size3, + uint_t & allocSize1, uint_t & allocSize2, uint_t & allocSize3 ) + { + allocSize1=size1; + allocSize2=size2; + allocSize3=size3; + void * result; + WALBERLA_CUDA_CHECK( cudaHostAlloc( &result, size0*size1*size2*size3*sizeof(T), cudaHostAllocFlags ) ); + return (T*)(result); + } + + virtual T * allocateMemory ( uint_t size ) + { + T* result; + cudaHostAlloc( &result, size*sizeof(T), cudaHostAllocFlags ); + return result; + } + + virtual void deallocate(T *& values) { + WALBERLA_CUDA_CHECK( cudaFreeHost( values ) ); + } + }; + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/Kernel.h b/src/cuda/Kernel.h new file mode 100644 index 000000000..3b6acf0a1 --- /dev/null +++ b/src/cuda/Kernel.h @@ -0,0 +1,294 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Kernel.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" +#include "core/debug/Debug.h" + +#include "ErrorChecking.h" + +#include <cuda_runtime.h> +#include <boost/type_traits.hpp> +#include <vector> + + + +namespace walberla { +namespace cuda { + + + //******************************************************************************************************************* + /*! Wrapper class around a CUDA kernel, to call kernels also from code not compiled with nvcc + * + * Example: + * \code + // Declaration of kernel, implementation has to be in a file compiled with nvcc + void kernel_func ( double * inputData, int size ); + + auto kernel = make_kernel( kernel_func ); + kernel.addParam<double*> ( argument1 ); + kernel.addParam<int> ( 20 ); + kernel.configure( dim3( 3,3,3), dim3( 4,4,4) ); + kernel(); + // this code is equivalent to: + kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 ); + * \endcode + * + * Why use this strange wrapper class instead of the nice kernel call syntax "<<<griddim, blockdim >>>" ?? + * - This syntax is nice but has to be compiled with nvcc, which does not (yet) understand C++11 + * - C++11 features are used all over the place in waLBerla code + * - all *.cu files and headers included in *.cu files have to be "C++11 free" + * - thus there should be as few code as possible in *.cu files + * + * Drawbacks of this class compared to kernel call syntax: + * Type checking of parameters can only be done at runtime (is done only in Debug mode!). + * Consider the following example: + * \code + // Declaration of kernel, implementation has to be in a file compiled with nvcc + void kernel_func ( double * inputData, int size ); + + auto kernel = make_kernel( kernel_func ); + kernel.addParam<float*> ( argument1 ); + kernel.addParam<unsigned int> ( 40 ); + kernel.configure( dim3( 3,3,3), dim3( 4,4,4) ); + kernel(); + // this code is equivalent to: + kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 ); + * \endcode + * The parameter types of the kernel and the parameters added at the cuda::Kernel class do not match. + * This is only detected when the code is run and was compiled in DEBUG mode! + * + * + * Advantages of this class compared to kernel call syntax: Integrates nicely with waLBerlas field indexing and + * accessor concepts: + * \code + void kernel_func( cuda::SimpleFieldAccessor<double> f ); + + auto myKernel = cuda::make_kernel( &kernel_double ); + myKernel.addFieldIndexingParam( cuda::SimpleFieldIndexing<double>::xyz( gpuField ) ); + myKernel(); + * \endcode + * When using at least one FieldIndexingParameter configure() does not have to be called, since the thread and grid + * setup is done by the indexing scheme. If two FieldIndexingParameters are passed, the two indexing schemes have to + * be consistent. + */ + //******************************************************************************************************************* + template<typename FuncPtr> + class Kernel + { + public: + Kernel( FuncPtr funcPtr ); + + template<typename T> void addParam( const T & param ); + template<typename T> void addFieldIndexingParam( const T & indexing ); + + + void configure( dim3 gridDim, dim3 blockDim ); + void operator() () const; + + + protected: + template<typename T> size_t determineNextOffset(); + + + //** Members ********************************************************************************************** + /*! \name Members */ + //@{ + FuncPtr funcPtr_; + + bool configured_; + dim3 gridDim_; + dim3 blockDim_; + + struct ParamInfo { + std::vector<char> data; + size_t offset; + }; + std::vector< ParamInfo > params_; + //@} + //**************************************************************************************************************** + + + //** Type checking of parameters ********************************************************************************** + /*! \name Type checking of parameters */ + //@{ + typedef typename boost::remove_pointer<FuncPtr>::type FuncType; + + #define CHECK_PARAMETER_FUNC( Number ) \ + template<typename T> \ + bool checkParameter##Number( typename boost::enable_if_c< (boost::function_traits<FuncType>::arity >= Number ), T >::type * = 0 ) { \ + return boost::is_same< T, typename boost::function_traits<FuncType>::arg##Number##_type >::value; \ + } \ + template<typename T> \ + bool checkParameter##Number( typename boost::disable_if_c< (boost::function_traits<FuncType>::arity >= Number ),T >::type * = 0 ) { \ + return false; \ + } + + CHECK_PARAMETER_FUNC(1) + CHECK_PARAMETER_FUNC(2) + CHECK_PARAMETER_FUNC(3) + CHECK_PARAMETER_FUNC(4) + CHECK_PARAMETER_FUNC(5) + CHECK_PARAMETER_FUNC(6) + CHECK_PARAMETER_FUNC(7) + CHECK_PARAMETER_FUNC(8) + + template<typename T> bool checkParameter( uint_t n ); + //@} + //**************************************************************************************************************** + }; + + + template<typename FuncPtr> + Kernel<FuncPtr> make_kernel( FuncPtr funcPtr ) { + return Kernel<FuncPtr> ( funcPtr ); + } + + + + + + + + //=================================================================================================================== + // + // Implementation + // + //=================================================================================================================== + + template<typename FP> + Kernel<FP>::Kernel( FP funcPtr ) + : funcPtr_ ( funcPtr ), + configured_( false ) + {} + + template<typename FP> + template<typename T> + void Kernel<FP>::addParam( const T & param ) + { + ParamInfo paramInfo; + paramInfo.data.resize( sizeof(T) ); + std::memcpy ( &(paramInfo.data[0]), ¶m, sizeof(T) ); + paramInfo.offset = determineNextOffset<T>(); + + WALBERLA_ASSERT( checkParameter<T>( params_.size() +1 ), + "cuda::Kernel type mismatch of parameter " << params_.size() +1 ); + + params_.push_back( paramInfo ); + } + + + template<typename FP> + template<typename Indexing> + void Kernel<FP>::addFieldIndexingParam( const Indexing & indexing ) + { + configure( indexing.gridDim(), indexing.blockDim() ); + addParam( indexing.gpuAccess() ); + } + + template<typename FP> + void Kernel<FP>::configure( dim3 gridDim, dim3 blockDim ) + { + if ( ! configured_ ) + { + gridDim_ = gridDim; + blockDim_ = blockDim; + configured_ = true; + } + else + { + if ( gridDim.x != gridDim_.x || gridDim.y != gridDim_.y || gridDim.z != gridDim_.z || + blockDim.x != blockDim_.x || blockDim.y != blockDim_.y || blockDim.z != blockDim_.z ) + { + WALBERLA_ABORT( "Error when configuring cuda::Kernel: Inconsistent setup. " ); + } + } + } + + template<typename FP> + void Kernel<FP>::operator() () const + { + // check for correct number of parameter calls + + if ( params_.size() != boost::function_traits<FuncType>::arity ) { + WALBERLA_ABORT( "Error when calling cuda::Kernel - Wrong number of arguments. " << + "Expected " << boost::function_traits<FuncType>::arity << ", received " << params_.size() ); + } + + // set the number of blocks and threads, + WALBERLA_CUDA_CHECK( cudaConfigureCall( gridDim_, blockDim_ ) ); //TODO extend class to support streams + + // register all parameters + for( auto paramIt = params_.begin(); paramIt != params_.end(); ++paramIt ) { + const void * ptr = &(paramIt->data[0]); + WALBERLA_CUDA_CHECK( cudaSetupArgument( ptr, paramIt->data.size(), paramIt->offset ) ); + } + + // .. and launch the kernel + static_assert( sizeof(void *) == sizeof(void (*)(void)), + "object pointer and function pointer sizes must be equal" ); + // dirty casting trick to circumvent compiler warning + // essentially the next two lines are: cudaLaunch( funcPtr_ ); + void *q = (void*) &funcPtr_; + WALBERLA_CUDA_CHECK( cudaLaunch( (const char*) ( *static_cast<void **>(q) )) ); + } + + + template<typename FP> + template<typename T> + bool Kernel<FP>::checkParameter( uint_t n ) + { + switch (n) { + case 1: return checkParameter1<T>(); + case 2: return checkParameter2<T>(); + case 3: return checkParameter3<T>(); + case 4: return checkParameter4<T>(); + case 5: return checkParameter5<T>(); + case 6: return checkParameter6<T>(); + case 7: return checkParameter7<T>(); + case 8: return checkParameter8<T>(); + default: + WALBERLA_ABORT("Too many parameters passed to kernel"); + } + return false; + } + + + template<typename FP> + template<typename T> + size_t Kernel<FP>::determineNextOffset() + { + size_t currentOffset = 0; + if ( !params_.empty() ) + currentOffset = params_.back().offset + params_.back().data.size(); + + size_t alignment = __alignof( T ); + return (currentOffset + alignment-1) & ~(alignment-1); + } + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/doc/cuda.dox b/src/cuda/doc/cuda.dox new file mode 100644 index 000000000..96652834d --- /dev/null +++ b/src/cuda/doc/cuda.dox @@ -0,0 +1,80 @@ + +namespace walberla{ +/*! + +\page cudaPage Overview of waLBerla CUDA support + +\brief waLBerla CUDA concepts + + +\section cudaField Fields on GPU + + +\subsection cudaFieldOverview Creating GPU fields and copy them between host and device + + \code + // create a CPU field and a GPU field of same size and with same layout + GhostLayerField<double,4> h_f ( 16,20,30, 1, 42.0, field::fzyx ); + cuda::GPUField<double> d_f ( 16,20,30, 4, 1, field::fzyx ); + + cuda::fieldCpy( d_f, h_f ); // copy from host to device + some_kernel_wrapper( d_f ); // run some kernel + cuda::fieldCpy( h_f, d_f ); // copy field data back to host + \endcode + + Similarities and Differences of CPU and GPU field + - cuda::GPUField corresponds to field::GhostLayerField + - fSize is a template parameter for CPU fields and a normal parameter for GPUFields + - CPU field iterators correspond to FieldAccessors (see next section) + +\subsection cudaFieldAccess Writing CUDA kernels operating on GPUFields + + \image html cuda/doc/fieldAccess.png "Accessing fields in CUDA kernels" + + When writing a kernel that operates on a field, the first task is to distribute the data to CUDA threads and blocks. + We need a function $(blockIdx, threadIdx) \\rightarrow (x,y,z)$ or $(blockIdx, threadIdx) \\rightarrow (x,y,z,f)$. + The optimal mapping depends on many parameters: for example which layout the field has, the extends of each coordinate, + hardware parameters like warp-size, etc. + Thus this indexing function is abstracted. A few indexing strategies are already implemented which can be + substituted by custom strategies. + A indexing strategy consists of two classes: and somewhat complex Indexing class, which manages the + indexing on the host-side and a lightweight Accessor class, which is passed to the CUDA kernel. + + An indexing scheme is very similar to the iterator concept, it defines the bounds of the iteration, which is not necessarily the + complete field but could also be a certain sub-block, for example the ghost layer in a certain direction. + + + Lets start to write a simple kernel that doubles all values stored in a field: + \code + #include "cuda/FieldAccessor.h" + + __global__ void kernel_double( cuda::FieldAccessor<double> f ) + { + f.set( blockIdx, threadIdx ); + f.get() *= 2.0; + } + \endcode + We do not have to care about indexing, the cuda::FieldAccessor takes care of that. So this is a generic kernel that operates + on double fields. Using the cuda::FieldAccessor the current and neighboring values can be accessed and manipulated. + + This kernel can be called like this: + \code + cuda::FieldIndexing<double> indexing = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true ); + kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() ); + \endcode + In the example above we only iterate over a slice of the field. Of course we can also iterate over the complete field, there are + various static member functions in a Indexing class to create certain iteration patterns. + The Indexing class encapsulates the information of how to launch the kernel (blockDim and gridDim) and holds the Accessor class that + is passed to the kernel. + + Two indexing strategies are currently provided: + - cuda::FieldIndexing and cuda::FieldAccessor (general, but slow ) + - cuda::FieldIndexingXYZ and cuda::FieldAccessorXYZ ( optimized for cell based iterating over bigger chunks, for fields where xSize bigger than warpSize ) + + \section cudaKernelWrapper Calling CUDA kernels from CPP files + \copydoc cuda::Kernel + + + +*/ +} diff --git a/src/cuda/doc/drawing.svg b/src/cuda/doc/drawing.svg new file mode 100644 index 000000000..4e356d3f3 --- /dev/null +++ b/src/cuda/doc/drawing.svg @@ -0,0 +1,285 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="744.09448819" + height="1052.3622047" + id="svg2" + version="1.1" + inkscape:version="0.48.4 r9939" + sodipodi:docname="drawing.svg"> + <defs + id="defs4"> + <marker + inkscape:stockid="Arrow1Lend" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lend" + style="overflow:visible;"> + <path + id="path3859" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;" + transform="scale(0.8) rotate(180) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Arrow1Lstart" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lstart" + style="overflow:visible"> + <path + id="path3856" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt" + transform="scale(0.8) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Arrow1Lend" + orient="auto" + refY="0" + refX="0" + id="Arrow1Lend-7" + style="overflow:visible"> + <path + inkscape:connector-curvature="0" + id="path3859-2" + d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" + style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" + transform="matrix(-0.8,0,0,-0.8,-10,0)" /> + </marker> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="1.4" + inkscape:cx="310.32854" + inkscape:cy="651.81828" + inkscape:document-units="px" + inkscape:current-layer="layer1" + showgrid="true" + inkscape:window-width="1600" + inkscape:window-height="1180" + inkscape:window-x="2558" + inkscape:window-y="-3" + inkscape:window-maximized="1"> + <inkscape:grid + type="xygrid" + id="grid2987" /> + </sodipodi:namedview> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <rect + style="fill:#88bfff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757" + width="240" + height="60" + x="40" + y="53.076469" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="50" + y="92.362183" + id="text3759" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761" + x="50" + y="92.362183" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">GhostLayerField</tspan></text> + <rect + style="fill:#88bfff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9" + width="240" + height="60" + x="40" + y="182.36218" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="50" + y="222.36218" + id="text3759-2" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6" + x="50" + y="222.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">cuda::GPUField</tspan></text> + <rect + style="fill:#c7ffea;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.71999997px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-0" + width="166.62854" + height="40.114277" + x="277.37146" + y="126.6479" + ry="7.1999998" /> + <text + xml:space="preserve" + style="font-size:28.79999924px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="300" + y="152.36218" + id="text3759-7" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-1" + x="300" + y="152.36218" + style="font-size:17.27999878px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">FieldCopy.h</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)" + d="m 150,182.36218 0,-70" + id="path3850" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="160" + y="152.36218" + id="text4482" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484" + x="160" + y="152.36218" + style="font-size:11px">cpu-gpu transfer</tspan></text> + <rect + style="fill:#ffe693;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9-2" + width="240" + height="60" + x="42.142857" + y="293.07648" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="90" + y="332.36218" + id="text3759-2-2" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6-3" + x="90" + y="332.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">*Indexing</tspan></text> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="293.57144" + y="316.64789" + id="text4482-4" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-2" + x="293.57144" + y="316.64789" + style="font-size:11px">represents iteration over a field </tspan><tspan + sodipodi:role="line" + x="293.57144" + y="330.39789" + style="font-size:11px" + id="tspan4550"> i.e. complete field, only slice, only ghost layer..</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:1,1;stroke-dashoffset:0" + d="m 150,292.36218 0,-50" + id="path4552" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="81.428574" + y="275.21933" + id="text4482-6" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-26" + x="81.428574" + y="275.21933" + style="font-size:11px"><<< can be created with >>></tspan></text> + <rect + style="fill:#ffe693;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9-2-7" + width="240" + height="60" + x="42.14286" + y="403.07648" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="90" + y="442.36218" + id="text3759-2-2-5" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6-3-3" + x="90" + y="442.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">*Accessor</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;marker-end:url(#Arrow1Lend)" + d="m 150,402.36218 0,-50" + id="path4552-9" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="81.428574" + y="385.21933" + id="text4482-6-8" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-26-6" + x="81.428574" + y="385.21933" + style="font-size:11px"><<< can be created with >>></tspan></text> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="290" + y="432.36218" + id="text4482-4-0" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + x="290" + y="432.36218" + style="font-size:11px" + id="tspan4550-2">class to be passed to GPU kernel</tspan><tspan + sodipodi:role="line" + x="290" + y="446.11218" + style="font-size:11px" + id="tspan5222">can access a single value or cell , supports neighbor access</tspan></text> + </g> +</svg> diff --git a/src/cuda/doc/fieldAccess.png b/src/cuda/doc/fieldAccess.png new file mode 100644 index 0000000000000000000000000000000000000000..d6dc372b737cea71bb0e4128f11a8bf72edafab3 GIT binary patch literal 46057 zcmbTe1yEIC*T=gF={O(~(jhHK2na|VQc@6*?viehMnVt)5fJGTDe3N%?v@UvyPNx* z_x-+mXYQRlbLXBJ2hZ92?0EKi*0a`s{niP4tt5ks^%x6+Alz57lBy7d90);3-56-# zljmeh4d4%|GyIhr26%a6n1+J)m=3a9&JcuefcS?r^=YODd`RXZrRnm<-rU9A$jJ<H zcX#J_XJ_qfV&q`PVee#-v?u%+f@q*ulHzI}$vgAzx-nhU=m$f=jixNfpHLi?=e+jS z-xg|EpH~##<`vG@tJfETm+N@GYlW*drTVr85&DM}!@dRbuXFoRVsZ+|7_Btk3&kXy zG3$G^*NilmPDArA;c}QJL9nNgOJzyLmYtdPV(~N1oI*J*OOq2+y1HV`SwRLCc6Q{W zGM-3e49G!4F2QQACNy+ofgt3oIIZMMHuCy7)k3P}%~_FVmDvS1Ty2-CgR^Z^9drwx zpjVRWw9h#xxTc2aS^p~dsvWSHd8&TSRx4POXK9YlzRi}Elzbkd&&?e4-T0l^v3x<Q z$uFjX`<}_B$od30eWus0XN!o<jf;l9ubsJ}k^W%2YOjNHm!zbmx;ztEMU}H=#%s;~ z<eM$5Bs`RyL@stZ?)aNzoSln3zu<uDseHBGwo=vJz_rM*(9rxyJ~AS>LaDQhmXNz^ zNnuQVd_p@Noip}yIiXH-Ldq}p?&BEMp9w-#baeTTvBY>}hDUqX;#sgYxCj{NAh9VG z<@AJmLoYV#db732V8j8Dp<gyT8dZBd3w5@Om}`+AL-V6qif@gX*x4B|X*&~zi2rja zT1l@wji~Pe=Q29Z=n|3>;W0@}olOVX5tS+J>;FAe2(lRekU8biPRd+AB{qSUfs zjFGRMGcD>l=@YvA03s~xK0C#k(@K7K8cDU6wVr$daS@G#V(89Ya7oFZIVEMK+fya# zZ?T=x;gS>a)do3mNN<1=oq!-AH4{A1;A#UV=Pq%{3C%3Nmz3=6g*X4R5niaFrS<24 zm5YGP!gia6i1=INT4Z2osDiT59ux{*-QM4E2nh%XCX5x(z%YhdaWMvh2OwEV8Y50e zN0+JnxrYX9R(aZK`UPyxB{ty(PgPPaoQsSBY|sDwzhD2hhUcHl*DKqEt8Nsq;E29n zS5mdAw}`MME-miwM3kRG(;s%rMSLvG!rtPu2?%*w-_qF>*?J|~$PJ5b%~=w)VPtHs zYg03t<3Dod{u1p%4lSuR6L|P-)W?&$;(jU9b*0pOX}JGa!zx9Y+9ovlgaOBA#|Ovz z!u3s%<v3??q?otnmxaTgV?XI?F6D$DQR>A}tpmsCBUOu%3Vs%WuVxtNG81z~dzN;| zNlH$WNMP^({m@T{@S!|OJV3ru_EO&u)XZb|FMCGKYG!06@vMFu>6(Dr_kogK>V|-a zcI{xTlMOlF)8q$`$5la~<(g;>tHiJ9Cf{FA$F0<Bw<6!)VV@)k7LN<&w|O&TA7^N= zC=%%Bd271{N>IA*9_x@qeQ&_8%zu6C*WZG{00y;G?nxjmliO1D`A@OzaqsErDaVh> z+f>Zm18uwH0o_AAEz76jOB26LYGZy}kNo!Ym1#+hvz*S?iSSJ^6?~(wljrkGqu^i- zHaJZ~M|Yfi!1Nj}S;9w9QR?2-zHcYlN0n9aPgM?;dNHKA$#ak)fRJqL=edXQMQ5Vw zw&tJdxIcr|+xEal;tM#y<tVBwn>uXsmuILFw~Y3p4S740;wHLJvXH>j!340J$)GM9 zi(#c?j^-lPm@IL2$#W6vT&o7wGhzBj4sEslgTr>45hRu*g1_<P!;?SYW_-eGRXYE! zo6_AJsm$RBqq01CT($AR4drJ`+i3py@wnBaHV!KBxx&6seEg8Wav|ZQDw&uwpXHt9 zH$_v<;AoO)X>coV)bU=pK&@AXT|2Smhz}}-p76;QleC@$Tr%|Ry|y<hM8nC3`!>>c zqF}MY<er;^Q!OZ#u(~)+=Hj1;tnz1RH&4R3hQv(YF*nQMOl;k4em)O<(*qXMpB=58 zg9N^MA>U?X9QAZQ3QDg(H)rgkqq|^OvD1_GH0Bk4Tpd!KCh9GO8wAT7KjZmo4_qA% zl&(HBQc_YOZKW%KjQ>l*%7*lp&F=D_3Q9rT_tSq1Y)s%ov`=Z}_C0PHZC_U#g*#Qp z#3XzG6CkwSm5im@zO6BUy5UwZLu<Vi9EQ)V+w(^(M!g5B3T;j_A$dqhVO<7rMm6uB z7moUdh32m1zUKBg9G2c)<)Ei7aDh2ibC_Hv-*pejjlEd#*gpS$dZ-~FF!kI+U&Bh6 z&zqN$9|eCiPHR@saHZkj0kvSeWiwe%b?IIsI)UTApjIbZ)Spd<Y&RbV#$Whu4m0i; zoEGWd{@B>oydIQnTpjXrr-+z6<9jS46=Wb1;!3R`W$yH=)hW0QnLhOj^){#RXZ#l_ z-4_9ReQV+v5A+vFk=8wNajSsY7@m@wM8`ybUWn1>{*IG6e~azLo)Y+>!KSf{cwTxb zgMX^6H9K`=ih#fcd3(VR1|95GF`(|~hc${eXH-A?yKk`2DdhKr)Ha7M()JQ9;M#9Y zNaQ9g>e)qfb>D<^cTdXn(VvIiO7i1u*5FB+H#B3~={LniPJC&*p}4v0QP6#H6@NCp zN!e#)^e$M7di!1l_)k;W;IFM4)<`p51A=eu9@zkk(P-i$RN7nE5ZTk3o_?{im8Tbq zBN7~%zA8|&hVt3(ecr<R_kT7o&Nc_Tfky*(PKow12)Hado+o&%UDdQ)TaWpCxgWmR ze6_*zK62sGs!}cqLtA^ct$G#WwHkXkNF^d0fi0u3@jfW^Pxwu;LZdTz<z~j%+NuAo zY$!%ZmbFhHw(M4o*W0a^+zdP~h!c8BOWd6T4dOE#?3d6<@TpBL80dFanro;^@V$IJ zDNFe|jb>)IT?TiBy@}86PSDhPk&_qC+0X_&#tf-xncV4^=*b`)JNKjY_@R~l!88!P zB)UuYrDiQ}tOQ&fun9EVKZOt7MeG}Fgw+-lw~WqI^-mFb7*e`%`r&HuogKW}!;g|K z`~vKBHe)?HmB_i;UL`@>g*=1D%Ez<Ii5dH-*XIuSqmuJ=eh6|U=aZRjdbkWc^w&%{ z0XU8^e8^~%NAH=tB1ZC@u;SzTSVFcnd>P4MM@Z}%>CJGBt-UG3V&}j1q{CUSH@(m4 zl)*BRVfw6egD|f#hMc|7hV@ZPVYtk2{yJ&@jbS*S5h?Gn;x8Wy!VgjQ?SJOkxK{F% zPu83%?@hw^2$Bz5i7R%6#C7<9X_RiAj$kx<<DmQ9V$4g6=DCM)iLlEG(FuTUTpnvW zo~2I~o8CEe6uh5Wwp?(nIlKHelqw0Ae5Sj_-&RA?oOhArNRqt$yyNesXx7*;*%daG zc~h`SQgi2^nX}{ak|$2$CF2(VyMm`~4n^TrK1@ra%ZeD&<-L~N9=^rqD$?y5^Gp9` z#dFwqMw@3OOYSS)avg03zh-8y_~*ko)QZn}DlWJ4l;Yk56HUS3^_sO{&6))7?lAt? zf_9E`m_ecH%Fdp_=hS#Gb-I_{NcU0dZne(WCoDML8}YbG`*2cdzTGo@c6p`<rYaSb zKKA7!*S|^w7s3T3e9o4m7bssSoKes<nxn#;{n+2u|9j+QT+c_~Z8+$@@!Y7Y>eu4< z`-{!EfJ7fsu*CPi&X^r}#EIdUuX9Pj2~2y{;7(&bPz~e3m4Yvs|C*|%Q@>)0GuE-Q zgU#_2<gf9vYlGnfWN5M<<jDCHDg70ZlI_$zO_R^+hcn0Z<tWn61K?8i3Dxv?t!A<3 zJB7_${#mE7I9Fi0Z2W0H?X8dUc?=;g&?r7~E6yVO$<o&5Jmhbel{U^2qiWoxI?$a9 z7Cq9_e!3J%7z8}-B;)kQ$*1g7zW(rxmep{#?SWG@P0h@%8jFZjcb)^-w}CyglkJwR z^S+&HyWKk@5il2OChb4JI{vmD;XUbi-kEDbf&vZhX$SPj3dEfGS<b^Du}0}a7eaB! z!RfeTNo)Umg(!$#YWD=mVpR(%pYrg=b8=r00k@#KU*N&nCiMQ&m0KVuKUG<NclD&h zGNr5|_x0`#+>bs?H*hTl#KTk&?2(gpal_vy6-y?wI8~0K58+7XXo({w6etC?Nv>3x zK8r)Udf9J%jqU|=)#bo)n>F@jQ_zeoY-Gl??)rNUP0%uR7I2Huu<ocbf641rrCaJm zgA6G;IJ4n9Gn12oUsAH}@`W19TP%v1bMH2Avbd_5AHUQnPzN4_Pw$@fiSehORxT7L zUs?rsuM}CQvfqP%t-eWtJqTHY&yBf%NJ`(WPXE1zRYPkubDjpd&CvA9gKNSt^TUQp z<KG4DNG**4X(O8R4c-Lq+uW-1<D8Qf5~2ci9yDxEk%XF(0<qDjvTU2fEa$B$ww_zP z<895eD@s`iY`JZweJ+B$iKzlY6o2+QDIIY;Vu*BZMg}9m&Lp*Ck!_(ZyFH2P?q7CE zmFw`gv}+ybu$uPtJkR(Id41php`MPoEI)BLvZ~e9H$p<yS<!T|0GCr6)h+9H(`B>s zjTdygSITXl&-veO%zXlWbn4DAOQYa(ldmkMwGKMbUo-j_Yd?GTxMNa@vMArV9`iK_ z{O;|<bDe99CiLoK@F65n8Bou75$L95$J`$!u91JYrKG%gMk=J^jLCnpvxxDsyn4LK zhB6tKinA7315C3&<24SuChK*$pR0B^v<-@Vdwi*V{T}+>d!TWDHHMLSV}TI0#NrZp zdRiz$asQC?;$S;>u|hBJFiBp?98060{L}0dN@Z8j>z2-iSH(r8?;Z4HL?Uks79_yf za8NJ&(j)V|?tIX((10O!Psjx^NPo1V-q+=;);(N!QD+@0e=GgZs&DRV=AGBA3?Dnk z)Qin9v#8JC-fDaQICDH4s-Xo@C?Z79RRFQp_GN{bv?@;77$Hse_^Y|O2JaE~UN(p~ z9=%TvjV<$UZc1#33`Mw{ylod)kot8VekjOsTb|jR$Ed&HR9&V2k`@!zg0c;BIZ+nA z+QhqXr83U!R+NwnTNL#qtu97y1EVQn-WZM-NcAE7BR&!Fy^nmeUlV#}Q7YF=WK2DY z_9;c+(&{X%#3GK5Ot{+pIW&-c#khODmGaif*RVP7*yW1%r|_#v7QV<P67coLJ75^B zQ~7!l`&lek{Cq2Tv8R3@50TA+_oRaW%MOj|gPd0AbP~z(?o`jXV_fg=<K}@_p09S~ zl$1SkVN>J97T(A2$i0|cD1;U3pD2LH4me<}^|K<lkuS&H)sa?1-ChW0R&hh@w9btD z;lp(f{yK>9DedUe;nzCgf)J}Vh(Z|_n=nbDq-z`;UPA4&Jy4<)_HWNfzoK#Z)8{HZ zQ;SARYP|b`JE>>;D~F31RvL+ijU?r&tNT&{l9DcI1;ZURlRll+)M>L>%@^tUamij9 zGiPM4xTcWO=iKR#c<BY+=wCme{KUA%gjjJTmcM=?VJ2Gp7h9j!%l!UbjqM^j&ScMk z!qX|NN#c2#wpx7&Sr$jTeJfp|M)E)J!d?SEcO0sPHgw1wjr04XL2cj0zTe=fx1pH{ zehY80K}N~2QoL*!E?c>8O5MWD$aL&1Kkg(;|B!e{G;R;@?sECBxu+{6{C0QiKxA6K zP7iY>=bc(u*u810MBA1OPM%!g9?Wr#s^80U-FKYnYHQ1kO}u}Sgh|lq8LpQP@9FdF z{oozwdN+R|RXd}~%s%Wo{t)5~mqvCgMujT2xMr3LFQ5V8N)QYo58E_&uO|;CDq#d@ zG<HHzr2QpY@MUeBUAc$L>Ev(ve;WoFjXU0lX^Xg%wJw%pdba6`AhmoO473GQs_$8~ zY_dwy+`b;AQLO|KpMT88g%4t|kqe#p9QZ;0+ZOkoz>B!_I+u@@tO^Y@NO>{wpQH-t z6i0k{C<Lpy=MSX*uA1*yWZ7;ubrsRqQ0y=tG7vr(gIJh1_ym8hO#I3^E6sRGR7^4Z z$wa4#!`%z4PMEYrvdW?GgRotww`pMMBEBgYhK!ip$Jh5{-qaITkBY$VyNgX~dzyPB zS!uU8-zKYMBcfW?q@rl~c_s`SlLGDySDAQlzs0F}Go@`5SP*!Kl@}_%Lle0Xga)3% zqU`SEa@b$eVxdoZzfO%82VqX>i$MQ14hls0>{sl8-BWG9(x+9pv^#|<V(TeZs7l9u z5qswJGf@5|hSX-3wR^$P-*`u>>!@zoPpu*NIjf_h<Rj^3Z%wm1X#f6I+GXvfjcO{W zevSn(NQM{=9X*&Xb-}x-!;xx~eyY+nPoBWySmSLPes;8Yz$D7S^9@8SE%WtcU+ot= z_NWbhw;G5B*>@q8u+HS0T|A_=l2OIgx}53et}ff;bwT^t@Us?M=4IM9%PD)p50Q6a zz==tnOz<3>QlpxQQ)PZoroZTL8@HXNAlBdu#m8pl@dXpga>2I7tu!joS+8R2dDXb{ z<I3G&k(l+R>2LN9H&IV8UX)UTJS!7qaA3_4Q$2B&L`@OXhMQLEM*Enr4~1gB%yDLK zwq#3QdN_NkA$p#6mOrScp!+h(@b4IQsr4UWUBL#-YLF~~oc8rl&J(cYGjz>~sdbYv zNb8OLDeL+kFh;V}ADQ+gJ|!|Dt-mAnA9lqohwJ03XIBcIS~<SqcJxcL?`Z7w1U`ej zWm#j17KAJ02|d*WE+?FS&2ri+eN6&qS%KCOr?zYggFs;YkOQPM=lI9b34|YW3oKkT zim_;iT~A_2Pj`%C>~E}14A7l5bP<NJ3=#+o!7C;(TzOBcon2agfI#xrp9O8Gcy$jK ztXY3ee{MvuBPRw~ZkX(E&lUBPV#`}2T<^~1JEWr2xiF9-1SQYqJ>BQI{HGct9#>Y< zFEY+LFZgi%sf@23aS{5C1jv2_Zzt?WiOWS6J}F{TKP{g(p6?PVR7AV|IKV!2ZoRWp zybNyf?rQ7BkB*RKkOZ7=3Dk^BBrUAsCfn*yp`0xrML$LB%PdX>542pc3y}fVXX`Dp z6u0E4^V{g(pnlDis4OUvFDoq$bh~AWBa8B(bZ@v=B?BrQaVA`eo}LAbOzPZ(uZr;U zP^!N4-7>qbNSV~SMGIIkqBv(FB(_D&5CV}IA@uhHu92A?hVJ9IG6<X+<juTgqlC+U zwW(8^c9-Yp7MJFwpaHtjh8-7y+V@+(2H6?2B{fHG@;oE7TYCn^Ebm9AkYQe{$SZ9h zmX>r$76xvY$Lc<|3jNsV)C=uiiV3ZC!y{uU1@;sK)~LD0qzv`q(tnRfYGcZ1@beA{ zfY>x;cRi9WWzjg&^K0B=H@WlZ*_p_^fFEq+?GPP^V9u26EXRK`X}eG(VasoGFRp&& zpqvRkrB(`rAv;e6ZOkG4WWRio0F3h^$e?6DH(;@mdkJ`5v=nHe^S)cG9#<LKsgT@d zwgH+&U!|gADUf&|)wYod6sm~X*lS@ip60P(k%UeU{&W61rk_Aq;sUosP4zh_)DR=X zzx*RUh<2rU?!|!!&{Qz(#PcSa^!=k=_<;B8^~|(a5<g-?u|$G_>3tz2M@yTWANe&# z19TADv&koa&409jRC=&6k8K4+!j6B>hxmy8{u9>C3jr$K4A(1e*>irDC*Oo82!LA9 zUflOu`1H50^GC@E93bpvCT`!=tUp%I`|@xW5&J>uQ>eGU`4GaMv%1wF&3rRqRuQex zQv&edR{J>ybVd0&4N{)EA{4+i6K+lUU2Q)6x5~StBJLoZUJZBOp0kQXsD~nqft6~& zcXTT$e*nG!3=#>*oKg9Uf3D$$gB|vOyjVH@ourA^9RqPTTn8k~|4;HF7B<`VFA&ZD zSLFmUv#QpYAp;^HFn?(SLay;iCG_7-lEa6>EE6|U;*x48EJx1ecM(`3>V|t_p}C%_ zEZAR&D6P=_fuyN|`@=sLiQuggQKArxE<e@SUjGA(_!25gLdd$@FCX~Fyu(!C1F;Z7 z=gkip+`m?TXJqCImtz?K5-!kP;c{fyETY=?%M}!HUr~Z2Ptv;Cm>fN?qw+t@?T{dJ zUIKO82T<-rzVcX1ru}XC8GT$W3SH)wxYn1Cr__o=Dh08{@+|MezK7<9s6zg+s#@RU zd3F!3e&>V8s-N;9L4p<{w7IxpF>~`Y{u1jHR=Y||mzhs5r-VWXSE#6{x~%lMXPI1G z_qMQI-!SeR?v4eq-I8{5X>EJ@wb<XdnCXL&lewMGf73tA{d%R^Yx!SsU1lARLAAFz z_Q9p^+emw%li&So@8j|8;clK^YmMn23ZBg7bPp&zcPCYg0C{tmo@oD@r+#LJ;@ZY1 zNe^nZUVg_QB$)7Sn?@F=?NG4R`8b4qRQ57uZfpIjVvqK5^{%b;_L6s$1jC!@CKtS! z6Ds^>4vBtBEOmO8(rVL-f2)6hn4!sree_79>MZBjnVWi@0+{4XyCrvSAx#juKL=E& zJuB$)5DUA@i;yhgvv#2qYk%Kf-fPVtntue7dEAB7^kjh^k)_uA;SDs1RKuSA_Zu<@ zEEE!YXy)&OVD`utsA9zaf^(b*qx{6uTLgdJ5}WB~i*%9114GEx`rmOT9pD&mA<FR7 ze5B8OCLp|%R2z6k_UkePrJK#u2G_8R%!C%?Q&u58h6CpiR8Kf0L=-`c(8{^Ue)R;w zkXuNh%#LMUsJ;Io_>e%IUjfa!ies4a5h~<mDUFR#(-CLM%FyCf{R%y+vm8e#=vFTn z1E0?lL40VDvw=GOFz`*VmnIy;9kP|D_zJ~JMb-Ys0iPlna%;v!wD{@t5<#-T=7+Mh zyh+N=;bh?Qjkz@g6@$P>BK}iI49MScX;2ag;C5UR;3mLD(9s4VCq|1SE#b2Ko05`J zfjm)UKdyNVJvIN&`Q@F_#ndy&AtuOK9whb<i&-u#lf?fa)QL208gh|~#+(>G1FO5% zBvj`ogW7(?j?M+Z)Zc;w|0IC!n6hs=UuVP~d8)u`^f72J<iGazEsPu@S-4MKvc~mR zyG|*6i_qpRH)NHa(9$;k#45}{bn5e%Avd;nx5p8aT9yU7pOQqywav7|e1<7oSY7)8 z%RgM&4hcvY>R3!F+3<Z5<ltOjU|)IfukW+Yp|>&9Qe9drL08B2F06;j%Wrn2`m8*A zEEx7CI~EDlesb$z6h!%yL9n+-!r*u|dzu?}bl?z0+ish2#iIG5PHTZTC|~!iqciwR z4coHvN|&RR|Ni@H_uO4rasr9u3rr<%Zfi>CdT=XS0X0wBay*mFSERIW;?KNo0<3Ld z?hvMq4l=K9W+C+JJUz72-9Hxn&lo?bnF!Pgn6w^o3U7H%+z-r^{_k!~B<oWKcSk6Z zsjnl(yiQ(Yqpu6j2f;w$CAfPIluM#jtSalo_3V|MKZ>h85j@*9!d%En2?b7xj4{Y^ zCSB_xVuRTY`iYa9ljENCo<L}S1Y+x0?yGHLjWxUvx%s>&$B)jGB2)S)(W7e-r5KZ- zrI1mz)mOfFs)b$msK5~Z-%B{FdJjT#wL_ymi5n{<j~ofwae#YHSUBt-=k2HBCGhEr zXyKu5)ZY|nGB}`hEB=$~v$v%}(yS-wG_`AU>HDPWzJZw9PBW6RP-i2qAXYBu!N--E zOL-tT$ViA9T;Oj!!0x-gpE`;(m^3;bedMv*#W`GLLE`*TLei|}-ftVG5^>OV&Q|Jn zsG9Zfmg#fbO7r9f<y#i@8s9^#8^H>$R}?R$ao~EzSCId6?0A~zw0ZDdhOW-ltKV$h zeLWLw_3vMxVl#?-`ku5b*fqa3)lOZFY-nx&HbbN!d_0Ijx7p*tAF_lsyx`E@05EXs zY^!OBVR$}u+mz=vbHj!{UQ99#jaxdrC)QS9@9OdPJ{ARDBSClCcWm5m#uP^FO4Hx0 zAB=u^1%oe5n5AVLw=97F^==+(UM`coW{G3mjS9_W2PToLy*XWtE-iZXx^`P<cE+&Z zWGw#~-PqM;N-((2-(~GO%cAMkXCA8>16(#UlA4-_Q``jeI=W(T$@6SZ18+)BoaFS| zE7#?(s@oPne=Z-B=(qRdp*)+;W1-sSCfXLcr&8^e>vCG&U}@lW;SQ<g>xy|Pz19DM zeRHceT)=C|pkjAKi-~~U+{^?w7=AwG<mtppEb@|MO#8g`^}4x7)xM^UUXfp|@TMDu zKB)i_dHhX~U-e$A^KGZm_i*Y|>H|pW-bXga&T`4D;h6(6pU-u9tHAx0BQ9d^w@PW8 z`#Of>X`;V6#~P)La-DgE7yX2b3^4Ltk|a&FZyv^OP(6fSppfFy$ja4%nKD-_Onxnv zr<Os(iC;fu6J(KvCcQ1K*BnfOt_*k5MMe`9pt_|d4PGh_xY(=2ci>@0-7mT<YI8)T z5dKt~mDAq(2Q!Q6JuLV^@Ws|Y{^gNjonpI#1Tjc#(UHD)!VMfEbw>m_jH{HC`xFW1 z`!g(yGwRlrb^YHF@~z+RZ;fr|Z{~zisa^yCGw6_!HtCjlI9!(`diSS+(z#3cbFhv^ z=6#R~<Zr&KL(h7`ILV>^iKRTvzpV{E8yQR3>{CSq499YLv8Zhy8?w=BLYv~$7vCR| zh>nj5$CgoE{!JTG`kWi)hIbZcl@zM*v4>`Y6&|_=EkvtE^1QhZgl=$_*r3C;(2Tlk zi(|K-k3n0MX{T3x4la(9B58+PHjtRMaoI?8jqzf`5#Lw4uH|b9zl@EAnxX$?0p8r} z&#W3WouE~DpJOu$^YkqV|311{9#Dp$zNkI1tZ%-MztI?@;aPf^r>jXjJ(5_xST?&g ziBI@NE-CxPp|gU=nF`c0M$OT}Mo-m-cy=A@R1)Q9g|35`_?W-jL^X_-esDql@9w>b zd1g+#%J@a37Z2&vUo#sTy$j=tQtnDrZf;Gul^J&aYq)z!5|!GvTOuob(pz3qR@Kz! zF7Z~=f!P=L1|9ex$UlnOkE~wfJ~C_e%<!T;hHZk6AOVl_;hTgYm>V2}joxe$juCT1 zfk|s4N#@@E2(IRqkp0kqvJ)%ZxP;?yF5|DW$vWu^JC-1ahSdXRDCtLR-;dZFx$#GE zjE~$`j;w_ZKLwhsMZts0KaY#kFOADvrp*v2y@t?~9%sd1Ah*zC>mx%q<z_O$Gm%XM zAxk{#BNjW)Z0Rtd6!bIkZM!?q(^YyC2;w-M>zlYMdQQF!eA#Mha>h&ZcV@vK*P9`h z>{f>9f-rnE1&Tr+{ly$4G)1zHp#@Ww^fA!0y&f_c+;BY~x6Y^Nj?&+pR@v<3!HEJm zzNLm0ikOXuTJ|g>-Ix)#tTZO=6$UMG23hMTV)%`!i}weZaWQN~2I7KLgJDB)0{d@i zJKj7=ttNoaF=ta-({@-vn-E<vj1!BNFbIYl-uo7dR)iRy`~b!ogh2pzVR!URgtcXL z98U$q49GUR9<>NU5y7x!DT7pa=-W?M)2K^){_8=o#$Z?m1c!cL)4CC-7|dz>fURiJ zN(%UE(sn4rF|ISYy=XhqAUY&naAHDu4Hj(^%7L)|diWKUcI-0_<S*JMROnc=GenhH z%$~N&n6#NtGTU*I21-NS4$qEqW6`;Kb8KHrHYR>89`u?u?&A34#D&QpR-4N`5*z(G z<d1jLn8EOX*=jo0@w44$1F~uMvl^aeAysuON}rFMk9%!|*j}P&z1y@LtqWdQ2>hKD zz*XIbdooMxey8|vw(1E)!?DuL@GHRH#D#2*<lb&JOOGoB%s0QP9kKdUy^3`;=|J;3 zwuYq2sRpLap1e3a#LSBi>!G0y67r{nM9>c!W4R?TMjCRbv05CUePm0>5W~L^9{NVL z7aQ6^ilun^266~-<b*bG(&>4_)~lf{B<4FwXXqJu3o2v*c_S~m(@F}lWasDZeogbO zf=;Ai*|B->P%`M7JWFM}fE2w~6tV|S3l5Z@IW-|8m%tq4fL21)Vht@Y3l5OANJ2hH zM34Yv$n3uhQK6@!4ZoJlBZhk*b$?TB(&TM>OrL#IlDq#Nl1ARZqG`yDI+Z%c3cpTJ z8&@tQRVuBl9;>C$)%uM&eA&(fbKw>%p36@43MPsE(ZPfaCF`E97Ot*ky8rU<N?ti; zovSMNX)BTBQtvtb92UN||6S-@f#QB#0pUY$o9%!9Sf_4Z7Imj(;^||^LKJQ+3XXL% zhHv<v|IVbO<ECT`H0@`G<e>2ESS#p@sy?SmD*h5Siy4ytOL!;`#E4S!T00Alp#%L+ z`zps`M(FQ2tSi{4p|-9AMa?;GiL?BR&OG`J-Jz%ZXx+{S7pV4DK1r3qq}{@x?Sw3` zX&#Cd3k_cC%aQ33jV(eU;xRbp;?69dzhK7AvD{U$z7|LqDE{>RZV(=l2em)G{|4c$ zn$5<i3hWp9cPl<AeT6@l_VUQS3@Y>uv-K$`P6@3|bf7~pV(-HKs`*b}>)0i1bKcDq zZU4)7`|Vq-4Gr-?cUpOU!Qf%Q!>*P;^N)ny+O{CZplE{JaDBr+17DGuM}#i_g-62o z&xdFZr2+)JOChmke=Cb<psO^6$4_K-JH=vJ^!&~L5<qiEu?oDnKB`vdSzlB;QDoV( zn?DVcaTCH%P+&NUs>%2?xc5&=Lsk?5)?A@Pm5ySl;lWZB%s33zHchSno4#P5#OEng zLS9Fobk(k<X7{`LLqRtm%ivy-th}7KlKHn*r~RK15VVZ;wO&V@*0byCGbP?fK_!6_ zM{=yABl+Fgk>6x^uE{XRQ^9f8I3=y=M~Z&#`XLYelj1*|Mg~8yCjRSjv7v8OT~^xJ zE%TG^uMLgT?k)N{K4n+QyN5>WmflzSWhF2WYRg8-`)k|oOra3WD}TrN%4Gz3ek=3H zbRGFOX8iJS(X5};Qo8Ds>r-Ev>>CNWO|27k;Ze7TVm*peS7aN-7KbRbL7cqt(eh_g z-LL)7{z^C}Tr0WjS0`c*<Zgtk_CA45_F2q>V#{93U53Y&p+kO_986K%sxOgSd|Cbl z2{p-?liYL^qAt1>4*N_)JKDEG9BBdtZ>gNw+BRfU?CU3A7pMu8xkj}fp$#Oa+eQrv zR5_;&7XAZ!g{Qx{dLvAkvTb;@!}KK~F(g>|UU}V)UR7@A);4_5yN!T{k^Th-QoY~& zkC#7HSxfKw%n-}s>09=$xlb*JDc2Q4F~+NhL*FPZmu5+Ky*BTsJeJ-KJXyv!9|+Md z?40h(A@?r){Xr?EU9;GYbbA7i#St@m@=4}33g^QZE};xEpKEc9L$i@p4%Oa9sef{i z0XD4&+5sqsLcVOQv)G1VuoZmB6%u|UZLo+m%^nmQV!kkkJ*@2Si9wtC-~iu07*djD z3IC{nhWwpA$OFHH#*$Fp#47?7av*_!4m9T-oo@T)0Nn<`bRZmm#tgP8ACOeWWV||p zr`O(CKUn$}y(JH^6T&6oJgQqbhCgDDx{glNb#CizIhFQaU_i{>V+_h5MC$_K&%|WH z^)Ay)p~~7*z0f~_Ov|-*t6YX{%onaU;X5Y^y3M3P&i54U)}p?VKW$G)6oiHM-{0KM zntKaa^YQtXQ-H;B1SB=W7hm0ch*Iwik=7zo0@kl0+CvlGq_Z=3fMs(eSBgh6WMSO; zc!YnAW~cW#coNuP{3O-`d}xANvR5s@48^_ej2a%SOginPptv$&LWH6ucP}xE8ASw6 z&uLLpa9Y0P;irQX(Q}vo#QKx_&zFHCQ6i`yK@vE)Ht6o@?%3{Y9@Ys(h7@NjB<uC{ z2wb?K2<@v3pM5*`<3gIu$3VLwW617*1jh+7N9v)nl<u4qkMTfC45o-rfAzQ=>2thl z2uU1ekU3|g`f$IO7kW#0_KB-3#vudmF{%#o1x_L=d$wdHXS$Z;A)3D!^hPe$2YH|^ zyiux@!=Dk8XZ&p{%vg}OCxz1=Jwxl%YyBIa0R*0&BeagxfMe(#c4PuM$q7fHzHV?X z=HbOG!uJn{7JwIJVZ{z+Eaiw?e@iPV!n;322RUBszx)P8Ld?7;+E$P47dsEAhjoPT zi+SILy$Y?lTI~O@QvW_ySi|X8wTir`O^p+gXk$I;IzDw*Ddn}HiTy|Er-{p9?&}#- zxwL54hwt{nC)V){$Ero?S7va5)+eErd%e5CWejWjQ%@k;TurLebo~I=n|w7m*uH(V zm(Xj{Zt{=zgW+Gy`VFqQh;TA;_ktX;T$_m_J+8KjBoXyew%}R262UDwTQTxJ-?|p? zyAb$lyt+JF^+^9_=cPj0c|+@WuSn9MZ+e2pV_(x?j=N|eLHo_N=>6GuRpzmeD7w<) z@bnlYQiI4BaY`J6Gv~9T-6BD!<j?)@{TDpLEp2vp5jjxL9=)REPuiON0yh|3_rXDO z>`@8kj3=x`UbXiw4o_h$6*)0YN`2&*A4!-l+eYuio8Es_wbx#8ggPz?X1s)=Jalt> zn%3m>wEpZ?N-nSZR?gL~T6ni>4yTK#dG9;-HD>>{-H6`rW3{1Zj7C@xC0G0@al*K% zc+TMRZl1DTSor7n!yrdbcqoIGC+L9YkY_37q+_{;qQr7bl=-P3Kd6xPVe6*~gVd`o z_r3PGv30JkXmo3JI$6nk5Zpa-BuaeY$f#0rZ(w1`K|q3x5v}@dPI>iBtaB3;LEKPr zc++phnhU&f^1R;Rw9Y+`yR4)sqXODy(JPkK^n_XSUcQGa#vJpMW+aM#NK_O}!LpnX zH8oeQdm8uPfrEonheWY(_@VN^cx?(5!2}1Q)@$Wow~(so=3!mCEqb_j!}>WSbdPDy z;{OR^nv7dLKTpj}l((|W$zd;`Qc>&zxaQ~D_iP(hh0_O#Ryp0){Nq}i2^Rkm3><;z z3TDygys4J(bE%(?*$x(C-hsr2>AzbMUj=^Wx0Ga(5Kbnm<KgtX=e*Su4EYoPQ7`f^ zU)lQ|btIbt!PV3X1poS==2EUKmtei8`9Dg9y5{SWd-LImMvw&d^SiT4{`^W2?@zsW z!yv-Kcc%rYRyE3JWDJ?A*^CS#^)7eU1lA;+197^#3g@+m+}ovi<P{6%t0-BP@_tHu z6<3;zzQ+H{aB595uA^XWGZR^R@jzNqNW0$6*?W33+uk`iIcG!=7p`3>?cu=eRy6bF z@cc^D?nlQHg|DiKJ9k@>=R!bz`vPlImrL;M;%YQ~jbVh;A}*KU7$x!JIu@3x(}<Q@ zEE`tD=T<d%`q%^0XfHifE;>3f<m5*za&WektcG^yT5|udM_;MEB?W6L6$W@^FiKVA z$c%r%Rp>4r`p56-X}UDWsHQ%6h5`k_zH#xl&m%pf{;|a#9v>Jc*9BCsI%uf`X6zq( zw3QMp6}Gew>Hdg)aV{*FPw*pF4&;sqK9}_)wyf39KoE{3!{8OHTCZa<?2w;5Xi?(n zkOtDu1V&8jV_CTbOd7}^+dt+BAsib@Li)_&+0=!z475X)=WP!dp2xzBMf~xRrlD@= z3mOfYm<8kpt^A19UkgWAP(Df|&ZKTVlBUG*%OaF?O#h-`h7YXJY(Fs>Ld_%xdLJ{3 z*)#u@+0aLRX||v42GC1ZQ_-amr^kzFO|%u_&GL6ILw4ApLO{xr6R`h$=#PxjrR*$^ zkSURYu<3Fy#)GA)isui+M?h7F=F@@)@I98Byk%1ePbW~P7YLv#1mLqpVoPb1@F%ZW zBnBQK<WF@@Z~;y87xs_1s$+o=OGgRb*JP@+bY$iPFMZ<%XH-LuX`L~$9%-Ia1x1fg z7ogr!R*~G2_dhiQZ*v*?j?@U@7h=wA<#@w$gP->}e=n}XX*=l3rQs8b!u}8xpxVoh zV74XCQ@^M_=7423M>LLoWHrlZ&M#@{87O#Xx9ZkdxSKrJ8z-iEO6YCFmgq(S2{FBO z{s^CtH6E{Q0M*~<qy%FV+jY`H4VLpqYwH|eR2k!a_y81(>e-VHYa0aB31`g9{v|Sd z?JHL@)`q0(_TJ6~1=_DHLCMnrlZ{;oLOA(9x!<h79(v+2-RUO!ZLdV@nu#x%sLB-y zGtuQga6zE>$^gYz1ZAc1l)FK+`a0aCBraY$?vKaUzP~4n(>TAV+CA+b^3j${FxvYL zZhp%$Mw~XNIymmMayiQH@37yDH!bSaH`s&*f&{(0&3g~9{e711z68@fkN@n?Bf>_0 z{rpbCTod>E-Q$Pinc78>_59{Ob@P*>z2>p76>;Y-FjVR?NDw@<%XWw2`m@(2c~Y4Y zP3BDNLJ#g6;$LNz6=hnqK?l!(vMQ<eoCuD=56U+CRG!v_DFCxZe1Ad~P=mvDLmuSd zY}x-O9?!|d&*^$x)Q+gFs7nA2%AS5AU*$hjAs^85Q_{AcPWxhX+}(ZEC~Em|bXrN1 z|3%}goOfn(6_o*e5779S?8>KejfwxD4P)SpLc33hRI|56K?(4G==iFag|w1y5pMuh z&&=NK>UwBt3Ix{N$Iiz`l~rYBw#MKc_+Cv04MAMgGj|Bw$fN&d+z~z*DER+>5&8ey z*S}}iqU5py=wtd~lOFP4R`&f2po+v7nfdY2-k>sF`dU*v!wy9hkYh<gKfJ7Vg&wMW zPYd5ONt3R>iz%omPSYm~1>92Os)@l3n(ejHvscvhzTcxRN@#eug-KC<HkyCs0~frf zW)jod_CYr&^_CnOwm72Tk$L0#Lv3}uirT?xo0~xr)jA5Bc4_3t(<@P4>N^PwF{6y@ zLXna+l;oMSKz%R1GbM-Pqu`{sZ(K-EV2PLbqXjXbOxb5Zh5?cCuHt6x?*rni0}z@> z5H{c3=qn`vQHZ3Brq2~SgW~;&>-cK+WGz(*^@I0+0DY-hJ6o80@`Ln8r_2T=XZHZN z7B#@034AX}DU!Dne<^~Ri_pZhX$c>Kf8XNoq}v%)+2xXFBWGr3SNRej0dCJm|Ah2w zW|2|Ho3sT-JH|5A)zg`Nxd<7jeq5*pz`r96+YZ7fliC1fv)l&k-EQq=HiPK<KV&nW zs&eyi7Thy?1tVXj+U*7G;yX9J>lzwP^8H9BUK3v6RUUPj9*8{2eq69fnxlw1i~s$D z@0;}u@$f;w=Fo8+v4U!S@NL23N4aeF%0jI)o4`UJN=Qs!35~EV(9vo8902E?r~W?E zWfz#jRSp($jl_$QD8m-LY}u2CR+FXuZa<r^g~nN*0M?yH-4dMeIaN&S_4wv)AA#GZ z0VGz8Sq{iKd)f}a?Ep|74v<;pvz)c319)xRj7E%`kD)=A-gN_D&^BIfKDoJ2HJEF1 zL;|%~1kz$T=H(sNKx~0e9n)WKJ#YOBGZVPZd_9Fk9P{i>dXXEf(T7>fD<c?GiZdI! z`s}Z=+5OcDXo-kH0h(J=M(YbY1dL8RqvY9wy*6?td{8W-L<|(S4d3wgs#CF=A%R-_ zmuS+!xnF8E{0D&+V{YyfosEpy9nrYqNDwHelL}q4rAU@AoTK>yj@f<62Xq)2j+ciY z_7%`W5ZBb8x)!&lO7P{2*Tlpdlpu!<%#It-tBgyF38>MDN3id}v$c;^-@erG&tmUx z2b~&Uh^Yer5Z^vfu=#Q`J~R~LnF_ZDAd9A(PRxEVNpDlU1&~tRxd4vWjfBTl)f9`) zU9Xs={)@1|b_=5|a_f^BH5TlHGYlc9C3Z^Vv92%zaXzIM4kBO2+c^1S_Sb~;rh7KK zlLENk`<t^c^O1mn_xActl+R8JD-Vu#Ut0<NwnMuMY}mnp{B!KRYP|T_rr!TXfFyIj zcp<U>BL3J}uI(IDBOLl0yeCGQn%ax%2LJx;t{3|x!^l*h_^5SuDqGNQEQ+2)M*UBl zh_3CA&R~<VHkrz-U%!K1r|-wUeRh-}C4H~S6--Y~Y_izvo~}x46*XZ!j-~8-=N@<Z z3iu+`?62aL+Q9?iR1!;fix4#F7I|PZg9m~YIu=%<FqJ8Xf72Ql6?+-+I2sz}PVC>B zxuI7hQ=lS<pf1)Xh(<Ej9o)`2L4lt2^yVOawZ`d$6B46zx3A&1L}PNdH_drMbEOR# z+v~NJk$4*STC<Hf9#n9z{;B%=WY=nei_<&I7k{v^V5%HIIUTy9_z>ygI1}&#idnAo zMaknLcC+>?dZ*6(SlCXLQH8&D^M9p%ayquk=ZSx|H)$rD$8mny%2Z{t$`nN-%7pSw zgYV#I8x;WW43hd%-kPvTyE_DHdC<@Qv4Da0X1ieB0nq1AVp=OO0$M;-f#R|8?}iLd zwie*q^5NMcwo10w;z<~8kB^=)4(Q-n*WYWXubJhT04@p!SL2XF!7IEb0%KtG&Mc6O z+GZqyv&v+Xg5v&oR&}Vl%2n*VbSQo3NBryi$Oq^z5?C&7SIGb%N;2D&5W(j|<sYYb z`YHspgN%x}6|KK9yA9I2v9IXQa@=O!&dHP(J+B=+c9(gPNyAyMyS`@n)37hID;k(H z+S5A2D<N{w{o!o;zB`N(l;T|^LDR(8te@omE6xPoGeCL+R(w0Ri|caY(Tw~pKgxY; zB7ax>btQ<2)TM)CT7Olb?igI+LFrP}1mDlj1RDjb@CFf`W<+c2TXE{r=5iE+JK9Ll z#`8m|#$c3HeeDm!{yu<Hv8spwe6?iti1K6ha(Acpj#^RU!wSak9)(umnk>iGk;WSR z<$C25hee83_tc;GFh~X&PT?OTS)Q?<?DCe#T_O2nqff3LEe}Zd)kpdwKh)PU9~C(- z_9g)%R08NAEo%E<=f)7he}!IA?JXY-x|-tlmzIjDGiSf&C|sepidu`JLF0L|P|Vu> zFP`;s(9UAJ2%MdIVO!5qoZtQUpd<NN$H(l+=cl<dpebVJ_mdQves0YiFdn5Ammjds zm$jR@c9L~Lc=f6O?=<&z^{x<-NMRik1VDVRsoToEmr7nN^W~r4f2vJj0qE;L`N$6B z#9I_)yIcMJ<iU1o@szp{{`VqxU4*;F2)+t{ViMh9yEE@?d~$B^BW6w->PcKkeyB-k zeYdY@`0FkB=G47vBPwjqBz06CHQbf#lJu<58~xAbD+KwozjlYVJ^!>)F3dXT92Afw z98kiu>OOnpT)%BBXcna>AcziA7a&vkjQ1eD)+AWDV2=?yS9p|=4#|@7l3cz4wEM&r z6n_3e3c9f(D}rQ!>!KHBdZY7`eVBpSY(Ny1_xOg@1khpe^>&9J2;u_bD&A|oo4!6K z_v}h?+p*^*6TaA%L5tD>9QopSEh%?_l}s)mm%=ilnk*{jMkWqEoqUpu2p?ul1Edrf zH7ra_*}PXxKZ1gSGA*;~0bTodmS6E3&QB(3>vg`zN%Gse#k4!yHSyaPQ6K>1W=+On zl(F7xSz3F)U)?GjKE)7tHn=BUo$TmRXkbMSxX$S+Q$FPL;y}5)G_^Q2Zt-7uJ6knK z!@p_<2jl`%vcjwSM?00gUI~F78L*=F*%X^ajm$hC<a3L3$Xg^WN{txFpVAz6JTLn2 zHodR4)-Enmc!7mwD-rO`w*`G)ACsGx-G$$ON0PgL;p%d1ir~o9i`se&_1y%amd@pc z{jRr~yroSQkBZFn67LY6!FRD31`nvZWz5uTe&uj>5KtIcUNyKi)kQ;pS@rG%A1wew zE299v>-tq3bR;7BqyX^?vtD5JvTymgaHDQ9wppfT8fWdQO_mfNli{;7Ki915ML3gm zaMu$2{(fG&xFT(AtHKhv8ml5KKY0Cneu?SqEWo7m8wuRAbU$zUI{HVV`~#oR%Mr_z z9o|(l;GR})nhcOEs{%HymozL@<^B>43>4VO=_4zDt_$%LdAk4ZUZd*Xwv&CLv)W0e zKpoib@xI%`jeIpH{H|BWzVP;MdSIU?>&)NIoD=noGY~0~I+yqCCnx5G5PJGew_uJe z;zjJK;gBInPLW$r_%YJ3?$%TN4yPO;{uT}vMz!*^wBYRLo{Ao=hAI8An!9vyTb%#_ z+g)w(vEF}w|AeG4_KfP0pZ&h>Ut_ydZ*9wJAvxB>Yi%;%j%PNpUXb;eO}ghT%3ce+ zWa%`d1V-OKej(QdQctySSWJPxof|;>*kK^Qe|57mLSXW0HpM-Nxu@WdUx964ls%8; zI^YbYhlZ5KKG$e!&yqHJ$+YanNe}V=-6?t`(BMmeS8NajQm6ebT}YtXz4Gj)n=na^ zd}W~pA2gn!L5L~xtHMj@gX#WXG%u!shTo$cSvgFSNdAL&J`;%6Shw=6`dbn5%#xBI z`H}4F5_y@vU-l*jGG2M2e>>a)UExZ5zpDH`!$2c|>auAG@IDn=FVLWa{Z3AhW(^uP z0!m!4I-E_zyLoQI+9Dd3;Z;x$mUGae6E@OBUcH^A*Xj+pRnMg3g(fOX-dCtlROX{@ z00W|lHXu430dIF28Nar%R$L1Dp%6V$Ft^&Bpd}@c!R5caUkjEno1_MuC<ZN9#FygV zw|_|>%z3+Rx7X=d4Q4GSgmHC_FRAx3>n)X&F?t&(H40R*!<FB=lAF7NTVS6o8X7bE zdEe-JQd|g+kRL~R<4RnoQT*LgSG79L#k@qJ@i_ta*Y?p-Wi-lO`|m)^oUf6mb&FU5 zk^(+{0Y1~L+$(E8@LpoIs&NTS^sgp%G=e7-N_VF(L;5{s)<+rB|4=*J=evWhJ>%au zMx<98UJl1+QmSgqh!q%*gG!dg?7@1v%tBkS{Y!t)+)!$K_v|F$q-@R2WeT~(?_YgM zMHNq-XbK>S!ORo)gcD6R-qVDLTv0u)-i`ociD({_X!gMZ&^`!=;<n4uULbhDK=Xz$ zKi_NA(rS+7`t`aD#WeKcTw6{MDAI_qV{}z{XB8Say)k)1dVE8I@uK$O<qI-RE_>3t zsP|a7I<A4hj{){mpx*ic=pIGz!NWMWg?6lLAWJZva|}~2R7D=X;c@`Xxoa=zUPA)e z`mQqo&_;^xO;K;J(W`uT$fuNcr9X<lQ2`t=7WNUMHwi=>4$Cc`Al1sap3aevusG<i zcKElE23id}Eby;0Z~f6Lopo*n@ufkM>bm7leN2RRd&UAX4=^kaAoN5*bp3&j7=c<_ zG62f9cP`}36q+(28je+a((B!M`n3o`6+zSns61&Io}KV2I7=SLuh9m)u@RkHU}3nq z{KqrHT`g=X`oAmy=OUiRNsR4<$Jd;Zrr8rJM1NOD)e}y-quy63@&%C`RNwFR&~A>l zFap<SriF~QwyjIfQo;cE+BIqKvW`0=RJ$CFf8WxnOr2NNLj&RX9hraz7Nh&E^XOF4 zh!~ybsac1akxG%yhJ);$YCA`#^R^TB$<-mwRB`DeTZ;LcIQ<mhvA9kL3@XXBlSV<9 z?|3@S5oi{GK1qAvzenk)uTABTrljW*spw~c0;YAS-=yzn&}>3xJ2Ws`-R}Y94wK#w z>eE$!rHKBTFSoUx&TwO6%Qu=1_jXbZdHZ01G%GN6`q|FSouA;pRf8*HKq8Jt76Hyq zJx*@_T&3CwTe$gzl!Ic<|GPC(D1P$?(2gZAbsPXfaToI2A0yW@cs?gDJucl^K)1jb z7}aOM4<j0DEkzBnY$)|A$O*`2cd9}2m`d|29aPHOhbpudhDJtVKPk~${H7HI>O3un z_<1b~6YgpH4ZLt=eD5hr`8jA_ay^bLq5@6NqMo1q5b<^9(BBsTi<CmJzTVWHiphm~ zw|{CBi=7(X1&hsY%DZJp`<tytn+$4)$e;Nj3F0M09$!zc0MOi&p{uQP0W(!`wzXUf z-Gxd--eNn!_-P&PN5zr0$WM&cg`SbEH<eQkPP5z{L8&sTF3g}&KX-kMFk<Eme$J(A zea_hs^P0J0F>U42%N=wme%!ZuRFZK^_>{fyndxLbg49QJEg&G}P>hj&q?3dHrj72% zkBR5CezNOH3x6Wk`$!KOn+fTQtz>>CxjnnNJ+Yj7%*iek3>=KKOE=k%Cz5SBuK0l8 zCoc4g2(C<_zY)X_uqYqhmj$|y8X2;`ZV4?;QL5s9e#77m!fNT8{i*=#vu^ZXF~)2r z%oN0Uk}d{7G6#SEpT83Df0ID|-+pA^4MxW6g4hQL{U@Ib@Z(mMtc!DwFoNh!mS4LR zcE1-I{wvyq*LgC^sB1cm{Le<U;0Ik<#VK)XctN50#T(Y^L=ZC2wEu(aYJK+;ycr## z_>WHX2Dq!EvN2l-^*=x3fZ%=oXZbm_fq?g4-WPb}z{*02pjhr%P&GiD7)pMK9DlEW zXr56UNSnEpO~a2Z6hAQVw0l6?6rnZcS~35K-TKgwyvbDUTO8ToGz!Q;mFH#7f95_k z?0kk;{yF_;l++~x?#ta54^`ARsk=lMgN9XLr4SY?-6uM%PY*`aOSfmXXI!WBee2FD zi+x<DD&40z&OSZB{uHsGCW#b>mw7_?b^W~S#X~zro>%@YsB7<=_20j6->bD4oFnUx zS?6u(myZ9&-n3Io9+|K{|9kL-e;S%!N6UipHzLktcip1dm1JkT-z+*VGyQiw?rRkR z?kQ#eZVt1&N7J1zdw<0adCr?f>DAdg_N+)*eE(BiYZv1@So=uETwWtCS}i9%{Q&C` zk?zE5ZaDUt@a9Zgo1m-nwD8+E#DhtSjUkFo^Ehw&6sbiM*X@PfT4NpQ{n+#c?n!yB z*{5us247Sg$6=%OrN#<Any|lsH=-%d*p%(=x$@05=%{QD)Qvig*)r!(JwqRG>Dz8+ zDl)5H<;YP|Y1HNaJwaFQm(+1VjmEwFG2lixbMb~tLbQ<nWMgF|nB;oLXg#?PpDa)u zdL`8&^0lg9${~U(Je<VtjcP+l7#5cNTf2vPip)=0Sk%PD^B4>a=i*97zkaC0eSB|~ zoo(<``&l*hL0-b868+$dc$qS^Kr+w~fQ+l5>pX`wO1Ob8jIJBKT2(V+mS_Q`ICC^l zXeG9p^O;FK?<f@bdWME`5t9(n=jfd5Jow^mIFXuOM<Lq(!P{F$MHzkXqXR>CNJ%3h z9V6XXjFh5uBMj2rEeIm%kVC5|(jYz500II-h;$B&fJk@4eev`8-rx6+yYBt(t~IPR zoH=Ko+Go$+`#jG(6kxoXq*1AQE%!m$Ra57sc8r6zu37$L1tEf?0jvL1?e`OFI9tal z;Jq9rq!_R=`Pcqmp*+>yCoJ1sy%Xs-%~>$*mP#^_d$ez!gqwMc(ysy)$0XRu%DcIp z@oJs~A}QQdC^E4?X`}Ivi1x9W5Y+3M)ptZwYBx1yDG7j-H5hS3>9nbVmH~<=6c|mN zp~aLN*?t0z@tB~X7^a(wi|v@dPSTd{LCqjWkkGrH_#GbDP5WrVh>~h>I5JXYN77Qn zjb8&-&BD4{7HFbTFI{EwnMIT6$bv8dQmqSuF7&MarB2H@6|)oex|#T(Kg>;%+x~ez z&oHUXIQj`su&BK>WS#s1G5luZKcn#na1rZL4YzsMOf3m9Cl4OEIa{V7gx~d?gQT;j zq#7NIfHACk3_v1E3Kzm@b;B*aC@XFMa=?g0lWXStTyBQA>OeU}wn7XI1l=ES3izkx zgN@UjkciWlf%*33%!@#!#_6tz_KT{dJyIR{o9X(CWjt;rT=|Wp2HIzrm?<!#*&oxU zW!<cJwjh>PeJ`>=ALR<G<Ct*%w~3pJi1v$J3M-nJn;uP>hREb@<SdK->pBlQ%|Ef} zcJs;R5~5u%t^lc0CA>_?R|FQDN9*@8b-_u(aUp`{337P1KG*&3*ZtC(SOP3kx{d;* z;V%>MMl;z&4!EXl>dTA7f-mqRvq1+D5SzuI91`b~E+IUiTQTc-&p_CzIK~I)WbZX$ z#3>eH`6$6WkQP(K(&96sgX)UG5{7BZI>Z74G!Pu^^#<$XL7l_eK(y$Mm?wRb_z9rt z%&FAW6hYi0aKgQ6tN{N6wG5RZGb3)ZC9KQ6bbbqEjH*Xn>zbv)eL%Nq=p&UtC%AqU z?_4lyP(EB%>nR-?XDT<DN>(v^zgaX4<e0R$pH?I5Mm3DfczdQ<eWVe!kIKVdHa^j1 z#TZ%&V(c*DkSvCEd*+4n?p^OZiE831#U2njw;7ei5kKTyc>cN#<h*nlAa&S^^$x|U z11S~%Hs@HA!Dv93?-b$eNMl$b=bW(9LA)(|5^S)_$Q3FRWd)bRv7n`2AuxPC@=Qe$ z)9Z9Dt1+!KpIC@?vgFw;dHlmc3BTDD#*>=TlDs3^++6Il2JE>>ZK)JO73%mzPHUtP z#i17VQ~7=)7nBhC_aV`%Bf>LSktZSa7?(C&gJ`POsUnn{Gyl&6A^{&xyf+{_aCLYU zY8T}gqj8M0Mc_h652~lZP4BxPoMgU>I}i7ATAJ-g(V<?bsmQ44(yAqmi<J#T<1T<^ zzDsT6l``ur;51Ws5q{|w56lCF#vi}5!0a<}s2Pq12wUO}voQ)3J*<=Fuvb(AH3XJ! ztM&PU7YJw7h~O0j+FyT%9B|_$P;H&FqM*Begcu=+iRK^RaXIh+-P{;kX~OO&#&<Lt z#Zjgh9_Ydor0GZ$Qy(4e*8U+159$k@meR-x4*z7`Xve)tyWIk)M!j{$l)(A@9|tT% z%|sUz2e=dje_?l<6x}E})z4C>M(_1Mb-AO4+5vOM`tnphlbn{ok!iZUoJoWGmK?iV z8)^BKgUuHlQuh1oC+bcN)-$YX*L}#3{WjlVC|Nf9Lg=p=WH%0jjepJ?)N%}#mbhKV zYN_!VirT1rI|s?G!eALNeR>V5;7!QtnV7|Ug)c-<r<r|?<0see$VwSR!@lr{NTBUq z&jk3mt4F?IOHbj7cv8`QmP`!u^}DA9PPG0m;+<H+e(TBQ8KVsTEBFVeKn(IkHUUor zY;1dXf-9uXqI<h12Q}V!CB3SAg}r%|EDROTYc-4S5OCg|>Or@b7i?syn5m_0$wK<a z$pZ;LI3B<-k02(7dUveTN_ckk)Ak?JAfMFp*~GjfLKIq9zds%*y{E)RQIj^>f&OCa z-%I(6?OGaEfy4#8(fgu;pmkcL_c1SC?!tE|Nj@7LNS$ne=x&4K#dd!Ta=i-~2%UaM z<+XJ?&jQk#?u4X4CL|k;oTAcD3YM9Oc=hactse@oJ?b5|4X$~x4AURh?OwJ>6$kqR zZ3Q+8C(>qkncIcPA@~f6B=*DeyLA^QS5xpD>=s`z6UH2#73PDow_pB(bF7Y}l%Z>g z6?^faQ@51?uutW>C0=5$V4rwu`D~(clMe`Ygm#({@j8O@?}t<UmE5WeR*6*?lWIJA zsJn+*DdW-)cz0NK-rKUTr;;#c733w=&|7`%$VG{%(c9nVEI~*p@5<2K(jR}baCftc z`Zb!yQi1c2MkbCXug#3EZhy&(SDtDfrKZF?W>Lt(?&~X3KL(iv^h)=+iP15PpYv^? z+~%pfik(`%VMfDhe6^m9mD7@8$7)?Gf;@RNT?{W@kn?%{y%FWaC(+Np=kGsNL$#5A zC<(EHoO6|#Qdb%X)r)?ZxqRBOCAQ}6!rspc{qf_9ofEtTSmLp-dLih1AaPq=LOd6{ zijzgtT@ox%^!Vef7o@2Njig3CXTYiGt}-v==@S;KW*FE|>G=Jo9i>eCvOxltf!^>{ zOW&hr(sq6Phl|z5YIfQ@vUF;1Qh9S%43b3|r2WHs;9fOn5xbf+R*b`68Ai)1VB_G) zo7c16^Lb2Bl0a4-yxpx*NzECXx<@+94cpRHx)aC1lJ?b6MhtRQ?@~$&JtRg9^c`^G z73Xm$R1iEe%uq@0HMDFISn#KZPpbzIpQKW~RRYyWRYQ`)^9$6V$Fxe({lwcXIS5u5 ztis58?ml!?<LtfIlnue~9NI^`IvTx^XzN@8oa*!efgC2ycV^ysw7inpk24Ti?wHVx z;`8W&5@K3zIT*h*`HAQUW*<lg>dHx*&^+A>#7SEV=Ioy&{CcNInseA*<Hx*kVicSJ zH#uyM2+J#8Ey_JdLY96YV#V@Ox&tqD^KRbmYcZF-fd&FgU=fb^VMi~|NHeE2;72$i zX$Nv*v0_G81<3h}5!y%$Qj~L&Ev3OHv*0*flqZ<-B4=1}IwvVOIDW@$mh_K})tB8` z)Vce|A?--ZCW&C8`yiLZ9(G(xo&<<LTf@T<tKHq`)Rch&X!A-%Dqc2~L_u<x3ry<E zJ2`Th@^_q+H(#{zyR7OCPfbyN1|8h*+hJ07l`I})NSdnsF!jcJlr_cf4o%~NFthg4 zQ%Sg83U#_LSDm}vvf3~t%&$PiYYA5Kb28#HTEjYdT1YN_p$Q67&0?vZmeci9yd63Q zwQ93Nn0g3&c+f#?Ne;dW6K}*+KZ;t-=A$<Xzn`W>qBp}bR5i7lC$p3A&u7tJxcgim z{}>F>{`1b&8n*CxA=$7FhjWM323Eb~+4TKfL6)ro$0?VrhFg-`OGAenZ*4AI=lRPg zSh2{ZgTM_yeal1u7lam$1TW~nZwI{PRnuIf<sLGm!%Q;II@>>A1BZ=lC3#H<p?Xta zme5ZuLx-2D`}D>a8%*7`6bohP@DTb0`1pg3W63%ky|i#C+%FkZ&8GETH+5>G?FjaJ z9EGNI0qKArxP#6I#H^Z{@1reqlMb{@9fxnq%j1A*kp&beKS);cdfko7SbnsFw!CT} znv|J{|F4hJZh3W*RJl|0PXZGq!Tw{?e3$?2YW*bnnz#PwUu&P6u{D!x4tU?W8Mr|c zFmmN`(Dd^EIA1~b2nEg}VzC7@{VG!UW^^Tr3XlxtGjQvle?9aS`M{p)QdiOqe^$Nx znDg!($&CFQsev(Wh+xVK5<h`Sy{8W7Y(+a&WlNCQFdFt7sRc;UPY{Sug|aeB7a#w) z!0$3+9v2l@YqnkQlJ}nb`fP?gv!Z}kZdJbqQN0SW{2@x(#<sV=v`c&aYO+%G=V;?T zy_qd6@i)@>MpS8OdwxO3YuwA0e3gkj&%JVdhO0xJH_UYob?_{}xtLl(oBeov8G<>8 z%`*}61yW}#gC<^psU7le0w!I?2*f8)=6g<0j|b=nm|c%Hg89Z4iq;>=zRL6zNrw%X zqx`S^?0<SzkefEY`ni-mu+e2&-wW){KnWGu6WvY%?>0G`g|hnKlZ2Ol?|rpM`Sb<w zdn2Uh*#{R3m62Z?8f;7;%!u_PV8u&}=wP%Oa%~UyyG@k#L@DudJl%%PO?HnV$9J73 z^SAcle}5wtfmu1|H(EX&`QrWe*6iNJC7*Pu#SmC^)zod^;W=HMdD~j|n>Y9dzyIyX zP1VY0oyg<7VBN&mtjL;yXFwQJ`)ux`nz`T3?<g7rsKbAIm$hxr=cZ#w-|*Y}Ij6w$ zC%YuyvuE6?d2yOFjMa1QCV-LTLom%w#s!!-AwvdlW3x-<e5PHn0dV9BlJ#a>+7gVj zKGi<!=!)k(zF$2DZO5Inq`ZNRtk)=?0o?iWm6V4eZ}9QWe72D8m!?wPfKqwS1>C0> zHg5L*pBnfP-s|)(4f9CHlW)|6jitsF3n_16>7@s~<$)a;W7(#?aiz2SU$Xs=D^E#~ zbED6D8H5oxpLqJCBUFJ3)X<qH4mnsUue2Ifk(sB=>H<llTDlC-b!hQ;sXx@2+q?L8 z5Fgapq5SjUn8J+^7g=?r%7|3I-)SwXwJLWIE>c<*Wy@jD4(%yWyrjgS=uVNfIR!A3 z4r*D2(oz{S=wKdl*UgA$RW#N%yQDUVp9#<@rN`jpu=akpO?y=HNkeeydiiY!173NM zbJ!})P!5ieVR%KOz$7JL!=gdz4rI+aGrY3(dw#HC*?L`dqzLTG7W^Q-Af87O?p0kN z%H6`ZmjB>1;^p|JDKroIm&ZQ&`>7z$rs$dv)Iww0R2K}DR{zVl28KrS^;HP3Wg1ST z!^<_Y9R=1F;H~h3gaT!r!5ogYThP$NiL2+j{eD|yYE-^e1rtl+aEj!E=7M&(dFrTA zzq2g)r1<!;ujMbpAxHS%ZX`w00W2u-8a@hG=v)2X?+>yI>aqI_8fQ;OIG{Qj4h(tQ zkBFpv6DJfIXWU4J6?a3LdZa>Gx1qnjH=c8Ea;;HA*)`;Z#|DM+PSfD#Nuw4co$&F* zp>5u^qnd9s(15${&GiKnSl{IZcqaE?i$GCHfgN{n(sFG@qBG0gwVzINyKrq#IbNOC zTp-3Cv+)4Y4R?uM$eM>ARs2@;O*(j2&|0(|;aQslY<y#?tsO?y@-`>4B)K2$Bv}KV zAB~^7u0vCkPo&GJGg0`66NVN_zI4iJ9b6p_36DW(01l1sLMJMe`!H@<6h|tiBA8}J zIIKVo^D>{wO=(AB2VRh{i%dXmqHd!;qrRY=cV|sdCnv}WTvz@$$|g!bA{lO_K^S4a z@DB4ZX$IALBmLbpuUA2d6}YRo%;3Z>N7685uy4<~pKVYGjvKWbml#i75!DIX2_7TO z`n9i6n_}g&W`>;bc@)a9hkjS_%Lef*I0$E!W_t|>F`ofMvgxRys!d1SU>-W0&T!8c zA*D-9--U__E{PVRrNgVk(jq~^%b3Z8V{2xKQ?N`lm0LQ^mMbqiD3}N_=ZGD$>^PQO zX6;Z!-mSyBL*;G!hT_9?!i%cR=7^0RuR2I>^ZE>yX$B8$@PeQ-V6$*?v=c0-{EnO4 zj**ZnY7M5(Riw=M>I||Iq;Q>zFk`QnBRQe1{D9C(9I8z`3|1jpYQ+zcsK!TqHSAGB zA$Ps$aV&a-ruic+uD&~7QLkqcHj6@GQc7%zkZR7|3m24-WgH?N+^y&R9PRX>z#Y~6 zLb0k`Biv+QqslLcVyC)5(rBdi+yWA)^lU;A{($JWIrxge$pN;Ykx4eH(fTN}I!p(> z(R~t7*fp`9-LP`{ATak%oPQ9{eY3*uKsTsRAeR_;at1yB4uQsoXIY1Z%2YZwAL(MW zA%WZgFiPy%h#X8xGR+@_zB1@lY=IE8vxYN-C-X>3S1*k8X~FLizsfivtJlg&uYpw3 zRpucEa9W>ch<V8E=uP*tk)>oB3EWqByOZNxuD@>yoLGrqxL-5VQ#>;AD$?)R#TW^T zOe10yGNLwao1SlD1|Tx^Q3Jvgaivy@69zH8k#*D@Y&>sxpow?q{t^<2dK`mnQ&+fR zdWCb36EAq}jOKMECp?7P=5C%?^&6%>{m+VSMFrOy3evG+TcUL65O}l7NE;E()-AQD z;#-D^Nlc|rnAOs5fgtA(M;O%}Nz##hS%}@TspHgm9e{RfEpW9qqml^q1xZuC{#q3i z<_S?tKQ)Ru)7_S*BMZ@;Vt#+XI2@nkoP(1>wbeO7V3&n+7*1`!@Jp4c&js&5gEeF7 z<H^lj``0aZ&e;paJja8O&NEUIT1C{-bC*iNZD;SD>zoD#3|L+q;>)HvxUMFYH7@r} zFPIzLfz#53-0|A2OJr))9O;>^<-;?c`fCDma)Ist5_}il%~s{OcMB6C6SXO=#^4zU zsTSJ2JFpk@*VHJ*680Ijz+;BL@sP-mB<x(U2&_hbZ_X8Uh>);^|Luw<r5SQ2xWJ$M zfnMn%TdH|*{;j3$_j>A_=bM;YWkxh%s>{b*_5>gLmGshy!u5*FeZ!+)`mPg4V#KBr zLbIkiQ%Cc>M`8<dh>W_Ab`#%9p`LLo6@C`W8D5?pMAI8Q^C6&Xt7awI9&0oftmq-T z+E?CopOw|oH={b)SkE(bT^B4q^z2t;>VZ_a-G7v@+M?5n&EiyE{e76OU)*vTU4DDh zTe6&;v$BDMZt|`s2nxHBdY@t(U~6Fs##{;=2Hz(V5lwrcZ2nwFgZHTyu2{7NG4_;% zXsU&hc<y`QlVaL_sb_~1QOtNLWl%o*J5Vj)xFA}fjG^`VlI!*iJvpD7akKgCJ0Dz{ z=<7a)sG$sQFZ4G?3hD1j*1g3ZN%%8QJ|2O=Nr+yGhbC@Y*|(5;rH}Hkv1>Feu!Fy& z{xWrp2sU-~@Qc!R->TD6eAacYJ;8D}VPfn~9KM-;9?lz7qp?I$q1E*9y=AbaXBV-$ zJKAo(V)2hdTk{~9Nc_XDScHVLPi@~IqDiPlb#R#hlEuGfiJ``jK$WSHyCUH3@7Ath z11OT+kX@!9-3=>pEibrl9n7F{o3)}bYFIDGf=q-LfDvEZk-QQTe};uYvEjAE*wy0w z^gb-g*?o5EI_y2l3?4#on}#>zg>1<vI^^BUov5y`N?|LB`_D0tp$pwT$~b{GMOZ;7 zJ8m15gxxl-GWWZn2aJWs$pyWzx?p3vA>0{i<X=8o++Hl&iA|#yt(58owu+l8m^?}z zaPs&#nT&dhiT`e>LS)u6Kx}rrK>hj$G$n8J8NxTLjRyHvBdpM{yDAWN6nJ5-NM_2~ zG%&%V>2Nz)-^XiXB-COAufy-BZ`!hVjtL^=FP%N$rB$bAx2%d!nAZeo2L)g+7Zr|X z4KB{lI$t%Zp4_=S@x1Oy2lwn@874qtjz_rVjab&q@}FUjBGdJ=hrdFy?zdfOT+LnI zbCF}PxE28Mu}#HC92<QDUBa?TJ6E#G>pw2&;y=FH6c1>T&6sPdFC2<pEkBsBFushw zKu7jM?0+_k%e$tpB{~_*Xy(er9?Dz9AcQr1P4wa8Jb2n|Uyw`OW@Jvzu$~@75k$iK z8rT@teD~4{ncW~yODza<@-k9CQfN4G)UeAoNZFhC<q9uiYSy-l@0rD=;erjM|DLvZ z%Z0H@6+&-Ybn@MVG&i0CR$xu8!o}>6$ZP<cc9h%gI0JgFn;e(6cSetzwIzmCkJhtU zU0_c7kZ~`?+P)yPN@FIxT^{ylp)s@mu_x9j#=6PFM<U9;q9U?^LmsPn{<ff-=|k)W zR=;@$=r^<iKJhYcYj|gfaA5N?Y&~6mwb-s{(0Nh>h-_+KBeaT^E+Rjdx(Y1!G=9## zj(;NYK;v94b+q~|53Pa2$5#B5p&dQFkZzCGc?FJ_1U^iEjn#j^MAmD^LKc|H^Xf_` zfMxI)`>d9B+i*)6H`~N$kC#zl1tY>zxtccX17!6)h6oj=)*Ww4m-e8INGA`6uZJtH z8MPIx1IycNLXFENYW|X>>$<xWd#*-{?>R~pC32(V78i7Eet+%tdQ~rC;!>yd@Zp=W zV#i5e+EXj^i@G@nu+)rA&S#gQ*KfLl&O2>dnHAsYub1_Xvzq*L8i-=*PfJOWkoO6e zc6yqQOs#QWDLrL;T38v@9sf5~zW!@Nr2oF)Ano1a=jxxLj3&zP^J_*kB>CbPTt~jL zy?JBRu&#ssvOkcNXW!Ir?9}p5@V3;~=G@V9rqfRDM+uo|eOBWU_v%A+G>unn&qU;w zHsCo6KAp#l0BPNxz(#3MU`lBN-F|vk?94h^X?j5coWF8&TtZe^TZj31{BgAB$yy?= znDh66g8)x4gM}-KUv_AI!CxQdkj-b4Yrvdc#sk=bw=@P@zA@}Gq#OoSsrJK~&(Vl6 zhn~_YQknHV2EVIA)s&Q!yjy-dN;!TT<yZl{H?bN|rY?7ym=vrPW?X+o4Oujay&OL4 zlfN&!QS%sdQ%UCZ>z?B`rAv=c?aMYh0aJq<m(6F&T2`@U*Tks8itM2qotohp+F$>^ z(BD^D?L%uGv~$a6uF7mYARzQYxF`3JyKTsH#IZ*IS5))G5Bs2tu2<!4`)1=7zT~_V zS9YH_-34<I!TR3(z>@R}Zo_C97}AH=%i4p6eP2v@f}=0{o{Ju?CK`?1PN<p91(em! z;%96W`MXKiUHSDl|APfcOsH9qu{mnJb|(pB4HoU{LPU?5o>%zFY<{#uwB=_20i~zP zknmcavk9+G*2AGs+l#BasmEmG=PtM=+y0QDW{I#7-Iwda#_lILIaB4H%s>F`Uv?~3 zx8^TM%X|H73mM?OzO!_{aJax-v6blpVlcZ0dxPxJIg9uu5<nk@rCexL;~*MtHo9Lo z|50yB=a=N|7k8htT2Qd`1JP$6pm!Vlg~ESkt0dtWa=}u8?4F{@Uu@WP57B~P8B8SM zY4C~ypf&yF@^zo=5-DzpiY(?Qv)SzISU}8yr5P+6+~bA3f;I1SbCfqP3-fB{_R_6x zc2#ckiurx_D{t}(6=}WFV+;AhZjjq1ZI?G25#VhwTh{moY0!AdPU!iQsVQC2_dGbb z;dl4MiTBdB!uem2w!$})M#qrb(mBl6muw)yqgl8)WuEbPw`ymoZN;bU<*fE8KBcs@ z+k6%~WY~}#)sn-^Js>V0c^S>aRKt0v$8)Eba)1AX=>j`m?+@rppV>mWGbvju)-XW{ z>wa~Jdc5(oMe+Q?KK2_jQC{9=Mry%JajR|lYn=1R{(KJbr_rtdg|}I#tlu>txsA0A za^12^^mmjvbg22Xo#-lP<i3z;O~$pt9`ubj>FR*R_cs&Gn!x$kls1p1v`~m9H<ki- z{LY_CY{-&UW~>_hcN3-W?VrrlytE=MyMpGM4EhCt6?^F}upPl)EkuQCy=Om|owjc4 zvH1S+G4;T%vv@5<=#AC|QA~RFdxUeK9@=eaXN_B>?H5|;v-pPWhb|Nr6r?;NU%#1o zDye?`oI`{FgLBAIR|V6(3j^-m14X_RQal_vU#Ec<X;xxXa!c#VIlExOs`B4_XIX6I zRt6Ja?)AOo!YDFRhuuE}hp~RU?azx@uIrlttHiq1vi9Sf3YA*eY;_L|6m&NBhh#$G zz)ShdB|Y+YUgPn1;6q@Fd{@C2B+k_(>LbrdW?IYAxLLW^5**UXDtH1#MAz3hIo8CX z7Eoi25k7Z)Z@mf0m<b!xz^5Ze@Lc2EI=%FQV{Ydc5r>j-+rqV5zvJNvUyIHhjZGQ$ zOKNI)9gDa4cs30?z3HDAe|jfT2<@v2ttsKTG0ed5n~zS`nh)C`lMMsYbb}R*kHA~4 zyKZM~=f34g%RIX2v|kBjC)-u|Do#y$hFSMovgq&j8?)Ahu9DC~KnZeRH9jZvu?7A0 z1Oaw|wP~})zFXpad+L4dgoCDi(Qa25nwEGQx%KpB{YKZSPy1!BKUrLhoBG-rK3dPs zO0emt5OkTf?c;yEt*)xtMa*tiPqDt2kY~7i)iNGvXoPstSRj%kW%%LcBeO>@jauwX z%kNLRf0Dbxxv-P9*nZX__xdqyoeMMJi`th2k!&AbY!-4p@mGFJ)A&OoWbicmOVZA~ zZreI94gJsZvInoW-~Gv)ez#*ZUQqr*ai&SqDoe>Ac3Df>J!{-C)plN>dLXULz|TYi z{}X7vj25_UQ>eWdu_UFZhj<70xs*@{L(FFY8zE)iB-Pp0-&{KOHLpW}dB%G_X~Ax= zQ^+ePn!WF8CgAdmW}bL-Ubn6J`wt;4T;@T4ZXq{<1%s8?AGTe+G;D52X=ABlEo3os z&&Ir~{24U>QEIzFI^v4?8+VCE6z!&%xc(GyY%%eE)%)XFvP1rx^2}V$3y;COTlqf` zm6SWEnujqZw|w7DzOKwMydJ&n>%NIBS7|x9*f<H|Tx3{zYN^rDF>2AZi5@4A!_Dj3 zT+wyBc2=9*_ciG_^R(cZkVFrE@wT2~LV+NH@h076r1z3XnG9dS7P|Jx!$hjKTqVlH z85f2qP%Dx66<OVRp|n0W&{RR(I?jzK2^9K^!Ka-5dp=w)u@e-G_Ao5XkgMk8bQ#5> zd1$Wv5DcmnHePg2l_$u7_jv&}-6~cd-vHsvznx_3pHmM#2Xk*dt(LVCUB!tRs;5{C zDPzsED7JOrc78Y+3;-6jo+z|2Ee5vnF0uuu*<W@w=>Ikba_viv0g;=VQMSg{;vwls zYI+TJQZKP;0NHL#U57pw3NHKJus)cE(t#ZXPd7>uANwi|=ee;<<G))vrpGiDA7K&@ z-(L(XkD|MKR<m#%v!?z$m_kN|c6QR__3KAUN=Ii+iN<_mCDvdpBlUXMblu?4mt`rI ze51(ghmIjkh~k=8^D@}|H{vsTy2c@C+oH5-7o1HlvAjm>X1<%eWJ5a_DfZO}Ccd88 z5DRvfwhEn8XO?q~qpmaQxv%(?^FCz27bYH{OTR*}-hP<zPbH<*UtaiMKk2(x{stJK z-DI^oF*-Lp-6`36wPuhhNppG#m&n#RuNA|)XQ%{tf>iaqo?daK_Hbw9rAr{#JG>pW zTt&KP)^U9f!x82IlI;sruM65yzyq=Jg~c9>+(ESwt<y^-kKQSluIJz3-=VV#Umpnj z2|wzq`B7EyoEXQr&CsW`AecyXMI;h)V62w!rhoDISdKIQ96b4AWFD4wH59~(@ld1k z(poxIrVX*bDz40#%Zju>VZ+Md6q40C5;dOE7;-hsDUv@~%ldZQYn&ZdKZ3)Rp~zlz zd2Hc!%(ksv#}=!Jx}3m$`FCMBa4DUOj&10q*%#-i3566l*ETLEcRzlFuYF@qWy@^n zOSe;QH||CGpHwF_4Ucp+IxZbZ;s}skKCl@Q@u@Vq$jt*E9z3lkb-4w!E;iAeJ7~y< zCup4c$@Bzz1iW2~ReY6dsBj^QVuI;g#f&qO1^<Aze2vD9roBp1m(8qveB_s3R&Y7s z^s4zvKrQO*Wqx$QDtcqoZwnXG>=*hMydRbv{ww@Z<hC$Mwrb1i_zU$SWt=GSHOwdB zcm>+g!X{Kyi};N61@2qLtF?D2KLLU9?}^Cm@rSUZr$RH)l`|STmW+%0o?|H!?HIEO z*39GyvuU19xZ}R$*Sr?N2GvXSO#GMeRq(5}UAfXRq5Xi@P(9mcp||U~i^Q;r{ufG~ z^Cawo8Q*G-D+)&YaKHbS>GVp?@f4gZi`*(~%Rt!o`nZoxiJmZGEc3Ml?$Czrf@dF{ zJapGYpInkQB<_&%+D0OB1d7D1!v7*wnhdX;Eko7Pg(6$R79^{$yDhvdz;?8+6C}j3 z9+Z`z5acnNYlPV@u;V8I#ILMip<1R#VmmehNpLTGH@?MmGbYHn-Uxr>oIZti^MXy? znl@(r#U)&u0}nsTt|9aq-vv_5{PJiYOJ*wp_u{akT#5(cK{O*d{J?{2(w-2=ImL3D zfmsNH;77SI^#CRr@&-AjO9RYLp{9&%u&EPFwA<&!>NO1?ZXhl*cO8#ru#O9l4M$?) z5WDKhm0n@f{ynuvO=XBnPWw>ciob%7zRWz9XQ3!MtW&+eBXf!penz`xCy=C|xGOU^ zfGUaHW{<qA|5ZlrL}PIrqE;#M!fIh_J!}vrWjb;eDNJP{UR@tf8rC`Bq|(g(?pj0? z$6P0PyV%M+=yJ66XCQT*ThIu)`^jI8lRH~mWZTErM4>Eo64hP1iJvRDTT*`DO5;iu zJNYuE8<_*ppm)D*qF?yB^F9yFO5Qz<mwKiB>=|cQ7msPs3R`x_takrcu1+iCK<zaN z7H`6+iTHWp(8?bO+iG(9P(><^9&mjyZed5xtBo*PyBZmr3!E;yrATa4wU6CC<jSut zAV-%LONJeB;Jvan^DGkK&6jtx;>!1<Xb^KoU3u<i%EF_-qC9<}&>w#kGVd-3rsWB) z<@mLliV+=f;H5Qnp_wDgmvX`be=VqNJXt4bW~i2+LYDbzUUHQ$u@$TnE4=?&f-}tN z^Du3Ut_ioE8;>tz3Kv>IC80Z31sRs6JNS+|mhTAG!Iu~1LHf6G<K-b{b%;Ke^Wou~ z<E+~8pYyog--rrn|F&U~1Jl8j{oaZAerzL(S^j=mOj;9Tge>+cTBB&r`{8oP>s98` z19uuDwO=T5f=se;*aWl3wGXX)-?k%=r@8jSecSF8(6Uw$=n&LQyK`S)-3gY=9jtaT z-^$vr#IXj&JS?&9$#=2_P4N?i&k-$jdh?Xgfsuqn=ei55O~0BzMN{tssgE+1_^8)K zIlPt&kxk}1DsEGwtqZf|uU7_cr)p6kWO&YxvT1qsvSn~Mv9(Z^!jsO7&$oT_+5yWg zyHd(`m!E(6kFxLLFN48Cs|e)5x^k2tq*U3^q(CC4Hx7mrSGH4L)kjK(|8A(X#&C6r zjuqeIjl1qE!1=%|%ro{mO*5VE`n?d^EqlX9kN|#tx3F+56@C>0QsVGRwS(WTPHKip zxY14V1=!qb5@x2J&Xr(qNZOmaZ!LSb&30iVF4a-Jfe`wO=J3lzec0Y{bsQp?X<ZH^ zLwv#^*(!8@N-!KkG`9`rR@9HLlj1r*wju9ZsCXEOe&O93O0GT|!5DPeEintmNLmI% zXQ;o~3>hD_l&|CV#;JVSP^p^`o>+P1@Nu9AN17%I5(oEkPLp~eKyiHn%D|gd`;(^K zq7;epB!|k=&To^}+35T|7{bF4be#N1v7J5|2GDG&KkZ}Ff+XEoD%zyum_-%yJ5DPI z;{}-+(t8ukq?Ulp=1#|YM8nq7WpS;u334>xb#mdLHCD=TALIPE`cd-PY%q2)+vjKX zY)B}3z{YUG<?%+tsCUIHMW0vy@;T4i*iURbww)%6jT-lXpya(bL2ZA`Vr0y%Yx5|_ zXzCDuv+Tv$yOzFbHZ%da&j*|`3+VfIj2}iiO-ApY=%9R=`r<h>dHv=kwq51HlQQlK z;bQCF-w=wKAA)MTL~6Ig8xdP}EDd=QF;uf#j~I^$IPW|+TcDLNDnc^TO6W_A+Ezbj z<G}OJqCyH(OH?O@C%#09tBx#3y7W3l(IPJ@`rKUb`mZiEf_p%3u`gz-$dcXBrG_J) zBIEvq^ael}HBYV%V|z6A8{={7(nrO_=WF>mL7KLj7GU$_8ZhU}V8zA2nOFSZ#vX(7 zGN))k@C0aX+}s2uc$H?}aW8$2D8E-Cgtwd|m&x>@soz;1Kr*~6Ac?(`o|59x*(S(t z5IVPTA9Mo5<Tv4;QFFt+myN)+-u|e@Lds0J@N!gBz^~Q)XSe9+ZtgUz9wqNQUM}j{ z`0<R;ITqpG;eRbCz4*DVVW%SWqv|EslP6DPkNeDlIIMurYIjLjq<O_Qt#x%^>=sm! zho{;y9bEV%Kz$^SCx>fo_ra++TzgP=PEqAPl>LdE{s<JV9k=QN8cYK&*ffY$`}Q^z zW!M{_BSs{&T0VGNPegC=sW;z0;R*QAaXWGX`lM|5if#D$2ptKnE!jz6={W_IP+iVs zq^y5(P$PVwdyc{4{2r99JGX11C1+U6x3wUU*v#9OTamV>(RPo!(r`o`uFYi=9)+u? z-TY(8UaitBU99)c9pDm8PBLMrDO;?hskj^+3KqUKE~OwB(xY3lX+e-Jvey3nxI$>= zYh@HxlF5-AJ16!2pt+;qgLIg*geFx{c{DNB@^RKw(vv<juxW8+$m*}iFs1$LKZ1X? zh<KIbuFdCpY~u6aqlP1-@Pu?PDl4vDW|;nMyDM$*)_bq0f?}9W2xvoOO%s|<-5v1d zI`Q~fhuE7KcWZ+_ZxvtY5L8ZW#C_MjTXhFc07Z(qD`<@@jy1FU_dp`X?%DkI6FFaH zw(Y5gi6T;Ve^guSEZS+*)yo4}GddqCBlBqXN^;iemd;~k?Zvmh*q1Y7k3RQMY-SwI zu&@PgY0O`_aXa2CvM4u$GS@hD-{cMS>RY?O*XP;5(Zt5lB5zVjqr%0ViP$gVKySGL zManl{L+LmZ=`)`ihE_i{T$S%!FJ+0yCvD25v-eyBXLbJ&;~sbJP4GBxR|a_YOi1Hp zY$%I#JkI@4eqSvIi$(rlu>Fg5aDUEB@%{eWX2-5`^jZzSHxwJUEVlQ$&pW4F7G`8$ zRg}GY+V*y0fq8KFJ$9-kFi`2NxtOQglso{WE}XSX%lQ<nmc|F;(8f*RLL)U@oG#fr zL@P+pPG1U~!>Wl8dHks7z+EBy=+1?QGetJs@U^wbB7^o``mawZLkEX<(A`>y`;6&_ z25JoBB6T+WNvbIp-M=l>y1x-x#iUq8PW{N3B7!#2eB%^e@=Sbk8;3<T<AGR|jFNc7 zRMn<!Jtx~xoOg?`efs41Ct`?FUcBvU&o`c`4P_oJ4m{fX#4pc$G)7yfjk-^}+>_0S z5IKrOw70jQHu1TfXkG17#9sS;88=d5z^SgFf_SQ+4>wDi=(v{Hir!NIWx=>$;?S69 zzAGtSy>N@SA)A~<{ZCY=MDF^hN|k-6&DE7N8XhCOc5LZ!ag_sQR6gTa<_n(HIo(1p z3Nb|>W$NlTLVv@KsteeED7ka%Z9Rv|Q*WL7seyv;fBX><iTKHXhd7*<K27F~cF}x8 zHG{}I=t6;%Sk=_G0~znym6yUwj1`@EFZ<;ra&pcIc889aiB;<)JCQ@Pf!{=;7tQ<R zHsUH|VB_%{reC2v7ba)%NiEvma|@~`WXapUua-`vFf(MEo?*JzkC>&to^SBjZTq8Z z88*|gykpuxDBG3$c>a0WHy|VB`=)$0eSxiGu`oJx``4pc^t7X%Dcka_V4(XDHh8UJ z>MZW$<Q>UsXwHfJIs3IrcP(CpjH~FQ#GFSJ(uCE4$43YLyR&i5=N1*N4Vb^_5ZIOO zxWjbP_L+H?oMW`D6R%Fr!`0k+G23aUqtjpX&HD#TcROubDhLu@rr+PPzOh?!FMYQf z&XT@)9xLuL*;(vbk56d&UM?K3vNMPnU7R=lOrEly*ttH|v5;-+pb!8ZE1xpinO$+g z@8-DqzS#%)&Mri2T%XE){=9t6c)2jwvyW|9GYwsBsxfclZ=-1D#2%iy?2Q_gE}T9= zJMpi2RfH^sT`9rzmqpl1Vl&>Xk#Fcfbkl!cF%eUlbhJ{JvVKu^mju#zU40AkD*gEI zhzrk|6)m)$@$gNr3UEFAy5J~s>ieC6jSZ)X(MP+9AML^h1en;@95U<(O6;-Z6Dz<u zFVml5n-d@qFsZSq*f?{-9v;nnx-&50@Lg^IQo+*z<be+YFR#^Lt~)Z15)qHmwV5j? zTv%n!hRpsPu2|N+YGY$|&G3Kb7)F~gnVYL9w}TeEyh&TPURp`)Gu*1SDuLBF&b$T( zxb(WnyBNFZ?_j90H%Tob_m!AA2*GQRyqA5y=W!1D?K4_xYanr6+W3f(q1kMeyd?OV zV<QDV`q3ZzZM|AJu-a@@%GC2`>sR>hAwymxVf03m<mFkS)$gxRr@1P+rme{Y{dLLZ z<`Q$<eDD6$wxa^ilGif1r9FLq!f%9kj<`!927<V%L<l)Gn8Tao15PVlv{5Sl>u01G z;Wvmshs3IZUv`sf(>k0kv{C+SMCGQrOkN^#2cwVu4(C3w1gab40@<`;(I5ALasU99 zsKen-aRTIBl`e!fT1P+sB9L{72hc45Kt$}%UN-<V`kg61&p%TrV|x}So9#CL9GzR8 z1Y{hsXj<RiXQTiUrZII00H8IC`~MZX$!fbal=x0D_A-T{mz#b*H>|A80MdpY3j>c~ zbK^q*T*ideACUZS6IMmCfYevjg+j<9qo_*vPXQa%zvyT8hwe-kI3M*&1K^t3zds^W z)1_}*AWCzYXgFLKNEr>(EUW-Y<~zm}+tty)0G3553C!k!U#ZPsMmmmo%t?CA-2&3Z zBCA2T|3Tri-CUjDlq3RO19@cG^d1Gq8|ndo>alIdEWt;VVM9i*8lB=i%YDvYjt7yD zlD_ktqX3#n778&6iHI=mEQeLKir09GlmM4Pp8=oulgEEP=bO?}c=e~A_HU-Y$FOYr zlypQ-D%zs*&%SAW=M63kN)T~}K}1O8!hQB2Tk-~h6nu23v(F$Zaw+$Z3Si*b_gWPq z=rWwY^_5cTQYn4YeQ-<{xZJS%{uO!{e%0s#AoUwFpSwuyW;c?Gwg9PX2JKuqL4$ww zLYsb{;(L>j24qeB_y^!K9pGmm%bYU#{P}Z5_Fm`^dPpw<Kwu4NxRNh!Qa%>RY|B4M z`ESkQ%0Uc(c(V(hbv<bA$-Nd}xREJATZsNW8k=j>;P!I~a2<`yX8_trIZTg;U|=H= zpENd6X8#+*N)F(jc2%zmp&vGxl4r@gJ}b|{=&Ophiod03nr)~oC^&G31OjXafEHn} zFR`U!4PzHIu4%X=t)M*ncsV#Bda?tt(09`FU5#L``r5!fG2mA6*1m0E5%ZkQ+;0S5 z=UORn@O9@Z+RP7DKJ)vAlJrz>orIqLR<?ejc6+GFJy+dn02_cW?=$r^1US>*fCQm| zF2LcLJkJPhT={r82>|CBUEY<vfeq#kg?(t-8NlReoE@@PV*m$y)b(lGpITW@hFlj| z+bS{`&<6c%00;<zYK3oxm&Y@Yt(zM=m@$YL&pN%S9~{eo!$;)cPe@5#WUd1&pTf#~ z%V)rYxm$sm|Fq-b`Ft6E1(O1H^oGH-q$kMeRU!o~1XJQB50u*eTR?zYatE8CT0bpI zMpao}K<+umcyp&Q#s+Y7R=CITed42I!-72~S^kyYFn6wyd~6c{=|}JWjm{dJ-!OsX zL3cG-w&|<i+-R>Sfb<N%AO7*XJOA>3N^VeU0OO|?M@k)F{8WbiKQMlF-2b80VgY_n z3oFSz^v*?8>-V@DKpP661*H`Ps3d-W-kbAy1^Ztj5I_yGYx>3wG6luqy~DcSX)c+( z=^Gf0oP3y=k{ix2VFtcw4?Ju*J>!N0!{Y5R2deL2Q6oB1#-$Liwgy2zg{_7KMDoCt z`iyQYhHSbg;I@-Pr}18qHxsfr!Mfm4PySlLL~(kkkV(P+m_{N)y=ad)0)tNOoQD}4 z9)?_ZCP_{P(DdK2o#7?uQqL{pzrd<An(m`9f9-MH`~;}tTnOX}D6FG)jG^mg8~?aR z{}8FUNnS)dpAlMv-v1#_1^U6nLY*G0;D7x$6u=aq*&Q*}J^%je2IxP#Ok#KbH-c4$ z@Kx6TjZr1$_E#I3MgjDSl>d!-Jz<&T|G(QH+7+meInAA5pZ^`+|1ysfeZx0B=ZOAo zuLv;zuJ2?P|Hwb~ffz!_`OB^vr!M#lqFV)<#oubYiS8CV%@!1#6$zi;%rgiB(1nK7 zEmv(Dw(jQJlGlGW=9WyY+4j))#MwFV82?Gmm@1bv3ehfw0>ACL(i1jwW8Fvr1xefv zmz53J)Cw!~IGBbi<1p|gu!H25zuo|7P0K2cx70a{$afelUn3NsK^woIs8viLP2b=i z4^bhg_b?w*o@F^)LR?4i{V;}`08ShAzv(|5PPEd5GXL-NAJTA=<j3636fAf(pr~#s zQMi}Lj>68WUuZe}1)&T*u&qpfXaHCfFx`FJ>K-%)@N?9hhQw9CP=@d6!F}kE*yC$a zl`OxuT&<REzd4IA;jl!3Mh11cgv#wbMksk$8_ssd6s35-FRz>EQ}xLhd?0u3h#L+4 zix;Ftlz}sz;&tmOrQZnpi=F@6Cic>=4^5yy(i#w1y7C-#4%63#PRSGJb@s#3N-%T4 zny?u;OGLndR+$Al;i{3KPoYsB58)K>(O9bC4a$Q7-Bk+s7CtX-I+f4vkE654G`R1y zBlE2p)SeQ#Nr^EtlCn@Xr!{!7V_YykmN;&LoNh(v(6E+pE{rg=hj*}r%oInH2QL!L zEy=0LW}i8}azETP%-K527k50qa2sS!hy*{!X0+&pwRN9YiSGE5QQ;);;x*j~J3>8$ z3V&<Cfv!{IqUK%(Gw8N5pD1DI)slczgBs8ACq7QMNVkP;koLPCd5|4q%DUs@jDdl! zP9LOdV+Cahb+qA^CdeRd=3X;_rfW=#-EXw+2ySJ|;8+uZ&cKSQML%eSz6LOn|9?<` zD4obl-|J7vU|tJVR8uu5xCe1@p73ibNR!BL^&hr8KL-E7)zp!{X2*<uci%tB%mS5# zb)nK?_LV_8cK&79aGfRW43X-x!R|=_ruD=xdbH&o0MBXSOUS|@U`h4c62uG(C_Sg{ zcAsZxvtS1og0v)3oGDEilzZNzY*k9gmW{s;9rd4FHqcJWO<b>B^n~8L><-V3#+qWJ z3FbNQc+NAXCQ?&^X}@ax7*Vu?O(CteIlSWM$!1@{NJ|_Zpgn5w=KDOZ)YS~ujv|Gg zQFHDeXbA1K3L9O!$_yLpl0vf5mU)yl11}abTK<xt33Dl0N<pPMf5sJA8d8SI;|o?I zH|)Em9&huo`Nm%d%RcjMG4vIMnoz$UG{f#Yb|1sF4AdsvB{byNCuuSWH6hj1F`g~N zh#pNPT($(W?|~pba0^y(DuPN~IDY;t%IM+$U;$!!I>VjA4ZhJOEPM`8QHDz6B!Uug zc~HsrwxOVM;Fr_g?zqtoQ^*LH&ECVXL630?aeb+y1iTFUj&FzodJd@+1OrYrfb)$W z_Jm)7a`AJet9R|Y<Y|x+Lxe7A2MU~bS>XCxG98RaZ1^sG9xoTH^ljs1i!RQ~<O5px zIG5&&Nm|B7Ke)8+*hoHK;bSX&-O-2KVg9TrrIuub8^>qEoD9pD615-toMuooyb}4_ zu#pDSMCPp%e|^bdCEU3`;N;TWmg?=AM)SI%@K-K7p%*XSyEid`^D-t6>AQIoc!FHk zv!=wL!Nm7LNE!)mjXRh7U%g}=lF3o>;9bm6A^-YHRf~@F(cuJkd7RTA=P2>#d!Mcz zRn5MSjN7=TmcZN?z<6VTJ&`%}3euE0+~UVp$#2y>T?h~0?$eU44n&tSe~|a4-f>`A z_cs1Giwg<-iKgJzG-o8cNEV9{d>Pe?4&n3SXMewV;1ek_qDtRgw)yZ!rf}lrFE&a@ z^=YK=Y8^f-b1G?+5&(i8!+kVr{H3fus(q2N+mjszGp6ewk23z=AZum@w?6{FbbX>K z$r+6rr*UHDZiXMr7g6Yw1!R>r_ZVN?`%+DIwfr;a(usp*<Qw99h&cT5lD!-|&Z78& z^e_wJ@IPP`M~FOxiRg(yikI?-relweKvz3hG-Apz&G7i?$SDBx)RaQtcu7~Ap*J{9 z9!|UkJYa*SvJpgk!KB`?td(x3e<Y%jiSVkY92u&P%mkP$FVj4Jg<aDit)%aeZ49k= zVZ5P)R1(kRslLSphsnVMXmNAuA~)vtUfs3l5F?(97jvP3oU4tLq1>R`G~MDpcQl_) zflq|Xa@%lSyCp&ARLCkF@oM&87Q<+Bcnu|hznus-itN3;ikK>d=W^gp1QWL(<$tgJ z#oa;~#J?7Z*^b=$xfQvw?ijhfWUj(T3W=}4pHHL>_Tj>tNnN|oi0c-pa7E>J*FC#; zA0-T@h)q;3EpX!1T9OPc<HkfLapvH8L8`^|M(t7VdSw1LtQ3d1Z57>!CEV^N8r~Q( zscO~TQ?8EEMH;o0;yYph<NdA1w_Av*KZbQ-N#W~uOg=Mj=b}Fmz3h<#1rInm86&ia z+wY>NAk1t{&aN7@{Lg%Z)l?ewh2%Mu7QiQR!Dt&6=E+pCeSekWxm{C+4!oGwNcRYd z>O7iN6B>znur-0W<gT@+8vTYRBny7Gjg?dWT=s)tYYZYdJw(TPiXal@n}is6H?&E! zz5m;<nV`~NVi-P}G%T$6o~po?P)RGEKfaUd-LLH`z>%s)?pTWHs81(SMQW9OtGw~z z_PPa>VZ(Ye<2GwjzBej<wfBFbpA6+F8s6X=y3K#6K-r+0!T+J#oZ!I$dJWx97DD*P z|8|uQta?2lfS_#8FuVUUA2R=Oe}=d>Q`UWMgq8d3-jIW8B*mm^<)d##!2&p0c_&WR zH$CYA)by(z?|*F#!v7CxqWxkqkoJaVU5=Zl0_Xd0v?c+Fx&n+c*Z*;+0Col70x7Hj zeii3hAhc=uAN>QMV;AQzrMv)nfsmv8LX)lLEg#7X=%%F%BG9Gf1zjrk0LR-QuPat# zRmjj#fr=3X?6xfMu!8gJqe1!RVu!WPZEjAlf+q}ANxTn>)oF{~eVnAjukw9HO;_Gx zefjje&t}-`*CV%X;p5{K9CPUw98>T|o?c~mcMSH*LZ6D6R?n~Qwr}ViZPXr5t@^^o zjj_9a3fE+acdFDyhGL;x$Fww*^>1U|PiESsJb5*Ka?qSAcq{QT+UdYgfiFWbkTyfk zbFAHH|2l4}<@o6q?&;F?tS?7ZdT~hVT%ErHTjJ_>_y@+gn3zUIgYwA<*VZrgvpLc= zZ%fQSb5~UX_ICFoYpT*;T3Op%b*LwrkC$DWss37JNvjxnr)hJ~Pl0VHhVvuit@k7N zd#h~vUsZCWA{PGUuFiTE!y@e268Yq(s@6fi3JUk!4<a8D6BGBE>3KW(%=`wXk&D%l z6Tiy^7j?Zz(ROx_Jo|H9-k+*$5hR6J+4a*Vh>Fkb^ga&C%*f8xHPB19t5;-8?7_;m z7Q~*u`;qgIWVPhjJyaA=k&V)Hs!{29&dbHAwRzX#?b|%Tjhj`H7<sEoEvqOZvXaW_ z>OGV$TEXxdkl=Upo)r5e?XICP@1p?Qdg1>HrH(k=@1krzo%Xag$u;9?x)H(0*@(rS zEN!y+zqJ0%M9rT%cLdBharNZ=BQjogrIF?yJuMI0ba9~Q_vc+By1NEQbs9GxpP8k9 z(|cSDEX)&r{>OXcMMhHcn>P~02X5>2;on2M$OD3UJTHg>2-XeCUMyK|oisQ>y0N?2 zv-g~z^0%s7%?f{;p#Ubl%c88?WvLl=@74I9gV`zjIA9GL?G2Cb!IPg4_Wh3>b-D;( ztO99*&nJf%fX#na^tAkL3^bY)R?qrxh9M`j9yf1*LC7+S68u}sT|W;obFzS&kzjX! zTQ)NS4R7A}w)=_y0V)FTlOuMmhi@7Idr64x%Ks;TO^2d8R`ox^a#`XSBK|)Fd(<$R z)_IUZR!VzaAPTuG(Qh@W-v7G#;JHlyJ}C54um&~2Ih*$S#>G|pz~%wL@|%Yb5<2~- z->QpJ`#*Z%LiOG{{M)aGzkQA^D5>q~<qF^acJCp5b7)}kKqStiT!3qDbD{RskAv&e z!vbTUnZ@16<WF4~&&!@)hg&*Vx--#vtk5E4@Lj!n%6L(~Wf_x|-HUa5yx*tMgJZNO zJFYdNB-#GiPO?+{MA>vjDi5r~%~Yn^rW@bhVt7xCQpvxUL}m&qz4d;&eA);lsG^v! zC`O-gm&;uR2jiUoz;J>Abo@}(St-BWqV`ngIs1>ixkr8qi{EYQajr{(#66@;>Ndfa z{dK`YMpX#loc6&=6mpYEse9vti_$^#U;pMk`>O{o#*q(Scp=IElvjl{AGcCXFCD*6 z8$Z*M%l1n#Z`7d)6)o$-2L)_NrNyykMmU-&yx{5=lAOtoz{B~UxE)G9rDtduoGM02 zM(lni5o(WARkF-1<|X`_;yID^r`+Dvki8IG8-9o~)l;Nt7>8{IjMR9VPfnu(^m5q8 zEySfMG$6>3jVsV7&HE*kvEJPB*;kzGggLA?)4{GlzRDcNFZKr$>VrzlTya2?U<pwv zt{ey=Q6;YNR$bUKtJuYxt6t<ONyew*Eb!r99MbSUm4}uRPl*kq=Gwr;$a|%(?cGQP zL}GN0r*6{A@0hRK410^KzfRi<ML$`eexzRCm`g{|Rs?A*s`>jbweNpAl62h-xSFjb z4bv^8x85Ibi8~AV04SOcXm#!M_C81(*PQ0Pb}>?})Z7(AW68799z*@!^M!Xht<1i- zcqQ9cVJ_+7zd_V%lr><O)qE4aN}d?x|ElY(<C=crKTabhj+B-msB}n|;1Cp0i3!r7 zf`rmtlN#+1aI`dz93b7@6A&0NLb_v80`mLtef)lZ{`Th{pSa!K?q2Wb`*nAyTDby( z7gpKmaQH9sC&V+NDtus<W>jgki);ZwOZ~BSkTcjN5ru*$xcMixl8noOT}n_-D79a? zTxyX-T7s=(;8l9#m*7zlK60oFVoeHqUxuQ>8EU&73wL!Mkhk6e)iHW8AY+>*_XvbN z9W#hHNdkyc;JseZoz;F;;(5X%-0kNPE|&x~4S{<-BnMmIL#q7;Kf8W73W4LkZ)z^a zsO8@ONn=YV+z547BU>|#i}5|6-h4v=iXi+;JPsR3E940EmS-j$Qp-(6#kvs%3h#q{ zva`+Xw905DFp)iHMkJ$rBDR#3MTzL~66k|k1E})fhH77hd-3c%WVGyBzb9W86;5rw z=4>iZY}&I+&G*}fzy9O%*Zz20v6IKMW}v(bvBX65GRN+LTy<&N)}Hp8<x|q+YP^@r z?LDF;m@ib4RJSF%V}px<7A$_VmT{fjghEhvd>H29>`L&Fu)k<C_UFNVEY!Ue?njxH zhcY0KoFfcFZ4wVlJnT&0tGGX@OT3<onwNVqy@w=VC2S0e80UM=ZbsB704^YH3MZr_ zQZCxG-<1m>-6hQ+=R?`qX&6ENT7vhCK=0uZeb2H2h$sEHoPtsR*cF7qL!#V<uc0+E zm}P#@Mj>h`Xc9)N|Dy)JOeCIS?5XTT>r@FWmbo;ia3kVJZF+#-Q))3GK%fwJ=*eDu z>ro=1D*mcEbT_Xdq#b#qrE5Gn3Fep#273p!wW=%-LA99>HL#&a5}tc<L|jq5$VX#M zk0cGuxaco43pWiuD0sp(NTH@w3Lz0kE~u2>n^)W2)|h$p{E{qU`L>wX5HC$_`A%}< z!+mly_I=_^KiOcSGnY!6k3=QHJD6X|Me6}-))~9Fh>AQ)8|-B70TsbIww{b*SW-;J zdf2GiVMe`=j<nK2&uY~2)df0{x8)<Bl7V2N0Mpu_-CLkGrZ2X0ml^C^tmvJcvySs3 zVut+qZB?zrHD5Wr+t@R{Z@kH{Il-@q(5q1D3FtzV6dN_94;i26<T{!4Cn$v0|L6y0 z3P@@01e!O+K<#TP-w&8kX42W2C(Rzb_PtxPgT=Jxu}xLEi;Nc~<gT*h^`_o4NSfZA z^1VTNxZ0QK(+u1s5!gp|%%zcQb!L@j+5)bt@Dj@SBSYIa^$7F%7Rim!haGTw#Bgh! zVIdm)ax8I(#EcOEYc)U`>MneP3zCIJ_TE}3+IO-et6m{v{k@8;6WOsuK8o(<)WT+? z<kP}S8TrT_<EM9BSp{!WX&VqsKsG)ix3<2~bC!sTAqBx}4d}ucinyP_O$_vE!D`C_ z7Fu`GlxWsPz?l|c&m8%=o2SIJeBTl$iH9J|>VpF;tl@7a_jp*MAIl8EGWFdXwyaa& z#U1VAi&mm;91mdDwAPFBF37@wVGfa)7*lXLQLPr#K<34qIk&VZAr=0Z0{+LN6~o@- zDTWNMqLlhT@VFgvNjR7aNAX4WvVY^<XE_!uCf}uq_M}{WkaofBc;fY3M)nCc5rg38 zR2v`j-gKPt;esB|DWI(ga&y@&|AnX*LQCB8{flR6qw1n2h<~EpwT&Kr*UFf!)6md* zHyENe>!v-Xb&hRzRrWkNi!bXQ=Hcbt@w5(gQLEbuOHy#NNiE#*+>z~Vx;l##c{04Z zzgV?1@by8v<D_)Xo<9e;8rL8Kljq{-XXaVUl|SL-*xFy*GL>Km)eATep2I}Hncg1X z=%-}r9eSI-c`&jysphEcf6VHmd`|Pl<^oYF7})G3S5r3h872*JY<Y)!URr<oI3Tq_ zYUluYJ8Mm-WMlHg>i2JPrj7zhpR}fPy7iOHsJa|ust>!8AtLG+-qU6g^ZXm;&7M5k zn+AjBA}{n035g(S^32^GhF+(pgB>37A7DW!VW-8_R2U)B$zf^LQj)l|tueeTvNsr& zk{Am1PIH_g?x~hb%|XlGw=<o2d8vxDrEtEz9vTs*kxS5!^JkpYi`|Rql--tamh>Bn zeAcgxv-AoEb_qp2?{2hVCNPEFgDjgu{cq!o^ODs!{-%?65~FiD!W~D*?ofJnOo#u4 zS84qAJlSJ0T<3x|(cT%PCok854ydg!5lF3-6RCHx?(X@sV83G(MRxzu_+vYI)^7Y{ zqrc*AH-F{($<+w`+1pl3|Fn`oO40H$8KAJIp2r$%r=_K}f3;pc><}5yiyJa4kHs{n z^zAl<i`%}BX8p+hyV2Wa<Z!$H`xQ|%d(EA(V`c5vDkZL&HtU75?6TSVrG{z4JN9GM zYLd^=jo?4FJP&`_kMKxdx17<MQqQ+V30c3>8;Q>ay47}ub4>>}6NF#G-Pt@3P6w=c zs!hw?h}~zJTNgl;GA@ws#+8C!%?;z89Pxh^Fc6%NHg0{r^A~N2--Yjd{nwm~Z)i^E zCn?ewY#V2mUdZ;a(ZxsWZqH=)o_tACbD%1B8IqFo;6U<|yeNB_RwOrB;Cep&3-u}` zI6R(3`N3cI<M)*YdM*CSK-WmBt-ZZDDU1R7+RH2xomh4qGvAYY!%fA?6cK$o=QrZx zT|ztzXf3dXvW5J-nhd$Bb=jSrH@2j9t<Jb!kBDVx6mCjcP~GxB+|j8gzSMV5KnWAE zh9CWS4$XWyp3-WxK<TOQ6soK<{@n0XeA;d_-OyI**F2|<@z$gC<JRBQ4#juCi9~@l ztLvYl)UfRdS5)UBDdmw}3QxI*ss4;}oR*;izN7G>ak~7ddSj@$!MJOX3R2@q3>xJl z#CK=UmGfsPvcTN}>G(zeh+wr#PQ)oPdbOUt4WcW0fmXoI4N*BPbl;^x)ss>^K8au$ z#ZaPrZs2cMQw!KVy>r11ySvS4K&IrZN9^Rm8R4`irM~mPnu*XG{HJD9<aq7if@k?Q z<{L`f?HBvsW|^><)RUiUbH{>)^)O%z!0VSizn+)pxD*nbPuv$uIfZh<1k~lku&DRY z!{zEi^>A1F{Y%UMQq)_PoR$|M^3kue`keOe=*}DlTkp?Rz$DW9jSy!$S>cl+H-A|b zV)i1C1AUnl@}{(9O;5^kO7kF_^fdR%-Q;Kfs}wnUK9RLy^{{+<-(~n{&YHzU@yydH z-B1?sa(N+4X}pmVpvQza4wURC_m<HC)@S=ir5k^}R*#&wl|O6vl^QZ>Z)%P|d++IK z*yu+etUefNE8mI-Rx@Gd9(Q(iiSC8}$-ur9sM#N39?PlTJ3Dpq*pbyhgU`-9esVli zDpF;g;SBT^)Uzr5-K5guvZq~=eN1olb3gf7G&lHEEgziAyVyCO$I2&s9&C-fr+1pr zn0&n0_j#`MsxC2G*$TX83Z*%TzaaR-h!{oM6yLA@MhL27rsu+=e_EO@Jg@ONA{6}d zH9DfOmy7+B(@PD)+3~71Aky1D!xeu;N#IE={l-bi7$yQ)R&<CpA#^w(!=^QZtbFHV zNFxbK^d*e~BU--<4;YYBfut#-yGp<n#4_X>G?KM1eOv|arqSX_j-BzMEuE@g<zbqz zJ(C-*w^m=L-2rD3%8SNrN)P8?X}v~b9zU?rgv_SZahY5?W3er?e;5!5?CCs>7IfS^ z+s*Y}wZ+?PICW2B<al}crNPKBNB3AOo0UMWjivHUO5%@ie)W34lDEvd#Hi&8R4>>T z!<R|BEi&4xQLKuM2(cGLdXP7w>dY@pNlXf0j7yxDz9+E<)W_bI4<~02#3WHkZw_*m z6g&cZ^u;ERM#BWpSCrkFsz6OZgtWH1_hj1|XfCakcZk^768l`vJ*U#kE1<*hX_;}N zg^(5$-ni}n4_`U%8&3bcC6gsfJv)y@Zl1y<w>*gUkHV!3=syuS8tYsCi;K&b|94Q% zGWfdoCdl$<(1-UScOEqvt$1yn2@swx53mRt*YTQiRj$sI-Z3zpy_0^61egFHVw8P! zATw<W<pid~Wo%-1&6}P*{d>9g8}8V0{{E?*?dkZv`drW_f;il~*pbp7w*ALRjlqD_ zS&bma*tEJD<Wj1LmhA=CWU8+n|B$fV>7%|^FWjbJX<$u%;938hS=5rKx^<ClP(YHE z$~|DCpU={~aFT!*7K{2TzdB+R<n236^s!EfX9~?kpDiDxheLaoCzddZ5jr7IPw2l4 zh>kdkRHygaIiPzvCnqsT<V<8}vArb~W43dzskNG5?yPRyhp%{HCh&)j5jB2dlPMDi zs3Wr$WG)9aU+ayuE|*O)Er)hU%gw}#s+`I431d7jg`HN2XWhr!8}rg(R|BobY_KaU z%ZtVj)WhxV!;a;lGCMhv9w*tHQZ1z^mFh6O{*kqTBgMWEnZIB3ki_gp=??j~{3lbx zJ*JZbr90dURt~~;*3k4YZ*gg>?24PK&ZPJdrBERm3N*zG=e=H=R)@(eafi}1mu<6$ zL}6AZER!C3RrYwViYWFamc}My*WnK@5mEBLA`AnbWi>V)Y_I&O{p)bdbtCi8(ofo% zqOVWN@%~+fR>(_Wc(`UmJUv2iez4N%EK`T+uU#$HiI@yO?rBvxZJ6PCwGe8g(Rsz% z?rPVj+8R=n50yDdLv8dE%Ka!&1IC|gLDMp9q<zHCJC+ch|4YcIT1<6oP)AzZW~9P8 zkU8KyM{WOFl_aKbsTaG>91Y!8e*b{2X4`Tun<fqS%__`4!>I8NEp9}rLoTBw9Dgeh zPxpN@3U)Og?0A>`O_+dj5WTSF&I#d1&R~uKw8;7bdQVgY`p)sI<tNec(i`rti2UQ) zGUn0Vfr~v~L=SGc?OpBmPk>DK140~Obl(Lj*yi`eZYBm2`hjGADHAuGSbp+%Tfj;! zMA$lJWAQ3vE1G?8Fpuy3uBfWVh8CDpT%?E4hwtu<w_1ayIocHb+pJS>(4$a)Ux&!O zP)GBkdXKy+z5rES{Ib+mpZoT5vBbbg_R}|6j42|9StcWHs^9$u@P{$U<wzd&(LFx2 z>xqiTcE#DUBaat}#8PWpu6~(S!>gV-rH5A~I!$BPA8QsU#Tn0Co0-^42V8TU(9E$; zX;n$?KWpv38_!EpQly=C!Bh_dkH<f5PrHhmHra+WA~XZ%2Sagli%VG&x;=L#CW@uQ zb$|5p=0vXe{D>WvHgB-B>6BBsmZ1Oq5Rd=<$9I+)T%v76>AIqrG#HhW5rC$tQJ%V3 zwt&k(!Y3q8ZP(qZKg{<;*VzN-$LppXGQzrU6lA^<Z1m1AriNEOTZt>VOmaNOE;02q zD(tJ9ah_V|JHEQA<H4cVxSHS(S}xlTm8->FE{LAQnm;lzp?0@R$b*jYyESAxv#Kx+ zkI}kYjh3}kdY$ogKQ|o2Qba|>#AJCKiB~B!h)<f&$)I}NO6Y^%KFgr{akWsbWIvHt zqnt*$0{*D9dysZN?JHX2nhkj=r~o+1@AA85rOC<KhJYUFc=1`F%hAqz9`SRctQ8>o z&8H9cJ>kMRwOGKcU-Z@AD<7@1fX(*&lJeS>c&?n7ttLMILNxxR+z&66Rc)`38s+46 zAaXl(pRfav9J11h!<DHt=PK~t!7LB68qTl5cF}K;b3XNiOnW8OnF=YsL>nKkhph2Z z#50wx4bcWJMvxE3Aur!%HiShhcze67sX|^tk9XqOH(&TFl8Tr7hu<(Q>Fh#eaiI&< zVwKFrnx@pC2%UM6<*w%RG4>*p#dWjq_7giAG<EM1`x)b^%^LzH#eS6NH{Y&v>55NF zQ!SI9cG8@5js5QbS)tC=`KqL(?rLJ<E+5Cci`+av1vfHlK0&aoY^ah*biUe`{)hH% zKApz^QLJ6-el_cAEpT1p{Q+?$R=ISYCFb~qwDvD$0h&>KiLzy9Ebph&Usc}+-v4H7 zV*Q-*;|yBay{xq&FWdLl<BKkw(0@?z%wVz0f{Ep*X^7cDzl><o<NYo4aMhIkZ7I8( z*WIySa;8xCU_EjsawRICvO+_^T_VTfd;2nm83PM`+B2&pEF9L)7+LAC!t|<GF2Hwf zB{nVxP@ire%u^9d%uY`MvoEy!qWnJUHZ@(QKoZsTuiu}feq4_i9Az9gu!iALZJ!Io z8H8gYPJx|=Z~g8L<M-4d8H8qf<JRY)$mYyOEcmk$tE}cAg$hKN<YrM`_qo9xFip^O z*7ykckZ8Zr*l$TT7Or9ctq^q+ql)|csL7UXEUI^<)dQOD=2vntf2T+A&YAq6abt$E zG`8Wmy9kpHgho97CQq7k<oQDmnZw~gpS9#jr9Ej|8<TMojj-meO7HJ^Tu?y4W)u1n z>$Bb~%dX(Iy*3;T48czs9bQj0%BS62oA_q5qDP3W6!AX?2qKY#u=RanEt!rdL)w>D zn^4Us#Y+bl1GS%er<7t78x8q@ZKlbs*#wW~S|a4IOpm!ayN~lOCc$7H6VC0x6eH}h z;_ED__j{T$%(t+U5(U={bAz%0IMHtL*LNK$$uj5OFSnNy@73P{JItUn$urmy!intI z)t^@!hxVQS%ma$4#KhWx0H${DVDT<*L2gkFs!>_S|Ga>+KFH5|>2-}WGp*8*d#7tK za3V_eGq3HCt_@vm=&5T*sXoReo%Jnv;_SGmO9#{Neqip`z9;a_SmONX5j*8_O<g?Z z-+Zl`NAc?#3HPmx>uOj^JbEJnrsDx`G*jaaRFkWQMoC_aiKnsM0h98)ZFX|x;bUY* z%!28}J^|<kau-LkP>iK3^nLcK(itL`iF#!)?$#13&MLh|I!FhmW7}YjV+2J+Np%OJ zTLh^V1~(%$<{mwX*H~du<nk7`^KFz#TDz+{Q+P6CYWxKAER24TcuAK-vy1urm7qV9 z$tAU;Gh1;%Hi}sP2vP1MmfR!_x!d7*SDdBNj|E2sioh((QNlHrg~T-c^TE}gj$<2A z(!-!keDp5N_g*)QIrz2JvRA5z;SXI3{<jr$GF78qn}g7eYHis9OI{z7w-ID>Y!4He zOwHC>9tXSyBJ&HGx3V7tP9=s!0@5?$fTeTNg?btC^)&<wUo0hTC(b0VuQ#_TdH4I* zJj?!wNL786a*gtZT7C7(07Dk0X|>L8ZS29r=7qQaU6^0?=I^#7HLKDWASM`kPs3F( z)0C+;YmXk#-qXt(q5lKuMf2E<feYTGwv`T%{ITPH>FW>^82T5o@1F~r`S+-w9VJZ> z5YY2Kg{bJKO{|M?dVgR5`r}hmnZv$vmNd^Uw!3%v^lD$432z_+sJ>aL9lB1^B*E-D zYw&f2=M8?cKEgmgU@`qg!Ovp6y~BJR7ih81>~&wzs_Y0{L+2}@wMe-Z<P}`wfq@jm z%=!r}XzD)LlxT;O0dZm)7Fhy5Ooc;mw{#(9&i+F52ve|&f#I1cxB@ib+xQw%>e+*j z>XkQ){P7lCVFW?dUFQ$(Iq+<l4rh27yn;gbYXAMah3JF1(zNQX4lyHWz@ws(Lviq4 zGC|TgCG__Fk<zkf*t2}HeyOOpzaK&7ZXuW<Z|KTRAps0jF`=7MOo+q})bApCCBTPI z6I_{?5Zs10SBAicC2)qgmoFf1vfycKGE7^$?gA$#BCT>3CGcWuj<}x&<e=o+9uC=X zw;O3iW$<m1iKyOD+oi3|8Y+{$cc2Z>ecz`LGtfX`1>p?4VxvDd=qxA)v&Z#YmoBoG zWw&06xIt}MDB>_@ac{xFB>3&Z)E)2;?tmLSs`#=j7A{D0p{NJpbee3@hVYRp`4&X> zegM_coQ_RheR%l|-Wv#c!z|_Ds<vGGXv*&-*=WOol|Y|Ri3uSIK7<#YWNZaC!;6^_ zaNmZAUfNxWmfPozfe*kdL@P9qH#a>pJ)(NYU=`$Q%Q3C<HI}J}kGBwQrDdG3u`am= zsi<BnP#t(MO?Manqf4<UhRA=C4#9%WUj$10+PPf|Qs3a}4{wU%q}XBx$zo!r=q$?M zenK!V7O?IMNWSH3?+JLXG_1lN=UP5770@Ejc@KP9KD0g=)yoe0&SJ}a;C~nV72a!N zGw=lDJ%M2v3;`fVUKp>$A!QhVP5HR|lwuMRc%swGksl0lpH}vtmziiD@#lGM=ZR@$ zHid0ePUdbcRaK0vH8dRzuIPU^OarF7cM})NdX=9@9_R?VK(YZ=b|2tWid<a7vuay) zC7wPbH>x+Kb>)=WR#%$#S5H7QiT7UxS?<l<c5mPWS&k(&{qfxjhp%)+F5C3BvSm~5 z+W2^?LcPy+xXyM9c>PZFruB2LWATEICYg)JQW=2>(&%U|8mN0RJOQB3`G3D}Lc!qH z&{iApVK8d9M?7{m=&e&N-GKZBNzy+-y5OL2sAQM_`%8ZClKv4b`j0h+Xwur-d8xe0 zg8*YWC2EA)RJiA%J4+QHlhj=*!StJLciW3mv+^VgRZV&e5xG0RYM3#H5@s~v&c+OI zQZ=GbePCTuBrkZr$SzmA#FtR79kV7AnxnjlC!Dn0zyx&Y4&i7`_CosC46CD=(Z$?3 zL76#NX@cIEl|w}m>Q1Q-&D@54d4+5=e_AWLf<goEMBE6!7Su2k{u_?k@6%gzihIl( z9p6c1<-%|{?-c9Qpm{QCdc@`jo(=u9eHK`#?R<x7ka<L}0yKTAd-I9lAgANf!B?0> z5Rk&jf+vvNtV3UrX=YA>G()B7YR6;7oK+llEt$WCp*ZRArQwZLXXtZ!Fq1EnDnn=K z9$3r7FUX&>tkrC#z#`dqxLy3*FD7P<07T)xDrcGeHiImTyu0p}2bVBmQsE{DBune{ z({VW#Ue%!soKQD@R}7>7@S~B+c3nOD`G_b<Et6MYM4lMd=1B1y`+hKdP-pz(tk<&Z zS~1KK89;GdT1zowt9UwROFa82&$f}oD<Wqq*2z)0Ed$qsUMpfvig_m^I((!v5Hsbn zEx|}@6808fN34lp8Rjxv&E^2>lK2Mr5Mkin7U~W5d$}liwOmv~>66xQ!|vR&bfGK; z6Ge2iGL6{C<jj%I3y6`0?62Ux9K(}gf5!EElpbz=rRnxy2*6l%YveXu0&rSeH6Njh zKLSSDHx##liTSZ-^<wt1jkw>oLQ*kyFrG2BFHs-~&`z!@sVO=V4k6Pgjd_yk(;0mR z)~gkOo(8Vcawry}+W`JV2Ih24{xt%uW2JB1$>NZGzZoZvU#f~1Bnqh}M)KB{<C4>6 zs`#CqH`2T91AJ2`zI+vP-rOsG-4cAASGW`Sfj|9Kv@7eY-$|uwkf8zUa;yF&Eo5U_ zgq}gU5ANnQclzgdpf6W)TYDJQ3*c!@8d7zzL8&1S=vxUpc=B1_6MQ%K`^aIoBHYL( zA%8CU?M8qa{j>BG%U_@%^}Y8mRIOZ+nCiU#u?`P7@&1%eu+xSHf45(id!gdGJ)jnp zf`k(yOHtM)=E3OMc2XYgc2Ct(6td^L%xcN|kS4zdBJ0Qggio=W^ajV@P{Xd54xVd8 zQK3z{?FyV04=}>*X4&VkRq!a?Ku1IO_*7iPd=cbL0*cdgBw=nf6s)T<nDdGi|Lq~K zOV}4;AT#8zN3<5zc^jI(S-M1}wGWGNBy~VPB@)#h6iC`7-rB{DfACXvRq6&Qp6vCs zI)Lg1BF;Xv+Q35Z<7b(T^tPi~(%=aczMK`R(w>S3*v7{BAgk8o*0l2a9rQz&k427R zqR$}jtjzyp^>uv#%oAj54NSE<P-ay-K6=Qq!#4%K^~VA@J_}5bGkq`?uTNcT1bKqm zeC{b&MD0FhqRN?#j-T*7%lBQaiR%4|e90aZk@69GWH5P5x~;CJ7#pCZ3>{ZX#+s4U zszSO$bfw?5+7KGIv_+^0s=cKYW%Lar<g>EA)HdW|CU8}aLZp$4D$oxaK`&bq17o}; zit3{uSztOz4F1s_TW510vlNq6d0<Yd(SEaEt*mZsH{p}Qs;h21hE7I$&&m@^ikA7> z##(7^<Botjbvt@2Wg6_i8%|npyz*}+ETW$X`6&H{0ymS;ji$I#2E<6)2?EYeBFv0P z|1f*?$4Zpm4l2MQ7Wd&HCF5Yn7h1E=@0&KE&qhz0Zw5}4q7t9&uPf^41-;YQh!0(N zS5IyEwflE~&Gm96Mxu-z7SBsL3Q#Y%riXTK?rB!p>dM+BA&YN0azQZ8c`~ab6j$*# z#aO7;2HRE95y)^}h^YJQ^H?9H(FhgL8G{TB^3#f-((U!4hp=DoVN!KzF^Fw}D1XAx zdla;F*_oXImo2o*^oU9QBlmmHK6XNuAD!eKKgvz310Rb2q&x-De<E)$Rqh>Y-7%c| zO$0uyM}@8R(14Uaa7+7&SrnnjPTm=XMWCFXSuWui@v0#}ZmUjTGG_8I)BxVAx*(4J z8Ne(UF7ez?umjQw7E}f{$C$8Y41Oyn@^ar`76N|Z$xkRqd2W?~(J2$fRx*Ru5d4q+ ztS#NRl|iE9WA(QMNUty>>_tUzsbFz3-wbe!@!c|Gr{90U+jzr=!G}M)8+b)On)JqU zfGaWtW7F*=v?04ByCicU1PB=}GwpF}Bx9xm*(0NXv&=o$rT$t!GW88y0iO2OKPrcB z)7aPCf7z@KnXvq{Nl-m1gVc0aT769;CZBSPB$W*2Ev-%@fZlkJB_>28NJY^m`i%Zk z(TEAJPp0^2p2%AtNEpwO%+fUO%&DI={6Uxokq2d}y%G93dF?cD1!F&jG8+@NnODIz z^ad+OM?;bQ`^<{`U|l9(bRu-T$qN71j9UNoj>8Siy;MOdn=(e22tkjL;NX-*JoB)( ztQm<J1A?zr1T3x!okVTkAEbh^pLYcaFy-HzHX)$u8`m9>j)sR&VfOLB_Vy7P)>;WM zPO|Qm{rumm;QBAeKigE19)~gv^9c?fec;Qeypw5>r9{Y7xble%-|46~ZqF^b%qZkn z&gm=c<c_!GOt@u_E`Ngz_J0$vf650=CJ$u#?*PR&o9aepgUdSgq@)jRfLIzH-1 z8JXSM@-0u{;#1k<$2lUF)})Z>y={kBAIvZrU63QPmV)D4c>gME$aS)zh42>Q5F1yr zfM-(jh(Ov>O)C-I<^~tY$@^Gj99dfmG2LvDihM~~K}jVUzC9e}eMH95ah5_d{k1(` zqPU~e!Ws0v7?rh_#Zu1qQwnv<O@TmxV_&P8?Y=SbFD67bhy-t^wXbF+v>%L8V*4(- z?f;P7mrJoAN%QhKz|*N5TQ-<KUz@jLsX)!70Bn!ydexSG53Gj%YVZ8*J6yjfWZE3J zXM2y&WI9TN)WOWyskF`b5gTZOSIW5=#h%XakL;%&q?E}R>KO}9fR1+|dp3kX?-w0D z3WAvl;Jrg&-Jys?RqXOkI(T&Y(%=T3f2B=<V3EDGIYB-`<29P%ATQhW8+`UIcpP^| zX|<R)h!-MrB64D2V%oYZwb5vlP#=z~RpbFP*KL(O0Gmq4bTL1&b_iJNfi2m?`mLzg zH;G>mhL1^;4~}JnT^itNu%Rg`pT@t$IJIRa&;w!c3S{|tM0q_O-hqR}j0q9>+`)jD z%*Kgkv5`;#kf04~IeFN|wkboIE60;ThFggIVZ5sry!S2ma2_~Dvx})M?Gx@~0srtO zRh!hU)J#v!5Mg1@%qB9!T6~TuW;__IYwmC`k%(<n4Z0%YfGj^jd)t>0R_ePGj+{-U z12R707NQzF%D-Saq`8byeHY!+UPi38EQnT9Ea?);rLey56bWzS0q>xFITSTj-fTwn z;=sBr^riUnpLlUW{mY8;ZO;zXanOlibUW*}M0f%nVynDvlslj;Afb6__bCW{Hf(XS zqy_SxZ4%)pDQ!{a+pMpXVgx)^LAHB51^{;CT0BE@NeMJvd&2HTs-yd+Fcw+u<OIK} zRqL325&mJJl4REHH?<y-bUcalpUO4<U5W}9jr)Z|5*Wr&w8xbCNmbuiR^9GWBB9T= zuv!ulWN)OVx(chNsAHg_YM~MA;wukt))26m8(zVN;J0ul-G&Qdle-dHet8Fxa;Xf) z8laSQ&ZX%RiZV8tdFEZe8Y}vi$L20A7MxrrXVZ@w9_O0gaS;-w$3NR`Qy}HmN#5QJ zD?Q+LD9#{wwV?W2XPe4f<lLJ3DhswGfp+GM7P9c7YMAF1!91J2xF;<<X?e0GV6$bz z&6fW4P|lMBkCycvhc6&>#))hD@YsY9s-R;OtC91G{Ch=HgcsR)#a@&maU7A@@GzC> z`3d^`@-EXqGI|k}#fK5=${w5X9*gRqZ)<f5bvJ%ec6JMxK512NEx`IUUlA<&Mk)s5 zwidApedHbe=Iy&ZQLUONIfh5jz_yGnWAqwc)XDi9&TTZRcoKc!MS8xy+#Uizdg}1P z&EETyvgF3gbXM}h)Fr40tWrWB)>U%e^y9ke=f9;+nQctixHjYRHBSj(h4}tCk<_~^ zdy1x<VV6mNN~ty3KYs3Q`QM{^R8KsM>}(}h_(sEUJO>I=Ni9|w3~qN?XPm(yV~cxx z>WaoUKxZ8KfjXsOX?~%x9rZVYEe?3@8xnQJZf7hQ$%}P8q&Zf4*0yT(yj{uqZP58$ zxAe2aZrTlhgBkk@@%bUZL#QS75vRG~<F5nF9M}t_rWib-pZVe-#w6~e>?<@-2X}no z8iyKI9=;SN{-8JRR@9a5>56YTkYGQK&J2x6-xL<y>?2UNxpo}7kolXOI>=X-^u7V6 zY{e@DocL>wU0JDSM#P)JC>*s{J5^ip;(w0BW$KJJ)t3cCO~g?O3R9gF44<T}Gv+n6 ziT;(y<2}2pS^KGqFQ{giR}BBNIA*X18ULz8u7lg~A}Y{=H!qVygdUwuF0S6f0KUwB z%m-8&{}Ido-yWG8*#-ofzBf6Z4LguKUb`fx8kAvJEYdcW%on(KE%39x7&0ME+!ug1 z<P~GLff>w2!e&wKa3zKobFu+Y=!`4NPnN%B0R)U;{E-iME)ND%PSy}1A<N&V9K)|| zma$waVzui4@UK;t-tL#O2E4X?LkY=6H}7&iPe$N8{@04iPX?8IV4iWBNY8ifuc65K z!pCDzvGby9%XR56?AqJ8Hm{O0-g!>Tr{r@*-$4(UdNNG`A8W>!1i%lHbeo=D;=pT7 z=LmS*i{tnw)TqtLzP<-JA2*J?RXtb$qwaI1C;ly;7p>QLxfBxI9{-=aRkPlojF$KA zyi~ww{NrHtQ;p9(OR$oe6Xy>9V7mDDoAW4CYURv-nFDrnKtVxq$)CMWr#ThLd2u)! z>>@Gwmx>NypknAm|AwpSe>`wdwHjSqME&*ln7Jn28B1}6Pv&d@c(XddcBMAber9)* z^{3(sAcyqNattV}2ij(jdw2Kz?P&Rk>z6OU8%t5a6*GL-zSOka*^w#)pooYF1i6)! zRYs9XmZ9z#S)z+H%7EM8a(d;vVe`7OEl}^Wi0!Gbt&Lq<$qi2F|K#kCU{h2RT~VTE zK*etXyLx-ftX&s|UOmi8^hdnL({E1!D=xp`ofiY0xgAGzbn#T_UU;BslvNxYv|L~p xbq0KVd@r7AC@63s^H5Y3Gr&&-%H#EqYarRmnB^nkCj#K}RP8yWRMq0s{{Tumal-%r literal 0 HcmV?d00001 diff --git a/src/cuda/ideasForCommunication.txt b/src/cuda/ideasForCommunication.txt new file mode 100644 index 000000000..44f43cc5c --- /dev/null +++ b/src/cuda/ideasForCommunication.txt @@ -0,0 +1,40 @@ + +HybridUniformNeighborScheme scheme ( blockstorage, blockSelectorForGPU); + +# Add field that is communicated +scheme.addPackInfo( FieldPackInfo(cpuFieldID), GPUFieldPackInfo(gpuFieldID) ); +# possibly add further fields which are sent in same message +scheme.addPackInfo( .... ) + + +GPUFieldPackInfo Interface + + MemberVariable: the GPUField + - pack ( direction, device_ptr, offset ) # -> calls packing kernel that copies the slice before ghost layer to the device_ptr buffer + - getConstantByteSizeForDirection( d ) # returns the required buffer size to pack for a neighbor in the given direction + - unpack (direction, device_ptr, offset ) + + +# Pseudo code for scheme.startCommunication +startCommunication: + 1) Pack everything into buffers + For all blocks + For all neighbors of current block + if currentBlock=gpu: + if neighbor is localcpu + -> call localToCPU + if neighbor is localgpu + -> + if neighbor is somewhere else + -> call pack on gpu + else currentBlock=cpu: + similar to above + + +How to send on GPU? + - use blocking send on GPU and different streams for all blocks + - use ISend ... how to manage the exchange of MPI_Request and the wait? + + + + + diff --git a/src/field/Field.h b/src/field/Field.h index 38c402dc5..c7785e80e 100644 --- a/src/field/Field.h +++ b/src/field/Field.h @@ -181,17 +181,6 @@ namespace field { //**************************************************************************************************************** - //** Low Level Functions ********************************************************************************** - /*! \name Low Level Functions - only use when absolutely necessary */ - //@{ - T * data(); - const T * data() const; - - shared_ptr< FieldAllocator<T> > getAllocator() const; - //@} - //**************************************************************************************************************** - - //** Equality Checks ********************************************************************************************* /*! \name Equality Checks */ //@{ @@ -297,6 +286,17 @@ namespace field { //**************************************************************************************************************** + //** Pointer to internal memory - use with care! ********************************************************** + /*! \name Pointer to internal memory - use with care! */ + //@{ + T * data() { return values_; } + const T * data() const { return values_; } + T * dataInner() { return valuesWithOffset_; } + const T * dataInner() const { return valuesWithOffset_; } + + shared_ptr< FieldAllocator<T> > getAllocator() const; + //@} + //**************************************************************************************************************** //** Static cached end iterators ********************************************************************************* diff --git a/src/field/Field.impl.h b/src/field/Field.impl.h index 2feeefaf0..39b9b227d 100644 --- a/src/field/Field.impl.h +++ b/src/field/Field.impl.h @@ -1136,32 +1136,6 @@ namespace field { //=================================================================================================================== - //******************************************************************************************************************* - /*! Returns a pointer to the memory region where field data is stored - * - * For additional information about memory layout, have a look at the xOff() and xStride() functions - * If you store a pointer to the internal field data, you have to make sure the data is not freed by the field class. - * See documentation of getAllocator() for this - */ - //******************************************************************************************************************* - template<typename T, uint_t fSize_ > - T * Field<T,fSize_>::data() - { - return values_; - } - - //******************************************************************************************************************* - /*! Returns a pointer to the memory region where field data is stored - * - * see documentation of nonconst version - */ - //******************************************************************************************************************* - template<typename T, uint_t fSize_ > - const T * Field<T,fSize_>::data() const - { - return values_; - } - //******************************************************************************************************************* /*! Returns internal data allocator * diff --git a/src/field/Layout.h b/src/field/Layout.h new file mode 100644 index 000000000..ed78ba0e9 --- /dev/null +++ b/src/field/Layout.h @@ -0,0 +1,43 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Layout.h +//! \ingroup field +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +namespace walberla { +namespace field { + + + /** + * \brief Layout for field ( + * \ingroup field + */ + enum Layout { + fzyx = 0, //!< Value-sorted data layout (f should be outermost loop) + zyxf = 1 //!< Cell-sorted data layout, (f should be innermost loop) + }; + + + +} // namespace field +} // namespace walberla + + diff --git a/src/field/communication/MPIDatatypes.h b/src/field/communication/MPIDatatypes.h index 0aaff22d4..3e1fe5f88 100644 --- a/src/field/communication/MPIDatatypes.h +++ b/src/field/communication/MPIDatatypes.h @@ -65,8 +65,8 @@ namespace communication { * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatype( const Field< T, fSize > & field ); +template<typename Field_T> +MPI_Datatype mpiDatatype( const Field_T & field ); @@ -92,8 +92,8 @@ MPI_Datatype mpiDatatype( const Field< T, fSize > & field ); * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, +template<typename Field_T> +MPI_Datatype mpiDatatypeSlice( const Field_T & field, const cell_idx_t xBeg, const cell_idx_t yBeg, const cell_idx_t zBeg, const cell_idx_t fBeg, const cell_idx_t xEnd, const cell_idx_t yEnd, const cell_idx_t zEnd, const cell_idx_t fEnd ); @@ -115,8 +115,8 @@ MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, cell_idx_t f = 0 ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, cell_idx_t f = 0 ); @@ -137,8 +137,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ); @@ -158,8 +158,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ); @@ -176,8 +176,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field ); @@ -196,8 +196,8 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field, const uint_t numGhostLayers ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field, const uint_t numGhostLayers ); @@ -217,8 +217,8 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice = false ); @@ -238,8 +238,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice = false ); @@ -259,8 +259,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice = false, const cell_idx_t f = 0 ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice = false, const cell_idx_t f = 0 ); //====================================================================================================================== @@ -280,8 +280,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ); //====================================================================================================================== @@ -300,8 +300,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ); //====================================================================================================================== @@ -320,8 +320,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness = 1, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness = 1, const bool fullSlice = false ); //====================================================================================================================== @@ -341,8 +341,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness = 1, const cell_idx_t f = 0, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness = 1, const cell_idx_t f = 0, const bool fullSlice = false ); //====================================================================================================================== @@ -363,8 +363,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ); //====================================================================================================================== @@ -384,12 +384,12 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ); } // namespace communication } // namespace field } // namespace walberla -#include "MPIDatatypes.impl.h" \ No newline at end of file +#include "MPIDatatypes.impl.h" diff --git a/src/field/communication/MPIDatatypes.impl.h b/src/field/communication/MPIDatatypes.impl.h index 3a801a671..1bb4bf3a6 100644 --- a/src/field/communication/MPIDatatypes.impl.h +++ b/src/field/communication/MPIDatatypes.impl.h @@ -28,11 +28,12 @@ namespace field { namespace communication { -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, +template<typename Field_T> +MPI_Datatype mpiDatatypeSlice( const Field_T & field, const cell_idx_t xBeg, const cell_idx_t yBeg, const cell_idx_t zBeg, const cell_idx_t fBeg, const cell_idx_t xEnd, const cell_idx_t yEnd, const cell_idx_t zEnd, const cell_idx_t fEnd ) { + typedef typename Field_T::value_type T; int sizes[4]; int subsizes[4]; int starts[4]; @@ -101,8 +102,8 @@ MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatype( const Field< T, fSize > & field ) +template<typename Field_T> +MPI_Datatype mpiDatatype( const Field_T & field ) { return mpiDatatypeSlice( field, cell_idx_t( 0 ), cell_idx_t( 0 ), cell_idx_t( 0 ), cell_idx_t( 0 ), @@ -111,8 +112,8 @@ MPI_Datatype mpiDatatype( const Field< T, fSize > & field ) } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, cell_idx_t f /*= 0*/ ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, cell_idx_t f /*= 0*/ ) { return mpiDatatypeSlice( field, interval.xMin(), interval.yMin(), interval.zMin(), f, @@ -120,8 +121,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ) { return mpiDatatypeSlice( field, interval.xMin(), interval.yMin(), interval.zMin(), fBeg, @@ -129,9 +130,11 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ) { + typedef typename Field_T::value_type T; + MPI_Datatype newType = MPI_DATATYPE_NULL; int sizes[3]; @@ -206,14 +209,14 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field ) { return mpiDatatypeWithGhostLayer( field, field.nrOfGhostLayers() ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field, const uint_t numGhostLayers ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field, const uint_t numGhostLayers ) { const cell_idx_t xBeg = - cell_idx_c( numGhostLayers ); const cell_idx_t yBeg = - cell_idx_c( numGhostLayers ); @@ -231,14 +234,14 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice /*= false*/ ) { return mpiDatatypeGhostLayerOnly( field, field.nrOfGhostLayers(), dir, fullSlice ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -250,8 +253,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice /*= false*/, const cell_idx_t f /*= 0*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice /*= false*/, const cell_idx_t f /*= 0*/ ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -259,8 +262,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, f ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -268,8 +271,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -277,8 +280,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, fs ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -289,8 +292,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const cell_idx_t f /*= 0*/, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const cell_idx_t f /*= 0*/, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -298,8 +301,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz return mpiDatatypeSliceXYZ( field, ci, f ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -307,8 +310,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -319,4 +322,4 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz } // namespace communication } // namespace field -} // namespace walberla \ No newline at end of file +} // namespace walberla diff --git a/src/field/iterators/FieldIterator.h b/src/field/iterators/FieldIterator.h index 6a831fb54..a1c328e7a 100644 --- a/src/field/iterators/FieldIterator.h +++ b/src/field/iterators/FieldIterator.h @@ -26,6 +26,7 @@ #include "core/DataTypes.h" #include "core/cell/Cell.h" #include "core/debug/Debug.h" +#include "field/Layout.h" #include "stencil/Directions.h" @@ -39,16 +40,6 @@ namespace walberla { namespace field { - /** - * \brief Layout for field ( - * \ingroup field - */ - enum Layout { - fzyx = 0, //!< Value-sorted data layout (f should be outermost loop) - zyxf = 1 //!< Cell-sorted data layout, (f should be innermost loop) - }; - - template<typename T, uint_t fSize_> class Field; // forward for friend declaration diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 00c3bf12b..55b690d6c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories( ${walberla_BINARY_DIR}/src ) # for generated headers add_subdirectory( blockforest ) add_subdirectory( boundary ) add_subdirectory( core ) +add_subdirectory( cuda ) add_subdirectory( domain_decomposition ) add_subdirectory( fft ) add_subdirectory( field ) diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt new file mode 100644 index 000000000..89d914803 --- /dev/null +++ b/tests/cuda/CMakeLists.txt @@ -0,0 +1,15 @@ +################################################################################################### +# +# Tests for cuda +# +################################################################################################### + +waLBerla_compile_test( FILES FieldTransferTest ) +waLBerla_execute_test( NAME FieldTransferTest ) + +waLBerla_compile_test( FILES SimpleKernelTest.cpp Kernels.cu DEPENDS blockforest timeloop gui ) +waLBerla_execute_test( NAME SimpleKernelTest ) + +waLBerla_compile_test( FILES CudaMPI DEPENDS blockforest timeloop gui ) +waLBerla_execute_test( NAME CudaMPI ) + diff --git a/tests/cuda/CudaMPI.cpp b/tests/cuda/CudaMPI.cpp new file mode 100644 index 000000000..ffcb89260 --- /dev/null +++ b/tests/cuda/CudaMPI.cpp @@ -0,0 +1,144 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CudaMPI.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + + +#include "blockforest/Initialization.h" + + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" +#include "core/logging/Logging.h" +#include "core/mpi/Datatype.h" + +#include "cuda/GPUField.h" + +#include "field/communication/MPIDatatypes.h" +#include "field/AddToStorage.h" +#include "timeloop/SweepTimeloop.h" + +#include "gui/Gui.h" + + +using namespace walberla; + + +void fullFieldTransfer() +{ + Field<double,4> h_f1 ( 3, 4, 2, 42.0, field::fzyx ); + Field<double,4> h_f2 ( 3, 4, 2, 27.0, field::fzyx ); + + cuda::GPUField<double> d_f ( 3, 4, 2, 4, 8.0, field::fzyx ); + + + // Transfer h_f1 from CPU to GPU d_f + + auto h_f1_datatype = mpi::Datatype ( field::communication::mpiDatatype( h_f1 ) ); + auto h_f2_datatype = mpi::Datatype ( field::communication::mpiDatatype( h_f2 ) ); + auto d_f_datatype = mpi::Datatype ( field::communication::mpiDatatype( d_f ) ); + + WALBERLA_LOG_DEVEL("ISend"); + MPI_Request request1; + MPI_Isend( h_f1.data(), 1, h_f1_datatype, 0, 0, MPI_COMM_WORLD, &request1 ); + + WALBERLA_LOG_DEVEL("IRecv"); + MPI_Request request2; + MPI_Irecv( d_f.data(), 1, d_f_datatype, 0, 0, MPI_COMM_WORLD, &request2 ); + + MPI_Wait( &request1, MPI_STATUS_IGNORE ); + MPI_Wait( &request2, MPI_STATUS_IGNORE ); + + // Transfer GPU field d_f back to CPU into h_f2 + + MPI_Request request3; + WALBERLA_LOG_DEVEL("ISend"); + MPI_Isend( d_f.data(), 1, d_f_datatype, 0, 0, MPI_COMM_WORLD , &request3 ); + + MPI_Request request4; + WALBERLA_LOG_DEVEL("IRecv"); + MPI_Irecv( h_f2.data(), 1, h_f2_datatype, 0, 0, MPI_COMM_WORLD, &request4 ); + + MPI_Wait( &request3, MPI_STATUS_IGNORE ); + MPI_Wait( &request4, MPI_STATUS_IGNORE ); + + WALBERLA_CHECK_EQUAL( h_f1, h_f2 ); +} + + +void blockStorageAndGui( int argc, char ** argv ) +{ + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid( + uint_c(1) , uint_c(1), uint_c(1), // number of blocks in x,y,z direction + uint_c(5) , uint_c(7), uint_c(3), // number of blocks in x,y,z direction + real_c(1), // dx: length of one cell in physical coordinates + false, // one block per process? - "false" means all blocks to one process + true, true, true ); // no periodicity + + typedef GhostLayerField<real_t,1> ScalarField; + BlockDataID cpuFieldID1 = field::addToStorage<ScalarField>( blocks, "CPUField 1", real_c(42), field::fzyx, uint_c(1) ); + BlockDataID cpuFieldID2 = field::addToStorage<ScalarField>( blocks, "CPUField 2", real_c(0), field::fzyx, uint_c(1) ); + + typedef cuda::GPUField<real_t> GPUField; + BlockDataID gpuFieldID = blocks->addStructuredBlockData< GPUField >( + [&] ( IBlock * block, StructuredBlockStorage * const s ) { + return new GPUField( s->getNumberOfXCells(*block), + s->getNumberOfYCells(*block), + s->getNumberOfZCells(*block), + 1 , 1.0); + }, + "GPU Field" ); + + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + // get the field stored on the current block + ScalarField * h_f1 = blockIt->getData<ScalarField>( cpuFieldID1 ); + ScalarField * h_f2 = blockIt->getData<ScalarField>( cpuFieldID2 ); + GPUField * d_f = blockIt->getData<GPUField> ( gpuFieldID ); + + auto h_f1_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *h_f1, stencil::W, 1, true ) ); + auto h_f2_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *h_f2, stencil::W, 1, true ) ); + auto d_f_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *d_f , stencil::W, 1, true ) ); + + MPI_Sendrecv( const_cast<double *>( h_f1->data() ), 1, h_f1_datatype, 0, 0, + d_f->data(), 1, d_f_datatype , 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); + + MPI_Sendrecv( d_f->data(), 1, d_f_datatype, 0, 0, + h_f2->data(), 1, h_f2_datatype, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); + + + } + + SweepTimeloop timeloop( blocks, 4 ); + GUI gui( timeloop, blocks, argc, argv ); + gui.run(); + +} + + +int main( int argc, char ** argv ) +{ + debug::enterTestMode(); + walberla::Environment walberlaEnv( argc, argv ); + + fullFieldTransfer(); + //blockStorageAndGui(argc, argv); + + + return 0; +} diff --git a/tests/cuda/FieldTransferTest.cpp b/tests/cuda/FieldTransferTest.cpp new file mode 100644 index 000000000..e37768d9d --- /dev/null +++ b/tests/cuda/FieldTransferTest.cpp @@ -0,0 +1,65 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldTransferTest.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" + +#include "field/Field.h" + +#include "cuda/GPUField.h" +#include "cuda/FieldCopy.h" + + +using namespace walberla; + +void simpleTransfer() +{ + Field<double,4> h_f1 ( 16, 20, 30, 42.0, field::fzyx ); + Field<double,4> h_f2 ( 16, 20, 30, 0.0, field::fzyx ); + + + cuda::GPUField<double> d_f ( 16,20,30,4,0, field::fzyx ); + + WALBERLA_CHECK_EQUAL( h_f1.xSize() ,d_f.xSize() ); + WALBERLA_CHECK_EQUAL( h_f1.ySize() ,d_f.ySize() ); + WALBERLA_CHECK_EQUAL( h_f1.zSize() ,d_f.zSize() ); + WALBERLA_CHECK_EQUAL( h_f1.fSize() ,d_f.fSize() ); + WALBERLA_CHECK_EQUAL( h_f1.layout() ,d_f.layout() ); + + + cuda::fieldCpy( d_f, h_f1 ); + cuda::fieldCpy( h_f2, d_f ); + + WALBERLA_CHECK_EQUAL( h_f1, h_f2 ); +} + + + + +int main( int argc, char ** argv ) +{ + debug::enterTestMode(); + walberla::Environment walberlaEnv( argc, argv ); + + simpleTransfer(); + + return 0; +} diff --git a/tests/cuda/Kernels.cu b/tests/cuda/Kernels.cu new file mode 100644 index 000000000..fb4228113 --- /dev/null +++ b/tests/cuda/Kernels.cu @@ -0,0 +1,33 @@ + +#include <iostream> + +#include "cuda/FieldAccessor.h" +#include "cuda/FieldIndexing.h" + +namespace walberla { + + +namespace cuda { + template<typename T> + class GPUField; +} + +__global__ void kernel_double( cuda::FieldAccessor<double> f ) +{ + f.set( blockIdx, threadIdx ); + f.get() *= 2.0; +} + +void kernel_double_field( const cuda::GPUField<double> & field ) +{ + using namespace std; + cuda::FieldIndexing<double> iter = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true ); + std::cout << "Kernel call dims " + << iter.blockDim().x << "," + << iter.gridDim().x << "," + << iter.gridDim().y << "," + << iter.gridDim().z << endl; + kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() ); +} + +} // namespace walberla diff --git a/tests/cuda/SimpleKernelTest.cpp b/tests/cuda/SimpleKernelTest.cpp new file mode 100644 index 000000000..8313947ed --- /dev/null +++ b/tests/cuda/SimpleKernelTest.cpp @@ -0,0 +1,115 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldTransferTest.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "cuda/FieldIndexing.h" +#include "blockforest/Initialization.h" + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" + +#include "field/GhostLayerField.h" + +#include "cuda/GPUField.h" +#include "cuda/FieldCopy.h" +#include "cuda/Kernel.h" +#include "gui/Gui.h" +#include "timeloop/SweepTimeloop.h" + +using namespace walberla; + +namespace walberla{ +void kernel_double_field( const cuda::GPUField<double> & field ); + +void kernel_double( cuda::FieldAccessor<double> f ); +} + + + +GhostLayerField<real_t,1> * createCPUField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new GhostLayerField<real_t,1> ( + storage->getNumberOfXCells( *block ), // number of cells in x direction + storage->getNumberOfYCells( *block ), // number of cells in y direction + storage->getNumberOfZCells( *block ), // number of cells in z direction + 1, // number of ghost layers + real_t(1), // initial value + field::fzyx); +} + +cuda::GPUField<real_t> * createGPUField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new cuda::GPUField<real_t> ( + storage->getNumberOfXCells( *block ), // number of cells in x direction + storage->getNumberOfYCells( *block ), // number of cells in y direction + storage->getNumberOfZCells( *block ), // number of cells in z direction + 1, // fSize + 1, // number of ghost layers + field::fzyx ); +} + + +int main( int argc, char ** argv ) +{ + walberla::Environment env( argc, argv ); + debug::enterTestMode(); + + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1), uint_t(1), uint_t(1), // number of blocks in x,y,z direction + uint_t(14), uint_t(14), uint_t(14), // how many cells per block (x,y,z) + real_c(0.5), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData< GhostLayerField<real_t,1> > ( &createCPUField, "CPUField" ); + + + BlockDataID gpuFieldID = blocks->addStructuredBlockData< cuda::GPUField<real_t> > ( &createGPUField, "GPUField" ); + + for ( auto blockIterator = blocks->begin(); blockIterator != blocks->end(); ++blockIterator ) + { + IBlock & currentBlock = *blockIterator; + + // get the field stored on the current block + auto cpuField = currentBlock.getData< GhostLayerField<real_t,1> > ( cpuFieldID ); + auto gpuField = currentBlock.getData< cuda::GPUField<real_t> > ( gpuFieldID ); + + cuda::fieldCpy( *gpuField, *cpuField ); + + auto myKernel = cuda::make_kernel( &kernel_double ); + auto indexing = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( *gpuField, 1, stencil::W, true ); + myKernel.addFieldIndexingParam(indexing); + myKernel(); + + cuda::fieldCpy( *cpuField, *gpuField ); + + WALBERLA_ASSERT_EQUAL( cpuField->get(0,0,0), 2 ); + } + + + //SweepTimeloop timeloop ( blocks, uint_t(1) ); + //timeloop.run(); + //GUI gui ( timeloop, blocks, argc, argv ); + //gui.run(); + + + return 0; +} -- GitLab