Commit aa1ed37e authored by Houman Mirzaalian Dastjerdi's avatar Houman Mirzaalian Dastjerdi
Browse files

tutorials 02 CUDA blur image

parent e8f1f091
Pipeline #10009 failed with stage
in 73 minutes and 37 seconds
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file 03_GameOfLife.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "cuda/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/FieldCopy.h"
#include "cuda/GPUField.h"
#include "cuda/Kernel.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/FieldIndexing.h"
#include "02_BlurImage_cuda.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "geometry/structured/GrayScaleImage.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "timeloop/SweepTimeloop.h"
#include "geometry/structured/RGBAImage.h"
#include <typeinfo>
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
typedef cuda::GPUField<double> GPUField;
const int blurKernelWidth = 3;
const float blurKernelSigma = 1000.;
//------------------------
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new ScalarField (
storage->getNumberOfXCells( *block ), // number of cells in x direction per block
storage->getNumberOfYCells( *block ), // number of cells in y direction per block
storage->getNumberOfZCells( *block ), // number of cells in z direction per block
1, // one ghost layer
double(0), // initial value
field::fzyx, // layout
make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory
);
}
class CUDA_BLUR
{
public:
CUDA_BLUR( BlockDataID gpuFieldSrcID, BlockDataID gpuFieldDstID,
int numRows, int numCols,
const int filterWidth )
: gpuFieldSrcID_( gpuFieldSrcID ), gpuFieldDstID_( gpuFieldDstID ), numRows_(numRows), numCols_(numCols), filterWidth_(filterWidth)
{
//create and fill the filter we will convolve with
auto h_filter = new float[blurKernelWidth * blurKernelWidth];
float filterSum = 0.f; //for normalization
for (int r = -blurKernelWidth/2; r <= blurKernelWidth/2; ++r) {
for (int c = -blurKernelWidth/2; c <= blurKernelWidth/2; ++c) {
float filterValue = expf( -(float)(c * c + r * r) / (2.f * blurKernelSigma * blurKernelSigma));
(h_filter)[(r + blurKernelWidth/2) * blurKernelWidth + c + blurKernelWidth/2] = filterValue;
filterSum += filterValue;
}
}
float normalizationFactor = 1.f / filterSum;
for (int r = -blurKernelWidth/2; r <= blurKernelWidth/2; ++r) {
for (int c = -blurKernelWidth/2; c <= blurKernelWidth/2; ++c) {
(h_filter)[(r + blurKernelWidth/2) * blurKernelWidth + c + blurKernelWidth/2] *= normalizationFactor;
}
}
cudaMalloc(&d_filtter_, blurKernelWidth * blurKernelWidth * sizeof(float));
cudaMemcpy(d_filtter_, h_filter, blurKernelWidth * blurKernelWidth * sizeof(float), cudaMemcpyHostToDevice);
}
void operator() ( IBlock * block )
{
auto srcCudaField = block->getData< cuda::GPUField<double> > ( gpuFieldSrcID_ );
auto dstCudaField = block->getData< cuda::GPUField<double> > ( gpuFieldDstID_ );
auto myKernel = cuda::make_kernel( &gaussian_blur );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) );
// typedef typename boost::function_traits<decltype(gaussian_blur)>::arg5_type arg5_type;
// WALBERLA_LOG_DEVEL("Type is" << typeid(arg5_type).name());
// auto equal_res = boost::is_same<const float *const, typename boost::function_traits<decltype(gaussian_blur)>::arg5_type>::value;
// WALBERLA_LOG_DEVEL("Types are equal " << equal_res);
myKernel.addParam<int>( numRows_ );
myKernel.addParam<int>( numCols_ );
myKernel.addParam<float*>( d_filtter_ );
myKernel.addParam<int>( filterWidth_ );
myKernel();
srcCudaField->swapDataPointers( dstCudaField );
}
private:
BlockDataID gpuFieldSrcID_;
BlockDataID gpuFieldDstID_;
int filterWidth_;
int numRows_;
int numCols_;
float * d_filtter_;
};
int main( int argc, char ** argv )
{
walberla::Environment env( argc, argv );
geometry::GrayScaleImage image("image.png");
int numRows = image.height();
int numCols = image.width();
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(2) , uint_t(2), uint_t(1), // number of blocks in x,y,z direction
image.size( uint_t(0) ) /2, image.size(uint_t(1)) /2, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
false, false, false ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
// Initializing the field from an image
using geometry::initializer::ScalarFieldFromGrayScaleImage;
ScalarFieldFromGrayScaleImage fieldInitializer ( *blocks, cpuFieldID ) ;
fieldInitializer.init( image, uint_t(2), false );
BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" );
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9 > CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
// Alternative, if CUDA enabled MPI is available
//blockforest::communication::UniformDirectScheme<stencil::D2Q9 >
//typedef field::communication::UniformMPIDatatypeInfo<GPUField> Packing
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuFieldSrcID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(500); // number of timesteps for non-gui runs
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( CUDA_BLUR(gpuFieldSrcID, gpuFieldDstID, numRows, numCols, blurKernelWidth ), "GameOfLifeSweep" );
timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) );
// Register VTK output
timeloop.addFuncAfterTimeStep( field::createVTKOutput<ScalarField>( cpuFieldID, *blocks, "game_of_life" ) );
// GUI output
GUI gui ( timeloop, blocks, argc, argv );
gui.run();
return 0;
}
#include <stdio.h>
#include <math_functions.h>
#include <cuda.h>
#include <cuda_profiler_api.h>
#include <cudaProfiler.h>
#include <device_launch_parameters.h>
#include "cuda/FieldAccessor.h"
#include "02_BlurImage_cuda.h"
//------------------------
namespace walberla {
__global__
void gaussian_blur(cuda::FieldAccessor<double> inputChannel,
cuda::FieldAccessor<double> outputChannel,
int numRows, int numCols,
float * filter, int filterWidth) {
inputChannel.set( blockIdx, threadIdx );
outputChannel.set( blockIdx, threadIdx );
/*
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
if (px >= numCols || py >= numRows) {
return;
}*/
double c = 0.0f;
for (int fx = 0; fx < filterWidth; fx++) {
for (int fy = 0; fy < filterWidth; fy++) {
int imagex = fx - filterWidth / 2;
int imagey = fy - filterWidth / 2;
//imagex = min(max(imagex, 0), numCols - 1);
//imagey = min(max(imagey, 0), numRows - 1);
c += (filter[fy * filterWidth + fx] * inputChannel.getNeighbor(imagex, imagey, 0) );
}
}
outputChannel.get() = c;
}
}
//
// Created by po60nani on 6/18/18.
//
#pragma once
//
// Created by po60nani on 6/13/18.
//
#include <stdio.h>
#include <cuda.h>
//------------------------
namespace walberla {
__global__
void gaussian_blur(cuda::FieldAccessor<double> inputChannel,
cuda::FieldAccessor<double> outputChannel,
int numRows, int numCols,
float * filter, int filterWidth);
}
......@@ -3,5 +3,9 @@ waLBerla_link_files_to_builddir( *.png )
waLBerla_add_executable ( NAME 01_GameOfLife_cuda
FILES 01_GameOfLife_cuda.cpp 01_GameOfLife_kernels.cu
DEPENDS blockforest core cuda field lbm geometry timeloop gui )
\ No newline at end of file
DEPENDS blockforest core cuda field lbm geometry timeloop gui )
waLBerla_add_executable ( NAME 02_BlurImage_cuda
FILES 02_BlurImage_cuda.cpp 02_BlurImage_cuda.cu 02_BlurImage_cuda.h
DEPENDS blockforest core cuda field lbm geometry timeloop gui )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment