Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Houman Mirzaalian Dastjerdi
waLBerla
Commits
aa1ed37e
Commit
aa1ed37e
authored
Jun 20, 2018
by
Houman Mirzaalian Dastjerdi
Browse files
tutorials 02 CUDA blur image
parent
e8f1f091
Pipeline
#10009
failed with stage
in 73 minutes and 37 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
apps/tutorials/cuda/02_BlurImage_cuda.cpp
0 → 100644
View file @
aa1ed37e
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file 03_GameOfLife.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include
"cuda/HostFieldAllocator.h"
#include
"blockforest/Initialization.h"
#include
"blockforest/communication/UniformDirectScheme.h"
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"core/Environment.h"
#include
"cuda/HostFieldAllocator.h"
#include
"cuda/FieldCopy.h"
#include
"cuda/GPUField.h"
#include
"cuda/Kernel.h"
#include
"cuda/AddGPUFieldToStorage.h"
#include
"cuda/communication/GPUPackInfo.h"
#include
"cuda/FieldIndexing.h"
#include
"02_BlurImage_cuda.h"
#include
"field/AddToStorage.h"
#include
"field/communication/UniformMPIDatatypeInfo.h"
#include
"field/vtk/VTKWriter.h"
#include
"geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include
"geometry/structured/GrayScaleImage.h"
#include
"gui/Gui.h"
#include
"stencil/D2Q9.h"
#include
"timeloop/SweepTimeloop.h"
#include
"geometry/structured/RGBAImage.h"
#include
<typeinfo>
using
namespace
walberla
;
typedef
GhostLayerField
<
double
,
1
>
ScalarField
;
typedef
cuda
::
GPUField
<
double
>
GPUField
;
const
int
blurKernelWidth
=
3
;
const
float
blurKernelSigma
=
1000.
;
//------------------------
ScalarField
*
createField
(
IBlock
*
const
block
,
StructuredBlockStorage
*
const
storage
)
{
return
new
ScalarField
(
storage
->
getNumberOfXCells
(
*
block
),
// number of cells in x direction per block
storage
->
getNumberOfYCells
(
*
block
),
// number of cells in y direction per block
storage
->
getNumberOfZCells
(
*
block
),
// number of cells in z direction per block
1
,
// one ghost layer
double
(
0
),
// initial value
field
::
fzyx
,
// layout
make_shared
<
cuda
::
HostFieldAllocator
<
double
>
>
()
// allocator for host pinned memory
);
}
class
CUDA_BLUR
{
public:
CUDA_BLUR
(
BlockDataID
gpuFieldSrcID
,
BlockDataID
gpuFieldDstID
,
int
numRows
,
int
numCols
,
const
int
filterWidth
)
:
gpuFieldSrcID_
(
gpuFieldSrcID
),
gpuFieldDstID_
(
gpuFieldDstID
),
numRows_
(
numRows
),
numCols_
(
numCols
),
filterWidth_
(
filterWidth
)
{
//create and fill the filter we will convolve with
auto
h_filter
=
new
float
[
blurKernelWidth
*
blurKernelWidth
];
float
filterSum
=
0.
f
;
//for normalization
for
(
int
r
=
-
blurKernelWidth
/
2
;
r
<=
blurKernelWidth
/
2
;
++
r
)
{
for
(
int
c
=
-
blurKernelWidth
/
2
;
c
<=
blurKernelWidth
/
2
;
++
c
)
{
float
filterValue
=
expf
(
-
(
float
)(
c
*
c
+
r
*
r
)
/
(
2.
f
*
blurKernelSigma
*
blurKernelSigma
));
(
h_filter
)[(
r
+
blurKernelWidth
/
2
)
*
blurKernelWidth
+
c
+
blurKernelWidth
/
2
]
=
filterValue
;
filterSum
+=
filterValue
;
}
}
float
normalizationFactor
=
1.
f
/
filterSum
;
for
(
int
r
=
-
blurKernelWidth
/
2
;
r
<=
blurKernelWidth
/
2
;
++
r
)
{
for
(
int
c
=
-
blurKernelWidth
/
2
;
c
<=
blurKernelWidth
/
2
;
++
c
)
{
(
h_filter
)[(
r
+
blurKernelWidth
/
2
)
*
blurKernelWidth
+
c
+
blurKernelWidth
/
2
]
*=
normalizationFactor
;
}
}
cudaMalloc
(
&
d_filtter_
,
blurKernelWidth
*
blurKernelWidth
*
sizeof
(
float
));
cudaMemcpy
(
d_filtter_
,
h_filter
,
blurKernelWidth
*
blurKernelWidth
*
sizeof
(
float
),
cudaMemcpyHostToDevice
);
}
void
operator
()
(
IBlock
*
block
)
{
auto
srcCudaField
=
block
->
getData
<
cuda
::
GPUField
<
double
>
>
(
gpuFieldSrcID_
);
auto
dstCudaField
=
block
->
getData
<
cuda
::
GPUField
<
double
>
>
(
gpuFieldDstID_
);
auto
myKernel
=
cuda
::
make_kernel
(
&
gaussian_blur
);
myKernel
.
addFieldIndexingParam
(
cuda
::
FieldIndexing
<
double
>::
xyz
(
*
srcCudaField
)
);
myKernel
.
addFieldIndexingParam
(
cuda
::
FieldIndexing
<
double
>::
xyz
(
*
dstCudaField
)
);
// typedef typename boost::function_traits<decltype(gaussian_blur)>::arg5_type arg5_type;
// WALBERLA_LOG_DEVEL("Type is" << typeid(arg5_type).name());
// auto equal_res = boost::is_same<const float *const, typename boost::function_traits<decltype(gaussian_blur)>::arg5_type>::value;
// WALBERLA_LOG_DEVEL("Types are equal " << equal_res);
myKernel
.
addParam
<
int
>
(
numRows_
);
myKernel
.
addParam
<
int
>
(
numCols_
);
myKernel
.
addParam
<
float
*>
(
d_filtter_
);
myKernel
.
addParam
<
int
>
(
filterWidth_
);
myKernel
();
srcCudaField
->
swapDataPointers
(
dstCudaField
);
}
private:
BlockDataID
gpuFieldSrcID_
;
BlockDataID
gpuFieldDstID_
;
int
filterWidth_
;
int
numRows_
;
int
numCols_
;
float
*
d_filtter_
;
};
int
main
(
int
argc
,
char
**
argv
)
{
walberla
::
Environment
env
(
argc
,
argv
);
geometry
::
GrayScaleImage
image
(
"image.png"
);
int
numRows
=
image
.
height
();
int
numCols
=
image
.
width
();
// Create blocks
shared_ptr
<
StructuredBlockForest
>
blocks
=
blockforest
::
createUniformBlockGrid
(
uint_t
(
2
)
,
uint_t
(
2
),
uint_t
(
1
),
// number of blocks in x,y,z direction
image
.
size
(
uint_t
(
0
)
)
/
2
,
image
.
size
(
uint_t
(
1
))
/
2
,
uint_t
(
1
),
// how many cells per block (x,y,z)
real_t
(
1
),
// dx: length of one cell in physical coordinates
false
,
// one block per process - "false" means all blocks to one process
false
,
false
,
false
);
// no periodicity
BlockDataID
cpuFieldID
=
blocks
->
addStructuredBlockData
<
ScalarField
>
(
&
createField
,
"CPU Field"
);
// Initializing the field from an image
using
geometry
::
initializer
::
ScalarFieldFromGrayScaleImage
;
ScalarFieldFromGrayScaleImage
fieldInitializer
(
*
blocks
,
cpuFieldID
)
;
fieldInitializer
.
init
(
image
,
uint_t
(
2
),
false
);
BlockDataID
gpuFieldSrcID
=
cuda
::
addGPUFieldToStorage
<
ScalarField
>
(
blocks
,
cpuFieldID
,
"GPU Field Src"
);
BlockDataID
gpuFieldDstID
=
cuda
::
addGPUFieldToStorage
<
ScalarField
>
(
blocks
,
cpuFieldID
,
"GPU Field Dst"
);
typedef
blockforest
::
communication
::
UniformBufferedScheme
<
stencil
::
D2Q9
>
CommScheme
;
typedef
cuda
::
communication
::
GPUPackInfo
<
GPUField
>
Packing
;
// Alternative, if CUDA enabled MPI is available
//blockforest::communication::UniformDirectScheme<stencil::D2Q9 >
//typedef field::communication::UniformMPIDatatypeInfo<GPUField> Packing
CommScheme
commScheme
(
blocks
);
commScheme
.
addDataToCommunicate
(
make_shared
<
Packing
>
(
gpuFieldSrcID
)
);
// Create Timeloop
const
uint_t
numberOfTimesteps
=
uint_t
(
500
);
// number of timesteps for non-gui runs
SweepTimeloop
timeloop
(
blocks
,
numberOfTimesteps
);
// Registering the sweep
timeloop
.
add
()
<<
BeforeFunction
(
commScheme
,
"Communication"
)
<<
Sweep
(
CUDA_BLUR
(
gpuFieldSrcID
,
gpuFieldDstID
,
numRows
,
numCols
,
blurKernelWidth
),
"GameOfLifeSweep"
);
timeloop
.
add
()
<<
Sweep
(
cuda
::
fieldCpyFunctor
<
ScalarField
,
GPUField
>
(
cpuFieldID
,
gpuFieldDstID
)
);
// Register VTK output
timeloop
.
addFuncAfterTimeStep
(
field
::
createVTKOutput
<
ScalarField
>
(
cpuFieldID
,
*
blocks
,
"game_of_life"
)
);
// GUI output
GUI
gui
(
timeloop
,
blocks
,
argc
,
argv
);
gui
.
run
();
return
0
;
}
apps/tutorials/cuda/02_BlurImage_cuda.cu
0 → 100644
View file @
aa1ed37e
#include
<stdio.h>
#include
<math_functions.h>
#include
<cuda.h>
#include
<cuda_profiler_api.h>
#include
<cudaProfiler.h>
#include
<device_launch_parameters.h>
#include
"cuda/FieldAccessor.h"
#include
"02_BlurImage_cuda.h"
//------------------------
namespace
walberla
{
__global__
void
gaussian_blur
(
cuda
::
FieldAccessor
<
double
>
inputChannel
,
cuda
::
FieldAccessor
<
double
>
outputChannel
,
int
numRows
,
int
numCols
,
float
*
filter
,
int
filterWidth
)
{
inputChannel
.
set
(
blockIdx
,
threadIdx
);
outputChannel
.
set
(
blockIdx
,
threadIdx
);
/*
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
if (px >= numCols || py >= numRows) {
return;
}*/
double
c
=
0.0
f
;
for
(
int
fx
=
0
;
fx
<
filterWidth
;
fx
++
)
{
for
(
int
fy
=
0
;
fy
<
filterWidth
;
fy
++
)
{
int
imagex
=
fx
-
filterWidth
/
2
;
int
imagey
=
fy
-
filterWidth
/
2
;
//imagex = min(max(imagex, 0), numCols - 1);
//imagey = min(max(imagey, 0), numRows - 1);
c
+=
(
filter
[
fy
*
filterWidth
+
fx
]
*
inputChannel
.
getNeighbor
(
imagex
,
imagey
,
0
)
);
}
}
outputChannel
.
get
()
=
c
;
}
}
apps/tutorials/cuda/02_BlurImage_cuda.h
0 → 100644
View file @
aa1ed37e
//
// Created by po60nani on 6/18/18.
//
#pragma once
//
// Created by po60nani on 6/13/18.
//
#include
<stdio.h>
#include
<cuda.h>
//------------------------
namespace
walberla
{
__global__
void
gaussian_blur
(
cuda
::
FieldAccessor
<
double
>
inputChannel
,
cuda
::
FieldAccessor
<
double
>
outputChannel
,
int
numRows
,
int
numCols
,
float
*
filter
,
int
filterWidth
);
}
apps/tutorials/cuda/CMakeLists.txt
View file @
aa1ed37e
...
...
@@ -3,5 +3,9 @@ waLBerla_link_files_to_builddir( *.png )
waLBerla_add_executable
(
NAME 01_GameOfLife_cuda
FILES 01_GameOfLife_cuda.cpp 01_GameOfLife_kernels.cu
DEPENDS blockforest core cuda field lbm geometry timeloop gui
)
\ No newline at end of file
DEPENDS blockforest core cuda field lbm geometry timeloop gui
)
waLBerla_add_executable
(
NAME 02_BlurImage_cuda
FILES 02_BlurImage_cuda.cpp 02_BlurImage_cuda.cu 02_BlurImage_cuda.h
DEPENDS blockforest core cuda field lbm geometry timeloop gui
)
apps/tutorials/cuda/image.png
0 → 100644
View file @
aa1ed37e
120 KB
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment