From 9cde0d9fa7759409d2d2dc702c5c3ed6a65fc554 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 21 Feb 2019 08:25:37 +0100 Subject: [PATCH] execution tree module - a more flexible time loop implementation - allows for nested execution flow (i.e. solver loop inside time loop) - timing tree integration - parallel CUDA stream support --- src/cuda/CMakeLists.txt | 3 +- src/cuda/ExecutionTreeGPU.h | 190 +++++++++++++++++++ src/cuda/ExecutionTreeSweepGPU.h | 103 ++++++++++ src/cuda/ParallelStreams.h | 6 +- src/executiontree/CMakeLists.txt | 9 + src/executiontree/ExecutionTree.cpp | 220 ++++++++++++++++++++++ src/executiontree/ExecutionTree.h | 206 ++++++++++++++++++++ src/executiontree/ExecutionTree.impl.h | 109 +++++++++++ src/executiontree/ExecutionTreeSweep.h | 119 ++++++++++++ tests/CMakeLists.txt | 1 + tests/executiontree/CMakeLists.txt | 8 + tests/executiontree/ExecutionTreeTest.cpp | 68 +++++++ 12 files changed, 1038 insertions(+), 4 deletions(-) create mode 100644 src/cuda/ExecutionTreeGPU.h create mode 100644 src/cuda/ExecutionTreeSweepGPU.h create mode 100644 src/executiontree/CMakeLists.txt create mode 100644 src/executiontree/ExecutionTree.cpp create mode 100644 src/executiontree/ExecutionTree.h create mode 100644 src/executiontree/ExecutionTree.impl.h create mode 100644 src/executiontree/ExecutionTreeSweep.h create mode 100644 tests/executiontree/CMakeLists.txt create mode 100644 tests/executiontree/ExecutionTreeTest.cpp diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index c4ff50133..98aa991f0 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -4,6 +4,7 @@ # ################################################################################################### -waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition python_coupling field stencil BUILD_ONLY_IF_FOUND CUDA ) +waLBerla_add_module( DEPENDS blockforest core communication domain_decomposition executiontree python_coupling field stencil + BUILD_ONLY_IF_FOUND CUDA ) ################################################################################################### \ No newline at end of file diff --git a/src/cuda/ExecutionTreeGPU.h b/src/cuda/ExecutionTreeGPU.h new file mode 100644 index 000000000..9f458f289 --- /dev/null +++ b/src/cuda/ExecutionTreeGPU.h @@ -0,0 +1,190 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaskTree.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + +#pragma once + +#include "executiontree/ExecutionTree.h" +#include "ParallelStreams.h" + +#include <cuda_runtime.h> + +namespace walberla { +namespace executiontree { + +// -------------------------------------- Forward Declarations ------------------------------------------------------------------------------------------------ + +using executiontree::IFunctionNode; +using executiontree::IFunctionNodePtr; +using executiontree::TimingTreePtr; + +class SequenceCUDA; +class IFunctionNodeCUDA; +template<typename FunctorClass> class FunctorCUDA; +using IFunctionNodeCUDAPtr = shared_ptr<IFunctionNodeCUDA>; + + +// -------------------------------------- Public Interface ------------------------------------------------------------------------------------------------ + +template<typename FunctorType> +IFunctionNodeCUDAPtr functorCUDA( const FunctorType & t, const std::string &name = "", const TimingTreePtr &timingTree = nullptr ); + + +shared_ptr< SequenceCUDA > sequenceCUDA( std::initializer_list< IFunctionNodeCUDAPtr > initializerList, + const std::string &name, cudaStream_t defaultStream = 0, bool parallel = false, int priority = 0, + const TimingTreePtr &timingTree = nullptr ); + + +// -------------------------------------- Node Classes -------------------------------------------------------------------------------------------------------- + + +class IFunctionNodeCUDA : public IFunctionNode +{ +public: + virtual void operator()( cudaStream_t ) = 0; +}; + +template<typename FunctorClass> +void CUDART_CB functorCUDAStartTimer(void *data) +{ + auto functor = reinterpret_cast<FunctorClass *>( data ); + functor->timingTree_->start( functor->getName() ); +} + +template<typename FunctorClass> +void CUDART_CB functorCUDAStopTimer(void *data) +{ + auto functor = reinterpret_cast<FunctorClass *>( data ); + functor->timingTree_->stop( functor->getName() ); +} + +template<typename FunctorType> +class FunctorCUDA : public IFunctionNodeCUDA +{ +public: + FunctorCUDA( const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) + : functor_( functor ), name_( name ), timingTree_( timingTree ) {} + + void operator() (cudaStream_t stream) override + { + if ( timingTree_ ) + { + WALBERLA_CUDA_CHECK( cudaLaunchHostFunc( stream, functorCUDAStartTimer<FunctorCUDA<FunctorType> >, this ) ); + executiontree::internal::Caller<FunctorType>::call( functor_, stream ); + WALBERLA_CUDA_CHECK( cudaLaunchHostFunc( stream, functorCUDAStopTimer<FunctorCUDA<FunctorType> >, this ) ); + } + else + executiontree::internal::Caller<FunctorType>::call( functor_, stream ); + } + + const std::string getName() const override { return name_ != "" ? name_ : "FunctorCUDA"; }; + void operator() () override { (*this)( 0 ); } + +private: + friend void CUDART_CB functorCUDAStartTimer<FunctorCUDA<FunctorType> >(void *data); + friend void CUDART_CB functorCUDAStopTimer<FunctorCUDA<FunctorType> >(void *data); + + FunctorType functor_; + std::string name_; + shared_ptr< WcTimingTree > timingTree_; +}; + + +class SequenceCUDA : public IFunctionNodeCUDA +{ +public: + SequenceCUDA( std::initializer_list< IFunctionNodeCUDAPtr > initializerList, const std::string &name, cudaStream_t defaultStream, + bool parallel = false, int priority=0, + const TimingTreePtr &timingTree = nullptr) + : name_( name ), defaultStream_( defaultStream), timingTree_( timingTree ), parallelStreams_( priority ), parallel_( parallel ), priority_(priority) + { + for ( auto &e : initializerList ) + children_.push_back( e ); + } + + + void operator() (cudaStream_t stream) override + { + if ( timingTree_ ) { + WALBERLA_CUDA_CHECK( cudaLaunchHostFunc( stream, functorCUDAStartTimer< SequenceCUDA >, this )); + } + + if( parallel_ ) + { + auto parallelSection = parallelStreams_.parallelSection( stream ); + for ( auto &el : children_ ) + { + ( *el )( parallelSection.stream()); + parallelSection.next(); + } + } + else + for ( auto &el : children_ ) + (*el)( stream ); + + if ( timingTree_ ) { + WALBERLA_CUDA_CHECK( cudaLaunchHostFunc( stream, functorCUDAStopTimer< SequenceCUDA >, this )); + } + } + + void operator() () override { (*this)( defaultStream_ ); } + void push_back( const IFunctionNodeCUDAPtr &fct ) { children_.push_back( fct ); } + void push_front( const IFunctionNodeCUDAPtr &fct ) { children_.push_front( fct ); } + const std::string getName() const override { return name_ != "" ? name_ : "ParallelSequenceCUDA"; }; + const std::deque< IFunctionNodePtr > getChildren() const override { + std::deque< IFunctionNodePtr > result; + for( auto & c : children_ ) + result.push_back( c ); + return result; + }; + +private: + friend void CUDART_CB functorCUDAStartTimer< SequenceCUDA >( void *data ); + friend void CUDART_CB functorCUDAStopTimer< SequenceCUDA >( void *data ); + + std::string name_; + cudaStream_t defaultStream_; + std::deque< IFunctionNodeCUDAPtr > children_; + shared_ptr< WcTimingTree > timingTree_; + cuda::ParallelStreams parallelStreams_; + bool parallel_; + int priority_; +}; + + +template<typename FunctorType> +IFunctionNodeCUDAPtr functorCUDA( const FunctorType & t, const std::string &name, const shared_ptr< WcTimingTree > &timingTree ) +{ + return make_shared<FunctorCUDA<FunctorType> >( t, name, timingTree ); +} + + +shared_ptr< SequenceCUDA > sequenceCUDA( std::initializer_list< IFunctionNodeCUDAPtr > initializerList, + const std::string &name, cudaStream_t defaultStream, bool parallel, int priority, + const TimingTreePtr &timingTree ) +{ + return make_shared< SequenceCUDA >( initializerList, name, defaultStream, parallel, priority, timingTree ); +} + + +} // namespace executiontree +} // namespace walberla diff --git a/src/cuda/ExecutionTreeSweepGPU.h b/src/cuda/ExecutionTreeSweepGPU.h new file mode 100644 index 000000000..e5ad3d2a6 --- /dev/null +++ b/src/cuda/ExecutionTreeSweepGPU.h @@ -0,0 +1,103 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ExecutionTreeSweepGPU.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + +#include "domain_decomposition/IBlock.h" +#include "executiontree/ExecutionTree.h" +#include "ExecutionTreeGPU.h" + +namespace walberla { +namespace executiontree { + + +template<typename FunctorType> +IFunctionNodeCUDAPtr sweepCUDA( BlockStorage &bs, const FunctorType & t, const std::string &name = "", const TimingTreePtr &timingTree = nullptr ); + +template<typename FunctorType> +IFunctionNodeCUDAPtr sweepCUDA( const shared_ptr< StructuredBlockStorage > &bs, const FunctorType & t, const std::string &name = "", + const TimingTreePtr &tt = nullptr ); + + +template<typename FunctorType> +class SweepCUDA : public IFunctionNodeCUDA +{ +public: + SweepCUDA( BlockStorage &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) + : blockStorage_( bs ), + functor_( functor ), + name_( name ), + timingTree_( timingTree ) {} + + SweepCUDA( const shared_ptr <StructuredBlockStorage> &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) + : blockStorage_( bs->getBlockStorage()), + functor_( functor ), + name_( name ), + timingTree_( timingTree ) {} + + void operator() () override { (*this)( 0 ); } + + void operator()( cudaStream_t stream ) override + { + if ( timingTree_ ) + { + for ( auto &block: blockStorage_ ) + { + timingTree_->start( name_ ); + executiontree::internal::Caller<FunctorType>::call( functor_, &block, stream ); + timingTree_->stop( name_ ); + } + } + else + for ( auto &block: blockStorage_ ) + executiontree::internal::Caller<FunctorType>::call( functor_, &block, stream ); + } + + const std::string getName() const override { return name_ != "" ? name_ : "Sweep"; }; + +private: + BlockStorage &blockStorage_; + + FunctorType functor_; + std::string name_; + TimingTreePtr timingTree_; +}; + +template<typename FunctorType> +IFunctionNodeCUDAPtr sweepCUDA( BlockStorage &bs, FunctorType t, const std::string &name, const shared_ptr< WcTimingTree > &timingTree ) +{ + return make_shared<SweepCUDA<FunctorType> >( bs, t, name, timingTree ); +} + +template<typename FunctorType> +IFunctionNodeCUDAPtr sweepCUDA( const shared_ptr< StructuredBlockStorage > &bs, const FunctorType & t, const std::string &name, + const TimingTreePtr &timingTree ) +{ + return make_shared<SweepCUDA<FunctorType> >( bs, t, name, timingTree ); +} + + +} // namespace executiontree +} // namespace walberla diff --git a/src/cuda/ParallelStreams.h b/src/cuda/ParallelStreams.h index 8f6348015..4116e0ef9 100644 --- a/src/cuda/ParallelStreams.h +++ b/src/cuda/ParallelStreams.h @@ -35,15 +35,15 @@ namespace cuda { ~ParallelSection(); void run( const std::function<void( cudaStream_t )> &f ); + cudaStream_t stream(); + void next(); + private: friend class ParallelStreams; ParallelSection( ParallelStreams *parent, cudaStream_t mainStream ); void synchronize(); - cudaStream_t stream(); - void next(); - ParallelStreams * parent_; cudaStream_t mainStream_; cudaEvent_t startEvent_; diff --git a/src/executiontree/CMakeLists.txt b/src/executiontree/CMakeLists.txt new file mode 100644 index 000000000..46737d9f9 --- /dev/null +++ b/src/executiontree/CMakeLists.txt @@ -0,0 +1,9 @@ +################################################################################################### +# +# Module executiontree +# +################################################################################################### + +waLBerla_add_module( DEPENDS core domain_decomposition timeloop ) + +################################################################################################### \ No newline at end of file diff --git a/src/executiontree/ExecutionTree.cpp b/src/executiontree/ExecutionTree.cpp new file mode 100644 index 000000000..b27389707 --- /dev/null +++ b/src/executiontree/ExecutionTree.cpp @@ -0,0 +1,220 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaskTree.cpp +//! \ingroup executiontree +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + + +#include <sstream> +#include <iostream> +#include "core/logging/Logging.h" +#include "core/OpenMP.h" +#include "ExecutionTree.h" + + +namespace walberla { +namespace executiontree { + +using timeloop::ITimeloop; + + +// --------------------------- Logging Integration of Loop node ----------------------------------------------------------------------------------------------- + + +class LoggingStamp : public logging::Logging::CustomStamp +{ +public: + explicit LoggingStamp( const ITimeloop & timeloop ) : timeloop_( timeloop ) {} + std::string stamp() override + { + std::ostringstream oss; + int indention; + + if( timeloop_.getNrOfTimeSteps() > 0 ) + indention = int_c( std::ceil( std::log10( real_c( timeloop_.getNrOfTimeSteps() ) ) ) ); + else if( timeloop_.getCurrentTimeStep() > 0 ) + indention = int_c( std::ceil( std::log10( real_c( timeloop_.getCurrentTimeStep() ) ) ) ); + else + indention = 0; + + oss << std::setw( indention ) + << std::setfill(' ') << std::right << timeloop_.getCurrentTimeStep(); + return std::string("[") + oss.str() + std::string("]"); + } + uint_t maxStampWidth() override + { + if( timeloop_.getNrOfTimeSteps() > 0 ) + return uint_c( std::ceil( std::log10( real_c( timeloop_.getNrOfTimeSteps() ) ) ) ) + uint_c(2); + else if( timeloop_.getCurrentTimeStep() > 0 ) + return uint_c( std::ceil( std::log10( real_c( timeloop_.getCurrentTimeStep() ) ) ) ) + uint_c(2); + else + return uint_c(2); + } +private: + const ITimeloop & timeloop_; +}; + +class LoggingStampManager +{ +public: + LoggingStampManager( const shared_ptr< LoggingStamp > & stamp, const bool useCustomStamp ) + : useCustomStamp_( useCustomStamp ) + { + if( useCustomStamp_ ) + logging::Logging::instance()->addCustomStamp( stamp ); + } + ~LoggingStampManager() + { + if( useCustomStamp_ ) + logging::Logging::instance()->clearCustomStamp(); + } +private: + const bool useCustomStamp_; +}; + + +// --------------------------- Printing ------------------------------------------------------------------------------------------------------------------------ + +void printNode( std::ostream &os, const IFunctionNode &node, uint_t indentation ) +{ + for ( uint_t i = 0; i < indentation; ++i ) + os << " "; + + os << node.getName() << std::endl; + for ( auto &c : node.getChildren()) + printNode( os, *c, indentation + 4 ); +} + +std::ostream &operator<<( std::ostream &os, const IFunctionNode &node ) +{ + printNode( os, node, 0 ); + return os; +} + +// --------------------------- Node class implementation ------------------------------------------------------------------------------------------------------- + + +EveryNth::EveryNth( const IFunctionNodePtr &node, uint_t interval, bool onFirst, uint_t startValue ) + : wrapped_( node ), interval_( interval ), onFirst_( onFirst ), calls_( startValue ) {} + + +void EveryNth::operator()() +{ + if ( calls_ == 0 && !onFirst_ ) { + ++calls_; + return; + } + + if (( calls_ % interval_ ) == 0 ) + ( *wrapped_ )(); + ++calls_; +} + +const std::string EveryNth::getName() const +{ + std::stringstream ss; + ss << "every " << interval_ << "th step:"; + return ss.str(); +} + + +Sequence::Sequence( std::initializer_list< IFunctionNodePtr > initializerList, const std::string &name, const TimingTreePtr &timingTree, bool parallel ) + : name_( name ), timingTree_( timingTree ), parallel_( parallel ) +{ + for ( auto &e : initializerList ) + children_.push_back( e ); +} + +void Sequence::operator()() +{ +#ifdef WALBERLA_BUILD_WITH_OPENMP + if( parallel_ ) + { + if ( timingTree_ ) + timingTree_->start( name_ ); + + int threads = int_c( children_.size() ); + #pragma omp parallel num_threads( threads ) + { + + ( *children_[ uint_c( omp_get_thread_num() ) ] )(); + } + + if ( timingTree_ ) + timingTree_->stop( name_ ); + + return; + } +#endif + WALBERLA_UNUSED(parallel_); + + if ( timingTree_ ) + timingTree_->start( name_ ); + + for ( auto &el : children_ ) + { + ( *el )(); + } + + if ( timingTree_ ) + timingTree_->stop( name_ ); +} + + +Loop::Loop( const IFunctionNodePtr &body, uint_t iterations, bool logTimeStep ) + : body_( body ), currentIteration_( 0 ), iterations_( iterations ), stop_( false ), logTimeStep_( logTimeStep ) {} + + +void Loop::singleStep() +{ + LoggingStampManager raii( make_shared<LoggingStamp>( *this ), logTimeStep_ ); + ( *body_ )(); + ++currentIteration_; +} + +void Loop::operator()() +{ + LoggingStampManager raii( make_shared<LoggingStamp>( *this ), logTimeStep_ ); + + for ( ; currentIteration_ < iterations_; ++currentIteration_ ) + { + ( *body_ )(); + if ( stop_ ) + { + stop_ = false; + break; + } + } +} + +void Loop::synchronizedStop( bool stopVar ) +{ + stop_ = stopVar; + mpi::allReduceInplace( stop_, mpi::LOGICAL_OR ); +} + +const std::string Loop::getName() const +{ + std::stringstream ss; + ss << "Loop [" << iterations_ << "]"; + return ss.str(); +} + + +} // namespace tasktree +} // namespace walberla \ No newline at end of file diff --git a/src/executiontree/ExecutionTree.h b/src/executiontree/ExecutionTree.h new file mode 100644 index 000000000..99b3515a5 --- /dev/null +++ b/src/executiontree/ExecutionTree.h @@ -0,0 +1,206 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaskTree.h +//! \ingroup executiontree +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + +#pragma once + +#include "core/DataTypes.h" +#include "timeloop/ITimeloop.h" +#include "core/timing/TimingTree.h" +#include "domain_decomposition/StructuredBlockStorage.h" + +#include <deque> +#include <string> +#include <initializer_list> +#include <functional> + + +namespace walberla { +namespace executiontree { + + +// -------------------------------------- Forward Declarations ------------------------------------------------------------------------------------------------ + +class IFunctionNode; +using IFunctionNodePtr = shared_ptr<IFunctionNode>; +using TimingTreePtr = shared_ptr<WcTimingTree>; + +class EveryNth; +class Sequence; +class Loop; + +template< typename FunctorType > class Functor; +template< typename FunctorType > class SharedFunctor; +template< typename FunctorType > class Sweep; +template< typename FunctorType > class SharedSweep; + + +// -------------------------------------- Public Interface ------------------------------------------------------------------------------------------------ + + +/*! Creates a functor node around any callable object. The wrapped functor is copied. + * + * \param any callable object. The object is copied - if its state has to be modified later, pass a shared_ptr to a functor instead + * \param name optional name of the functor node + * \param timingTree optional timing tree object to time all executions of this functor + */ +template<typename FunctorType> +IFunctionNodePtr functor( FunctorType t, const std::string &name = "", const shared_ptr< WcTimingTree > &timingTree = nullptr ); + + +/*! Combine multiple task nodes into a (named) sequence + * + * \param initializerList list of tasks that are executed in the passed order + * \param name optional sequence name, used for printing and for labeling time measurements + * \param timingTree optional timing tree object + */ +shared_ptr< Sequence > sequence( std::initializer_list< IFunctionNodePtr > initializerList, + const std::string &name = "", + const TimingTreePtr &timingTree = nullptr ); + + +/*! All subtasks of this region are executed in parallel using OpenMP */ +shared_ptr< Sequence > parallelSequence( std::initializer_list< IFunctionNodePtr > initializerList, + const std::string &name = "", + const TimingTreePtr &timingTree = nullptr ); + + + +/*! Note that runs its contents only every n'th call + * + * \param node task that is only run every n'th call + * \param name the interval i.e. "n" + * \param onFirst if false the task is not run at the first call + * \param startValue initial call counter + */ +shared_ptr< EveryNth > everyNth( const IFunctionNodePtr &node, + uint_t interval, + bool onFirst = false, + uint_t startValue = 0 ); + +/*! Runs the child node for the given amount of iterations */ +shared_ptr< Loop > loop( const IFunctionNodePtr &body, uint_t iterations, bool logTimeStep = true ); + +std::ostream &operator<<( std::ostream &os, const IFunctionNode &node ); + + +// -------------------------------------- Node Classes -------------------------------------------------------------------------------------------------------- + + +class IFunctionNode +{ +public: + virtual ~IFunctionNode() {} + virtual void operator()() = 0; + virtual const std::string getName() const = 0; + virtual const std::deque< shared_ptr< IFunctionNode > > getChildren() const { return {}; } +}; + + +template<typename FunctorType> +class Functor : public IFunctionNode +{ +public: + Functor(const FunctorType &functor, + const std::string &name, + const TimingTreePtr & timingTree ); + + const std::string getName() const override { return name_ != "" ? name_ : "Functor"; }; + void operator() () override; + +private: + FunctorType functor_; + std::string name_; + shared_ptr< WcTimingTree > timingTree_; +}; + + +class EveryNth : public IFunctionNode +{ +public: + EveryNth( const IFunctionNodePtr &node, uint_t interval, bool onFirst = false, uint_t startValue = 0 ); + + void operator()() override; + const std::string getName() const override; + const std::deque< shared_ptr< IFunctionNode > > getChildren() const override { return { wrapped_ }; } + +private: + IFunctionNodePtr wrapped_; + uint_t interval_; + bool onFirst_; + uint_t calls_; +}; + +class Sequence : public IFunctionNode +{ +public: + Sequence( std::initializer_list< IFunctionNodePtr > initializerList, const std::string &name, + const TimingTreePtr &timingTree = nullptr, bool parallel = false ); + + void operator()() override; + + void push_back( const IFunctionNodePtr &fct ) { children_.push_back( fct ); } + void push_front( const IFunctionNodePtr &fct ) { children_.push_front( fct ); } + const std::string getName() const override { return name_ != "" ? name_ : "Sequence"; }; + const std::deque< IFunctionNodePtr > getChildren() const override { return children_; }; + +private: + std::string name_; + std::deque< IFunctionNodePtr > children_; + shared_ptr< WcTimingTree > timingTree_; + bool parallel_; +}; + + +class Loop : public IFunctionNode, public timeloop::ITimeloop +{ +public: + Loop( const IFunctionNodePtr &body, uint_t iterations, bool logTimeStep = true ); + + void operator()() override; + void run() override { ( *this )(); } + void singleStep() override; + + void synchronizedStop( bool stopVal ) override; + void stop() override { stop_ = true; } + void setBody( const IFunctionNodePtr &body ) { body_ = body; } + void setCurrentTimeStep( uint_t ts ) override { currentIteration_ = ts; }; + uint_t getCurrentTimeStep() const override { return currentIteration_; } + uint_t getNrOfTimeSteps() const override { return iterations_; } + + const std::deque< shared_ptr< IFunctionNode > > getChildren() const override { return { body_ }; } + const std::string getName() const override; + +private: + IFunctionNodePtr body_; + uint_t currentIteration_; + uint_t iterations_; + bool stop_; + bool logTimeStep_; +}; + + + + +} // namespace executiontree +} // namespace walberla + + +#include "ExecutionTree.impl.h" \ No newline at end of file diff --git a/src/executiontree/ExecutionTree.impl.h b/src/executiontree/ExecutionTree.impl.h new file mode 100644 index 000000000..8bdfb2bed --- /dev/null +++ b/src/executiontree/ExecutionTree.impl.h @@ -0,0 +1,109 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaskTree.impl.h +//! \ingroup executiontree +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + +#pragma once + +#include "ExecutionTree.h" +#include <utility> + +namespace walberla { +namespace executiontree { + +namespace internal { + +// Helper to handle functors and shared_ptr's to functors the same way +template<typename T> +struct Caller +{ + template<typename ... Args> + static void call( T &t, Args&&... args ) + { + t(std::forward<Args>(args)...); + } +}; + +template<typename T> +struct Caller< shared_ptr < T > > +{ + template<typename ... Args> + static void call( shared_ptr <T> &t, Args&&... args ) + { + ( *t )(std::forward<Args>(args)...); + } +}; + + +} // namespace internal + + +template<typename FunctorType> +IFunctionNodePtr functor( FunctorType t, const std::string &name, const TimingTreePtr &timingTree ) +{ + return make_shared< Functor< FunctorType > >( t, name, timingTree ); +} + +inline shared_ptr <Sequence> sequence( std::initializer_list< IFunctionNodePtr > initializerList, const std::string &name, + const TimingTreePtr &timingTree ) +{ + return make_shared< Sequence >( initializerList, name, timingTree, false ); +} + +inline shared_ptr <Sequence> parallelSequence( std::initializer_list< IFunctionNodePtr > initializerList, const std::string &name, + const TimingTreePtr &timingTree ) +{ + return make_shared< Sequence >( initializerList, name, timingTree, true ); +} + + +inline shared_ptr< EveryNth > everyNth( const IFunctionNodePtr &node, uint_t interval, bool onFirst, uint_t startValue ) +{ + return make_shared< EveryNth >( node, interval, onFirst, startValue ); +} + + +inline shared_ptr< Loop > loop( const IFunctionNodePtr &body, uint_t iterations, bool logTimeStep ) +{ + return make_shared< Loop >( body, iterations, logTimeStep ); +} + + +template<typename FunctorType> +Functor< FunctorType >::Functor( const FunctorType &functor, const std::string &name, const TimingTreePtr &timingTree ) + :functor_( functor ), name_( name ), timingTree_( timingTree ) {} + + +template<typename FunctorType> +void Functor< FunctorType >::operator()() +{ + if ( timingTree_ ) + { + timingTree_->start( name_ ); + internal::Caller<FunctorType>::call(functor_); + timingTree_->stop( name_ ); + } + else + internal::Caller<FunctorType>::call(functor_); +} + + + +} // namespace executiontree +} // namespace walberla \ No newline at end of file diff --git a/src/executiontree/ExecutionTreeSweep.h b/src/executiontree/ExecutionTreeSweep.h new file mode 100644 index 000000000..e450d9843 --- /dev/null +++ b/src/executiontree/ExecutionTreeSweep.h @@ -0,0 +1,119 @@ +//============================================================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file TaskTree.h +//! \ingroup executiontree +//! \author Martin Bauer <martin.bauer@fau.de> +// +//============================================================================================================================================================== + +#include "domain_decomposition/IBlock.h" +#include "executiontree/ExecutionTree.h" + +namespace walberla { +namespace executiontree { + + +// -------------------------------------- Public Interface ------------------------------------------------------------------------------------------------ + + +template<typename FunctorType> +IFunctionNodePtr sweep( BlockStorage &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree = nullptr ); + +template<typename FunctorType> +IFunctionNodePtr sweep( const shared_ptr< StructuredBlockStorage > &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree = nullptr ); + + +// -------------------------------------- Implementation ------------------------------------------------------------------------------------------------------ + + +template<typename FunctorType> +class Sweep : public IFunctionNode +{ +public: + Sweep( BlockStorage &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) + : blockStorage_( bs ), + functor_( functor ), + name_( name ), + timingTree_( timingTree ) {} + + Sweep( const shared_ptr< StructuredBlockStorage > &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) + : blockStorage_( bs->getBlockStorage()), + functor_( functor ), + name_( name ), + timingTree_( timingTree ) {} + + void operator()() override + { + if ( timingTree_ ) + { + for ( auto &block: blockStorage_ ) + { + timingTree_->start( name_ ); + internal::Caller<FunctorType>::call( functor_, &block ); + timingTree_->stop( name_ ); + } + } + else + for ( auto &block: blockStorage_ ) + internal::Caller<FunctorType>::call( functor_, &block ); + } + + const std::string getName() const override { return name_ != "" ? name_ : "Sweep"; }; + +private: + BlockStorage &blockStorage_; + + FunctorType functor_; + std::string name_; + TimingTreePtr timingTree_; +}; + + + +template<typename FunctorType> +IFunctionNodePtr sweep( BlockStorage &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) +{ + return make_shared< Sweep< FunctorType > >( bs, functor, name, timingTree ); +} + +template<typename FunctorType> +IFunctionNodePtr sweep( const shared_ptr< StructuredBlockStorage > &bs, + const FunctorType &functor, + const std::string &name, + const TimingTreePtr &timingTree ) +{ + return make_shared< Sweep< FunctorType > >( bs, functor, name, timingTree ); +} + + + +} // namespace executiontree +} // namespace walberla diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 47e3b49ff..e259e3a7d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory( boundary ) add_subdirectory( core ) add_subdirectory( cuda ) add_subdirectory( domain_decomposition ) +add_subdirectory( executiontree ) add_subdirectory( fft ) add_subdirectory( field ) add_subdirectory( gather ) diff --git a/tests/executiontree/CMakeLists.txt b/tests/executiontree/CMakeLists.txt new file mode 100644 index 000000000..37fcab24f --- /dev/null +++ b/tests/executiontree/CMakeLists.txt @@ -0,0 +1,8 @@ +################################################################################################### +# +# Tests for executiontree module +# +################################################################################################### + +waLBerla_compile_test( NAME ExecutionTreeTest FILES ExecutionTreeTest.cpp ) +waLBerla_execute_test( NAME ExecutionTreeTest ) diff --git a/tests/executiontree/ExecutionTreeTest.cpp b/tests/executiontree/ExecutionTreeTest.cpp new file mode 100644 index 000000000..c362dd11f --- /dev/null +++ b/tests/executiontree/ExecutionTreeTest.cpp @@ -0,0 +1,68 @@ +#include "executiontree/ExecutionTree.h" + +#include <iostream> +#include "core/logging/Logging.h" +#include "core/debug/Debug.h" +#include "core/debug/TestSubsystem.h" +#include "core/mpi/Environment.h" + +using namespace walberla; +namespace et = executiontree; + +class MyFunctor +{ +public: + void operator() () + { + WALBERLA_LOG_RESULT( "i = " << i ); + i += 1; + } + + int i = 0; +}; + + +int main( int argc, char **argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + int counter1 = 0; + auto func1 = [&counter1]() { + WALBERLA_LOG_RESULT("A"); + ++counter1; + }; + + int counter2 = 0; + auto func2 = [&counter2]() { + ++counter2; + }; + + int counter3 = 0; + auto func3 = [&counter3]() { + ++counter3; + }; + + auto func4 = [] { WALBERLA_LOG_RESULT("B"); }; + + auto myFunctor = make_shared<MyFunctor>(); + + auto s = et::parallelSequence( { et::everyNth( et::functor( func2, "func2" ), 5, true ), + et::everyNth( et::functor( func3, "func3" ), 5, false ), + et::functor( func1, "func1" ), + et::functor( func4, "func4" ), + et::functor( myFunctor, "myFunctor") } ); + + auto l = et::loop( s, 20 ); + myFunctor->i = 42; + + std::cout << *l << std::endl; + l->run(); + + WALBERLA_CHECK_EQUAL( counter1, 20 ); + WALBERLA_CHECK_EQUAL( counter2, 20 / 5 ); + WALBERLA_CHECK_EQUAL( counter3, ( 20 / 5 ) - 1 ); + WALBERLA_CHECK_EQUAL( myFunctor->i, 20 + 42 ); + + return 0; +} \ No newline at end of file -- GitLab