Commit a8129903 authored by Sebastian Eibl's avatar Sebastian Eibl
Browse files

OpenMP support is now switchable within the generation script

Since MSVC only has beta support for OpenMP 2.0 firstprivate
cannot be used for these compilers. However, firstprivate
is necessary for a reasonably safe implementation of OpenMP
in MESA-PD. The solution is to move OpenMP support to the
generator and disable OpenMP for the checked in version.
The waLBerla OpenMP guards are removed as they are no longer
needed. It is now assumed that if MESA-PD is generated with
OpenMP support it is about to be used. If OpenMP is disabled
with CMake warnings will pop up about undefined pragmas.
parent 01cc9f18
Pipeline #25341 passed with stages
in 187 minutes and 39 seconds
......@@ -15,6 +15,7 @@ if __name__ == '__main__':
args = parser.parse_args()
mpd = Module(args.path)
mpd.enable_openmp(False)
ps = mpd.add(data.ParticleStorage())
ps.set_shapes('Sphere', 'HalfSpace', 'CylindricalBoundary', 'Box', 'Ellipsoid')
ps.add_property("position", "walberla::mesa_pd::Vec3", defValue="real_t(0)", syncMode="ALWAYS")
......
......@@ -17,11 +17,14 @@ class Module:
name of the generated module
"""
self.context = {}
self.context['path'] = Path(path).resolve()
self.context['name'] = module_name
self.context['module_path'] = self.context['path'] / 'src' / self.context['name']
self.context['test_path'] = self.context['path'] / 'tests' / self.context['name']
path = Path(path).resolve()
self.context = {
'path': path,
'name': module_name,
'module_path': path / 'src' / module_name,
'test_path': path / 'tests' / module_name,
'enableOpenMP': False
}
self.components = []
......@@ -29,6 +32,9 @@ class Module:
self.components.append(component)
return component
def enable_openmp(self, enabled):
self.context['enableOpenMP'] = enabled
def rename(self):
for filename in (f for f in self.context['module_path'].glob('**/*') if f.is_file()):
filedata = None
......
......@@ -100,5 +100,6 @@ class ParticleStorage():
def generate(self, module):
ctx = {'module': module, **self.context}
generate_file(module['module_path'], 'data/ParticleStorage.templ.h', ctx, filename='data/ParticleStorage.h')
generate_file(module['module_path'], 'data/ParticleAccessor.templ.h', ctx, filename='data/ParticleAccessor.h')
generate_file(module['module_path'], 'data/ParticleStorage.templ.h', ctx)
generate_file(module['module_path'], 'data/ParticleAccessor.templ.h', ctx)
generate_file(module['module_path'], 'common/ParticleFunctions.templ.h', ctx)
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file ParticleFunctions.h
//! \author Sebastian Eibl <sebastian.eibl@fau.de>
//
//======================================================================================================================
//======================================================================================================================
//
// THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
//
//======================================================================================================================
#pragma once
#include <mesa_pd/data/DataTypes.h>
#include <mesa_pd/domain/IDomain.h>
#include <core/logging/Logging.h>
#include <core/mpi/MPIManager.h>
namespace walberla {
namespace mesa_pd {
/**
* Returns the "surface" velocity at a certain point given in world frame coordinates.
*/
template <typename Accessor>
inline Vec3 getVelocityAtWFPoint(const size_t p_idx, Accessor& ac, const Vec3& wf_pt)
{
return ac.getLinearVelocity(p_idx) + cross(ac.getAngularVelocity(p_idx), ( wf_pt - ac.getPosition(p_idx) ));
}
/**
* Force is applied at the center of mass.
*/
template <typename Accessor>
inline void addForceAtomic(const size_t p_idx, Accessor& ac, const Vec3& f)
{
// Increasing the force and torque on this particle
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[0] += f[0];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[1] += f[1];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[2] += f[2];
}
template <typename Accessor>
inline void addForceAtWFPosAtomic(const size_t p_idx, Accessor& ac, const Vec3& f, const Vec3& wf_pt)
{
// Increasing the force and torque on this particle
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[0] += f[0];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[1] += f[1];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getForceRef(p_idx)[2] += f[2];
const auto t = cross(( wf_pt - ac.getPosition(p_idx) ), f);
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[0] += t[0];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[1] += t[1];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[2] += t[2];
}
/**
* Torque is directly applied on the particle.
*/
template <typename Accessor>
inline void addTorqueAtomic(const size_t p_idx, Accessor& ac, const Vec3& t)
{
// Increasing the torque on this particle
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[0] += t[0];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[1] += t[1];
{%- if module.enableOpenMP %}
#ifdef _OPENMP
#pragma omp atomic
#endif
{%- endif %};
ac.getTorqueRef(p_idx)[2] += t[2];
}
} //namespace mesa_pd
} //namespace walberla
......@@ -369,9 +369,9 @@ inline void ContactStorage::forEachContact(const bool openmp, const Selector& se
{
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for schedule(static) firstprivate(selector,func) if (openmp)
{%- endif %}
for (int64_t i = 0; i < int64_c(len); ++i)
if (selector(uint64_c(i), acForPS)){
func( uint64_c(i), std::forward<Args>(args)... );
......
......@@ -171,9 +171,9 @@ void LinkedCells::clear()
{
const uint64_t cellsSize = cells_.size();
//clear existing linked cells
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for schedule(static)
{%- endif %}
for (int64_t i = 0; i < int64_c(cellsSize); ++i)
cells_[uint64_c(i)] = -1;
infiniteParticles_ = -1;
......@@ -185,9 +185,9 @@ inline void LinkedCells::forEachParticlePair{%- if half %}Half{%- endif %}(const
{
static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
WALBERLA_UNUSED(openmp);
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for collapse(2) schedule(static) firstprivate(selector,func) if (openmp)
{%- endif %}
for (int z = 0; z < numCellsPerDim_[2]; ++z)
{
for (int y = 0; y < numCellsPerDim_[1]; ++y)
......
......@@ -490,9 +490,9 @@ inline void ParticleStorage::forEachParticle(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for schedule(static) firstprivate(selector,func) if (openmp)
{%- endif %}
for (int64_t i = 0; i < int64_c(len); ++i)
{
if (selector(uint64_c(i), acForPS))
......@@ -512,9 +512,9 @@ inline void ParticleStorage::forEachParticlePair(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for schedule(static) firstprivate(selector,func) if (openmp)
{%- endif %}
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = 0; j < int64_c(len); ++j)
......@@ -542,9 +542,9 @@ inline void ParticleStorage::forEachParticlePairHalf(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
{%- if module.enableOpenMP %}
#pragma omp parallel for schedule(static) firstprivate(selector,func) if (openmp)
{%- endif %}
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = i+1; j < int64_c(len); ++j)
......
......@@ -26,6 +26,7 @@
#pragma once
#include <mesa_pd/common/ParticleFunctions.h>
#include <mesa_pd/data/DataTypes.h>
#include <mesa_pd/data/IAccessor.h>
......@@ -129,31 +130,9 @@ inline void ForceLJ::operator()(const size_t p_idx, const size_t np_idx, Accesso
const real_t force = real_t(48) * sr6 * ( sr6 - real_t(0.5) ) * sr2 * getEpsilon(ac.getType(p_idx), ac.getType(np_idx));
const Vec3 f = force * dir;
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[0] += f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[1] += f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[2] += f[2];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[0] -= f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[1] -= f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[2] -= f[2];
// Add normal force at contact point
addForceAtomic( p_idx, ac, f );
addForceAtomic( np_idx, ac, -f );
}
}
......
......@@ -44,51 +44,23 @@ inline Vec3 getVelocityAtWFPoint(const size_t p_idx, Accessor& ac, const Vec3& w
template <typename Accessor>
inline void addForceAtomic(const size_t p_idx, Accessor& ac, const Vec3& f)
{
// Increasing the force and torque on this particle
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[0] += f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[1] += f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
// Increasing the force and torque on this particle;
ac.getForceRef(p_idx)[0] += f[0];;
ac.getForceRef(p_idx)[1] += f[1];;
ac.getForceRef(p_idx)[2] += f[2];
}
template <typename Accessor>
inline void addForceAtWFPosAtomic(const size_t p_idx, Accessor& ac, const Vec3& f, const Vec3& wf_pt)
{
// Increasing the force and torque on this particle
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[0] += f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[1] += f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
// Increasing the force and torque on this particle;
ac.getForceRef(p_idx)[0] += f[0];;
ac.getForceRef(p_idx)[1] += f[1];;
ac.getForceRef(p_idx)[2] += f[2];
const auto t = cross(( wf_pt - ac.getPosition(p_idx) ), f);
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getTorqueRef(p_idx)[0] += t[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getTorqueRef(p_idx)[1] += t[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
const auto t = cross(( wf_pt - ac.getPosition(p_idx) ), f);;
ac.getTorqueRef(p_idx)[0] += t[0];;
ac.getTorqueRef(p_idx)[1] += t[1];;
ac.getTorqueRef(p_idx)[2] += t[2];
}
......@@ -98,21 +70,12 @@ inline void addForceAtWFPosAtomic(const size_t p_idx, Accessor& ac, const Vec3&
template <typename Accessor>
inline void addTorqueAtomic(const size_t p_idx, Accessor& ac, const Vec3& t)
{
// Increasing the torque on this particle
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getTorqueRef(p_idx)[0] += t[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getTorqueRef(p_idx)[1] += t[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
// Increasing the torque on this particle;
ac.getTorqueRef(p_idx)[0] += t[0];;
ac.getTorqueRef(p_idx)[1] += t[1];;
ac.getTorqueRef(p_idx)[2] += t[2];
}
} //namespace mesa_pd
} //namespace walberla
} //namespace walberla
\ No newline at end of file
......@@ -601,9 +601,6 @@ inline void ContactStorage::forEachContact(const bool openmp, const Selector& se
{
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
if (selector(uint64_c(i), acForPS)){
func( uint64_c(i), std::forward<Args>(args)... );
......@@ -615,9 +612,6 @@ inline void ContactStorage::forEachContact(const bool openmp, const Selector& se
{
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
if (selector(uint64_c(i), acForPS)){
func( uint64_c(i), std::forward<Args>(args)... );
......
......@@ -175,9 +175,6 @@ void LinkedCells::clear()
{
const uint64_t cellsSize = cells_.size();
//clear existing linked cells
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int64_t i = 0; i < int64_c(cellsSize); ++i)
cells_[uint64_c(i)] = -1;
infiniteParticles_ = -1;
......@@ -187,9 +184,6 @@ inline void LinkedCells::forEachParticlePair(const bool openmp, const Selector&
{
static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
WALBERLA_UNUSED(openmp);
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int z = 0; z < numCellsPerDim_[2]; ++z)
{
for (int y = 0; y < numCellsPerDim_[1]; ++y)
......@@ -282,9 +276,6 @@ inline void LinkedCells::forEachParticlePairHalf(const bool openmp, const Select
{
static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
WALBERLA_UNUSED(openmp);
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int z = 0; z < numCellsPerDim_[2]; ++z)
{
for (int y = 0; y < numCellsPerDim_[1]; ++y)
......
......@@ -1045,9 +1045,6 @@ inline void ParticleStorage::forEachParticle(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
if (selector(uint64_c(i), acForPS))
......@@ -1064,9 +1061,6 @@ inline void ParticleStorage::forEachParticle(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
if (selector(uint64_c(i), acForPS))
......@@ -1083,9 +1077,6 @@ inline void ParticleStorage::forEachParticlePair(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = 0; j < int64_c(len); ++j)
......@@ -1110,9 +1101,6 @@ inline void ParticleStorage::forEachParticlePair(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = 0; j < int64_c(len); ++j)
......@@ -1137,9 +1125,6 @@ inline void ParticleStorage::forEachParticlePairHalf(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = i+1; j < int64_c(len); ++j)
......@@ -1161,9 +1146,6 @@ inline void ParticleStorage::forEachParticlePairHalf(const bool openmp,
WALBERLA_UNUSED(openmp);
const uint64_t len = size();
#ifdef _OPENMP
#pragma omp parallel for schedule(static) if (openmp)
#endif
for (int64_t i = 0; i < int64_c(len); ++i)
{
for (int64_t j = i+1; j < int64_c(len); ++j)
......
......@@ -26,6 +26,7 @@
#pragma once
#include <mesa_pd/common/ParticleFunctions.h>
#include <mesa_pd/data/DataTypes.h>
#include <mesa_pd/data/IAccessor.h>
......@@ -138,31 +139,9 @@ inline void ForceLJ::operator()(const size_t p_idx, const size_t np_idx, Accesso
const real_t force = real_t(48) * sr6 * ( sr6 - real_t(0.5) ) * sr2 * getEpsilon(ac.getType(p_idx), ac.getType(np_idx));
const Vec3 f = force * dir;
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[0] += f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[1] += f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(p_idx)[2] += f[2];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[0] -= f[0];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[1] -= f[1];
#ifdef _OPENMP
#pragma omp atomic
#endif
ac.getForceRef(np_idx)[2] -= f[2];
// Add normal force at contact point
addForceAtomic( p_idx, ac, f );
addForceAtomic( np_idx, ac, -f );
}
}
......
......@@ -102,21 +102,6 @@ int main( int argc, char ** argv )
lj(0, 1, accessor);
WALBERLA_CHECK_FLOAT_EQUAL( p1.getForce(), -p2.getForce() );
// thread safety test
auto singleForce = p1.getForce();
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int i = 0; i < 100; ++i)
lj(0, 1, accessor);
WALBERLA_LOG_DEVEL(p1);
WALBERLA_LOG_DEVEL(p2);
WALBERLA_CHECK_FLOAT_EQUAL( p1.getForce(), -p2.getForce() );
WALBERLA_CHECK_FLOAT_EQUAL_EPSILON( p1.getForce(),
real_t(101) * singleForce,
real_t(1e-13) );
return EXIT_SUCCESS;
}
......
......@@ -107,23 +107,6 @@ int main( int argc, char ** argv )
WALBERLA_CHECK_FLOAT_EQUAL( ps->getForce(0), -ps->getForce(1) );
WALBERLA_CHECK_FLOAT_EQUAL( ps->getForce(0), Vec3(1,1,1).getNormalized() * ((std::sqrt(real_t(12)) - 4) * sd.getStiffness(0, 0)) );
// thread safety test
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int i = 0; i < 100; ++i)
sd(contact.getIdx1(),
contact.getIdx2(),
ac,