diff --git a/src/blockforest/BlockForest.cpp b/src/blockforest/BlockForest.cpp
index dc67a03e3a3e5f0a29ff9b9cdf26c026a395e06b..5aa3323e8c7cd26a90354f404b2a702fd221410c 100644
--- a/src/blockforest/BlockForest.cpp
+++ b/src/blockforest/BlockForest.cpp
@@ -20,6 +20,7 @@
 //======================================================================================================================
 
 #include "BlockForest.h"
+#include "BlockForestFile.h"
 #include "BlockNeighborhoodSection.h"
 #include "SetupBlockForest.h"
 #include "core/Abort.h"
@@ -355,7 +356,7 @@ BlockForest::BlockForest( const uint_t process, const char* const filename, cons
    uint_t offset = 0;
    std::vector< uint8_t > buffer;
 
-   if( broadcastFile )
+   if( broadcastFile && (mpi::MPIManager::instance()->numProcesses() > 1) )
    {
       std::ifstream file;
       uint_t length = 0;
@@ -2752,8 +2753,9 @@ void BlockForest::update( PhantomBlockForest & phantomForest )
 }
 
 
-
-/// ATTENTION: 'suidMap' and 'suidBytes' must be identical for every process!
+/// For a description of the file format see BlockForestFile.h
+/// \attention 'suidMap' and 'suidBytes' must be identical for every process!
+/// \see BlockForestFile.h
 void BlockForest::saveToFile( const std::string & filename, FileIOMode fileIOMode,
                               const std::map< SUID, boost::dynamic_bitset<uint8_t> > & suidMap, const uint_t suidBytes ) const
 {
@@ -2764,7 +2766,7 @@ void BlockForest::saveToFile( const std::string & filename, FileIOMode fileIOMod
    uint_t dataSize = uint_t(2) + blocks_.size() * ( blockIdBytes + suidBytes ) + uint_t(2) + neighborhood_.size() * processIdBytes_;
    if( MPIManager::instance()->rank() == 0 )
    {
-      dataSize += uint_c(89); // header
+      dataSize += internal::FILE_HEADER_SIZE; // header
       ++dataSize; // number of SUIDs
       for( auto suid = suidMap.begin(); suid != suidMap.end(); ++suid )
          dataSize += uint_t(1) + uint_c( suid->first.getIdentifier().length() );
diff --git a/src/blockforest/BlockForestFile.h b/src/blockforest/BlockForestFile.h
new file mode 100644
index 0000000000000000000000000000000000000000..af8619b711a876b0ef7c6e316b177f846d95d800
--- /dev/null
+++ b/src/blockforest/BlockForestFile.h
@@ -0,0 +1,95 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BlockForestFile.h
+//! \author Florian Schornbaum <florian.schornbaum@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+namespace walberla {
+namespace blockforest {
+namespace internal {
+
+//**********************************************************************************************************************
+/*!
+ *   \file BlockForestFile.h
+ *   \brief Description of the BlockForest save file format.
+ *
+ *   \section FileFormat File Format
+ *
+ *   \subsection HEADER HEADER
+ *
+ *   BYTES                      | DESCRIPTION
+ *   ---------------------------|-----------------
+ *   6 x ( 3 + sizeof(real_t) ) | domain AABB
+ *   3 x 4                      | number of coarse/root blocks in each direction ( max 2^32 = 4 294 967 296 )
+ *   3 x 1                      | domain periodicity
+ *   1                          | block forest depth (= number of levels - 1)
+ *   1                          | treeIdDigits (= number of bits used for storing the tree ID [tree ID marker + tree index])
+ *   1                          | processIdBytes (= number of bytes required for storing process IDs)
+ *   1                          | insertBuffersIntoProcessNetwork? ( 0=no, 1=yes )
+ *   4                          | number of processes ( max 2^32 = 4 294 967 296 )
+ *
+ *   --> 23 + 6 x ( 3 + sizeof(real_t) ) BYTES
+ *
+ *   \subsection SUID SUID MAPPING:
+ *
+ *   1 | number of SUIDs (= #SUIDs)
+ *
+ *   \code{.unparsed}
+ *   for each SUID:
+ *      1                | length of the UID identifier string
+ *      length-of-string | UID identifier string
+ *   \endcode
+ *
+ *   --> 1 + #SUIDs + number-of-characters-of-all-identifiers-combined BYTES
+ *
+ *   How the mapping works:\n
+ *   SUID #1 is assigned bit #1 ( -> [...]0 0000 0001 )\n
+ *   SUID #2 is assigned bit #2 ( -> [...]0 0000 0010 )\n
+ *   SUID #3 is assigned bit #3 ( -> [...]0 0000 0100 )\n
+ *   ...\n
+ *   For every block a bit mask containing information about all SUIDs (i.e., is the corresponding SUID set at this block?) is saved.
+ *   -> The number of available SUIDs determines the size that is needed to store this bit mask (= SUID-mask-bytes).
+ *   One byte is enough to hold 8 SUIDs, two bytes are enough to hold 16 SUIDs, ...
+ *
+ *   \subsection BLOCKDATA BLOCK DATA
+ *
+ *   \code{.unparsed}
+ *   for each process:
+ *      2 | number of blocks (can be '0' -> buffer process! - 2^16 = 65 536 )
+ *      if( number-of-blocks > 0 ):
+ *         for each block:
+ *            block-ID-bytes  | ID of the block (the number of bytes required for storing the block ID largely depends on the size
+ *                              of the simulation, the total number of blocks, and the number of refinement levels)
+ *            SUID-mask-bytes | state of the block = bit mask containing information about all SUIDs (see "How the mapping works" for SUIDs,
+ *                              SUID-mask-bytes can be equal to 0 bytes if no SUIDs exist!)
+ *      2 | number of neighbor processes
+ *      for each neighbor process:
+ *         process-ID-bytes | process ID / rank of the neighbor process (one byte if there are less than 257 processes,
+ *                                                                          two bytes if there are less than 65 537 processes, ...)
+ *   \endcode
+ */
+//**********************************************************************************************************************
+
+static const uint_t FILE_HEADER_SIZE = 6 * sizeof( real_t ) + 6 + 12 + 3 * 4 + 3 + 1 + 1 + 1 + 1 + 4;
+
+}
+}
+}
diff --git a/src/blockforest/SetupBlockForest.cpp b/src/blockforest/SetupBlockForest.cpp
index c2d003d9adea6fa0e65e15e3db5453115dc36865..9ae11ce9cb13d6b669c3792acde13e74ae02199f 100644
--- a/src/blockforest/SetupBlockForest.cpp
+++ b/src/blockforest/SetupBlockForest.cpp
@@ -19,6 +19,7 @@
 //
 //======================================================================================================================
 
+#include "BlockForestFile.h"
 #include "BlockNeighborhoodConstruction.h"
 #include "BlockNeighborhoodSection.h"
 #include "HilbertCurveConstruction.h"
@@ -1615,57 +1616,9 @@ void SetupBlockForest::calculateProcessDistributionFinalization( const bool reor
 
 
 //**********************************************************************************************************************
-/*!
-*   File Format:
-*
-*   HEADER:
-*   -------
-*     BYTES            |     DESCRIPTION
-*   6 x ( 3 + real_t ) | domain AABB
-*   3 x 4              | number of coarse/root blocks in each direction ( max 2^32 = 4 294 967 296 )
-*   3 x 1              | domain periodicity
-*   1                  | block forest depth (= number of levels - 1)
-*   1                  | treeIdDigits (= number of bits used for storing the tree ID [tree ID marker + tree index])
-*   1                  | processIdBytes (= number of bytes required for storing process IDs)
-*   1                  | insertBuffersIntoProcessNetwork? ( 0=no, 1=yes )
-*   4                  | number of processes ( max 2^32 = 4 294 967 296 )
-*
-*   --> 23 + 6 x ( 3 + real_t ) BYTES
-*
-*   SUID MAPPING:
-*   -------------
-*   1 | number of SUIDs (= #SUIDs)
-*
-*   for each SUID:
-*      1                | length of the UID identifier string
-*      length-of-string | UID identifier string
-*
-*   --> 1 + #SUIDs + number-of-characters-of-all-identifiers-combined BYTES
-*
-*   How the mapping works:
-*   SUID #1 is assigned bit #1 ( -> [...]0 0000 0001 )
-*   SUID #2 is assigned bit #2 ( -> [...]0 0000 0010 )
-*   SUID #3 is assigned bit #3 ( -> [...]0 0000 0100 )
-*   ...
-*   For every block a bit mask containing information about all SUIDs (i.e., is the corresponding SUID set at this block?) is saved.
-*   -> The number of available SUIDs determines the size that is needed to store this bit mask (= SUID-mask-bytes).
-*   One byte is enough to hold 8 SUIDs, two bytes are enough to hold 16 SUIDs, ...
-*
-*   BLOCK DATA:
-*   -----------
-*   for each process:
-*      2 | number of blocks (can be '0' -> buffer process! - 2^16 = 65 536 )
-*      if( number-of-blocks > 0 ):
-*         for each block:
-*            block-ID-bytes  | ID of the block (the number of bytes required for storing the block ID largely depends on the size
-*                              of the simulation, the total number of blocks, and the number of refinement levels)
-*            SUID-mask-bytes | state of the block = bit mask containing information about all SUIDs (see "How the mapping works" for SUIDs,
-*                              SUID-mask-bytes can be equal to 0 bytes if no SUIDs exist!)
-*      2 | number of neighbor processes
-*      for each neighbor process:
-*         process-ID-bytes | process ID / rank of the neighbor process (one byte if there are less than 257 processes,
-*                                                                          two bytes if there are less than 65 537 processes, ...)
-*/
+/// \brief
+///
+/// For a description of the file format see BlockForestFile.h \see BlockForestFile.h
 //**********************************************************************************************************************
 
 void SetupBlockForest::saveToFile( const char* const filename ) const {
@@ -1675,7 +1628,7 @@ void SetupBlockForest::saveToFile( const char* const filename ) const {
    // HEADER
 
    uint_t offset = 0;
-   std::vector< uint8_t > buffer( 6 * sizeof( real_t ) + 6 + 12 + 3 * 4 + 3 + 1 + 1 + 1 + 1 + 4 );
+   std::vector< uint8_t > buffer( internal::FILE_HEADER_SIZE );
 
    // domain AABB
 
diff --git a/tests/blockforest/CMakeLists.txt b/tests/blockforest/CMakeLists.txt
index 0e590e8aa6eeb2f1a25af888d5793bb278ab1e2b..cdadb523d771c14d37cd032a8bd67e79b2131e4f 100644
--- a/tests/blockforest/CMakeLists.txt
+++ b/tests/blockforest/CMakeLists.txt
@@ -13,6 +13,17 @@ waLBerla_execute_test( NAME SetupBlockForestTest LABELS longrun CONFIGURATIONS R
 waLBerla_compile_test( FILES BlockForestTest.cpp )
 waLBerla_execute_test( NAME BlockForestTest PROCESSES 4 )
 
+waLBerla_compile_test( NAME   SaveLoad FILES SaveLoadTest.cpp DEPENDS core blockforest  )
+waLBerla_execute_test( NAME   SaveLoad01 COMMAND $<TARGET_FILE:SaveLoad> PROCESSES 1 )
+waLBerla_execute_test( NAME   SaveLoad02 COMMAND $<TARGET_FILE:SaveLoad> PROCESSES 2 )
+waLBerla_execute_test( NAME   SaveLoad04 COMMAND $<TARGET_FILE:SaveLoad> PROCESSES 4 )
+waLBerla_execute_test( NAME   SaveLoad08 COMMAND $<TARGET_FILE:SaveLoad> PROCESSES 8 )
+if( WALBERLA_BUILD_WITH_MPI )
+set_property( TEST SaveLoad02 PROPERTY DEPENDS SaveLoad01 ) #serialize runs of tets to avoid i/o conflicts when running ctest with -jN
+set_property( TEST SaveLoad04 PROPERTY DEPENDS SaveLoad02 ) #serialize runs of tets to avoid i/o conflicts when running ctest with -jN
+set_property( TEST SaveLoad08 PROPERTY DEPENDS SaveLoad04 ) #serialize runs of tets to avoid i/o conflicts when running ctest with -jN
+endif()
+
 waLBerla_compile_test( FILES StructuredBlockForestTest.cpp )
 waLBerla_execute_test( NAME StructuredBlockForestTest )
 
@@ -36,4 +47,4 @@ waLBerla_execute_test( NAME GhostLayerCommTest8 COMMAND $<TARGET_FILE:GhostLayer
 waLBerla_compile_test( FILES communication/DirectionBasedReduceCommTest.cpp DEPENDS field timeloop )
 waLBerla_execute_test( NAME DirectionBasedReduceCommTest1 COMMAND $<TARGET_FILE:DirectionBasedReduceCommTest> )
 waLBerla_execute_test( NAME DirectionBasedReduceCommTest3 COMMAND $<TARGET_FILE:DirectionBasedReduceCommTest> PROCESSES 3 )
-waLBerla_execute_test( NAME DirectionBasedReduceCommTest8 COMMAND $<TARGET_FILE:DirectionBasedReduceCommTest> PROCESSES 8 )
\ No newline at end of file
+waLBerla_execute_test( NAME DirectionBasedReduceCommTest8 COMMAND $<TARGET_FILE:DirectionBasedReduceCommTest> PROCESSES 8 )
diff --git a/tests/blockforest/SaveLoadTest.cpp b/tests/blockforest/SaveLoadTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..11ab5d06b78ee4ad4e860056f77b9e79d7b5a701
--- /dev/null
+++ b/tests/blockforest/SaveLoadTest.cpp
@@ -0,0 +1,122 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SaveLoadTest.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+
+#include "blockforest/all.h"
+#include "core/all.h"
+#include "core/math/IntegerFactorization.h"
+#include "domain_decomposition/all.h"
+
+#include "core/debug/TestSubsystem.h"
+
+using namespace walberla;
+using namespace walberla::blockforest;
+
+void blockForestSaveLoadTest(const BlockForest::FileIOMode ioMode, const bool broadcast)
+{
+   std::vector< walberla::uint64_t > dump;
+   std::vector< walberla::uint64_t > check;
+
+   WALBERLA_LOG_DEVEL_ON_ROOT("*** DUMPING ***");
+
+   dump.clear();
+
+   auto proc = math::getFactors3D(uint_c( MPIManager::instance()->numProcesses() ));
+
+   auto forestDump = createUniformBlockGrid( math::AABB(0,0,0,60,60,60), // domain
+                                             2,2,2,                      // number of blocks
+                                             1,1,1,                      // cells
+                                             proc[0],proc[1],proc[2]);                     // on block per prozess
+   forestDump->getBlockForest().saveToFile("SerializeDeserialize.sbf", ioMode);
+
+   for (auto blockIt = forestDump->begin(); blockIt != forestDump->end(); ++blockIt)
+   {
+      WALBERLA_LOG_DEVEL("DUMPING BLOCK (" << blockIt->getId() << ") " << blockIt->getAABB() );
+      dump.push_back( blockIt->getId().getID() );
+   }
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+
+   WALBERLA_LOG_DEVEL_ON_ROOT("*** CHECKING ***");
+
+   check.clear();
+
+   auto forestCheck = shared_ptr< BlockForest >( new BlockForest( uint_c( MPIManager::instance()->rank() ), "SerializeDeserialize.sbf", broadcast ) );
+
+   for (auto blockIt = forestCheck->begin(); blockIt != forestCheck->end(); ++blockIt)
+   {
+      WALBERLA_LOG_DEVEL("CHECKING BLOCK (" << blockIt->getId() << ") " << blockIt->getAABB() );
+      check.push_back( blockIt->getId().getID() );
+   }
+
+   WALBERLA_CHECK_EQUAL(forestDump->getBlockIdBytes(), forestCheck->getBlockIdBytes());
+   WALBERLA_CHECK_EQUAL(forestDump->getDepth(), forestCheck->getDepth());
+   WALBERLA_CHECK_EQUAL(forestDump->getDomain(), forestCheck->getDomain());
+   WALBERLA_CHECK_EQUAL(forestDump->getNumberOfBlocks(), forestCheck->getNumberOfBlocks());
+
+   std::sort(dump.begin(), dump.end());
+   std::sort(check.begin(), check.end());
+
+   WALBERLA_CHECK_EQUAL( dump.size(), check.size() );
+   for (size_t i = 0; i < dump.size(); ++i)
+   {
+      WALBERLA_CHECK_EQUAL(dump[i], check[i]);
+   }
+}
+
+int main( int argc, char ** argv )
+{
+   walberla::debug::enterTestMode();
+
+   WALBERLA_MPI_SECTION()
+   {
+      walberla::MPIManager::instance()->initializeMPI( &argc, &argv );
+   }
+
+
+   blockForestSaveLoadTest(BlockForest::MPI_PARALLEL, true);
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+   WALBERLA_MPI_SECTION() {walberla::MPIManager::instance()->resetMPI();}
+
+   blockForestSaveLoadTest(BlockForest::MASTER_SLAVE, true);
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+   WALBERLA_MPI_SECTION() {walberla::MPIManager::instance()->resetMPI();}
+
+   blockForestSaveLoadTest(BlockForest::SERIALIZED_DISTRIBUTED, true);
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+   WALBERLA_MPI_SECTION() {walberla::MPIManager::instance()->resetMPI();}
+
+   blockForestSaveLoadTest(BlockForest::MPI_PARALLEL, false);
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+   WALBERLA_MPI_SECTION() {walberla::MPIManager::instance()->resetMPI();}
+
+   blockForestSaveLoadTest(BlockForest::MASTER_SLAVE, false);
+
+   WALBERLA_MPI_SECTION() {WALBERLA_MPI_BARRIER();}
+   WALBERLA_MPI_SECTION() {walberla::MPIManager::instance()->resetMPI();}
+
+   blockForestSaveLoadTest(BlockForest::SERIALIZED_DISTRIBUTED, false);
+
+   return EXIT_SUCCESS;
+}