MPI devleopment with boundaries for contact search and data communication, memory leak

2025-09-16 04:16:04 +00:00 · 2024-04-27 08:44:35 -07:00
parent 1321e6340e
commit 94fcc3d01b
27 changed files with 3379 additions and 158 deletions
--- a/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
@ -0,0 +1,106 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __gatherMaster_hpp__
+#define __gatherMaster_hpp__
+
+#include <numeric>
+
+#include "procCommunication.hpp"
+#include "stdVectorHelper.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class gatherMaster
+:
+    public procCommunication
+{
+protected:
+
+    std::vector<T> buffer_;
+
+public:
+
+    gatherMaster(const localProcessors& procs)
+    :
+        procCommunication(procs)
+    {}
+
+    span<T> getData()
+    {
+        if(this->localMaster())
+            return span<T>( buffer_.data(), buffer_.size());
+        else
+            return span<T>(nullptr, 0);
+    }
+
+    std::vector<T> moveData()
+    {
+        return std::move(buffer_);
+    }
+
+    bool gatherData(span<T> data)
+    {
+        int thisN = data.size();
+
+        bool succss;
+        
+        procVector<int> numElems(this->processors(), true);
+        procVector<int> displ(this->processors(), true);      
+        
+        if( !this->collectAllToMaster(thisN, numElems) )
+        {
+            fatalErrorInFunction<<
+            "error in collecting number of elements from processors"<<endl;
+            return false;
+        }
+        auto totalN = std::accumulate(
+            numElems.begin(), 
+            numElems.end(),
+            static_cast<int>(0));
+        
+        buffer_.resize(totalN);
+    
+        std::exclusive_scan(
+            numElems.begin(), 
+            numElems.end(),
+            displ.begin(),
+            0);
+        
+        auto bufferSpan = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+        return CheckMPI( 
+            Gatherv(
+                data, 
+                bufferSpan, 
+                numElems.getSpan(), 
+                displ.getSpan(), 
+                this->localMasterNo(), 
+                this->localCommunicator()),
+            false);
+        
+    }
+
+
+};
+}
+
+#endif
--- a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
@ -0,0 +1,427 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiCommunication_H__
+#define __mpiCommunication_H__
+
+
+#include "mpiTypes.hpp"
+#include "types.hpp"
+#include "span.hpp"
+
+
+
+namespace pFlow::MPI
+{
+
+extern DataType realx3Type__;
+
+extern DataType realx4Type__;
+
+extern DataType int32x3Type__;
+
+template<typename T> 
+auto constexpr Type()
+{
+	return MPI_BYTE;
+}
+
+template<typename T>
+auto constexpr sFactor()
+{
+	return sizeof(T);
+}
+
+template<char> 
+auto constexpr Type()
+{
+	return MPI_CHAR;
+}
+template<char>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<short>
+auto constexpr Type()
+{
+	return MPI_SHORT;
+}
+template<short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<unsigned short>
+auto constexpr Type()
+{
+	return MPI_UNSIGNED_SHORT;
+}
+template<unsigned short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<int>
+auto constexpr Type()
+{
+	return MPI_INT;
+}
+template<int>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned int>()
+{
+	return MPI_UNSIGNED;
+}
+template<>
+auto constexpr sFactor<unsigned int>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<long>()
+{
+	return MPI_LONG;
+}
+template<>
+auto constexpr sFactor<long>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned long>()
+{
+	return MPI_UNSIGNED_LONG;
+}
+template<>
+auto constexpr sFactor<unsigned long>()
+{
+	return 1;
+}
+
+
+template<>
+auto constexpr Type<float>()
+{
+	return MPI_FLOAT;
+}
+template<>
+auto constexpr sFactor<float>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<double>()
+{
+	return MPI_DOUBLE;
+}
+template<>
+auto constexpr sFactor<double>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx3>()
+{
+	return realx3Type__;
+}
+
+template<>
+auto constexpr sFactor<realx3>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx4>()
+{
+	return realx4Type__;
+}
+
+template<>
+auto constexpr sFactor<realx4>()
+{
+	return 1;
+}
+
+
+template<>
+inline
+auto Type<int32x3>()
+{
+	return int32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<int32x3>()
+{
+	return 1;
+}
+
+/*inline 
+auto createByteSequence(int sizeOfElement)
+{
+    DataType newType;
+    MPI_Type_contiguous(sizeOfElement, MPI_CHAR, &newType);
+	MPI_Type_commit(&newType);
+    return newType;
+}*/
+
+inline 
+auto TypeCommit(DataType* type)
+{
+	return MPI_Type_commit(type);
+}
+
+inline 
+auto TypeFree(DataType* type)
+{
+    return MPI_Type_free(type);
+
+}
+template<typename T>
+inline auto getCount(Status* status, int& count)
+{
+	int lCount;
+	auto res = MPI_Get_count(status, Type<T>(), &lCount);
+	count = lCount/sFactor<T>();
+	return res;
+}
+
+template<typename T>
+inline int convertIndex(const int& ind)
+{
+	return ind*sFactor<T>();
+}
+
+template<typename T> 
+inline auto send(span<T> data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        data.data(), 
+        sFactor<T>()*data().size(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
+template<typename T>
+inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		data.data(), 
+		sFactor<T>()*data.size(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto Isend(const T& data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		&data, 
+		sFactor<T>(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        data.data(), 
+        sFactor<T>()*data.size(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
+template<typename T>
+inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		&data,
+		sFactor<T>(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto Irecv(span<T> data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		data.data(),
+		sFactor<T>()*data.size(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto scan(T sData, T& rData, Comm comm, Operation op = SumOp)
+{
+	return MPI_Scan(&sData, &rData, sFactor<T>()*1, Type<T>(), op , comm );
+}
+
+// gathering one scalar data to root processor 
+template<typename T>
+inline auto gather(T sendData, span<T>& recvData, int root, Comm comm)
+{
+	return MPI_Gather(
+		&sendData, 
+		sFactor<T>()*1, 
+		Type<T>(), 
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto allGather(T sendData, span<T>& recvData, Comm comm)
+{
+	return MPI_Allgather(
+		&sendData,
+		sFactor<T>()*1,
+		Type<T>(),
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		comm);
+}
+
+template<typename T>
+inline auto scatter(span<T> sendData, T& recvData, int root, Comm comm)
+{
+	return MPI_Scatter(
+		sendData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		&recvData,
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto Bcast(T& sendData, int root, Comm comm)
+{
+	return MPI_Bcast(
+		&sendData, sFactor<T>()*1, Type<T>(), root, comm);
+
+}
+
+
+template<typename T> 
+bool typeCreateIndexedBlock(
+    span<int32> index, 
+    DataType &newType)
+{
+    auto res =  MPI_Type_create_indexed_block(
+        index.size(), 
+        sFactor<T>(), 
+        index.data(), 
+        Type<T>(), 
+        &newType);
+    
+    if(res == Success)
+    {
+        TypeCommit(&newType);
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;	
+}
+
+
+template<typename T>
+inline auto Gatherv
+(
+    span<T> sendData, 
+    span<T>& recvData, 
+    span<int> recvCounts,
+    span<int> displs,
+    int root, 
+    Comm comm)
+{
+    
+    return MPI_Gatherv(
+        sendData.data(), 
+        sendData.size()*sFactor<T>(),
+        Type<T>(),
+        recvData.data(),
+        recvCounts.data(),
+        displs.data(),
+        Type<T>(),
+        root,
+        comm
+         );
+
+}
+
+inline auto Wait(Request* request, Status* status)
+{
+	return MPI_Wait(request, status);
+}
+
+inline auto typeFree(DataType& type)
+{
+	return MPI_Type_free(&type);
+}
+
+
+}
+
+
+#endif  //__mpiCommunication_H__
--- a/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
@ -0,0 +1,69 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiTypes_H__
+#define __mpiTypes_H__
+
+
+
+#include <mpi.h>
+
+namespace pFlow::MPI
+{
+	// types
+	using Comm 			= MPI_Comm;
+    using Group         = MPI_Group;
+	using Status 		= MPI_Status;
+	using Offset 		= MPI_Offset;
+	using Request 		= MPI_Request;
+	using Operation 	= MPI_Op;
+	using Information	= MPI_Info;
+	using DataType 		= MPI_Datatype;
+	
+	inline Comm CommWorld 		= MPI_COMM_WORLD;
+
+	// all nulls
+
+	inline auto ProcNull 		= MPI_PROC_NULL;
+	inline auto InfoNull  		= MPI_INFO_NULL;
+	inline auto RequestNull		= MPI_REQUEST_NULL;
+	inline auto StatusIgnore 	= MPI_STATUS_IGNORE;
+	inline auto StatusesIgnore 	= MPI_STATUSES_IGNORE;
+	inline auto FileNull 		= MPI_FILE_NULL;
+	inline Comm  CommNull 		= MPI_COMM_NULL;
+    inline auto TypeNull        = MPI_DATATYPE_NULL;
+
+	// errors
+	inline const auto Success 	= MPI_SUCCESS;
+	inline const auto ErrOp 	= MPI_ERR_OP;
+
+	inline const auto SumOp		= MPI_SUM;
+
+	inline const size_t MaxNoProcessors = 2048;
+	
+}
+
+
+
+
+
+
+
+#endif //__mpiTypes_H__
--- a/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
@ -0,0 +1,30 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "procCommunication.hpp"
+
+
+pFlow::MPI::procCommunication::procCommunication
+(
+    const localProcessors& proc
+)
+:
+    processors_(proc)
+{}
--- a/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
@ -0,0 +1,178 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __procCommunication_hpp__
+#define __procCommunication_hpp__
+
+
+#include "procVector.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+
+class procCommunication
+{
+protected:
+
+	const localProcessors& processors_;
+
+public:	
+
+    procCommunication(const localProcessors& proc);
+    	
+	~procCommunication()=default;
+    
+    /// @brief Tell if this processor is master processor in the local
+    /// communicator 
+    /// @return true if this processor is master  
+
+    inline 
+    const auto& processors()const
+    {
+        return processors_;
+    }
+
+    inline
+    bool localMaster()const
+    {
+        return processors_.localMaster();;
+    }
+
+    inline
+    auto localSize()const
+    {
+        return processors_.localSize();
+    }
+
+    inline
+    auto localRank()const
+    {
+        return processors_.localRank();
+    }
+
+    inline
+    auto localCommunicator()const
+    {
+        return processors_.localCommunicator();
+    }
+
+    /// @brief return the master number in the local communicator  
+    auto localMasterNo()const
+    {
+        return processors_.localMasterNo();
+    }
+
+	/// Send a single val to all processors including itself (local communicator)
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const T& val)
+	{
+		
+		T retVal = val;
+		auto res = CheckMPI(
+            Bcast(retVal, localMasterNo(),localCommunicator() ),
+            false);
+		
+		return {retVal, res};
+	}
+
+	/// @brief  Send a single value to all processor including master (in local communicator)
+	/// @param val value to be sent
+	/// @param recvVal recieved value 
+	/// @return true if successful and false if fail
+	template<typename T>
+	bool distributeMasterToAll(const T& val, T& recvVal)
+	{
+		recvVal = val;
+		return CheckMPI(
+            Bcast(recvVal, localMasterNo(), localCommunicator()),
+            false);
+	}
+
+	/// @brief  values in the vector (size is equal to number of 
+    // processors in local communicator) to each processor 
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const procVector<T>& vals)
+	{
+		T val;	
+		auto vec = vals.getSpan();
+		auto res = CheckMPI(
+            scatter(vec, val, localMasterNo(), localCommunicator()),
+            false);
+		
+		return {val, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	std::pair<procVector<T>, bool> collectAllToAll(const T& val)
+	{
+		procVector<T> allVec(processors_);
+		auto vec = allVec.getSpan();
+		auto res = CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+		return {allVec, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	bool collectAllToAll(const T& val, procVector<T>& allVec)
+	{
+		auto vec = allVec.getSpan();
+		return CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+	}
+
+    /// @brief Each processor in the local communicator calls this function with a value
+    /// and all values are collected in the master processor 
+	template<typename T>
+	std::pair<procVector<T>,bool> collectAllToMaster(const T& val)
+	{
+		// only on master processor
+		procVector<T> masterVec(processors_, true);
+		
+		auto masterSpan = masterVec.getSpan();
+		auto res = CheckMPI( 
+			gather(val,masterSpan, localMasterNo(), localCommunicator()), 
+            false);
+
+		return {masterVec, res};
+
+	}
+
+    template<typename T>
+	bool collectAllToMaster(const T& val, procVector<T>& masterVec)
+	{
+		// only on master processor
+		auto [vec, res] = collectAllToMaster(val);
+        masterVec = vec;
+		return res;
+	}
+
+}; //procCommunication
+
+} // pFlow::MPI
+
+#endif //__procCommunication_hpp__
--- a/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
@ -0,0 +1,199 @@
+#ifndef __procVector_hpp__ 
+#define __procVector_hpp__
+
+// from PhasicFlow
+
+#include "localProcessors.hpp"
+#include "span.hpp"
+#include "streams.hpp"
+#include "IOPattern.hpp"
+
+#include "mpiTypes.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class procVector
+:
+	public std::vector<T>
+{
+public:
+
+	using ProcVectorType = procVector<T>;
+
+	using VectorType = std::vector<T>;
+
+protected:
+
+	int 	rank_ 	= 0;
+
+    bool 	isMaster_ = false;
+
+	using VectorType::reserve;
+
+	using VectorType::resize;
+
+	using VectorType::assign;
+
+	using VectorType::clear;
+
+	using VectorType::erase;
+
+public:
+
+	procVector(
+		const localProcessors& procs,
+		bool onlyMaster = false)
+    :
+        rank_(procs.localRank()),
+        isMaster_(procs.localMaster())
+	{
+       
+		if( onlyMaster && !isMaster_ ) return;
+		this->reserve(procs.localSize());
+		this->resize(procs.localSize());
+	}
+
+	procVector(
+		const T& val,
+		const localProcessors& procs,
+		bool onlyMaster = false)
+	:
+		procVector(procs, onlyMaster)
+	{
+		std::fill(this->begin(), this->end(), val);
+	}
+
+    procVector(const T& val, const procVector& src)
+    {
+        this->reserve(src.size());
+        this->resize(src.size());
+        std::fill(this->begin(), this->end(), val);
+    }
+
+    procVector(const localProcessors& procs, const VectorType& src)
+	:
+		procVector(procs)
+	{
+		if(src.size()!= this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in construction"<<endl;
+			fatalExit;
+		}
+
+        this->assign(src.begin(), src.end());
+	} 
+        
+	procVector(const procVector&) = default;
+	
+	procVector(procVector&&) = default;
+
+	procVector& operator=(const procVector&) = default;
+	
+	procVector& operator=(procVector&&) = default;
+
+	procVector& operator=(const VectorType& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in copy assignment"<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(src);
+		return *this;
+	}
+
+	procVector& operator=(VectorType&& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move assignment"
+			<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(std::move(src));
+		return *this;
+	}
+
+	procVector(const localProcessors& procs, VectorType&& src)
+	:
+		VectorType(std::move(src))
+	{
+		if(this->size()!= static_cast<size_t>(procs.localSize()))
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move"<<endl;
+            fatalExit;
+		}
+        isMaster_ = procs.localMaster();
+        rank_ = procs.localRank();
+	}
+
+	~procVector()=default;
+
+    inline
+	auto& thisValue()
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	const auto& thisValue()const
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	auto size()const
+	{
+		return VectorType::size();
+	}
+
+    inline
+	auto rank()const
+	{
+		return rank_;
+	}
+
+	inline
+	auto getSpan()
+	{
+		return span<T>(this->data(), this->size());
+	}
+	
+	inline 
+	auto getSpan()const
+	{
+		return span<T>(const_cast<T*>(this->data()), this->size());
+	}
+
+    bool write(
+        iOstream& os,
+        const IOPattern& iop ) const
+    {
+        return writeStdVector(os, *this, iop);	
+    }
+
+};
+
+template<typename T> 
+inline iOstream& operator << (iOstream& os, const procVector<T>& ovec )
+{	
+	if( !ovec.write(os, IOPattern::AllProcessorsDifferent) )
+	{
+		ioErrorInFile(os.name(), os.lineNumber());
+		fatalExit;
+	}
+	return os; 
+}
+
+}
+
+
+#endif
--- a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
@ -0,0 +1,158 @@
+
+
+template<typename T>
+pFlow::MPI::scatteredMasterDistribute<T>::scatteredMasterDistribute
+(
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true)
+{
+    
+}
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<int32> index;
+
+        freeIndexedMap();
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i];
+            }
+
+            DataType dt;
+            
+            if(! typeCreateIndexedBlock<T>( makeSpan(index), dt)) 
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+        
+        freeIndexedMap();
+        
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            DataType dt;
+            if( !typeCreateIndexedBlock<T>(maps[proc], dt) )
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+template<typename T>
+void pFlow::MPI::scatteredMasterDistribute<T>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::distribute
+(
+    span<T>& sendBuff, 
+    span<T>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size()*sFactor<T>(), 
+            Type<T>(), 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        false);
+            
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+
+    return sucss;
+}
--- a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
@ -0,0 +1,67 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistribute_hpp__
+#define __scatteredMasterDistribute_hpp__
+
+#include "mpiCommunication.hpp"
+#include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "stdVectorHelper.hpp"
+#include "streams.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class scatteredMasterDistribute : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(const localProcessors& procs);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<T>& sendBuff, span<T>& recvb);
+};
+
+} // pFlow::MPI
+
+#include "scatteredMasterDistribute.cpp"
+
+#endif //__scatteredMasterDistribute_hpp__
--- a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
@ -0,0 +1,166 @@
+
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPI::scatteredMasterDistribute<char>::scatteredMasterDistribute
+(
+    size_t  sizeOfElement,
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true),
+    sizeOfElement_(sizeOfElement)
+{}
+
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        freeIndexedMap();
+
+        std::vector<MPI_Aint> index;
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                m.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_BYTE, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+    
+    return true;
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<MPI_Aint> index;
+        freeIndexedMap();
+        
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                index.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_CHAR, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+
+    return true;
+}
+
+
+void pFlow::MPI::scatteredMasterDistribute<char>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::distribute
+(
+    span<char>& sendBuff, 
+    span<char>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size(), 
+            MPI_CHAR, 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        true); 
+    
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+    
+    return sucss;
+}
--- a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
@ -0,0 +1,66 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistributeChar_hpp__
+#define __scatteredMasterDistributeChar_hpp__
+
+#include "scatteredMasterDistribute.hpp"
+
+namespace pFlow::MPI
+{
+
+template<>
+class scatteredMasterDistribute<char> : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	size_t               sizeOfElement_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(
+	  size_t                 sizeOfElement,
+	  const localProcessors& procs
+	);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<char>& sendBuff, span<char>& recvb);
+};
+
+} // pFlow::MPI
+
+#endif //__scatteredMasterDistributeChar_hpp__
--- a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
@ -0,0 +1,52 @@
+
+template<typename T>
+bool pFlow::MPI::dataIOMPI<T>::gatherData(span<T> data )
+{
+	
+	if(this->ioPattern_.isAllProcessorsDifferent())
+	{
+		this->bufferSpan_ = data;
+		return true;
+	}
+
+	if( this->ioPattern_.isMasterProcessorDistribute())
+	{
+		
+		auto gatherT = pFlow::MPI::gatherMaster<T>(pFlowProcessors());
+		
+		if(!gatherT.gatherData(data))
+		{
+			fatalErrorInFunction<<"Error in gathering data to master"<<endl;
+			return false;
+		}
+
+		this->buffer_ = gatherT.moveData();
+		this->bufferSpan_ = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+		return true;
+	
+	}
+
+	if( this->ioPattern_.isMasterProcessorOnly() || this->ioPattern_.isAllProcessorSimilar() )
+	{
+		if( this->ioPattern_.isMaster() )
+		{
+			this->bufferSpan_ = data;
+			return true;
+		}
+		else
+		{
+			this->bufferSpan_ = span<T>(nullptr, 0);
+			return true;
+		}
+	}
+
+	return false;
+	
+}
+
+template<typename T>
+pFlow::MPI::dataIOMPI<T>::dataIOMPI(const IOPattern& iop)
+:
+	dataIO<T>(iop)
+{}
--- a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
@ -0,0 +1,58 @@
+#ifndef __datIOMPI_hpp__
+#define __datIOMPI_hpp__
+
+#include "dataIO.hpp"
+#include "pFlowProcessors.hpp"
+#include "gatherMaster.hpp"
+
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class dataIOMPI
+:
+	public dataIO<T>
+{
+public:
+
+    using DataIOType = dataIO<T>;
+
+    using DataIOMPIType = dataIOMPI<T>;
+
+protected:
+    
+    bool gatherData(span<T> data ) override;
+    
+public:
+
+	TypeInfoTemplate111("dataIO",T,"MPI");
+
+	explicit dataIOMPI(const IOPattern& iop);
+    
+	dataIOMPI(const dataIOMPI&) = default;
+
+	dataIOMPI(dataIOMPI&&) = default;
+
+
+	dataIOMPI& operator=(const dataIOMPI&) = default;
+
+	dataIOMPI& operator=(dataIOMPI&&) = default;
+
+	~dataIOMPI() = default;
+
+    add_vCtor
+    (
+        DataIOType,
+        DataIOMPIType,
+        IOPattern
+    );
+
+}; //dataIOMPI
+
+
+} //namespace pFlow::MPI
+
+#include "dataIOMPI.cpp"
+
+#endif //__datIOMPI_hpp__
--- a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
@ -0,0 +1,27 @@
+
+#include "types.hpp"
+#include "dataIOMPI.hpp"
+
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int32>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint32>;
+template class pFlow::MPI::dataIOMPI<pFlow::uint32x3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::size_t>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::real>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx4>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::word>;
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@ -24,15 +24,16 @@ Licence:
 #include "scatteredMasterDistribute.hpp"
 #include "scatteredMasterDistributeChar.hpp"

-pFlow::MPISimulationDomain::MPISimulationDomain(systemControl& control)
+pFlow::MPI::MPISimulationDomain::MPISimulationDomain(systemControl& control)
 :
    simulationDomain(control),
    communication_(pFlowProcessors()),
-    subDomains_(pFlowProcessors()),
-    domainPartition_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox_))
+    subDomainsAll_(pFlowProcessors()),
+    numPointsAll_(pFlowProcessors()),
+    domainPartitioning_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox()))
 {}

-bool pFlow::MPISimulationDomain::createBoundaryDicts()
+bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
 {
    auto& boundaries = this->subDict("boundaries");
    
@ -60,48 +61,94 @@ bool pFlow::MPISimulationDomain::createBoundaryDicts()
 			"in dictionary "<< boundaries.globalName()<<endl;
 			return false;
 		}
-        if( initialThisDomainActive() )
+        if( thisDomainActive_ )
        {
            if( neighbors[i] == -1 )
            {
-                bDict.add("mirrorProcessorNo", processors::globalRank());
+                bDict.add("neighborProcessorNo", processors::globalRank());
            }
            else
            {
-                bDict.add("mirrorProcessorNo", neighbors[i]);
+                bDict.add("neighborProcessorNo", neighbors[i]);
                bDict.addOrReplace("type", "processor");
            }
-            warningInFunction<<"replace the method initialThisDomainActive()"<<endl;
        }
        else
        {
-            bDict.add("mirrorProcessorNo", processors::globalRank());
+            bDict.add("neighborProcessorNo", processors::globalRank());
            bDict.addOrReplace("type", "none");
-            warningInFunction<<"None: replace the method initialThisDomainActive()"<<endl;
        }
 		
+        if( bDict.getVal<word>("type") == "periodic")
+        {
+            fatalErrorInFunction<<
+            "periodic is not implemented "<<endl;
+            fatalExit;
+        }
 	}

    return true;
 }

-bool pFlow::MPISimulationDomain::setThisDomain()
+bool pFlow::MPI::MPISimulationDomain::setThisDomain()
 {
-    thisDomain_ = domain(domainPartition_->localBox());
-    if(!communication_.collectAllToAll(thisDomain_, subDomains_))
+    thisDomain_ = domain(domainPartitioning_->localBox());
+    uint32 thisNumPoints = initialNumberInThis();
+
+    if(!communication_.collectAllToAll(thisNumPoints, numPointsAll_))
+    {
+        fatalErrorInFunction<<
+        "Failed to distribute number of points."<<endl;
+        return false;
+    }
+    uint32 allNumPoints = std::accumulate(numPointsAll_.begin(), numPointsAll_.end(), 0u);
+
+    if( thisNumPoints != 0u )
+    {
+        thisDomainActive_ = true;
+    }
+    else 
+    {
+        if(communication_.localMaster()&& allNumPoints == 0u)
+            thisDomainActive_ = true;
+        else
+            thisDomainActive_ = false;
+    }
+
+    if( thisDomainActive_ )
+    {
+        bool allInactive = true;
+        for(int32 i=0; i<communication_.localSize(); i++ )
+        {
+            if(i == communication_.localRank() )continue;
+            if(numPointsAll_[i]!=0)
+            {
+                allInactive = false;
+                break;
+            }
+        }
+
+        if(allInactive)
+        {
+            thisDomain_ = domain(globalBox());
+        }
+    }
+    
+    if(!communication_.collectAllToAll(thisDomain_, subDomainsAll_))
    {
        fatalErrorInFunction<< "Failed to distributed domains"<<endl;
        return false;
    }

+
    return true;
 }

-std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
+std::vector<int> pFlow::MPI::MPISimulationDomain::findPlaneNeighbors() const
 {

    std::vector<int> neighbors(sizeOfBoundaries(),  -2);
-    domain gDomain(globalBox_);
+    domain gDomain(globalBox());

    // left 
    if( thisDomain_.left().parallelTouch( gDomain.left() ) )
@ -109,12 +156,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[0] = -1;
    }
    
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.left().parallelTouch(
-            subDomains_[i].right()) )
+            subDomainsAll_[i].right()) )
        {
            neighbors[0] = i;
            break;
@ -127,13 +174,13 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[1] = -1;
    }

-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {
        
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.right().parallelTouch(
-            subDomains_[i].left()) )
+            subDomainsAll_[i].left()) )
        {
            neighbors[1] = i;
            break;
@ -146,12 +193,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[2] = -1;
    }
    
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.bottom().parallelTouch(
-            subDomains_[i].top()) )
+            subDomainsAll_[i].top()) )
        {
            neighbors[2] = i;
            break;
@ -164,12 +211,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[3] = -1;
    }
    
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.top().parallelTouch(
-            subDomains_[i].bottom()) )
+            subDomainsAll_[i].bottom()) )
        {
            neighbors[3] = i;
            break;
@ -182,12 +229,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[4] = -1;
    }
    
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.rear().parallelTouch(
-            subDomains_[i].front()) )
+            subDomainsAll_[i].front()) )
        {
            neighbors[4] = i;
            break;
@ -200,12 +247,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
        neighbors[5] = -1;
    }
    
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
    {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;

        if( thisDomain_.front().parallelTouch(
-            subDomains_[i].rear()) )
+            subDomainsAll_[i].rear()) )
        {
            neighbors[5] = i;
            break;
@ -215,39 +262,37 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
 }

 const pFlow::dictionary &
-pFlow::MPISimulationDomain::thisBoundaryDict() const
+pFlow::MPI::MPISimulationDomain::thisBoundaryDict() const
 {
    return this->subDict("MPIBoundaries");
 }

-bool pFlow::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
+bool pFlow::MPI::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
 {
    pFlagTypeHost flags(pointPos.size(), 0 , pointPos.size());
    initialNumPoints_ = pointPos.size();
-    if( !domainPartition_->partition(pointPos, flags) )
+    if( !domainPartitioning_->partition(pointPos, flags) )
    {
+        fatalErrorInFunction<<
+        "Point partitioning failed."<<endl;
        return false;
    }
    
    if(!setThisDomain()) return false;
+
    if(!createBoundaryDicts()) return false;

    return true;
 }

-pFlow::uint32 pFlow::MPISimulationDomain::initialNumberInThis() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::initialNumberInThis() const
 {
-    uint32 numImport = domainPartition_->numberImportThisProc();
-    uint32 numExport = domainPartition_->numberExportThisProc();
+    uint32 numImport = domainPartitioning_->numberImportThisProc();
+    uint32 numExport = domainPartitioning_->numberExportThisProc();
    return max(initialNumPoints_+ numImport - numExport, 0u);
 }

-bool pFlow::MPISimulationDomain::initialThisDomainActive() const
-{
-    return initialNumberInThis()>0;
-}
-
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (  
    span<char> src, 
    span<char> dst, 
@ -256,7 +301,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
 {
    MPI::scatteredMasterDistribute<char> dataDist(sizeOfElement, pFlowProcessors());
    
-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();
    
    if(!dataDist.setDataMaps( lists ))
    {
@ -273,7 +318,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    return true;
 }

-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
    span<realx3> src, 
    span<realx3> dst
@ -282,8 +327,8 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    
    MPI::scatteredMasterDistribute<realx3> 
        dataDist(pFlowProcessors());
-    auto lists = domainPartition_->allExportLists();
-
+    auto lists = domainPartitioning_->allExportLists();
+    
    if(!dataDist.setDataMaps( lists ))
    {
        fatalErrorInFunction;
@ -300,7 +345,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    return true;
 }

-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
    span<real> src, 
    span<real> dst
@ -309,7 +354,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    MPI::scatteredMasterDistribute<real> 
        dataDist(pFlowProcessors());

-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();

    if(!dataDist.setDataMaps( lists ))
    {
@ -327,7 +372,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    return true;
 }

-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
    span<uint32> src, 
    span<uint32> dst
@ -336,7 +381,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    MPI::scatteredMasterDistribute<uint32> 
        dataDist(pFlowProcessors());

-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();

    if(!dataDist.setDataMaps( lists ))
    {
@ -354,7 +399,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    return true;
 }

-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
    span<int32> src, 
    span<int32> dst
@ -363,7 +408,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    MPI::scatteredMasterDistribute<int32> 
        dataDist(pFlowProcessors());

-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();

    if(!dataDist.setDataMaps( lists ))
    {
@ -381,35 +426,25 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
    return true;
 }

-/*bool pFlow::MPISimulationDomain::updateDomains(
-    span<realx3> pointPos,
-    pFlagTypeHost flags)
-{
-    if( !domainPartition_->partition(pointPos, flags) )
-    {
-        return false;
-    }
-        
-    if(!setThisDomain()) return false;
-    if(!createBoundaryDicts()) return false;
-	
-    return true;
-}*/

-pFlow::uint32 pFlow::MPISimulationDomain::numberToBeImported() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeImported() const
 {
-    return domainPartition_->numberImportThisProc();
+    return domainPartitioning_->numberImportThisProc();
 }

-pFlow::uint32 pFlow::MPISimulationDomain::numberToBeExported() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeExported() const
 {
-    return domainPartition_->numberExportThisProc();
+    return domainPartitioning_->numberExportThisProc();
 }

-
-
-bool pFlow::MPISimulationDomain::requiresDataTransfer() const
+bool
+pFlow::MPI::MPISimulationDomain::domainActive() const
 {
-    notImplementedFunction;
-    return false;
+	return thisDomainActive_;
+}
+
+const pFlow::domain&
+pFlow::MPI::MPISimulationDomain::thisDomain() const
+{
+	return thisDomain_;
 }
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@ -2,17 +2,17 @@
      O        C enter of
     O O       E ngineering and
    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
+   OOOOOOO     F luid flow
 ------------------------------------------------------------------------------
  Copyright (C): www.cemf.ir
  email: hamid.r.norouzi AT gmail.com
------------------------------------------------------------------------------  
+------------------------------------------------------------------------------
 Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
+  This file is part of phasicFlow code. It is a free software for simulating
  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

@ -20,100 +20,99 @@ Licence:
 #ifndef __MPISimulationDomain_hpp__
 #define __MPISimulationDomain_hpp__

-#include "simulationDomain.hpp"
 #include "partitioning.hpp"
-#include "procVector.hpp"
 #include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "simulationDomain.hpp"

-namespace pFlow
+namespace pFlow::MPI
 {

-class MPISimulationDomain
-:
-    public simulationDomain
+class MPISimulationDomain : public simulationDomain
 {
-protected:
-    MPI::procCommunication      communication_;
+private:

-    MPI::procVector<domain>      subDomains_;
+	/// a processor communcator for simulation domain 
+	procCommunication  communication_;

-    uniquePtr<partitioning> domainPartition_ = nullptr;
+	/// sub-domain (thisDomain_ for all processors)
+	procVector<domain> subDomainsAll_;

-    uint32 initialNumPoints_ = 0;
+	/// number of points in all processors 
+	procVector<uint32> numPointsAll_;

-    bool createBoundaryDicts() override;
+	/// partitioning object 
+	uniquePtr<partitioning> domainPartitioning_ = nullptr;

-	bool setThisDomain() override;
+	/// the acutal limits of the simulation domain in this processor 
+	domain                  thisDomain_;

-    std::vector<int> 
-    findPlaneNeighbors()const;
+	uint32                  initialNumPoints_ = 0;
+
+	bool                    thisDomainActive_ = false;
+
+	bool                    createBoundaryDicts() final;
+
+	bool                    setThisDomain() final;
+
+	std::vector<int>        findPlaneNeighbors() const;

 public:

-    TypeInfo("simulationDomain<MPI>");
+	TypeInfo("simulationDomain<MPI>");

-    MPISimulationDomain(systemControl& control);
+	explicit MPISimulationDomain(systemControl& control);

-	virtual
-	~MPISimulationDomain()=default;
+	~MPISimulationDomain() final = default;

-    add_vCtor
-	(
-		simulationDomain,
-		MPISimulationDomain,
-		systemControl
-	);
+	add_vCtor
+    (
+        simulationDomain, 
+        MPISimulationDomain, 
+        systemControl
+    );

-	const dictionary& thisBoundaryDict()const override;
-    
-    /// @brief 
-    /// @param pointPos 
-    /// @return 
-    bool initialUpdateDomains(span<realx3> pointPos)override;
-    
+	const dictionary& thisBoundaryDict() const final;

-    /// @brief 
-    /// @return 
-    uint32 initialNumberInThis()const override;
+	/// @brief
+	/// @param pointPos
+	/// @return
+	bool initialUpdateDomains(span<realx3> pointPos) final;

-    bool initialThisDomainActive()const override;
+	/// @brief
+	/// @return
+	uint32 initialNumberInThis() const final;

-    bool initialTransferBlockData(
-		span<char> src, 
-		span<char> dst,
-		size_t sizeOfElement)const override;
-    
-    bool initialTransferBlockData(
-        span<realx3> src,
-        span<realx3> dst) const override;
-	
-    bool initialTransferBlockData(
-        span<real> src,
-        span<real> dst) const override;
-    
-    bool initialTransferBlockData(
-        span<uint32> src,
-        span<uint32> dst) const override;
-    
-    bool initialTransferBlockData(
-        span<int32> src,
-        span<int32> dst) const override;
-    
+	bool  initialTransferBlockData(
+			span<char> src,
+			span<char> dst,
+			size_t     sizeOfElement
+			) const final;

-    /*bool updateDomains(
-		span<realx3> pointPos,
-        pFlagTypeHost flags) override;*/
+	bool initialTransferBlockData(span<realx3> src, span<realx3> dst)
+	  const final;

+	bool initialTransferBlockData(span<real> src, span<real> dst)
+	  const final;

-	uint32 numberToBeImported()const override;
-    
-    uint32 numberToBeExported()const override;
-	
-	bool requiresDataTransfer() const override;
-    
+	bool initialTransferBlockData(span<uint32> src, span<uint32> dst)
+	  const final;

+	bool initialTransferBlockData(span<int32> src, span<int32> dst)
+	  const final;
+
+	uint32 numberToBeImported() const final;
+
+	uint32 numberToBeExported() const final;
+
+	/// @brief Is this domain active?
+	/// Active mean, there is particle in it and
+	/// boundaries and other entities of simulation domains are valid
+	bool   domainActive() const final;
+
+	const domain& thisDomain()const final;
 };

-}
+} // namespace pFlow::MPI

-#endif
+#endif // 
--- a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
@ -0,0 +1,113 @@
+
+
+#include "partitioning.hpp"
+#include "error.hpp"
+#include "streams.hpp"
+
+void pFlow::partitioning::freeZoltan()
+{
+	if(validPointers_)
+	{
+		Zoltan::LB_Free_Part(&importGlobalGids_, &importLocalGids_, 
+                      	 	 &importProcs_, &importToPart_);
+
+		Zoltan::LB_Free_Part(&exportGlobalGids_, &exportLocalGids_, 
+                      		 &exportProcs_, &exportToPart_);
+        validPointers_ = false;
+	}
+	
+	zoltan_.release();
+}
+
+
+pFlow::partitioning::partitioning
+(
+    const dictionary& dict, 
+    const box& globalBox
+)
+:
+	globalBox_(globalBox)
+{
+	if(!zoltanInitialized__)
+	{
+		auto rc = Zoltan_Initialize
+        (
+            processors::argc(), 
+            processors::argv(), 
+            &version_
+        );
+        
+		if (rc != ZOLTAN_OK)
+		{
+			fatalErrorInFunction<<"Cannot initialize zoltan"<<endl;
+			fatalExit;
+		}
+		zoltanInitialized__ = true;
+	}
+
+	// Creates Zoltan object 
+	zoltan_ = std::make_unique<Zoltan>(pFlowProcessors().localCommunicator());
+
+	zoltan_->Set_Param("DEBUG_LEVEL", "0");
+  	zoltan_->Set_Param("LB_METHOD", "RCB");
+  	zoltan_->Set_Param("NUM_GID_ENTRIES", "1"); 
+  	zoltan_->Set_Param("NUM_LID_ENTRIES", "1");
+  	zoltan_->Set_Param("OBJ_WEIGHT_DIM", "0");
+    zoltan_->Set_Param("RETURN_LISTS", "ALL");
+	
+}
+
+bool pFlow::partitioning::partition(span<realx3> points, pFlagTypeHost flags)
+{
+    pointCollection pointCollctn{points, flags};
+
+    return partition(pointCollctn);
+}
+int GetObjectSize
+(
+    void *data,
+    int num_gid_entries, 
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int *ierr
+)
+{
+    *ierr = ZOLTAN_OK;
+    pFlow::uint32 s = *(static_cast<pFlow::uint32*>(data));
+    return static_cast<int>(s);
+}
+
+void PackObject 
+(
+    void *data,
+    int num_gid_entries,
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int dest,
+    int size,
+    char *buf,
+    int *ierr
+)
+{
+    
+}
+
+bool pFlow::partitioning::migrateData(span<char> src, span<char> dst, uint32 elementSize)
+{
+    dataCollection data{src, dst, elementSize};
+
+    zoltan_->Set_Obj_Size_Fn(GetObjectSize, &elementSize);
+    return false;
+}
+
+pFlow::partitioning::~partitioning()
+{
+	freeZoltan();
+}
+
+void pFlow::partitioning::printBox()const
+{
+	pOutput<< "localBox:" << localBox_<<endl;
+}
--- a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
@ -0,0 +1,168 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __partitioning_hpp__
+#define __partitioning_hpp__
+
+#include "zoltan_cpp.h"
+
+#include "pFlowProcessors.hpp"
+#include "virtualConstructor.hpp"
+#include "box.hpp"
+#include "span.hpp"
+#include "pointFlag.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+struct pointCollection
+{
+	span<realx3> points_;
+	pFlagTypeHost pFlag_;
+
+	uint32 numActivePoints()const
+	{
+		return pFlag_.numActive();
+	}
+};
+
+struct dataCollection
+{
+    span<char> srcData_;
+    span<char> dstData_;
+    uint32 elementSize_;
+};
+
+class partitioning
+{
+protected:
+
+	float 					version_ 	= 0.0;
+
+	std::unique_ptr<Zoltan> zoltan_ 	= nullptr;
+
+	bool					validPointers_ = false;
+
+	box 					globalBox_;
+
+	box 					localBox_; 	
+
+	int32 	changes_, numImport_, numExport_;
+  	
+  	id_t *importGlobalGids_, *importLocalGids_, *exportGlobalGids_, *exportLocalGids_; 
+  	
+  	int32 *importProcs_, *importToPart_, *exportProcs_, *exportToPart_;
+
+    uint32 numBeforePartition_ = 0 ;
+
+	static inline bool 		zoltanInitialized__ = false;
+
+	void freeZoltan();
+
+	virtual 
+	bool partition(pointCollection& points) = 0;
+
+public:
+
+	partitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+
+	virtual 
+	~partitioning();
+
+    create_vCtor(
+        partitioning,
+        dictionary,
+        (
+            const dictionary& dict, 
+		    const box& globalBox
+        ),
+        (dict, globalBox));
+
+	bool partition(
+		span<realx3> points, 
+		pFlagTypeHost flags);
+
+    
+    bool migrateData(span<char> src, span<char> dst, uint32 elementSize);
+
+	inline
+	auto localBox()const
+	{
+		return localBox_;
+	}
+
+	inline
+	const auto& globalBox()const
+	{
+		return globalBox_;
+	}
+
+	inline 
+	bool partitionsChanged()const
+	{
+		return changes_ == 1;
+	}
+
+    
+    uint32 numberImportThisProc()const
+    {
+        return numImport_;
+    }
+
+    uint32 numberExportThisProc()const
+    {
+        return numExport_;
+    }
+    
+    virtual
+    span<int32> exportList(int procNo)const = 0;
+
+    virtual 
+    pFlow::MPI::procVector<span<int32>> allExportLists()const=0;
+
+	void printBox()const;
+
+	
+};
+
+
+}
+
+
+#endif //__partitioning_hpp__
+
+
+
+/*static 
+	int getNumberOfPoints(void *data, int32 *ierr);
+
+	static 
+	void getPointList(
+		void *data, 
+		int32 sizeGID, 
+		int32 sizeLID,
+        id_t* globalID, 
+        id_t* localID,
+        int32 wgt_dim, 
+        float *obj_wgts, 
+        int32 *ierr);*/
--- a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
@ -0,0 +1,330 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "zoltan_cpp.h"
+
+
+#include "error.hpp"
+#include "processors.hpp"
+#include "rcb1DPartitioning.hpp"
+
+bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
+{
+
+    zoltan_->Set_Param("RCB_OUTPUT_LEVEL", "0");
+  	zoltan_->Set_Param("RCB_RECTILINEAR_BLOCKS", "1");
+  	zoltan_->Set_Param("KEEP_CUTS", "1"); 
+  	zoltan_->Set_Param("REDUCE_DIMENSIONS", "1");
+  	zoltan_->Set_Param("RCB_RECOMPUTE_BOX", "1");
+  	zoltan_->Set_Param("AVERAGE_CUTS", "0");
+    zoltan_->Set_Param("MIGRATE_ONLY_PROC_CHANGES", "0");
+
+  	zoltan_->Set_Num_Obj_Fn(rcb1DPartitioning::getNumberOfPoints, &points);
+  	zoltan_->Set_Obj_List_Fn(rcb1DPartitioning::getPointList, &points);
+  	zoltan_->Set_Num_Geom_Fn(rcb1DPartitioning::getNumGeometry, &points);
+  	switch (direction_)
+    {
+    case Direction::X:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_x, &points);
+        break;
+    case Direction::Y:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_y, &points);
+        break;
+    case Direction::Z:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_z, &points);
+        break;
+    }
+    
+	int numGidEntries_, numLidEntries_;
+	int rc = zoltan_->LB_Partition(changes_, numGidEntries_, numLidEntries_,
+    	numImport_, importGlobalGids_, importLocalGids_, importProcs_, importToPart_,
+    	numExport_, exportGlobalGids_, exportLocalGids_, exportProcs_, exportToPart_);
+
+    
+  	if (rc != ZOLTAN_OK)
+  	{	
+        fatalErrorInFunction<< "Zoltan faild to perform partitioning."<<endl;
+    	return false;
+  	}
+    
+    for(auto& ids:exportIds_)
+    {
+        ids.clear();
+    }
+    
+    std::vector<int32> thisProc(points.numActivePoints(),-1);
+        
+    for(auto i =0; i<numExport_; i++)
+    {
+        exportIds_[exportProcs_[i]].push_back(exportGlobalGids_[i]);
+        thisProc[exportGlobalGids_[i]] = exportGlobalGids_[i];
+    }
+
+    for(int i=0; i<thisProc.size(); i++)
+    {
+        if(thisProc[i]==-1)
+            exportIds_[0].push_back(i);
+    }
+    
+  	validPointers_ = true;
+
+  	int nDim;
+  	double x0;
+  	double y0;
+  	double z0;
+  	double x1;
+  	double y1;
+  	double z1;
+  	zoltan_->RCB_Box
+    (
+        processors::globalRank(), 
+        nDim,
+   		x0, y0, z0,
+   		x1, y1, z1
+    );
+
+  	localBox_ = globalBox_;
+
+    if(equal(x0, x1))
+    {
+        x0 = x0 - 0.00001;
+        x1 = x1 + 0.00001;
+    }
+
+    switch (direction_)
+    {
+    case Direction::X :
+        localBox_.minPoint().x_ = x0;
+  	    localBox_.maxPoint().x_ = x1;
+        break;
+
+    case Direction::Y :
+        localBox_.minPoint().y_ = x0;
+  	    localBox_.maxPoint().y_ = x1;
+        break;
+    
+    case Direction::Z :
+        localBox_.minPoint().z_ = x0;
+  	    localBox_.maxPoint().z_ = x1;
+        break;
+    }
+  	
+
+    localBox_.minPoint() = max(localBox_.minPoint(), globalBox_.minPoint());
+    localBox_.maxPoint() = min(localBox_.maxPoint(), globalBox_.maxPoint());
+
+
+  	return true;
+}
+
+pFlow::rcb1DPartitioning::rcb1DPartitioning
+(
+    const dictionary &dict,
+    const box &globalBox
+)
+: 
+    partitioning(dict, globalBox),
+    exportIds_(pFlowProcessors())
+{
+
+    word directionName = dict.getVal<word>("direction");
+
+	if(toUpper(directionName)== "X")
+    {
+        direction_ = Direction::X;
+        dirVector_ ={1.0, 0.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Y")
+    {
+        direction_ = Direction::Y;
+        dirVector_ ={0.0, 1.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Z")
+    {
+        direction_ = Direction::Z;
+        dirVector_ ={0.0, 0.0, 1.0};
+    }
+    else
+    {
+        fatalErrorInFunction<< "wrong direction  in dictionary "<<
+        dict.globalName()<<". Directions should be one of x, y, or z."<<endl;
+        fatalError;
+    }       
+			
+}
+
+int pFlow::rcb1DPartitioning::getNumGeometry(void *data, int *ierr)
+{
+  *ierr = ZOLTAN_OK;
+  return 1;
+}
+
+int pFlow::rcb1DPartitioning::getNumberOfPoints(void *data, int *ierr)
+{
+    auto *obj = static_cast<pointCollection *>(data);
+
+    *ierr = ZOLTAN_OK;
+
+    return obj->numActivePoints();
+}
+
+void pFlow::rcb1DPartitioning::getPointList
+(
+    void *data, 
+    int sizeGID, 
+    int sizeLID, 
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID, 
+    int wgt_dim, 
+    float *obj_wgts, 
+    int *ierr
+)
+{			
+    auto* obj = static_cast<pointCollection *>(data);
+    *ierr = ZOLTAN_OK;
+    
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            globalID[n] = i;
+            localID[n] = n;
+            n++;
+        }
+    }
+
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_x
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].x_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_y
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].y_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_z
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].z_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
--- a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
@ -0,0 +1,240 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __rcb1DPartitioning_hpp__
+#define __rcb1DPartitioning_hpp__
+
+#include "partitioning.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+
+class rcb1DPartitioning
+:
+public partitioning
+{
+public:
+
+	enum Direction  
+	{
+		X = 0,
+		Y = 1,
+		Z = 2
+	};
+
+protected:
+
+	/// Direction of partitioning
+	Direction   direction_ = Direction::X;
+
+	realx3 		dirVector_ = {1.0, 0.0, 0.0};
+
+    word        directionName_ = "x";
+
+    MPI::procVector<std::vector<int>> exportIds_;
+	
+	bool partition(pointCollection& points) override;
+
+public:
+
+	
+	rcb1DPartitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+	
+
+	~rcb1DPartitioning() override=default;
+
+    span<int32> exportList(int procNo)const override
+    {
+        return span<int32>(
+            const_cast<int32*>(exportIds_[procNo].data()), 
+            exportIds_[procNo].size());
+    }
+
+    
+    pFlow::MPI::procVector<span<int32>> allExportLists()const override
+    {
+        pFlow::MPI::procVector<span<int32>> allList(pFlowProcessors());
+
+        for(int i=0; i<allList.size(); i++)
+            allList[i]= exportList(i);
+
+        return allList;
+    }
+
+	static 
+	int getNumGeometry(void *data, int *ierr);
+
+	static
+	int getNumberOfPoints(void *data, int *ierr);
+	
+
+	static
+	void getPointList
+	(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		ZOLTAN_ID_PTR globalID, 
+		ZOLTAN_ID_PTR localID,
+		int wgt_dim, 
+		float *obj_wgts, 
+		int *ierr
+	);
+
+    static
+    void getGeometryList_x(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_y(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_z(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+};
+
+/*class RCB_y_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_y_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_y_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr)
+	{
+
+		auto* obj = static_cast<pointCollection *>(data);
+
+	  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+	  	{
+	    	*ierr = ZOLTAN_FATAL;
+	    	return;
+	  	}
+
+	  	*ierr = ZOLTAN_OK;
+
+	  	for (int i=0;  i < num_obj ; i++)
+	  	{
+	    	geom_vec[i] 	  = obj->pointList()[i].y_;
+		}
+
+	  return;
+	}
+
+	
+	static 
+	int getNumGeometry(void *data, int *ierr)
+	{
+	  *ierr = ZOLTAN_OK;
+	  return 1;
+	}
+
+};
+
+
+class RCB_x_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_x_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_x_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+	
+	static 
+	int getNumGeometry(void *data, int *ierr);
+	
+
+};*/
+
+} // pFlow
+#endif //__rcb1DPartitioning_hpp__
--- a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
@ -26,9 +26,6 @@ Licence:
 #include "types.hpp"
 #include "span.hpp"

-#ifdef pFlow_Build_MPI
-
-

 namespace pFlow::MPI
 {
@ -375,9 +372,6 @@ inline auto typeFree(DataType& type)

 }

-#endif //pFlow_Build_MPI
-
-


 #endif  //__mpiCommunication_H__
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@ -0,0 +1,110 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+template<class T, class MemorySpace>
+void
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		//uint32 nRecv  = reciever_.waitComplete();
+		dataRecieved_ = true;
+		/*if (nRecv != this->neighborProcSize())
+		{
+			fatalErrorInFunction;
+			fatalExit;
+		}*/
+	}
+}
+
+template<class T, class MemorySpace>
+bool
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
+  int           step,
+  DataDirection direction
+)
+{
+	/*if (step == 1)
+	{
+		// Isend
+		if (direction == DataDirection::TwoWay || 
+		( this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		(!this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			sender_.sendData(pFlowProcessors(), this->thisField());
+			dataRecieved_ = false;
+		}
+	}
+	else if (step == 2)
+	{
+		// Irecv
+		if (direction == DataDirection::TwoWay || 
+		(!this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		( this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			reciever_.recieveData(pFlowProcessors(), this->neighborProcSize());
+			dataRecieved_ = false;
+		}
+	}
+	else
+	{
+		fatalErrorInFunction << "Invalid step number " << step << endl;
+		return false;
+	}*/
+
+	return true;
+}
+
+template<class T, class MemorySpace>
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
+  const boundaryBase&   boundary,
+  const pointStructure& pStruct,
+  InternalFieldType&    internal
+)
+  : BoundaryFieldType(boundary, pStruct, internal),
+    sender_(
+      groupNames("sendBufferField", boundary.name()),
+      boundary.neighborProcessorNo(),
+      boundary.thisBoundaryIndex()
+    ),
+    reciever_(
+      groupNames("neighborProcField", boundary.name()),
+      boundary.neighborProcessorNo(),
+      boundary.mirrorBoundaryIndex()
+    )
+{
+}
+
+template<class T, class MemorySpace>
+typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::ProcVectorType&
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField()
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+template<class T, class MemorySpace>
+const typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::
+  ProcVectorType&
+  pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField() const
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@ -0,0 +1,113 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryField_hpp__
+#define __processorBoundaryField_hpp__
+
+#include "boundaryField.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+
+namespace pFlow::MPI
+{
+
+template< class T, class MemorySpace = void>
+class processorBoundaryField
+:
+    public boundaryField<T, MemorySpace> 
+{
+public:
+	
+    using processorBoundaryFieldType = processorBoundaryField<T, MemorySpace>;
+
+	using BoundaryFieldType = boundaryField<T, MemorySpace>;
+
+	using InternalFieldType = typename BoundaryFieldType::InternalFieldType;
+
+	using memory_space 		= typename BoundaryFieldType::memory_space;
+
+	using execution_space 	= typename BoundaryFieldType::execution_space;
+
+	using FieldAccessType 	= typename BoundaryFieldType::FieldAccessType;
+
+	using ProcVectorType  = typename BoundaryFieldType::ProcVectorType;
+
+private:
+
+	dataSender<T, MemorySpace>           sender_;
+
+	mutable dataReciever<T, MemorySpace> reciever_;
+
+	mutable bool                          dataRecieved_ = true;
+
+	void checkDataRecieved()const;
+
+	bool updateBoundary(int step, DataDirection direction);
+	
+
+public:
+
+	TypeInfoTemplate211("boundaryField","processor", T, memory_space::name());
+
+	processorBoundaryField(
+		const boundaryBase& boundary, 
+		const pointStructure& pStruct,
+		InternalFieldType& internal);
+
+	
+	~processorBoundaryField()override = default;
+
+	add_vCtor
+	(
+		BoundaryFieldType,
+		processorBoundaryFieldType,
+		boundaryBase
+	);
+	
+	ProcVectorType& neighborProcField() override;
+
+	
+	const ProcVectorType& neighborProcField()const override;
+
+	bool hearChanges
+	(
+		real t,
+		real dt,
+		uint32 iter,
+		const message& msg, 
+    	const anyList& varList
+	) override
+    {
+		BoundaryFieldType::hearChanges(t,dt,iter, msg,varList);
+		
+		if(msg.equivalentTo(message::BNDR_DELETE))
+		{
+			// do nothing;
+		}
+		
+		return true;
+	}
+
+};
+
+}
+
+#include "processorBoundaryField.cpp"
+
+#endif //__processorBoundaryField_hpp__
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
@ -0,0 +1,24 @@
+
+//#include "Field.hpp"
+
+#include "processorBoundaryField.hpp"
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::real>;
+template class pFlow::MPI::processorBoundaryField<pFlow::real, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4, pFlow::HostSpace>;
+
+
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@ -0,0 +1,148 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "boundaryProcessor.hpp"
+#include "dictionary.hpp"
+#include "mpiCommunication.hpp"
+
+void
+pFlow::MPI::boundaryProcessor::checkSize() const
+{
+	if (!sizeObtained_)
+	{
+		//MPI_Wait(&sizeRequest_, StatusIgnore);
+		sizeObtained_ = true;
+	}
+}
+
+void
+pFlow::MPI::boundaryProcessor::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		//uint32 nRecv  = reciever_.waitComplete();
+		dataRecieved_ = true;
+		/*if (nRecv != neighborProcSize())
+		{
+			fatalErrorInFunction;
+			fatalExit;
+		}*/
+	}
+}
+
+pFlow::MPI::boundaryProcessor::boundaryProcessor(
+  const dictionary& dict,
+  const plane&      bplane,
+  internalPoints&   internal,
+  boundaryList&     bndrs,
+  uint32            thisIndex
+)
+  : boundaryBase(dict, bplane, internal, bndrs, thisIndex),
+    sender_(
+      groupNames("sendBuffer", name()),
+      neighborProcessorNo(),
+      thisBoundaryIndex()
+    ),
+    reciever_(
+      groupNames("neighborProcPoints", name()),
+      neighborProcessorNo(),
+      mirrorBoundaryIndex()
+    )
+{
+}
+
+bool
+pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
+{
+	thisNumPoints_ = size();
+
+	auto req = MPI_REQUEST_NULL;
+	MPI_Isend(
+		&thisNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		thisBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&req);
+
+	MPI_Recv(
+		&neighborProcNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		mirrorBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		MPI_STATUS_IGNORE
+	);
+
+	sizeObtained_ = false;
+
+	return true;
+}
+
+pFlow::uint32
+pFlow::MPI::boundaryProcessor::neighborProcSize() const
+{
+	checkSize();
+	return neighborProcNumPoints_;
+}
+
+pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints()
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+const pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints() const
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+bool
+pFlow::MPI::boundaryProcessor::updataBoundary(int step)
+{
+	if (step == 1)
+	{
+		sender_.sendData(pFlowProcessors(), thisPoints());
+		dataRecieved_ = false;
+	}
+	else if (step == 2)
+	{
+		reciever_.recieveData(pFlowProcessors(), neighborProcSize());
+		dataRecieved_ = false;
+	}
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
+{
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
+{
+	return true;
+}
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@ -0,0 +1,116 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __boundaryProcessor_hpp__
+#define __boundaryProcessor_hpp__
+
+
+#include "boundaryBase.hpp"
+#include "mpiTypes.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+
+namespace pFlow::MPI
+{
+
+class boundaryProcessor
+:
+ 	public boundaryBase
+{
+private:
+
+	uint32                       neighborProcNumPoints_ = 0;
+
+	uint32                       thisNumPoints_;
+
+	realx3Vector_D               neighborProcPoints_;
+
+	mutable Request              sizeRequest_;
+
+	mutable Request 			sSizeRequest_;
+
+	int req_=0;
+
+	mutable bool                 sizeObtained_ = true;
+
+	mutable dataSender<realx3>   sender_;
+
+	mutable dataReciever<realx3> reciever_;
+
+	mutable bool                 dataRecieved_ = true;
+
+	void checkSize()const;
+
+	void checkDataRecieved()const;
+	
+	/// @brief  Update processor boundary data for this processor
+	/// @param step It is either 1 or 2 in the input to indicate 
+	/// the update step 
+	/// @return true if successful 
+	/// @details This method is called by boundaryList two times to 
+	/// allow processor boundaries to exchange data in two steps.
+	/// The first step is a buffered non-blocking send and the second
+	/// step is non-blocking recieve to get data. 
+	bool updataBoundary(int step)override;
+
+public:
+
+	TypeInfo("boundary<processor>");
+
+	boundaryProcessor(
+	  const dictionary& dict,
+	  const plane&      bplane,
+	  internalPoints&   internal,
+	  boundaryList&     bndrs,
+	  uint32            thisIndex
+	);
+
+	~boundaryProcessor() override = default;
+
+	add_vCtor
+	(
+		boundaryBase, 
+		boundaryProcessor, 
+		dictionary
+	);
+
+	bool beforeIteration(uint32 iterNum, real t, real dt) override;
+
+	bool iterate(uint32 iterNum, real t, real dt) override;
+
+	bool afterIteration(uint32 iterNum, real t, real dt) override;
+
+	/// @brief Return number of points in the neighbor processor boundary.
+	/// This is overriden from boundaryBase.
+	uint32 neighborProcSize() const override;
+
+	/// @brief Return a reference to point positions in the neighbor
+	/// processor boundary.
+	realx3Vector_D& neighborProcPoints() override;
+
+	/// @brief Return a const reference to point positions in the
+	/// neighbor processor boundary.
+	const realx3Vector_D& neighborProcPoints() const override;
+
+};
+
+} // namespace pFlow::MPI
+
+#endif //__boundaryProcessor_hpp__
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@ -0,0 +1,108 @@
+
+#ifndef __dataReciever_hpp__
+#define __dataReciever_hpp__
+
+
+#include "span.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataReciever
+{
+public:
+    
+    using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+private:	
+    
+    BufferVectorType buffer_;
+
+    std::vector<T>  buffer0_;
+
+	int              fromProc_;
+
+	int              tag_;
+
+	Request          recvRequest_;
+
+public:
+
+    dataReciever(const word& name, int from, int tag)
+    :
+        buffer_(name),
+        fromProc_(from),
+        tag_(tag)
+    {}
+
+    ~dataReciever()=default;
+
+    void recieveData(
+        const localProcessors&      processors,
+        uint32 numToRecv
+    )
+    {   
+        
+        buffer0_.clear();
+		buffer0_.resize(numToRecv);
+        MPI_Status status;
+
+        /*CheckMPI(recv(
+            buffer_.getSpan(), 
+            fromProc_, 
+            tag_, 
+            processors.localCommunicator(), 
+            &status), true);*/
+        MPI_Recv(
+            buffer0_.data(),
+            buffer0_.size(),
+            realx3Type__,
+            fromProc_,
+            tag_,
+            processors.localCommunicator(),
+            &status
+        );
+        int c;
+        getCount<realx3>(&status, c);
+        pOutput<<"Number of data recieved "<<c<<endl;
+    }
+
+    auto& buffer()
+    {
+        return buffer_;
+    }
+
+    const auto& buffer()const
+    {
+        return buffer_;
+    }
+
+    uint32 waitComplete()
+    {
+        
+        /*Status status;   
+        
+        CheckMPI(MPI_Wait(&recvRequest_, &status), true);
+
+        int count;
+        CheckMPI(getCount<T>(&status, count), true);
+
+        return static_cast<uint32>(count);*/
+        return buffer_.size();
+    }
+
+};
+
+}
+
+
+#endif //__dataReciever_hpp__
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@ -0,0 +1,120 @@
+#ifndef __dataSender_hpp__
+#define __dataSender_hpp__
+
+#include "VectorSingles.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataSender
+{
+public:
+
+	using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+	// This is device vector 
+    
+
+private:
+
+	//BufferVectorType buffer_;
+
+    std::vector<T> buffer_;
+
+	int              toProc_;
+
+	int              tag_;
+
+	Request          sendRequest_ = RequestNull;
+
+public:
+
+    dataSender(const word& name, int toProc, int tag)
+    :
+        toProc_(toProc),
+        tag_(tag)
+    {}
+
+    ~dataSender()=default;
+
+    void sendData(
+        const localProcessors&      processors,
+        const scatteredFieldAccess<T, memory_space>&  scatterField
+    )
+    {
+        using RPolicy = Kokkos::RangePolicy<
+                DefaultExecutionSpace,
+                Kokkos::Schedule<Kokkos::Static>,
+                Kokkos::IndexType<pFlow::uint32>>;
+
+        uint32 n = scatterField.size();
+
+        // clear the buffer to prevent data copy if capacity increases 
+        buffer_.clear();
+        buffer_.resize(n);
+        
+        auto* buffView = buffer_.data();
+
+        Kokkos::parallel_for(
+            "dataSender::sendData",
+            RPolicy(0,n),
+            LAMBDA_HD(uint32 i)
+            {
+                buffView[i] = scatterField[i];
+            }
+        );
+        Kokkos::fence();
+        auto req = MPI_REQUEST_NULL;
+
+        MPI_Isend(
+            buffer_.data(),
+            buffer_.size(),
+            realx3Type__,
+            toProc_,
+            tag_,
+            processors.localCommunicator(),
+            &req);
+        
+        /*CheckMPI(send(
+            buffer_.getSpan(), 
+            toProc_, 
+            tag_, 
+            processors.localCommunicator(), 
+            MPI_STATUS_IGNORE), true);*/
+    }
+
+    /*auto& buffer()
+    {
+        return buffer_;
+    }
+
+    const auto& buffer()const
+    {
+        return buffer_;
+    }*/
+
+    bool sendComplete()
+    {
+        return true;
+        /*int test;   
+        MPI_Test(&sendRequest_, &test, StatusIgnore);
+        if(test) 
+            return true;
+        else
+            return false;*/
+    }
+
+};
+
+}
+
+#endif //__dataSender_hpp__