Commit 38091310 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files

Fix for the error occuring in M2M, still bugged

parent eb759b06
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FFMMALGORITHMTHREADPROC_HPP
......@@ -62,1006 +62,1041 @@
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
// Can be deleted
// const static int MaxSizePerCell = CellClass::GetSize();
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
// Can be deleted
// const static int MaxSizePerCell = CellClass::GetSize();
const FMpi::FComm& comm; //< MPI comm
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
typename OctreeClass::Iterator* iterArray; //Will be used to store pointers to cells/leafs to work with
typename OctreeClass::Iterator* iterArrayComm; //Will be used to store pointers to cells/leafs to send/rcv
int numberOfLeafs; //< To store the size at the previous level
const FMpi::FComm& comm; //< MPI comm
const int MaxThreads; //< the max number of thread allowed by openmp
typename OctreeClass::Iterator* iterArray; //Will be used to store pointers to cells/leafs to work with
typename OctreeClass::Iterator* iterArrayComm; //Will be used to store pointers to cells/leafs to send/rcv
int numberOfLeafs; //< To store the size at the previous level
const int nbProcess; //< Number of process
const int idProcess; //< Id of current process
const int MaxThreads; //< the max number of thread allowed by openmp
const int OctreeHeight; //<Height of the tree
const int nbProcess; //< Number of process
const int idProcess; //< Id of current process
const int OctreeHeight; //<Height of the tree
/** An interval is the morton index interval
* that a proc use (it holds data in this interval)
*/
struct Interval{
MortonIndex min;
MortonIndex max;
};
/** My interval */
Interval*const intervals;
/** All process intervals */
Interval*const workingIntervalsPerLevel;
/** Get an interval from proc id and level */
Interval& getWorkingInterval( int level, int proc){
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
public:
/** Get current proc interval at level */
Interval& getWorkingInterval( int level){
return getWorkingInterval(level, idProcess);
}
/** An interval is the morton index interval
* that a proc use (it holds data in this interval)
*/
struct Interval{
MortonIndex min;
MortonIndex max;
};
/** My interval */
Interval*const intervals;
/** All process intervals */
Interval*const workingIntervalsPerLevel;
/** Does the current proc has some work at this level */
bool hasWorkAtLevel( int level){
return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).max) < (getWorkingInterval(level, idProcess).max);
}
/** Get an interval from proc id and level */
Interval& getWorkingInterval( int level, int proc){
return workingIntervalsPerLevel[OctreeHeight * proc + level];
}
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels)
public:
/** Get current proc interval at level */
Interval& getWorkingInterval( int level){
return getWorkingInterval(level, idProcess);
}
/** Does the current proc has some work at this level */
bool hasWorkAtLevel( int level){
return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).max) < (getWorkingInterval(level, idProcess).max);
}
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels)
: tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0),
MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()])
{
FAssertLF(tree, "tree cannot be null");
{
FAssertLF(tree, "tree cannot be null");
this->kernels = new KernelClass*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass(*inKernels);
}
FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
this->kernels = new KernelClass*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass(*inKernels);
}
/** Default destructor */
virtual ~FFmmAlgorithmThreadProc(){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
printf("Delete %d\n",idxThread);
delete this->kernels[idxThread];
}
delete [] this->kernels;
delete [] intervals;
delete [] workingIntervalsPerLevel;
FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmThreadProc(){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
printf("Delete %d\n",idxThread);
delete this->kernels[idxThread];
}
delete [] this->kernels;
delete [] intervals;
delete [] workingIntervalsPerLevel;
}
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
FTRACE( FTrace::FFunction functionTrace( __FUNCTION__, "Fmm" , __FILE__ , __LINE__ ) );
// Count leaf
this->numberOfLeafs = 0;
{
FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
FTRACE( FTrace::FFunction functionTrace( __FUNCTION__, "Fmm" , __FILE__ , __LINE__ ) );
// Count leaf
this->numberOfLeafs = 0;
{
FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
Interval myLastInterval;
{
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
myLastInterval.min = octreeIterator.getCurrentGlobalIndex();
do{
++this->numberOfLeafs;
} while(octreeIterator.moveRight());
myLastInterval.max = octreeIterator.getCurrentGlobalIndex();
}
iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
FAssertLF(iterArray, "iterArray bad alloc");
FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( &myLastInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()), __LINE__ );
Interval*const myIntervals = new Interval[OctreeHeight];
myIntervals[OctreeHeight - 1] = myLastInterval;
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
myIntervals[idxLevel].min = myIntervals[idxLevel+1].min >> 3;
myIntervals[idxLevel].max = myIntervals[idxLevel+1].max >> 3;
}
if(idProcess != 0){
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
Interval myLastInterval;
{//Building the leaf interval
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
myLastInterval.min = octreeIterator.getCurrentGlobalIndex();
do{
++this->numberOfLeafs;
} while(octreeIterator.moveRight());
myLastInterval.max = octreeIterator.getCurrentGlobalIndex();
}
iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
FAssertLF(iterArray, "iterArray bad alloc");
FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( &myLastInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()), __LINE__ );
Interval*const myIntervals = new Interval[OctreeHeight];
myIntervals[OctreeHeight - 1] = myLastInterval;
//Building the intervals for all the cells for at each level using bitshifting
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
myIntervals[idxLevel].min = myIntervals[idxLevel+1].min >> 3;
myIntervals[idxLevel].max = myIntervals[idxLevel+1].max >> 3;
}
if(idProcess != 0){
//We test for each level if process on left (idProcess-1) own cell I thought I owned
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
MortonIndex currentLimit = intervals[idProcess-1].max >> 3;
MortonIndex currentLimit = intervals[idProcess-1].max >> 3;
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 ; --idxLevel){
while(octreeIterator.getCurrentGlobalIndex() <= currentLimit){
if( !octreeIterator.moveRight() ) break;
}
myIntervals[idxLevel].min = octreeIterator.getCurrentGlobalIndex();
octreeIterator.moveUp();
currentLimit >>= 3;
}
}
printf("Proc::%d From leaf %lld to leaf %lld\n",idProcess,myLastInterval.min,myLastInterval.max);
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
delete[] myIntervals;
for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 ; --idxLevel){
while(octreeIterator.getCurrentGlobalIndex() <= currentLimit){
if( !octreeIterator.moveRight() ) break;
}
myIntervals[idxLevel].min = octreeIterator.getCurrentGlobalIndex();
octreeIterator.moveUp();
currentLimit >>= 3;
}
}
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
//Print for each proc the working interval at each level
// for(int idL = 0 ; idL < OctreeHeight ; ++idL){
// printf("Proc::%d From cell %lld to cell %lld\n",
// idProcess,getWorkingInterval(idL).min,getWorkingInterval(idL).max);
// }
delete[] myIntervals;
}
// run;
if(operationsToProceed & FFmmP2M) bottomPass();
// run;
if(operationsToProceed & FFmmP2M) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass();
if(operationsToProceed & FFmmM2M) upwardPass();
if(operationsToProceed & FFmmM2L) transferPassOld();
if(operationsToProceed & FFmmM2L) transferPassOld();
if(operationsToProceed & FFmmL2L) downardPass();
if(operationsToProceed & FFmmL2L) downardPass();
if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPassOld();
if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPassOld();
// delete array
delete [] iterArray;
delete [] iterArrayComm;
iterArray = nullptr;
iterArrayComm = nullptr;
}
// delete array
delete [] iterArray;
delete [] iterArrayComm;
iterArray = nullptr;
iterArrayComm = nullptr;
}
private:
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** P2M Bottom Pass */
void bottomPass(){
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
typename OctreeClass::Iterator octreeIterator(tree);
/** P2M Bottom Pass */
void bottomPass(){
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
typename OctreeClass::Iterator octreeIterator(tree);
// Iterate on leafs
octreeIterator.gotoBottomLeft();
int leafs = 0;
do{
iterArray[leafs++] = octreeIterator;
} while(octreeIterator.moveRight());
// Iterate on leafs
octreeIterator.gotoBottomLeft();
int leafs = 0;
do{
iterArray[leafs++] = octreeIterator;
} while(octreeIterator.moveRight());
FLOG(computationCounter.tic());
FLOG(computationCounter.tic());
#pragma omp parallel
{
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
{
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
}
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
}
}
FLOG(computationCounter.tac());
FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
/** M2M */
void upwardPassOld(){
const int MaxSizePerCell = CellClass::GetSize();
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
FLOG(FTic prepareCounter);
FLOG(FTic waitCounter);
// Start from leal level - 1
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// This variable is the proc responsible
// of the shared cells
int sendToProc = idProcess;
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request requests[14];
MPI_Status status[14];
// Maximum data per message is:
FMpiBufferWriter sendBuffer(comm.getComm(),7*MaxSizePerCell);
const int recvBufferOffset = (8 * MaxSizePerCell + 1);
FMpiBufferReader recvBuffer(comm.getComm(), nbProcess*recvBufferOffset);
CellClass recvBufferCells[8];
int firstProcThatSend = idProcess + 1;
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// No more work for me
if(idProcess != 0
&& getWorkingInterval((idxLevel+1), idProcess).max <= getWorkingInterval((idxLevel+1), idProcess - 1).max){
break;
}
// copy cells to work with
int numberOfCells = 0;
// for each cells
do{
iterArray[numberOfCells++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
// We may need to send something
int iterRequests = 0;
int cellsToSend = -1;
while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).min){
++cellsToSend;
}
FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
FLOG(prepareCounter.tic());
if(idProcess != 0
&& (getWorkingInterval((idxLevel+1), idProcess).min >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).max >>3)){
char state = 0;
sendBuffer.write(state);
const CellClass* const* const child = iterArray[cellsToSend].getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).min <= child[idxChild]->getMortonIndex() ){
child[idxChild]->serializeUp(sendBuffer);
state = char(state | (0x1 << idxChild));
}
}
FLOG(computationCounter.tac());
sendBuffer.writeAt(0,state);
FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
}
while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , sendToProc - 1).max){
--sendToProc;
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, sendToProc,
FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
}
// We may need to receive something
bool hasToReceive = false;
int endProcThatSend = firstProcThatSend;
/** M2M */
void upwardPassOld(){
const int MaxSizePerCell = CellClass::GetSize();
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
FLOG(FTic counterTime);
FLOG(FTic computationCounter);
FLOG(FTic prepareCounter);
FLOG(FTic waitCounter);
if(idProcess != nbProcess - 1){ // if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
while(firstProcThatSend < nbProcess
&& (getWorkingInterval((idxLevel+1), firstProcThatSend).max) <= (getWorkingInterval((idxLevel+1), idProcess).max)){
// Second condition :: while firstProcThatSend max morton index is < to myself max interval
++firstProcThatSend;
}
// Start from leal level - 1
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
if(firstProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1), firstProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3) ){
// This variable is the proc responsible
// of the shared cells
int sendToProc = idProcess;
endProcThatSend = firstProcThatSend;
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request requests[14];
MPI_Status status[14];
while( endProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1) ,endProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3)){
++endProcThatSend;
}
// Maximum data per message is:
FMpiBufferWriter sendBuffer(comm.getComm(),7*MaxSizePerCell);
const int recvBufferOffset = (8 * MaxSizePerCell + 1);
FMpiBufferReader recvBuffer(comm.getComm(), nbProcess*recvBufferOffset);
CellClass recvBufferCells[8];
int firstProcThatSend = idProcess + 1;
if(firstProcThatSend != endProcThatSend){
hasToReceive = true;
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// No more work for me
if(idProcess != 0
&& getWorkingInterval((idxLevel+1), idProcess).max <= getWorkingInterval((idxLevel+1), idProcess - 1).max){
break;
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_PACKED,
idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
}
}
}
}
// copy cells to work with
int numberOfCells = 0;
// for each cells
do{
iterArray[numberOfCells++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
FLOG(prepareCounter.tac());
FTRACE( regionTrace.end() );
// We may need to send something
int iterRequests = 0;
int cellsToSend = -1;
// Compute
const int endIndex = (hasToReceive?numberOfCells-1:numberOfCells);
FLOG(computationCounter.tic());
#pragma omp parallel
{
KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
#pragma omp for nowait
for( int idxCell = cellsToSend + 1 ; idxCell < endIndex ; ++idxCell){
myThreadkernels.M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
// for(int k=0 ; k< 8 ; ++k){
// if(iterArray[idxCell].getCurrentChild()[k]){
// FILE * fd = fopen("ResM2MNearOld","a+");
// fprintf(fd,"%lld\t% lld\t %d\n",iterArray[idxCell].getCurrentCell()->getMortonIndex(),iterArray[idxCell].getCurrentChild()[k]->getMortonIndex(),idxLevel);
// fclose(fd);
// }
//}
while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).min){
++cellsToSend;
}
}
FLOG(computationCounter.tac());
// Are we sending or waiting anything?
if(iterRequests){
FLOG(waitCounter.tic());
MPI_Waitall( iterRequests, requests, status);
FLOG(waitCounter.tac());
// we were receiving data
if( hasToReceive ){
CellClass* currentChild[8];
memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
// retreive data and merge my child and the child from others
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
recvBuffer.seek(idxProc * recvBufferOffset);
int state = int(recvBuffer.getValue<char>());
int position = 0;
while( state && position < 8){
while(!(state & 0x1)){
state >>= 1;
++position;
}