Commit 1f69b258 authored by Matthias Messner's avatar Matthias Messner
parents 57ea27fd d788edc1
......@@ -150,7 +150,8 @@ void ValidateFMMAlgo(OctreeClass* const tree){
octreeIterator.gotoBottomLeft();
do{
if(octreeIterator.getCurrentCell()->getDataUp() != octreeIterator.getCurrentListSrc()->getSize() ){
std::cout << "Problem P2M : " << (octreeIterator.getCurrentCell()->getDataUp() - octreeIterator.getCurrentListSrc()->getSize()) << "\n";
std::cout << "Problem P2M : " << octreeIterator.getCurrentCell()->getDataUp() <<
" (should be " << octreeIterator.getCurrentListSrc()->getSize() << ")\n";
}
NbPart += octreeIterator.getCurrentListSrc()->getSize();
} while(octreeIterator.moveRight());
......@@ -194,7 +195,7 @@ void ValidateFMMAlgo(OctreeClass* const tree){
// there is a problem
if( (!isUsingTsm && iter.data().getDataDown() != NbPart - 1) ||
(isUsingTsm && iter.data().getDataDown() != NbPart) ){
std::cout << "Problem L2P + P2P : " << iter.data().getDataDown() << "\n";
std::cout << "Problem L2P + P2P : " << iter.data().getDataDown() << "(" << octreeIterator.getCurrentGlobalIndex() << ")\n";
}
iter.gotoNext();
}
......
......@@ -43,8 +43,8 @@ struct StarHandle : public FNoCopyable, public FNoAssignement {
/** Release the handle */
~StarHandle(){
if( handle != ((void *)0) ){
//starpu_data_unregister(handle);
if( handle != starpu_data_handle_t(0) ){
starpu_data_unregister(handle);
}
}
......@@ -69,7 +69,7 @@ struct StarHandle : public FNoCopyable, public FNoAssignement {
/** Release data */
void unregisterData(){
if( handle != ((void *)0) ){
//starpu_data_unregister(handle);
starpu_data_unregister(handle);
memset(&handle, 0, sizeof(starpu_data_handle_t));
}
}
......@@ -259,6 +259,7 @@ class FFmmAlgorithmStarpu : protected FAssertable{
KernelClass* const kernels; //< The kernels
const int OctreeHeight;
const bool putNameInTask;
//////////////////////////////////////////////////////////////////
// Codelets
......@@ -280,7 +281,7 @@ class FFmmAlgorithmStarpu : protected FAssertable{
starpu_perfmodel l2p_model;
// Init the codelet
void initCodelets(const bool putNameInTask){
void initCodelets(){
memset(&p2p_model, 0, sizeof(p2p_model));
p2p_model.type = STARPU_HISTORY_BASED;
p2p_model.symbol = "P2P";
......@@ -341,6 +342,8 @@ class FFmmAlgorithmStarpu : protected FAssertable{
l2p_cl.where = STARPU_CPU;
l2p_cl.cpu_funcs[0] = l2p_cpu;
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
l2p_cl.modes[1] = STARPU_RW;
if(putNameInTask) l2p_cl.model = &l2p_model;
// M2M & L2L
......@@ -400,6 +403,29 @@ class FFmmAlgorithmStarpu : protected FAssertable{
}
void releaseHandles(){
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
// init leaf handle
do{
octreeIterator.getCurrentLeaf()->getSrc()->handle.unregisterData();
if(octreeIterator.getCurrentLeaf()->getSrc() != octreeIterator.getCurrentLeaf()->getTargets()){
octreeIterator.getCurrentLeaf()->getTargets()->handle.unregisterData();
}
} while(octreeIterator.moveRight());
octreeIterator = avoidGotoLeftIterator;
// init cells handle
for(int idxLevel = OctreeHeight - 1 ; idxLevel > 1 ; --idxLevel ){
do{
octreeIterator.getCurrentCell()->handleUp.unregisterData();
octreeIterator.getCurrentCell()->handleDown.unregisterData();
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
}
}
//////////////////////////////////////////////////////////////////
......@@ -433,20 +459,33 @@ public:
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmStarpu(OctreeClass* const inTree, KernelClass* const inKernels, const bool putNameInTask = false)
: tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()) {
FFmmAlgorithmStarpu(OctreeClass* const inTree, KernelClass* const inKernels, const bool inPutNameInTask = false)
: tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()), putNameInTask(inPutNameInTask) {
FDEBUG(FDebug::Controller << "FFmmAlgorithmStarpu\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmStarpu(){
}
/** Run starpu */
void initStarpu(const int nbThreads = -1){
starpu_conf setup;
starpu_conf_init(&setup);
setup.ncpus = nbThreads;
// Run starpu
starpu_init(NULL);
starpu_init(&setup);
FDEBUG(FDebug::Controller << "Init starpu, there are " << starpu_worker_get_count() << " workers\n");
// Init
initCodelets(putNameInTask);
initCodelets();
initHandles();
initKernels();
}
/** Default destructor */
virtual ~FFmmAlgorithmStarpu(){
/** Release starpu */
void releaseStarpu(){
// Release stuff
releaseCodelets();
releaseHandles();
......
// ===================================================================================
// Logiciel initial: ScalFmm Version 0.5
// Co-auteurs : Olivier Coulaud, Bérenger Bramas.
// Propriétaires : INRIA.
// Copyright © 2011-2012, diffusé sous les termes et conditions d’une licence propriétaire.
// Initial software: ScalFmm Version 0.5
// Co-authors: Olivier Coulaud, Bérenger Bramas.
// Owners: INRIA.
// Copyright © 2011-2012, spread under the terms and conditions of a proprietary license.
// ===================================================================================
#ifndef FFMMALGORITHMSTARPUGROUP_HPP
#define FFMMALGORITHMSTARPUGROUP_HPP
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Utils/FMemUtils.hpp"
#include "../Containers/FOctree.hpp"
#include "../Containers/FBoolArray.hpp"
#include "../Extensions/FExtendCoordinate.hpp"
#include "../Extensions/FExtendMortonIndex.hpp"
#include <starpu.h>
/*
TODO:
scinder multipole/local
*/
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmStarpuGroup
* @brief
* Please read the license
*/
template<class OctreeClass, class ParticleClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmStarpuGroup : protected FAssertable{
/////////////////////////////////////////////////////////////
// Utils classes
/////////////////////////////////////////////////////////////
struct MortonContainer : public FExtendMortonIndex, public FExtendCoordinate {
ContainerClass container;
};
/** This structure holds the data properties needed
* by a cell/leaf to finish its computation
*/
struct TransferProperties{
explicit TransferProperties(const int inIndex = 0, const int inPosition = 0, const int inDataPos = 0)
: indexWhoNeedsData(inIndex), positionInComputationArray(inPosition), positionInDataArray(inDataPos) {
}
// In the group destination, who need the data?
int indexWhoNeedsData;
// where to put the data in the array
int positionInComputationArray;
// Where to read the data from?
int positionInDataArray;
};
/** The transfer buffer holds many properties
* it has enough information to create a copy task and
* a process task
*/
struct TransferBuffer {
TransferBuffer() : groupDestination(0) {
}
// the group who need the data
int groupDestination;
// position in the original group
FVector<int> originalIndexPosition;
// transfer properties
FVector<TransferProperties> compuationProperties;
// where data will be copied
int indexToStarCopying;
};
/** A group contains several cells
* and some properties
*/
struct Group {
Group() : cellArray(0), needOther(0), leavesArray(0), transferBufferCell(0),
nbCellToReceive(0), transferBufferLeaf(0), nbLeafToReceive(0) {
handleCellArrayUp = 0;
handleCellArrayDown = 0;
handleLeafArray = 0;
handleLeafArrayRead = 0;
handleTransferCell = 0;
handleTransferLeaf = 0;
}
~Group(){
delete[] cellArray;
delete[] needOther;
delete[] leavesArray;
for(int idx = 0 ; idx < dataToSend.getSize() ; ++idx){
delete dataToSend[idx];
}
delete[] transferBufferCell;
delete[] transferBufferLeaf;
if( handleCellArrayUp != starpu_data_handle_t(0)) starpu_data_unregister(handleCellArrayUp);
if( handleCellArrayDown != starpu_data_handle_t(0)) starpu_data_unregister(handleCellArrayDown);
if( handleLeafArray != starpu_data_handle_t(0)) starpu_data_unregister(handleLeafArray);
if( handleLeafArrayRead != starpu_data_handle_t(0)) starpu_data_unregister(handleLeafArrayRead);
if( handleTransferCell != starpu_data_handle_t(0)) starpu_data_unregister(handleTransferCell);
if( handleTransferLeaf != starpu_data_handle_t(0)) starpu_data_unregister(handleTransferLeaf);
}
// Morton index the group start at
MortonIndex beginIndex;
// Morton index the group end at
MortonIndex endIndex;
// Number of elements in the group, usually GroupSize
int nbElements;
// The data of the group
CellClass* FRestrict cellArray;
bool* needOther;
// Or the leaves data
MortonContainer* FRestrict leavesArray;
// Information needed to compute parent child operations
int indexOfStartInLowerGroups;
FVector<Group*> lowerGroups;
// Information needed in case of transfering data needed
FVector<TransferBuffer*> dataToSend;
// memory to copy before compute remotly
CellClass* FRestrict transferBufferCell;
int nbCellToReceive;
// memory to copy before compute remotly
MortonContainer* FRestrict transferBufferLeaf;
int nbLeafToReceive;
// Starpu data
starpu_data_handle_t handleCellArrayUp;
starpu_data_handle_t handleCellArrayDown;
starpu_data_handle_t handleLeafArray;
starpu_data_handle_t handleLeafArrayRead;
starpu_data_handle_t handleTransferCell;
starpu_data_handle_t handleTransferLeaf;
};
//////////////////////////////////////////////////////////////////
// Init Kernels
//////////////////////////////////////////////////////////////////
// Init the fmm kernel (1 per thread)
void initKernels(){
globalKernels = new KernelClass*[starpu_worker_get_count()];
memset(globalKernels, 0, sizeof(KernelClass*) * starpu_worker_get_count());
for(unsigned int workerid = 0; workerid < starpu_worker_get_count(); ++workerid){
if( starpu_worker_get_type(workerid) == STARPU_CPU_WORKER ){
globalKernels[workerid] = new KernelClass(*kernel);
}
}
}
// Delete kernels
void releaseKernels(){
for(unsigned int workerid = 0; workerid < starpu_worker_get_count(); ++workerid){
delete globalKernels[workerid];
}
delete[] globalKernels;
}
/////////////////////////////////////////////////////////////
// Attributes
/////////////////////////////////////////////////////////////
OctreeClass* const tree; //< The octree to work on
const int OctreeHeight; //< Height of the tree
const int BlockSize; //< Size of the block
Group**const blockedTree; //< Current block tree
int*const blockedPerLevel; //< Number of block per level
KernelClass* const kernel; //< The kernel
const bool useStarpuPerfModel;//< to know if perf model has to be used
static const int MaxChild = 9;
starpu_codelet p2m_cl;
starpu_codelet p2p_cl;
starpu_codelet p2p_restore_cl;
starpu_codelet m2m_cl[MaxChild];
starpu_codelet m2l_cl;
starpu_codelet m2l_other_cl;
starpu_codelet m2l_copy_cl;
starpu_codelet l2l_cl[MaxChild];
starpu_codelet l2p_cl;
starpu_perfmodel p2p_model;
starpu_perfmodel p2p_restore_model;
starpu_perfmodel p2m_model;
starpu_perfmodel m2m_model;
starpu_perfmodel m2l_model;
starpu_perfmodel m2l_other_model;
starpu_perfmodel m2l_copy_model;
starpu_perfmodel l2l_model;
starpu_perfmodel l2p_model;
void initCodelet(){
// init perf model
memset(&p2p_model, 0, sizeof(p2p_model));
p2p_model.type = STARPU_HISTORY_BASED;
p2p_model.symbol = "P2P";
memset(&p2p_restore_model, 0, sizeof(p2p_restore_model));
p2p_restore_model.type = STARPU_HISTORY_BASED;
p2p_restore_model.symbol = "P2P Restore";
memset(&p2m_model, 0, sizeof(p2m_model));
p2m_model.type = STARPU_HISTORY_BASED;
p2m_model.symbol = "P2M";
memset(&m2l_model, 0, sizeof(m2l_model));
m2l_model.type = STARPU_HISTORY_BASED;
m2l_model.symbol = "M2L";
memset(&m2l_other_model, 0, sizeof(m2l_other_model));
m2l_other_model.type = STARPU_HISTORY_BASED;
m2l_other_model.symbol = "M2L Other";
memset(&m2l_copy_model, 0, sizeof(m2l_model));
m2l_copy_model.type = STARPU_HISTORY_BASED;
m2l_copy_model.symbol = "M2L Copy";
memset(&l2p_model, 0, sizeof(l2p_model));
l2p_model.type = STARPU_HISTORY_BASED;
l2p_model.symbol = "L2P";
memset(&l2l_model, 0, sizeof(l2l_model));
l2l_model.type = STARPU_HISTORY_BASED;
l2l_model.symbol = "L2L";
memset(&m2m_model, 0, sizeof(m2m_model));
m2m_model.type = STARPU_HISTORY_BASED;
m2m_model.symbol = "M2M";
// P2M
memset(&p2m_cl, 0, sizeof(p2m_cl));
p2m_cl.where = STARPU_CPU;
p2m_cl.cpu_funcs[0] = p2m_cpu;
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_W;
p2m_cl.modes[1] = STARPU_R;
if(useStarpuPerfModel) p2m_cl.model = &p2m_model;
// P2P
memset(&p2p_cl, 0, sizeof(starpu_codelet) );
p2p_cl.where = STARPU_CPU;
p2p_cl.cpu_funcs[0] = p2p_cpu;
p2p_cl.nbuffers = 2;
p2p_cl.modes[0] = STARPU_RW;
p2p_cl.modes[1] = STARPU_RW;
if( useStarpuPerfModel ) p2p_cl.model = &p2p_model;
// P2P restore
memset(&p2p_restore_cl, 0, sizeof(starpu_codelet) );
p2p_restore_cl.where = STARPU_CPU;
p2p_restore_cl.cpu_funcs[0] = p2p_restore_cpu;
p2p_restore_cl.nbuffers = 2;
p2p_restore_cl.modes[0] = STARPU_RW;
p2p_restore_cl.modes[1] = STARPU_R;
if( useStarpuPerfModel ) p2p_restore_cl.model = &p2p_restore_model;
// L2P
memset(&l2p_cl, 0, sizeof(l2p_cl));
l2p_cl.where = STARPU_CPU;
l2p_cl.cpu_funcs[0] = l2p_cpu;
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
l2p_cl.modes[1] = STARPU_RW;
if(useStarpuPerfModel) l2p_cl.model = &l2p_model;
// M2L
memset(&m2l_cl, 0, sizeof(starpu_codelet) );
m2l_cl.where = STARPU_CPU;
m2l_cl.cpu_funcs[0] = m2l_cpu;
m2l_cl.nbuffers = 2;
m2l_cl.modes[0] = STARPU_RW;
m2l_cl.modes[1] = STARPU_R;
if( useStarpuPerfModel ) m2l_cl.model = &m2l_model;
// M2L other
memset(&m2l_other_cl, 0, sizeof(starpu_codelet) );
m2l_other_cl.where = STARPU_CPU;
m2l_other_cl.cpu_funcs[0] = m2l_other_cpu;
m2l_other_cl.nbuffers = 2;
m2l_other_cl.modes[0] = STARPU_RW;
m2l_other_cl.modes[1] = STARPU_R;
if( useStarpuPerfModel ) m2l_other_cl.model = &m2l_other_model;
// M2L copy
memset(&m2l_copy_cl, 0, sizeof(starpu_codelet) );
m2l_copy_cl.where = STARPU_CPU;
m2l_copy_cl.cpu_funcs[0] = m2l_copy_cpu;
m2l_copy_cl.nbuffers = 2;
m2l_copy_cl.modes[0] = STARPU_RW;
m2l_copy_cl.modes[1] = STARPU_R;
if( useStarpuPerfModel ) m2l_copy_cl.model = &m2l_copy_model;
// M2M & L2L
memset(m2m_cl, 0, sizeof(starpu_codelet) * MaxChild);
memset(l2l_cl, 0, sizeof(starpu_codelet) * MaxChild);
for( int idxChild = 0 ; idxChild < MaxChild ; ++idxChild){
m2m_cl[idxChild].where = STARPU_CPU;
m2m_cl[idxChild].cpu_funcs[0] = m2m_cpu;
m2m_cl[idxChild].nbuffers = idxChild + 2;
m2m_cl[idxChild].modes[0] = STARPU_W;
if( useStarpuPerfModel) m2m_cl[idxChild].model = &m2m_model;
l2l_cl[idxChild].where = STARPU_CPU;
l2l_cl[idxChild].cpu_funcs[0] = l2l_cpu;
l2l_cl[idxChild].nbuffers = idxChild + 2;
l2l_cl[idxChild].modes[0] = STARPU_R;
if( useStarpuPerfModel) l2l_cl[idxChild].model = &l2l_model;
for( int idxMode = 0 ; idxMode <= idxChild ; ++idxMode){
m2m_cl[idxChild].modes[idxMode+1] = STARPU_R;
l2l_cl[idxChild].modes[idxMode+1] = STARPU_RW;
}
}
}
public:
/** The constructor need the octree and the kernel used for computation
* @param inTree the octree to work on
* @param inKernels the kernel to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmStarpuGroup(OctreeClass* const inTree, KernelClass* const inKernel,
const int inBlockedSize = 250, const bool inUseStarpuPerfModel = false)
: tree(inTree), OctreeHeight(tree->getHeight()),
BlockSize(inBlockedSize),
blockedTree(new Group*[OctreeHeight + 1]) ,
blockedPerLevel(new int[OctreeHeight + 1]),
kernel(inKernel),
useStarpuPerfModel(inUseStarpuPerfModel) {
fassert(tree, "tree cannot be null", __LINE__, __FILE__);
fassert(kernel, "kernel cannot be null", __LINE__, __FILE__);
memset(blockedTree, 0, sizeof(Group*) * (OctreeHeight + 1));
memset(blockedPerLevel, 0, (OctreeHeight + 1) * sizeof(int));
FDEBUG(FDebug::Controller << "FFmmAlgorithmStarpuGroup (Block size = " << BlockSize <<")\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmStarpuGroup(){
delete[] blockedTree;
delete[] blockedPerLevel;
}
/////////////////////////////////////////////////////////////
// Tree to group functions
/////////////////////////////////////////////////////////////
/**
*/
void buildGroups(const int nbThreads = -1){
FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
starpu_conf setup;
starpu_conf_init(&setup);
setup.ncpus = nbThreads;
// Run starpu
starpu_init(&setup);
FDEBUG(FDebug::Controller << "Start starpu runtime, Nb Workers = " << starpu_worker_get_count() << "\n");
// create codelet
initCodelet();
// create kernel for all thread
initKernels();
// Count leaf to allocate and big array
typename OctreeClass::Iterator* iterArray = 0;
{
int leafsNumber = 0;
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
do{
++leafsNumber;
} while(octreeIterator.moveRight());
iterArray = new typename OctreeClass::Iterator[leafsNumber];
fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
}
FDEBUG( FDebug::Controller << "\tCopy the tree\n"; );
// Then we start creating the block
{
typename OctreeClass::Iterator octreeIterator(tree);
typename OctreeClass::Iterator avoidGotLeftIterator(octreeIterator);
for(int idxLevel = 1; idxLevel < OctreeHeight; ++idxLevel){
// put every thing in the array
int counterAtLevel = 0;
do{
iterArray[counterAtLevel++] = octreeIterator;
} while(octreeIterator.moveRight());
avoidGotLeftIterator.moveDown();
octreeIterator = avoidGotLeftIterator;
// find the number of groups
const int NbGroups = (counterAtLevel + BlockSize - 1) / BlockSize;
FDEBUG( FDebug::Controller << "\t\tAt level " << idxLevel << " there are " << NbGroups << " groups\n"; );
blockedPerLevel[idxLevel] = NbGroups;
blockedTree[idxLevel] = new Group[NbGroups];
// copy data to group
int copyIndex = 0;
for( int idxGroup = 0 ; idxGroup < NbGroups ; ++idxGroup ){
const int cellsInThisGroup = FMath::Min(BlockSize, counterAtLevel-copyIndex);
blockedTree[idxLevel][idxGroup].nbElements = cellsInThisGroup;
blockedTree[idxLevel][idxGroup].cellArray = new CellClass[cellsInThisGroup];
blockedTree[idxLevel][idxGroup].needOther = new bool[cellsInThisGroup];
// starpu
starpu_vector_data_register(&blockedTree[idxLevel][idxGroup].handleCellArrayUp, 0,
(uintptr_t)blockedTree[idxLevel][idxGroup].cellArray,
blockedTree[idxLevel][idxGroup].nbElements, sizeof(CellClass));
starpu_vector_data_register(&blockedTree[idxLevel][idxGroup].handleCellArrayDown, 0,
(uintptr_t)blockedTree[idxLevel][idxGroup].cellArray,
blockedTree[idxLevel][idxGroup].nbElements, sizeof(CellClass));
for(int idxCell = 0 ; idxCell < cellsInThisGroup ; ++idxCell, ++copyIndex){
blockedTree[idxLevel][idxGroup].cellArray[idxCell].setMortonIndex( iterArray[copyIndex].getCurrentGlobalIndex() );
blockedTree[idxLevel][idxGroup].cellArray[idxCell].setCoordinate( iterArray[copyIndex].getCurrentGlobalCoordinate() );
blockedTree[idxLevel][idxGroup].needOther[idxCell] = false;
blockedTree[idxLevel][idxGroup].cellArray[idxCell].intialCopy( iterArray[copyIndex].getCurrentCell() );
}
blockedTree[idxLevel][idxGroup].beginIndex = blockedTree[idxLevel][idxGroup].cellArray[0].getMortonIndex();
blockedTree[idxLevel][idxGroup].endIndex = blockedTree[idxLevel][idxGroup].cellArray[cellsInThisGroup-1].getMortonIndex();
}
}
// leaf level will have the same groups has head cell level
const int NbGroups = blockedPerLevel[OctreeHeight-1];
blockedPerLevel[OctreeHeight] = NbGroups;
blockedTree[OctreeHeight] = new Group[NbGroups];
int copyIndex = 0;
for( int idxGroup = 0 ; idxGroup < NbGroups ; ++idxGroup ){
blockedTree[OctreeHeight][idxGroup].nbElements = blockedTree[OctreeHeight-1][idxGroup].nbElements;
blockedTree[OctreeHeight][idxGroup].beginIndex = blockedTree[OctreeHeight-1][idxGroup].beginIndex;
blockedTree[OctreeHeight][idxGroup].endIndex = blockedTree[OctreeHeight-1][idxGroup].endIndex;
const int NbLeaves = blockedTree[OctreeHeight][idxGroup].nbElements;
blockedTree[OctreeHeight][idxGroup].leavesArray = new MortonContainer[NbLeaves];
// starpu
starpu_vector_data_register(&blockedTree[OctreeHeight][idxGroup].handleLeafArray, 0,
(uintptr_t)blockedTree[OctreeHeight][idxGroup].leavesArray,
NbLeaves, sizeof(MortonContainer));
starpu_vector_data_register(&blockedTree[OctreeHeight][idxGroup].handleLeafArrayRead, 0,
(uintptr_t)blockedTree[OctreeHeight][idxGroup].leavesArray,
NbLeaves, sizeof(MortonContainer));
for(int idxLeaf = 0 ; idxLeaf < NbLeaves ; ++idxLeaf, ++copyIndex){
blockedTree[OctreeHeight][idxGroup].leavesArray[idxLeaf].container = *iterArray[copyIndex].getCurrentListSrc();
blockedTree[OctreeHeight][idxGroup].leavesArray[idxLeaf].setMortonIndex(iterArray[copyIndex].getCurrentGlobalIndex());
blockedTree[OctreeHeight][idxGroup].leavesArray[idxLeaf].setCoordinate(iterArray[copyIndex].getCurrentGlobalCoordinate());
}
}
}
delete[] iterArray;
iterArray = 0;
FDEBUG( FDebug::Controller << "\tPrepare child parent relations\n"; );
// All block has been created, now find the parent-child relations
{
for(int idxLevel = 1; idxLevel < OctreeHeight - 1; ++idxLevel){
int currentLowerGroup = 0;
FDEBUG( FReal totalDependencies = 0 );
// find the number of groups
const int NbGroups = blockedPerLevel[idxLevel];
for( int idxGroup = 0 ; idxGroup < NbGroups ; ++idxGroup ){
// copy current group
blockedTree[idxLevel][idxGroup].lowerGroups.push( &blockedTree[idxLevel+1][currentLowerGroup] );
{
int startIndex = 0;
while( (blockedTree[idxLevel+1][currentLowerGroup].cellArray[startIndex].getMortonIndex()>>3) != blockedTree[idxLevel][idxGroup].beginIndex){
++startIndex;
}
blockedTree[idxLevel][idxGroup].indexOfStartInLowerGroups = startIndex;
}
if((blockedTree[idxLevel+1][currentLowerGroup].endIndex>>3) <= blockedTree[idxLevel][idxGroup].endIndex){
++currentLowerGroup;
}
// copy until too much on the right