From 957f825549a20973dddec200b771fcbef4eb9eb5 Mon Sep 17 00:00:00 2001
From: Berenger Bramas <bbramas@mpcdf.mpg.de>
Date: Tue, 11 Oct 2016 16:45:20 +0200
Subject: [PATCH] debug for implicit extract

---
 .../FGroupTaskStarpuImplicitAlgorithm.hpp     | 87 ++++++++++++++-----
 1 file changed, 67 insertions(+), 20 deletions(-)

diff --git a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
index cb57a9a20..3b17b0ffa 100644
--- a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
+++ b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
@@ -186,6 +186,7 @@ protected:
 
     starpu_codelet p2p_extract;
     starpu_codelet p2p_insert;
+    starpu_codelet p2p_insert_bis;
 
     struct CellExtractedHandles{
         starpu_data_handle_t all;
@@ -851,6 +852,14 @@ protected:
         p2p_insert.cpu_funcs[0] = ThisClass::InsertP2P;
         p2p_insert.where |= STARPU_CPU;
 
+        memset(&p2p_insert_bis, 0, sizeof(p2p_insert_bis));
+        p2p_insert_bis.nbuffers = 2;
+        p2p_insert_bis.modes[0] = STARPU_R;
+        p2p_insert_bis.modes[1] = STARPU_RW;
+        p2p_insert_bis.name = "p2p_insert_bis";
+        p2p_insert_bis.cpu_funcs[0] = ThisClass::InsertP2PBis;
+        p2p_insert_bis.where |= STARPU_CPU;
+
         memset(&cell_extract_up, 0, sizeof(cell_extract_up));
         cell_extract_up.nbuffers = 3;
         cell_extract_up.modes[0] = STARPU_R;
@@ -893,6 +902,24 @@ protected:
                                 STARPU_VECTOR_GET_NX(buffers[0]));
     }
 
+    static void InsertP2PBis(void *buffers[], void *cl_arg){
+        ParticleExtractedHandles* interactionBufferPtr;
+        const unsigned char* dataPtr;
+        size_t datasize;
+        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &dataPtr, &datasize);
+
+        memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), dataPtr, datasize);
+
+        ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+                                      STARPU_VECTOR_GET_NX(buffers[1]),
+                                      nullptr);
+
+
+        containers.restoreData(interactionBufferPtr->leavesToExtract,
+                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+                                STARPU_VECTOR_GET_NX(buffers[0]));
+    }
+
     static void ExtractP2P(void *buffers[], void *cl_arg){
         ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
                                       STARPU_VECTOR_GET_NX(buffers[0]),
@@ -1670,25 +1697,33 @@ protected:
                                     duplicateB.sizeOther = tree->getCellGroup(idxLevel,interactionid)->getMultipoleBufferSizeInByte();
                                     if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){
                                         // Reuse block but just to perform the send
-                                        duplicateB.dataSymb = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
-                                        duplicateB.dataOther = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
-                                    }
-                                    else{
-                                        duplicateB.dataSymb = nullptr;
-                                        duplicateB.dataOther = nullptr;
+                                        duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);// = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
+                                        duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);// = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
                                     }
+                                    duplicateB.dataSymb = nullptr;
+                                    duplicateB.dataOther = nullptr;
+
                                     registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb);
                                     where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
                                     starpu_variable_data_register(&duplicateB.symb, where,
-                                                                  (uintptr_t)duplicateB.dataSymb, duplicateB.sizeSymb);
+                                                                  (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb);
                                     starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
                                     starpu_variable_data_register(&duplicateB.other, where,
-                                                                  (uintptr_t)duplicateB.dataOther, duplicateB.sizeOther);
+                                                                  (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther);
                                     starpu_mpi_data_register(duplicateB.other, tag++, registeringNode);
 
+                                    const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
+                                    size_t size1 = duplicateB.sizeSymb;
+                                    const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
+                                    size_t size2 = duplicateB.sizeOther;
+
                                     starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                           &cell_insert_up,
+                                                           &cell_insert_up_bis,
                                                            STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
+                                                           STARPU_VALUE, &ptr1, sizeof(ptr1),
+                                                           STARPU_VALUE, &size1, sizeof(size1),
+                                                           STARPU_VALUE, &ptr2, sizeof(ptr2),
+                                                           STARPU_VALUE, &size2, sizeof(size2),
                                    #ifdef SCALFMM_STARPU_USE_PRIO
                                                            STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
                                    #endif
@@ -2101,14 +2136,14 @@ protected:
 
                         interactionBuffer.size = tree->getParticleGroup(interactionid)->getExtractBufferSize(interactionBuffer.leavesToExtract);
                         // I allocate only if I will use it to extract
-                        if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){
+                        if(starpu_mpi_data_get_rank(particleHandles[interactionid].symb) == mpi_rank){
                             interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
                         }
                         else{
                             interactionBuffer.data.reset(nullptr);
                         }
 
-                        int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down);
+                        int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].symb);
                         int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
                         starpu_variable_data_register(&interactionBuffer.symb, where,
                                                       (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
@@ -2129,22 +2164,28 @@ protected:
                         duplicatedParticlesBuffer.emplace_back();
                         DuplicatedParticlesHandle& duplicateB = duplicatedParticlesBuffer.back();
                         duplicateB.size = tree->getParticleGroup(interactionid)->getBufferSizeInByte();
-                        if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == mpi_rank){
+                        if(starpu_mpi_data_get_rank(particleHandles[idxGroup].symb) == mpi_rank){
                             // Reuse block but just to perform the send
-                            duplicateB.data = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
+                            duplicateB.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateB.size);// = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
                         }
                         else{
                             duplicateB.data = nullptr;
                         }
-                        registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].down);
+
+                        registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].symb);
                         where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
                         starpu_variable_data_register(&duplicateB.symb, where,
                                                       (uintptr_t)duplicateB.data, duplicateB.size);
                         starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
 
+                        const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
+                        size_t sizeData = duplicateB.size;
+
                         starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_insert,
+                                               &p2p_insert_bis,
                                                STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                                               STARPU_VALUE, &dataPtr, sizeof(dataPtr),
+                                               STARPU_VALUE, &sizeData, sizeof(sizeData),
                        #ifdef SCALFMM_STARPU_USE_PRIO
                                                STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
@@ -2238,20 +2279,26 @@ protected:
                         duplicateA.size = tree->getParticleGroup(idxGroup)->getBufferSizeInByte();
                         if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){
                             // Reuse block but just to perform the send
-                            duplicateA.data = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
-                        }
-                        else{
-                            duplicateA.data = nullptr;
+                            duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
                         }
+						else{
+	                        duplicateA.data = nullptr;
+						}
+
                         registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down);
                         where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
                         starpu_variable_data_register(&duplicateA.symb, where,
                                                       (uintptr_t)duplicateA.data, duplicateA.size);
                         starpu_mpi_data_register(duplicateA.symb, tag++, registeringNode);
 
+                        const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
+                        size_t sizeData = duplicateA.size;
+
                         starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_insert,
+                                               &p2p_insert_bis,
                                                STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                                               STARPU_VALUE, &dataPtr, sizeof(dataPtr),
+                                               STARPU_VALUE, &sizeData, sizeof(sizeData),
                        #ifdef SCALFMM_STARPU_USE_PRIO
                                                STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-- 
GitLab