Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
ScalFMM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
ScalFMM
Commits
b095fe94
Commit
b095fe94
authored
10 years ago
by
BRAMAS Berenger
Browse files
Options
Downloads
Patches
Plain Diff
Update debug and clean MPI core algo
parent
76859146
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Src/Core/FFmmAlgorithmThreadProc.hpp
+161
-958
161 additions, 958 deletions
Src/Core/FFmmAlgorithmThreadProc.hpp
Src/Utils/FMpi.hpp
+13
-13
13 additions, 13 deletions
Src/Utils/FMpi.hpp
with
174 additions
and
971 deletions
Src/Core/FFmmAlgorithmThreadProc.hpp
+
161
−
958
View file @
b095fe94
...
@@ -99,6 +99,25 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
...
@@ -99,6 +99,25 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
return
workingIntervalsPerLevel
[
OctreeHeight
*
proc
+
level
];
return
workingIntervalsPerLevel
[
OctreeHeight
*
proc
+
level
];
}
}
const
Interval
&
getWorkingInterval
(
int
level
,
int
proc
)
const
{
return
workingIntervalsPerLevel
[
OctreeHeight
*
proc
+
level
];
}
/** To know if a proc has work at a given level (if it hold cells and was responsible of them) */
bool
procHasWorkAtLevel
(
const
int
idxLevel
,
const
int
idxProc
)
const
{
return
getWorkingInterval
(
idxLevel
,
idxProc
).
leftIndex
<=
getWorkingInterval
(
idxLevel
,
idxProc
).
rightIndex
;
}
/** Return true if the idxProc left cell at idxLevel+1 has the same parent as us for our right cell */
bool
procCoversMyRightBorderCell
(
const
int
idxLevel
,
const
int
idxProc
)
const
{
return
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)
==
(
getWorkingInterval
((
idxLevel
+
1
)
,
idxProc
).
leftIndex
>>
3
);
}
/** Return true if the idxProc right cell at idxLevel+1 has the same parent as us for our left cell */
bool
procCoversMyLeftBorderCell
(
const
int
idxLevel
,
const
int
idxProc
)
const
{
return
(
getWorkingInterval
((
idxLevel
+
1
)
,
idxProc
).
rightIndex
>>
3
)
==
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
leftIndex
>>
3
);
}
public
:
public
:
/** Get current proc interval at level */
/** Get current proc interval at level */
Interval
&
getWorkingInterval
(
int
level
){
Interval
&
getWorkingInterval
(
int
level
){
...
@@ -135,7 +154,6 @@ public:
...
@@ -135,7 +154,6 @@ public:
/** Default destructor */
/** Default destructor */
virtual
~
FFmmAlgorithmThreadProc
(){
virtual
~
FFmmAlgorithmThreadProc
(){
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
printf
(
"Delete %d
\n
"
,
idxThread
);
delete
this
->
kernels
[
idxThread
];
delete
this
->
kernels
[
idxThread
];
}
}
delete
[]
this
->
kernels
;
delete
[]
this
->
kernels
;
...
@@ -230,7 +248,7 @@ public:
...
@@ -230,7 +248,7 @@ public:
if
(
operationsToProceed
&
FFmmM2M
)
upwardPass
();
if
(
operationsToProceed
&
FFmmM2M
)
upwardPass
();
if
(
operationsToProceed
&
FFmmM2L
)
transferPass
Old
();
if
(
operationsToProceed
&
FFmmM2L
)
transferPass
();
if
(
operationsToProceed
&
FFmmL2L
)
downardPass
();
if
(
operationsToProceed
&
FFmmL2L
)
downardPass
();
...
@@ -266,10 +284,10 @@ private:
...
@@ -266,10 +284,10 @@ private:
}
while
(
octreeIterator
.
moveRight
());
}
while
(
octreeIterator
.
moveRight
());
FLOG
(
computationCounter
.
tic
());
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
#pragma omp parallel
{
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
#pragma omp for nowait
#pragma omp for nowait
for
(
int
idxLeafs
=
0
;
idxLeafs
<
leafs
;
++
idxLeafs
){
for
(
int
idxLeafs
=
0
;
idxLeafs
<
leafs
;
++
idxLeafs
){
myThreadkernels
->
P2M
(
iterArray
[
idxLeafs
].
getCurrentCell
()
,
iterArray
[
idxLeafs
].
getCurrentListSrc
());
myThreadkernels
->
P2M
(
iterArray
[
idxLeafs
].
getCurrentCell
()
,
iterArray
[
idxLeafs
].
getCurrentListSrc
());
}
}
...
@@ -283,199 +301,6 @@ private:
...
@@ -283,199 +301,6 @@ private:
// Upward
// Upward
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
/** M2M */
void
upwardPassOld
(){
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Upward Pass
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
FLOG
(
FTic
prepareCounter
);
FLOG
(
FTic
waitCounter
);
// Start from leal level - 1
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
octreeIterator
.
moveUp
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
// This variable is the proc responsible
// of the shared cells
int
currentProcIdToSendTo
=
idProcess
;
// There are a maximum of 8-1 sends and 8-1 receptions
MPI_Request
requests
[
14
];
MPI_Status
status
[
14
];
// Maximum data per message is:
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
7
*
MaxSizePerCell
);
const
int
recvBufferOffset
=
(
8
*
MaxSizePerCell
+
1
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
nbProcess
*
recvBufferOffset
);
CellClass
recvBufferCells
[
8
];
int
firstProcThatSend
=
idProcess
+
1
;
// for each levels
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>
1
;
--
idxLevel
){
// No more work for me
if
(
idProcess
!=
0
&&
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
rightIndex
<=
getWorkingInterval
((
idxLevel
+
1
),
idProcess
-
1
).
rightIndex
){
break
;
}
// copy cells to work with
int
numberOfCells
=
0
;
// for each cells
do
{
iterArray
[
numberOfCells
++
]
=
octreeIterator
;
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveUp
();
octreeIterator
=
avoidGotoLeftIterator
;
// We may need to send something
int
iterRequests
=
0
;
int
cellsToSend
=
-
1
;
while
(
iterArray
[
cellsToSend
+
1
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
++
cellsToSend
;
}
FTRACE
(
FTrace
::
FRegion
regionTrace
(
"Preprocess"
,
__FUNCTION__
,
__FILE__
,
__LINE__
)
);
FLOG
(
prepareCounter
.
tic
());
if
(
idProcess
!=
0
&&
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
-
1
).
rightIndex
>>
3
)){
char
state
=
0
;
sendBuffer
.
write
(
state
);
const
CellClass
*
const
*
const
child
=
iterArray
[
cellsToSend
].
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
]
&&
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
leftIndex
<=
child
[
idxChild
]
->
getMortonIndex
()
){
child
[
idxChild
]
->
serializeUp
(
sendBuffer
);
state
=
char
(
state
|
(
0x1
<<
idxChild
));
}
}
sendBuffer
.
writeAt
(
0
,
state
);
while
(
currentProcIdToSendTo
&&
iterArray
[
cellsToSend
].
getCurrentGlobalIndex
()
<=
getWorkingInterval
(
idxLevel
,
currentProcIdToSendTo
-
1
).
rightIndex
){
--
currentProcIdToSendTo
;
}
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
currentProcIdToSendTo
,
FMpi
::
TagFmmM2M
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
// We may need to receive something
bool
hasToReceive
=
false
;
int
endProcThatSend
=
firstProcThatSend
;
if
(
idProcess
!=
nbProcess
-
1
){
// if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
while
(
firstProcThatSend
<
nbProcess
&&
(
getWorkingInterval
((
idxLevel
+
1
),
firstProcThatSend
).
rightIndex
)
<=
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
rightIndex
)){
// Second condition :: while firstProcThatSend rightIndex morton index is < to myself rightIndex interval
++
firstProcThatSend
;
}
if
(
firstProcThatSend
<
nbProcess
&&
(
getWorkingInterval
((
idxLevel
+
1
),
firstProcThatSend
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)
){
endProcThatSend
=
firstProcThatSend
;
while
(
endProcThatSend
<
nbProcess
&&
(
getWorkingInterval
((
idxLevel
+
1
)
,
endProcThatSend
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)){
++
endProcThatSend
;
}
if
(
firstProcThatSend
!=
endProcThatSend
){
hasToReceive
=
true
;
for
(
int
idxProc
=
firstProcThatSend
;
idxProc
<
endProcThatSend
;
++
idxProc
){
MPI_Irecv
(
&
recvBuffer
.
data
()[
idxProc
*
recvBufferOffset
],
recvBufferOffset
,
MPI_PACKED
,
idxProc
,
FMpi
::
TagFmmM2M
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
}
}
}
FLOG
(
prepareCounter
.
tac
());
FTRACE
(
regionTrace
.
end
()
);
// Compute
const
int
endIndex
=
(
hasToReceive
?
numberOfCells
-
1
:
numberOfCells
);
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
{
KernelClass
&
myThreadkernels
=
(
*
kernels
[
omp_get_thread_num
()]);
#pragma omp for nowait
for
(
int
idxCell
=
cellsToSend
+
1
;
idxCell
<
endIndex
;
++
idxCell
){
myThreadkernels
.
M2M
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
// for(int k=0 ; k< 8 ; ++k){
// if(iterArray[idxCell].getCurrentChild()[k]){
// FILE * fd = fopen("ResM2MNearOld","a+");
// fprintf(fd,"%lld\t% lld\t %d\n",iterArray[idxCell].getCurrentCell()->getMortonIndex(),iterArray[idxCell].getCurrentChild()[k]->getMortonIndex(),idxLevel);
// fclose(fd);
// }
//}
}
}
FLOG
(
computationCounter
.
tac
());
// Are we sending or waiting anything?
if
(
iterRequests
){
FLOG
(
waitCounter
.
tic
());
MPI_Waitall
(
iterRequests
,
requests
,
status
);
FLOG
(
waitCounter
.
tac
());
// we were receiving data
if
(
hasToReceive
){
CellClass
*
currentChild
[
8
];
memcpy
(
currentChild
,
iterArray
[
numberOfCells
-
1
].
getCurrentChild
(),
8
*
sizeof
(
CellClass
*
));
// retreive data and merge my child and the child from others
for
(
int
idxProc
=
firstProcThatSend
;
idxProc
<
endProcThatSend
;
++
idxProc
){
recvBuffer
.
seek
(
idxProc
*
recvBufferOffset
);
int
state
=
int
(
recvBuffer
.
getValue
<
char
>
());
int
position
=
0
;
while
(
state
&&
position
<
8
){
while
(
!
(
state
&
0x1
)){
state
>>=
1
;
++
position
;
}
FAssertLF
(
!
currentChild
[
position
],
"Already has a cell here"
);
recvBufferCells
[
position
].
deserializeUp
(
recvBuffer
);
currentChild
[
position
]
=
(
CellClass
*
)
&
recvBufferCells
[
position
];
state
>>=
1
;
++
position
;
}
}
// Finally compute
FLOG
(
computationCounter
.
tic
());
(
*
kernels
[
0
]).
M2M
(
iterArray
[
numberOfCells
-
1
].
getCurrentCell
()
,
currentChild
,
idxLevel
);
FLOG
(
computationCounter
.
tac
());
firstProcThatSend
=
endProcThatSend
-
1
;
}
}
sendBuffer
.
reset
();
recvBuffer
.
seek
(
0
);
}
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Upward Pass (M2M) = "
<<
counterTime
.
tacAndElapsed
()
<<
" s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Prepare : "
<<
prepareCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Wait : "
<<
waitCounter
.
cumulated
()
<<
" s
\n
"
);
}
/** M2M */
/** M2M */
void
upwardPass
(){
void
upwardPass
(){
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
...
@@ -493,16 +318,16 @@ private:
...
@@ -493,16 +318,16 @@ private:
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
// The proc to send the shared cells to (will move to left)
// The proc to send the shared cells to (will move to left)
int
currentProcIdToSendTo
=
idProcess
;
int
currentProcIdToSendTo
=
(
idProcess
-
1
)
;
// There are a maximum of
8-
1 sends and 8-1 receptions
// There are a maximum of 1 sends and 8-1 receptions
MPI_Request
requests
[
14
];
MPI_Request
requests
[
8
];
MPI_Status
status
[
14
];
MPI_Status
status
[
8
];
// Maximum data per message is:
// Maximum data per message is:
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
7
*
MaxSizePerCell
);
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
7
*
MaxSizePerCell
+
1
);
const
int
recvBufferOffset
=
(
8
*
MaxSizePerCell
+
1
);
const
int
recvBufferOffset
=
(
7
*
MaxSizePerCell
+
1
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
nbProcess
*
recvBufferOffset
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
7
*
recvBufferOffset
);
CellClass
recvBufferCells
[
8
];
CellClass
recvBufferCells
[
8
];
// The first proc that send to me a cell
// The first proc that send to me a cell
...
@@ -511,9 +336,9 @@ private:
...
@@ -511,9 +336,9 @@ private:
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>
1
;
--
idxLevel
){
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>
1
;
--
idxLevel
){
// Does my cells are covered by my neighbors working interval
// Does my cells are covered by my neighbors working interval
const
bool
noMoreWorkForMe
=
(
idProcess
!=
0
&&
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
rightIndex
<=
getWorkingInterval
(
(
idxLevel
+
1
)
,
idProcess
-
1
).
rightIndex
);
const
bool
noMoreWorkForMe
=
(
idProcess
!=
0
&&
!
procHasWorkAtLevel
(
idxLevel
+
1
,
idProcess
)
);
if
(
noMoreWorkForMe
){
if
(
noMoreWorkForMe
){
FAssertLF
(
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<
getWorkingInterva
l
(
idxLevel
,
idProcess
)
.
leftIndex
);
FAssertLF
(
procHasWorkAtLeve
l
(
idxLevel
,
idProcess
)
==
false
);
break
;
break
;
}
}
...
@@ -527,34 +352,27 @@ private:
...
@@ -527,34 +352,27 @@ private:
int
iterMpiRequests
=
0
;
// The iterator for send/recv requests
int
iterMpiRequests
=
0
;
// The iterator for send/recv requests
int
nbCellsToS
end
=
0
;
// The number of cells to send
int
nbCellsToS
kip
=
0
;
// The number of cells to send
// Skip all the cells that are out of my working interval
// Skip all the cells that are out of my working interval
while
(
iterArray
[
nbCellsToS
end
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
while
(
nbCellsToSkip
<
totalNbCellsAtLevel
&&
iterArray
[
nbCellsToS
kip
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
++
nbCellsToS
end
;
++
nbCellsToS
kip
;
}
}
int
nbCellsForThreads
=
totalNbCellsAtLevel
;
// totalNbCellsAtLevel or totalNbCellsAtLevel-1
int
nbCellsForThreads
=
totalNbCellsAtLevel
;
// totalNbCellsAtLevel or totalNbCellsAtLevel-1
bool
hasToReceive
=
false
;
bool
hasToReceive
=
false
;
if
(
idProcess
!=
nbProcess
-
1
){
if
(
idProcess
!=
nbProcess
-
1
&&
procHasWorkAtLevel
(
idxLevel
,
idProcess
)
){
// Find the first proc that may send to me
// Find the first proc that may send to me
while
(
firstProcThatSend
<
nbProcess
while
(
firstProcThatSend
<
nbProcess
&&
!
procHasWorkAtLevel
(
idxLevel
+
1
,
firstProcThatSend
)
){
&&
(
getWorkingInterval
((
idxLevel
+
1
),
firstProcThatSend
).
rightIndex
)
<=
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
rightIndex
)){
firstProcThatSend
+=
1
;
firstProcThatSend
+=
1
;
}
}
// Do we have to receive?
// Do we have to receive?
if
(
firstProcThatSend
<
nbProcess
&&
(
getWorkingInterval
(
(
idxLevel
+
1
)
,
firstProcThatSend
)
.
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)
){
if
(
firstProcThatSend
<
nbProcess
&&
procHasWorkAtLevel
(
idxLevel
+
1
,
firstProcThatSend
)
&&
procCoversMyRightBorderCell
(
idxLevel
,
firstProcThatSend
)
){
hasToReceive
=
true
;
hasToReceive
=
true
;
// Threads do not compute the last cell, we will do it once data are received
// Threads do not compute the last cell, we will do it once data are received
nbCellsForThreads
-=
1
;
nbCellsForThreads
-=
1
;
}
}
}
}
// TODO REMOVE
//if((iterArray[0].getCurrentGlobalIndex() == getWorkingInterval(idxLevel, idProcess-1).rightIndex) && numberOfCells==1){
// nbCellsToSend++;
//}
// END REMOVE
FLOG
(
parallelCounter
.
tic
());
FLOG
(
parallelCounter
.
tic
());
#pragma omp parallel
#pragma omp parallel
{
{
...
@@ -564,95 +382,92 @@ private:
...
@@ -564,95 +382,92 @@ private:
#pragma omp single nowait
#pragma omp single nowait
{
{
FLOG
(
singleCounter
.
tic
());
FLOG
(
singleCounter
.
tic
());
const
bool
firstCellAreOutOfMyInterval
=
idProcess
!=
0
&&
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
),
idProcess
-
1
).
rightIndex
>>
3
);
if
(
idProcess
!=
0
){
//Post Send
while
(
currentProcIdToSendTo
&&
!
procHasWorkAtLevel
(
idxLevel
,
currentProcIdToSendTo
)
){
if
(
firstCellAreOutOfMyInterval
){
--
currentProcIdToSendTo
;
FAssertLF
(
nbCellsToSend
!=
0
);
char
packageFlags
=
0
;
sendBuffer
.
write
(
packageFlags
);
// Only the cell the most on the right out of my working interval should be taken in
// consideration (at pos nbCellsToSend-1) other (x < nbCellsToSend-1) have already been sent
const
CellClass
*
const
*
const
child
=
iterArray
[
nbCellsToSend
-
1
].
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
// Check if child exists and it was part of my working interval
if
(
child
[
idxChild
]
&&
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
leftIndex
<=
child
[
idxChild
]
->
getMortonIndex
()
){
// Add the cell to the buffer
child
[
idxChild
]
->
serializeUp
(
sendBuffer
);
packageFlags
=
char
(
packageFlags
|
(
0x1
<<
idxChild
));
}
}
}
// Add the flag as first value
sendBuffer
.
writeAt
(
0
,
packageFlags
);
// Find the proc to send to
if
(
procHasWorkAtLevel
(
idxLevel
,
currentProcIdToSendTo
)
&&
procCoversMyLeftBorderCell
(
idxLevel
,
currentProcIdToSendTo
)){
while
(
currentProcIdToSendTo
&&
iterArray
[
nbCellsToSend
-
1
].
getCurrentGlobalIndex
()
<=
getWorkingInterval
(
idxLevel
,
currentProcIdToSendTo
-
1
).
rightIndex
){
FAssertLF
(
nbCellsToSkip
!=
0
);
--
currentProcIdToSendTo
;
char
packageFlags
=
0
;
sendBuffer
.
write
(
packageFlags
);
// Only the cell the most on the right out of my working interval should be taken in
// consideration (at pos nbCellsToSkip-1) other (x < nbCellsToSkip-1) have already been sent
const
CellClass
*
const
*
const
child
=
iterArray
[
nbCellsToSkip
-
1
].
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
// Check if child exists and it was part of my working interval
if
(
child
[
idxChild
]
&&
getWorkingInterval
((
idxLevel
+
1
),
idProcess
).
leftIndex
<=
child
[
idxChild
]
->
getMortonIndex
()
){
// Add the cell to the buffer
child
[
idxChild
]
->
serializeUp
(
sendBuffer
);
packageFlags
=
char
(
packageFlags
|
(
0x1
<<
idxChild
));
}
}
// Add the flag as first value
sendBuffer
.
writeAt
(
0
,
packageFlags
);
// Post the message
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
currentProcIdToSendTo
,
FMpi
::
TagFmmM2M
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterMpiRequests
++
]);
}
}
// Post the message
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
currentProcIdToSendTo
,
FMpi
::
TagFmmM2M
,
comm
.
getComm
(),
&
requests
[
iterMpiRequests
++
]);
}
}
//Post receive, Datas needed in several parts of the section
//Post receive, Datas needed in several parts of the section
int
nbProcThatSendToMe
=
0
;
int
nbProcThatSendToMe
=
0
;
if
(
hasToReceive
){
// if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
if
(
hasToReceive
){
// if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
//Test : if the firstProcThatSend father minimal value in interval is lesser than mine
//Test : if the firstProcThatSend father minimal value in interval is lesser than mine
int
en
dProc
ThatSend
=
firstProcThatSend
;
int
i
dProc
Source
=
firstProcThatSend
;
// Find the last proc that should send to me
// Find the last proc that should send to me
while
(
endProcThatSend
<
nbProcess
&&
while
(
idProcSource
<
nbProcess
(
getWorkingInterval
((
idxLevel
+
1
)
,
endProcThatSend
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)){
&&
(
!
procHasWorkAtLevel
(
idxLevel
+
1
,
idProcSource
)
||
procCoversMyRightBorderCell
(
idxLevel
,
idProcSource
)
)){
++
endProcThatSend
;
if
(
procHasWorkAtLevel
(
idxLevel
+
1
,
idProcSource
)
&&
procCoversMyRightBorderCell
(
idxLevel
,
idProcSource
)){
}
MPI_Irecv
(
&
recvBuffer
.
data
()[
nbProcThatSendToMe
*
recvBufferOffset
],
recvBufferOffset
,
MPI_PACKED
,
idProcSource
,
FMpi
::
TagFmmM2M
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterMpiRequests
++
]);
// If there is at least one proc that send me something
nbProcThatSendToMe
+=
1
;
if
(
firstProcThatSend
!=
endProcThatSend
){
FAssertLF
(
nbProcThatSendToMe
<=
7
);
nbProcThatSendToMe
=
endProcThatSend
-
firstProcThatSend
;
FAssertLF
(
nbProcThatSendToMe
<=
7
);
// Post a recv for each of them
for
(
int
idxProc
=
firstProcThatSend
;
idxProc
<
endProcThatSend
;
++
idxProc
){
MPI_Irecv
(
&
recvBuffer
.
data
()[
idxProc
*
recvBufferOffset
],
recvBufferOffset
,
MPI_PACKED
,
idxProc
,
FMpi
::
TagFmmM2M
,
comm
.
getComm
(),
&
requests
[
iterMpiRequests
++
]);
}
}
++
idProcSource
;
}
}
}
}
//Wait For the comms, and do the work
//Wait For the comms, and do the work
// Are we sending or waiting anything?
// Are we sending or waiting anything?
if
(
iterMpiRequests
){
if
(
iterMpiRequests
){
FAssertLF
(
iterMpiRequests
<=
8
);
MPI_Waitall
(
iterMpiRequests
,
requests
,
status
);
MPI_Waitall
(
iterMpiRequests
,
requests
,
status
);
// we were receiving data
}
if
(
hasToReceive
){
if
(
hasToReceive
){
FAssertLF
(
iterMpiRequests
!=
0
);
CellClass
*
currentChild
[
8
];
CellClass
*
currentChild
[
8
];
memcpy
(
currentChild
,
iterArray
[
totalNbCellsAtLevel
-
1
].
getCurrentChild
(),
8
*
sizeof
(
CellClass
*
));
memcpy
(
currentChild
,
iterArray
[
totalNbCellsAtLevel
-
1
].
getCurrentChild
(),
8
*
sizeof
(
CellClass
*
));
// Retreive data and merge my child and the child from others
// Retreive data and merge my child and the child from others
for
(
int
idxProc
=
firstProcThatSend
;
idxProc
<
firstProcThatSend
+
nbProcThatSendToMe
;
++
idxProc
){
for
(
int
idxProc
=
0
;
idxProc
<
nbProcThatSendToMe
;
++
idxProc
){
recvBuffer
.
seek
(
idxProc
*
recvBufferOffset
);
recvBuffer
.
seek
(
idxProc
*
recvBufferOffset
);
int
state
=
int
(
recvBuffer
.
getValue
<
char
>
());
int
state
=
int
(
recvBuffer
.
getValue
<
char
>
());
int
position
=
0
;
int
position
=
0
;
while
(
state
&&
position
<
8
){
while
(
state
&&
position
<
8
){
while
(
!
(
state
&
0x1
)){
while
(
!
(
state
&
0x1
)){
state
>>=
1
;
++
position
;
}
FAssertLF
(
!
currentChild
[
position
],
"Already has a cell here"
);
recvBufferCells
[
position
].
deserializeUp
(
recvBuffer
);
currentChild
[
position
]
=
(
CellClass
*
)
&
recvBufferCells
[
position
];
state
>>=
1
;
state
>>=
1
;
++
position
;
++
position
;
}
}
}
// Finally compute
FAssertLF
(
!
currentChild
[
position
],
"Already has a cell here"
);
(
*
kernels
[
threadNumber
]).
M2M
(
iterArray
[
totalNbCellsAtLevel
-
1
].
getCurrentCell
()
,
currentChild
,
idxLevel
);
recvBufferCells
[
position
].
deserializeUp
(
recvBuffer
);
firstProcThatSend
+=
nbProcThatSendToMe
-
1
;
currentChild
[
position
]
=
(
CellClass
*
)
&
recvBufferCells
[
position
];
state
>>=
1
;
++
position
;
}
}
}
// Finally compute
(
*
kernels
[
threadNumber
]).
M2M
(
iterArray
[
totalNbCellsAtLevel
-
1
].
getCurrentCell
()
,
currentChild
,
idxLevel
);
firstProcThatSend
+=
nbProcThatSendToMe
-
1
;
}
}
// Reset buffer
sendBuffer
.
reset
();
sendBuffer
.
reset
();
recvBuffer
.
seek
(
0
);
recvBuffer
.
seek
(
0
);
FLOG
(
singleCounter
.
tac
());
FLOG
(
singleCounter
.
tac
());
...
@@ -660,7 +475,7 @@ private:
...
@@ -660,7 +475,7 @@ private:
// All threads proceed the M2M
// All threads proceed the M2M
#pragma omp for nowait
#pragma omp for nowait
for
(
int
idxCell
=
nbCellsToS
end
;
idxCell
<
nbCellsForThreads
;
++
idxCell
){
for
(
int
idxCell
=
nbCellsToS
kip
;
idxCell
<
nbCellsForThreads
;
++
idxCell
){
myThreadkernels
->
M2M
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
myThreadkernels
->
M2M
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
}
}
}
//End of parallel section
}
//End of parallel section
...
@@ -680,7 +495,7 @@ private:
...
@@ -680,7 +495,7 @@ private:
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
void
transferPass
Old
(){
void
transferPass
(){
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
...
@@ -718,11 +533,9 @@ private:
...
@@ -718,11 +533,9 @@ private:
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
// for each levels
// for each levels
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
if
(
idProcess
!=
0
if
(
!
procHasWorkAtLevel
(
idxLevel
,
idProcess
)){
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveDown
();
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
continue
;
}
}
...
@@ -742,7 +555,6 @@ private:
...
@@ -742,7 +555,6 @@ private:
leafsNeedOther
[
idxLevel
]
=
new
FBoolArray
(
numberOfCells
);
leafsNeedOther
[
idxLevel
]
=
new
FBoolArray
(
numberOfCells
);
// Which cell potentialy needs other data and in the same time
// Which cell potentialy needs other data and in the same time
// are potentialy needed by other
// are potentialy needed by other
MortonIndex
neighborsIndexes
[
189
];
MortonIndex
neighborsIndexes
[
189
];
...
@@ -780,9 +592,7 @@ private:
...
@@ -780,9 +592,7 @@ private:
if
(
needOther
){
if
(
needOther
){
leafsNeedOther
[
idxLevel
]
->
set
(
idxCell
,
true
);
leafsNeedOther
[
idxLevel
]
->
set
(
idxCell
,
true
);
}
}
}
}
}
}
FLOG
(
prepareCounter
.
tac
());
FLOG
(
prepareCounter
.
tac
());
...
@@ -801,7 +611,6 @@ private:
...
@@ -801,7 +611,6 @@ private:
FMpi
::
MpiAssert
(
MPI_Allgather
(
indexToSend
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
globalReceiveMap
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FMpi
::
MpiAssert
(
MPI_Allgather
(
indexToSend
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
globalReceiveMap
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FLOG
(
gatherCounter
.
tac
());
FLOG
(
gatherCounter
.
tac
());
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
// Send and receive for real
// Send and receive for real
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
...
@@ -863,12 +672,9 @@ private:
...
@@ -863,12 +672,9 @@ private:
// Now we can compute all the data
// Now we can compute all the data
// for each levels
// for each levels
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
if
(
idProcess
!=
0
if
(
!
procHasWorkAtLevel
(
idxLevel
,
idProcess
)){
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveDown
();
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
continue
;
}
}
...
@@ -918,12 +724,9 @@ private:
...
@@ -918,12 +724,9 @@ private:
// compute the second time
// compute the second time
// for each levels
// for each levels
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
if
(
idProcess
!=
0
if
(
!
procHasWorkAtLevel
(
idxLevel
,
idProcess
)){
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveDown
();
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
continue
;
}
}
...
@@ -995,10 +798,6 @@ private:
...
@@ -995,10 +798,6 @@ private:
}
}
}
}
}
}
// FILE * fd = fopen("res0","a+");
// fprintf(fd,"Q %d\t %d\t %d\t %d\n",
// idProcess,idxLevel,idxCell,counter);
// fclose(fd);
// need to compute
// need to compute
if
(
counter
){
if
(
counter
){
myThreadkernels
->
M2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
neighbors
,
counter
,
idxLevel
);
myThreadkernels
->
M2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
neighbors
,
counter
,
idxLevel
);
...
@@ -1037,410 +836,11 @@ private:
...
@@ -1037,410 +836,11 @@ private:
}
}
/** M2L */
void
transferPass
(){
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (M2L)
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
FLOG
(
FTic
singleCounter
);
FLOG
(
FTic
gatherCounter
);
FLOG
(
FTic
m2lSelf
);
FLOG
(
FTic
m2lFar
);
FLOG
(
FTic
sendCounter
);
//////////////////////////////////////////////////////////////////
// First know what to send to who
//////////////////////////////////////////////////////////////////
// pointer to send
FVector
<
typename
OctreeClass
::
Iterator
>
toSend
[
nbProcess
*
OctreeHeight
];
// index
int
*
const
indexToSend
=
new
int
[
nbProcess
*
OctreeHeight
];
memset
(
indexToSend
,
0
,
sizeof
(
int
)
*
nbProcess
*
OctreeHeight
);
// To know which one has need someone
FBoolArray
**
const
leafsNeedOther
=
new
FBoolArray
*
[
OctreeHeight
];
memset
(
leafsNeedOther
,
0
,
sizeof
(
FBoolArray
*
)
*
OctreeHeight
);
//Variables needed in multiple omp sections
int
iterRequest
;
MPI_Request
*
requests
;
MPI_Status
*
status
;
int
*
globalReceiveMap
;
FMpiBufferWriter
**
sendBuffer
;
FMpiBufferReader
**
recvBuffer
;
// #pragma omp parallel /*default(none)*/ shared(m2lSelf,globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,singleCounter,toSend,gatherCounter,sendCounter)
#pragma omp parallel
/*default(none)*/
shared(globalReceiveMap,requests,sendBuffer,recvBuffer,iterRequest,status,toSend) FLOG(shared(m2lSelf,singleCounter,gatherCounter,sendCounter))
{
int
threadIdx
=
omp_get_thread_num
();
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
#pragma omp single //nowait
{
FTRACE
(
FTrace
::
FRegion
regionTrace
(
"Preprocess"
,
__FUNCTION__
,
__FILE__
,
__LINE__
)
);
FLOG
(
singleCounter
.
tic
());
typename
OctreeClass
::
Iterator
octreeIteratorComm
(
tree
);
octreeIteratorComm
.
moveDown
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIteratorComm
(
octreeIteratorComm
);
// To know if a leaf has been already sent to a proc
bool
*
const
alreadySent
=
new
bool
[
nbProcess
];
memset
(
alreadySent
,
0
,
sizeof
(
bool
)
*
nbProcess
);
// for each levels
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
//If I'm not the process 0 AND my rightIndex idx is lesser than left one's
if
(
idProcess
!=
0
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIteratorComm
.
moveDown
();
octreeIteratorComm
=
avoidGotoLeftIteratorComm
;
continue
;
}
int
numberOfCells
=
0
;
while
(
octreeIteratorComm
.
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
octreeIteratorComm
.
moveRight
();
}
// for each cells
do
{
iterArrayComm
[
numberOfCells
]
=
octreeIteratorComm
;
++
numberOfCells
;
}
while
(
octreeIteratorComm
.
moveRight
());
avoidGotoLeftIteratorComm
.
moveDown
();
octreeIteratorComm
=
avoidGotoLeftIteratorComm
;
//printf("Single :: I'm %d, numberOfCells : %d my iterArrayComm %p \n",omp_get_thread_num(),numberOfCells,iterArrayComm);
leafsNeedOther
[
idxLevel
]
=
new
FBoolArray
(
numberOfCells
);
// Which cell potentialy needs other data and in the same time
// are potentialy needed by other
MortonIndex
neighborsIndexes
[
189
];
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
// Find the M2L neigbors of a cell
const
int
counter
=
iterArrayComm
[
idxCell
].
getCurrentGlobalCoordinate
().
getInteractionNeighbors
(
idxLevel
,
neighborsIndexes
);
memset
(
alreadySent
,
false
,
sizeof
(
bool
)
*
nbProcess
);
bool
needOther
=
false
;
// Test each negibors to know which one do not belong to us
for
(
int
idxNeigh
=
0
;
idxNeigh
<
counter
;
++
idxNeigh
){
if
(
neighborsIndexes
[
idxNeigh
]
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
||
(
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
)
<
neighborsIndexes
[
idxNeigh
]){
int
procToReceive
=
idProcess
;
while
(
0
!=
procToReceive
&&
neighborsIndexes
[
idxNeigh
]
<
getWorkingInterval
(
idxLevel
,
procToReceive
).
leftIndex
){
--
procToReceive
;
}
while
(
procToReceive
!=
nbProcess
-
1
&&
(
getWorkingInterval
(
idxLevel
,
procToReceive
).
rightIndex
)
<
neighborsIndexes
[
idxNeigh
]){
++
procToReceive
;
}
// Maybe already sent to that proc?
if
(
!
alreadySent
[
procToReceive
]
&&
getWorkingInterval
(
idxLevel
,
procToReceive
).
leftIndex
<=
neighborsIndexes
[
idxNeigh
]
&&
neighborsIndexes
[
idxNeigh
]
<=
getWorkingInterval
(
idxLevel
,
procToReceive
).
rightIndex
){
alreadySent
[
procToReceive
]
=
true
;
needOther
=
true
;
toSend
[
idxLevel
*
nbProcess
+
procToReceive
].
push
(
iterArray
[
idxCell
]);
++
indexToSend
[
idxLevel
*
nbProcess
+
procToReceive
];
}
}
}
if
(
needOther
){
leafsNeedOther
[
idxLevel
]
->
set
(
idxCell
,
true
);
}
}
}
delete
[]
alreadySent
;
//////////////////////////////////////////////////////////////////
// Gather this information
//////////////////////////////////////////////////////////////////
FLOG
(
gatherCounter
.
tic
());
// All process say to each others
// what the will send to who
globalReceiveMap
=
new
int
[
nbProcess
*
nbProcess
*
OctreeHeight
];
memset
(
globalReceiveMap
,
0
,
sizeof
(
int
)
*
nbProcess
*
nbProcess
*
OctreeHeight
);
FMpi
::
MpiAssert
(
MPI_Allgather
(
indexToSend
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
globalReceiveMap
,
nbProcess
*
OctreeHeight
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FLOG
(
gatherCounter
.
tac
());
// if(idProcess == 0)
// {
// FILE * fd = fopen("M2LMap","a+");
// for(int k =0; k < nbProcess*nbProcess*OctreeHeight ; ++k){
// fprintf(fd,"%d\n",globalReceiveMap[k]);
// }
// fclose(fd);
// }
//////////////////////////////////////////////////////////////////
// Send and receive for real
//////////////////////////////////////////////////////////////////
FLOG
(
sendCounter
.
tic
());
// Then they can send and receive (because they know what they will receive)
// To send in asynchrone way
requests
=
new
MPI_Request
[
2
*
nbProcess
*
OctreeHeight
];
status
=
new
MPI_Status
[
2
*
nbProcess
*
OctreeHeight
];
iterRequest
=
0
;
const
int
SizeOfCellToSend
=
int
(
sizeof
(
MortonIndex
)
+
sizeof
(
int
)
+
MaxSizePerCell
);
sendBuffer
=
new
FMpiBufferWriter
*
[
nbProcess
*
OctreeHeight
];
memset
(
sendBuffer
,
0
,
sizeof
(
FMpiBufferWriter
*
)
*
nbProcess
*
OctreeHeight
);
recvBuffer
=
new
FMpiBufferReader
*
[
nbProcess
*
OctreeHeight
];
memset
(
recvBuffer
,
0
,
sizeof
(
FMpiBufferReader
*
)
*
nbProcess
*
OctreeHeight
);
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
const
int
toSendAtProcAtLevel
=
indexToSend
[
idxLevel
*
nbProcess
+
idxProc
];
if
(
toSendAtProcAtLevel
!=
0
){
sendBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
=
new
FMpiBufferWriter
(
comm
.
getComm
(),
toSendAtProcAtLevel
*
SizeOfCellToSend
);
for
(
int
idxLeaf
=
0
;
idxLeaf
<
toSendAtProcAtLevel
;
++
idxLeaf
){
const
MortonIndex
cellIndex
=
toSend
[
idxLevel
*
nbProcess
+
idxProc
][
idxLeaf
].
getCurrentGlobalIndex
();
sendBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
write
(
cellIndex
);
toSend
[
idxLevel
*
nbProcess
+
idxProc
][
idxLeaf
].
getCurrentCell
()
->
serializeUp
(
*
sendBuffer
[
idxLevel
*
nbProcess
+
idxProc
]);
}
FMpi
::
MpiAssert
(
MPI_Isend
(
sendBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
data
(),
sendBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
getSize
(),
MPI_PACKED
,
idxProc
,
FMpi
::
TagLast
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterRequest
++
])
,
__LINE__
);
}
const
int
toReceiveFromProcAtLevel
=
globalReceiveMap
[(
idxProc
*
nbProcess
*
OctreeHeight
)
+
idxLevel
*
nbProcess
+
idProcess
];
if
(
toReceiveFromProcAtLevel
){
recvBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
=
new
FMpiBufferReader
(
comm
.
getComm
(),
toReceiveFromProcAtLevel
*
SizeOfCellToSend
);
FMpi
::
MpiAssert
(
MPI_Irecv
(
recvBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
data
(),
recvBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
getCapacity
(),
MPI_PACKED
,
idxProc
,
FMpi
::
TagLast
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterRequest
++
])
,
__LINE__
);
}
}
}
FLOG
(
sendCounter
.
tac
());
{
//////////////////////////////////////////////////////////////////
// Wait received data and compute
//////////////////////////////////////////////////////////////////
// Wait to receive every things (and send every things)
MPI_Waitall
(
iterRequest
,
requests
,
status
);
}
FLOG
(
singleCounter
.
tac
());
}
//End of Single section
//////////////////////////////////////////////////////////////////
// Do M2L SELF
//////////////////////////////////////////////////////////////////
FLOG
(
m2lSelf
.
tic
());
{
FTRACE
(
FTrace
::
FRegion
regionTrace
(
"Compute"
,
__FUNCTION__
,
__FILE__
,
__LINE__
)
);
// Now we can compute all the data
// for each levels
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>
1
;
--
idxLevel
){
if
(
idProcess
!=
0
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveUp
();
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
}
int
numberOfCells
=
0
;
// for each cells
//TODO see if it can be moved somewhere else
#pragma omp single copyprivate(numberOfCells,octreeIterator,avoidGotoLeftIterator)
{
while
(
octreeIterator
.
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
octreeIterator
.
moveRight
();
}
do
{
iterArray
[
numberOfCells
]
=
octreeIterator
;
++
numberOfCells
;
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveUp
();
octreeIterator
=
avoidGotoLeftIterator
;
}
//End of single section
FLOG
(
computationCounter
.
tic
());
{
KernelClass
*
const
myThreadkernels
=
kernels
[
threadIdx
];
const
CellClass
*
neighbors
[
343
];
//Change kind
#pragma omp for schedule(static,1) //nowait
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
const
int
counter
=
tree
->
getInteractionNeighbors
(
neighbors
,
iterArray
[
idxCell
].
getCurrentGlobalCoordinate
(),
idxLevel
);
if
(
counter
){
myThreadkernels
->
M2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
neighbors
,
counter
,
idxLevel
);
}
}
myThreadkernels
->
finishedLevelM2L
(
idxLevel
);
}
FLOG
(
computationCounter
.
tac
());
}
}
FLOG
(
m2lSelf
.
tac
());
}
//End of parallel section
//Synch between threads at implict barrier
FTRACE
(
FTrace
::
FRegion
regionTrace
(
"Compute Received data"
,
__FUNCTION__
,
__FILE__
,
__LINE__
)
);
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
moveDown
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
// compute the second time
// for each levels
for
(
int
idxLevel
=
2
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
if
(
idProcess
!=
0
&&
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
<=
getWorkingInterval
(
idxLevel
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
}
// put the received data into a temporary tree
FLightOctree
<
CellClass
>
tempTree
;
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
const
int
toReceiveFromProcAtLevel
=
globalReceiveMap
[(
idxProc
*
nbProcess
*
OctreeHeight
)
+
idxLevel
*
nbProcess
+
idProcess
];
for
(
int
idxCell
=
0
;
idxCell
<
toReceiveFromProcAtLevel
;
++
idxCell
){
const
MortonIndex
cellIndex
=
recvBuffer
[
idxLevel
*
nbProcess
+
idxProc
]
->
FMpiBufferReader
::
getValue
<
MortonIndex
>
();
CellClass
*
const
newCell
=
new
CellClass
;
newCell
->
setMortonIndex
(
cellIndex
);
newCell
->
deserializeUp
(
*
recvBuffer
[
idxLevel
*
nbProcess
+
idxProc
]);
tempTree
.
insertCell
(
cellIndex
,
idxLevel
,
newCell
);
}
}
// take cells from our octree only if they are
// linked to received data
int
numberOfCells
=
0
;
int
realCellId
=
0
;
while
(
octreeIterator
.
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
octreeIterator
.
moveRight
();
}
// for each cells
do
{
// copy cells that need data from others
if
(
leafsNeedOther
[
idxLevel
]
->
get
(
realCellId
++
)){
iterArray
[
numberOfCells
++
]
=
octreeIterator
;
}
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
printf
(
"Proc :: %d Lvl %d, numberOfCells : %d that need other datas
\n
"
,
idProcess
,
idxLevel
,
numberOfCells
);
delete
leafsNeedOther
[
idxLevel
];
leafsNeedOther
[
idxLevel
]
=
nullptr
;
// Compute this cells
FLOG
(
computationCounter
.
tic
());
FLOG
(
m2lFar
.
tic
());
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
MortonIndex
neighborsIndex
[
189
];
int
neighborsPosition
[
189
];
const
CellClass
*
neighbors
[
343
];
printf
(
"Proc %d, idThread %d, idxLevel %d, number : %d
\n
"
,
idProcess
,
omp_get_thread_num
(),
idxLevel
,
numberOfCells
);
#pragma omp for schedule(dynamic) //nowait
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
// compute indexes
memset
(
neighbors
,
0
,
343
*
sizeof
(
CellClass
*
));
memset
(
neighborsPosition
,
0
,
189
*
sizeof
(
int
));
memset
(
neighborsIndex
,
0
,
189
*
sizeof
(
MortonIndex
));
const
int
counterNeighbors
=
iterArray
[
idxCell
].
getCurrentGlobalCoordinate
().
getInteractionNeighbors
(
idxLevel
,
neighborsIndex
,
neighborsPosition
);
int
counter
=
0
;
// does we receive this index from someone?
for
(
int
idxNeig
=
0
;
idxNeig
<
counterNeighbors
;
++
idxNeig
){
if
(
neighborsIndex
[
idxNeig
]
<
(
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
)
||
(
getWorkingInterval
(
idxLevel
,
idProcess
).
rightIndex
)
<
neighborsIndex
[
idxNeig
]){
CellClass
*
const
otherCell
=
tempTree
.
getCell
(
neighborsIndex
[
idxNeig
],
idxLevel
);
if
(
otherCell
){
//otherCell->setMortonIndex(neighborsIndex[idxNeig]);
neighbors
[
neighborsPosition
[
idxNeig
]
]
=
otherCell
;
++
counter
;
}
}
}
// FILE * fd = fopen("res1","a+");
// fprintf(fd,"Q %d\t %d\t %d\t %d\n",
// idProcess,idxLevel,idxCell,counter);
// fclose(fd);
// need to compute
if
(
counter
){
myThreadkernels
->
M2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
neighbors
,
counter
,
idxLevel
);
}
}
myThreadkernels
->
finishedLevelM2L
(
idxLevel
);
}
//End of parallel section
FLOG
(
computationCounter
.
tac
());
FLOG
(
m2lFar
.
tac
());
}
for
(
int
idxComm
=
0
;
idxComm
<
nbProcess
*
OctreeHeight
;
++
idxComm
){
delete
sendBuffer
[
idxComm
];
delete
recvBuffer
[
idxComm
];
}
for
(
int
idxComm
=
0
;
idxComm
<
OctreeHeight
;
++
idxComm
){
delete
leafsNeedOther
[
idxComm
];
}
delete
[]
sendBuffer
;
delete
[]
recvBuffer
;
delete
[]
indexToSend
;
delete
[]
leafsNeedOther
;
delete
[]
globalReceiveMap
;
delete
[]
requests
;
delete
[]
status
;
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Downward Pass (M2L) = "
<<
counterTime
.
tacAndElapsed
()
<<
" s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Single : "
<<
singleCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
M2L Self : "
<<
m2lSelf
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
M2L Far : "
<<
m2lFar
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
M2L Gather : "
<<
gatherCounter
.
elapsed
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
M2L Send : "
<<
sendCounter
.
elapsed
()
<<
" s
\n
"
);
}
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
// ---------------- L2L ---------------
// ---------------- L2L ---------------
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
void
downardPass
Old
(){
// second L2L
void
downardPass
(){
// second L2L
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (L2L)
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (L2L)
\n
"
).
write
(
FLog
::
Flush
);
);
...
@@ -1454,263 +854,105 @@ private:
...
@@ -1454,263 +854,105 @@ private:
octreeIterator
.
moveDown
();
octreeIterator
.
moveDown
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
MPI_Request
*
const
requests
=
new
MPI_Request
[
nbProcess
];
// Max 1 receive and 7 send (but 7 times the same data)
MPI_Status
*
const
status
=
new
MPI_Status
[
nbProcess
];
MPI_Request
*
const
requests
=
new
MPI_Request
[
8
];
MPI_Status
*
const
status
=
new
MPI_Status
[
8
];
const
int
heightMinusOne
=
OctreeHeight
-
1
;
const
int
heightMinusOne
=
OctreeHeight
-
1
;
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
int
righestProcToSendTo
=
nbProcess
-
1
;
// for each levels exepted leaf level
// for each levels exepted leaf level
for
(
int
idxLevel
=
2
;
idxLevel
<
heightMinusOne
;
++
idxLevel
){
for
(
int
idxLevel
=
2
;
idxLevel
<
heightMinusOne
;
++
idxLevel
){
if
(
idProcess
!=
0
// If nothing to do in the next level skip the current one
&&
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
<=
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
-
1
).
rightIndex
){
if
(
idProcess
!=
0
&&
!
procHasWorkAtLevel
(
idxLevel
+
1
,
idProcess
)
){
avoidGotoLeftIterator
.
moveDown
();
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
continue
;
}
}
// copy cells to work with
int
totalNbCellsAtLevel
=
0
;
int
numberOfCells
=
0
;
// for each cells
do
{
do
{
iterArray
[
numberOfCells
++
]
=
octreeIterator
;
iterArray
[
totalNbCellsAtLevel
++
]
=
octreeIterator
;
}
while
(
octreeIterator
.
moveRight
());
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveDown
();
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
octreeIterator
=
avoidGotoLeftIterator
;
int
firstCellWork
=
-
1
;
int
nbCellsToSkip
=
0
;
while
(
iterArray
[
firstCellWork
+
1
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
while
(
nbCellsToSkip
<
totalNbCellsAtLevel
&&
iterArray
[
nbCellsToSkip
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
++
firstCellWork
;
nbCellsToSkip
+=
1
;
}
}
bool
needToRecv
=
false
;
bool
hasToReceive
=
false
;
int
iterRequests
=
0
;
int
idxProcToReceive
=
idProcess
-
1
;
if
(
idProcess
!=
0
&&
nbCellsToSkip
){
FLOG
(
prepareCounter
.
tic
());
while
(
idxProcToReceive
&&
!
procHasWorkAtLevel
(
idxLevel
,
idxProcToReceive
)
){
idxProcToReceive
-=
1
;
// do we need to receive one or zeros cell
if
(
idProcess
!=
0
&&
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
-
1
).
rightIndex
>>
3
)
){
needToRecv
=
true
;
MPI_Irecv
(
recvBuffer
.
data
(),
recvBuffer
.
getCapacity
(),
MPI_PACKED
,
MPI_ANY_SOURCE
,
FMpi
::
TagFmmL2L
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
if
(
idProcess
!=
nbProcess
-
1
){
int
firstProcThatRecv
=
idProcess
+
1
;
while
(
firstProcThatRecv
<
nbProcess
&&
getWorkingInterval
((
idxLevel
+
1
)
,
firstProcThatRecv
).
rightIndex
<=
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
){
++
firstProcThatRecv
;
}
int
endProcThatRecv
=
firstProcThatRecv
;
while
(
endProcThatRecv
<
nbProcess
&&
(
getWorkingInterval
((
idxLevel
+
1
)
,
endProcThatRecv
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)
){
++
endProcThatRecv
;
}
if
(
firstProcThatRecv
!=
endProcThatRecv
){
iterArray
[
numberOfCells
-
1
].
getCurrentCell
()
->
serializeDown
(
sendBuffer
);
for
(
int
idxProc
=
firstProcThatRecv
;
idxProc
<
endProcThatRecv
;
++
idxProc
){
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
idxProc
,
FMpi
::
TagFmmL2L
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
}
}
FLOG
(
prepareCounter
.
tac
());
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
{
KernelClass
&
myThreadkernels
=
(
*
kernels
[
omp_get_thread_num
()]);
#pragma omp for nowait
for
(
int
idxCell
=
firstCellWork
+
1
;
idxCell
<
numberOfCells
;
++
idxCell
){
myThreadkernels
.
L2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
}
}
}
if
(
procHasWorkAtLevel
(
idxLevel
,
idxProcToReceive
)
&&
procCoversMyLeftBorderCell
(
idxLevel
,
idxProcToReceive
)){
FLOG
(
computationCounter
.
tac
());
hasToReceive
=
true
;
// are we sending or receiving?
if
(
iterRequests
){
// process
FLOG
(
waitCounter
.
tic
());
MPI_Waitall
(
iterRequests
,
requests
,
status
);
FLOG
(
waitCounter
.
tac
());
if
(
needToRecv
){
// Need to compute
FLOG
(
computationCounter
.
tic
());
iterArray
[
firstCellWork
].
getCurrentCell
()
->
deserializeDown
(
recvBuffer
);
kernels
[
0
]
->
L2L
(
iterArray
[
firstCellWork
].
getCurrentCell
()
,
iterArray
[
firstCellWork
].
getCurrentChild
(),
idxLevel
);
FLOG
(
computationCounter
.
tac
());
}
}
}
}
sendBuffer
.
reset
();
#pragma omp parallel
recvBuffer
.
seek
(
0
);
}
delete
[]
requests
;
delete
[]
status
;
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Downward Pass (L2L) = "
<<
counterTime
.
tacAndElapsed
()
<<
" s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Prepare : "
<<
prepareCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Wait : "
<<
waitCounter
.
cumulated
()
<<
" s
\n
"
);
}
void
downardPass
(){
// second L2L
const
int
MaxSizePerCell
=
CellClass
::
GetSize
();
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (L2L)
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
FLOG
(
FTic
prepareCounter
);
FLOG
(
FTic
waitCounter
);
// Start from leal level - 1
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
moveDown
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
MPI_Request
*
const
requests
=
new
MPI_Request
[
nbProcess
];
MPI_Status
*
const
status
=
new
MPI_Status
[
nbProcess
];
const
int
heightMinusOne
=
OctreeHeight
-
1
;
FMpiBufferWriter
sendBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
FMpiBufferReader
recvBuffer
(
comm
.
getComm
(),
MaxSizePerCell
);
// for each levels exepted leaf level
for
(
int
idxLevel
=
2
;
idxLevel
<
heightMinusOne
;
++
idxLevel
){
if
(
idProcess
!=
0
&&
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
<=
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
-
1
).
rightIndex
){
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
continue
;
}
// copy cells to work with
int
numberOfCells
=
0
;
// for each cells
do
{
iterArray
[
numberOfCells
++
]
=
octreeIterator
;
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
int
firstCellWork
=
-
1
;
while
(
iterArray
[
firstCellWork
+
1
].
getCurrentGlobalIndex
()
<
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
){
++
firstCellWork
;
}
if
(
iterArray
[
firstCellWork
+
1
].
getCurrentGlobalIndex
()
==
getWorkingInterval
(
idxLevel
,
idProcess
).
leftIndex
&&
numberOfCells
==
1
){
printf
(
"You Mouchard
\n
"
);
firstCellWork
++
;
}
#pragma omp parallel
{
{
int
threadNumber
=
omp_get_thread_num
();
int
threadNumber
=
omp_get_thread_num
();
KernelClass
*
myThreadkernels
=
(
kernels
[
threadNumber
]);
KernelClass
*
myThreadkernels
=
(
kernels
[
threadNumber
]);
#pragma omp single
#pragma omp single
{
{
bool
needToRecv
=
false
;
int
iterRequests
=
0
;
FLOG
(
prepareCounter
.
tic
());
FLOG
(
prepareCounter
.
tic
());
int
iterRequests
=
0
;
// do we need to receive one or zeros cell
if
(
hasToReceive
){
if
(
idProcess
!=
0
FMpi
::
MpiAssert
(
MPI_Irecv
(
recvBuffer
.
data
(),
recvBuffer
.
getCapacity
(),
MPI_PACKED
,
idxProcToReceive
/*MPI_ANY_SOURCE*/
,
&&
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
-
1
).
rightIndex
>>
3
)
){
FMpi
::
TagFmmL2L
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]),
__LINE__
);
needToRecv
=
true
;
MPI_Irecv
(
recvBuffer
.
data
(),
recvBuffer
.
getCapacity
(),
MPI_PACKED
,
MPI_ANY_SOURCE
,
FMpi
::
TagFmmL2L
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
}
if
(
idProcess
!=
nbProcess
-
1
&&
idProcess
<
righestProcToSendTo
){
if
(
idProcess
!=
nbProcess
-
1
){
int
idxProcSend
=
idProcess
+
1
;
int
firstProcThatRecv
=
idProcess
+
1
;
int
nbMessageSent
=
0
;
while
(
firstProcThatRecv
<
nbProcess
&&
while
(
idxProcSend
<=
righestProcToSendTo
&&
(
!
procHasWorkAtLevel
(
idxLevel
+
1
,
idxProcSend
)
||
procCoversMyRightBorderCell
(
idxLevel
,
idxProcSend
))
){
getWorkingInterval
((
idxLevel
+
1
)
,
firstProcThatRecv
).
rightIndex
<=
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
){
if
(
procHasWorkAtLevel
(
idxLevel
+
1
,
idxProcSend
)){
++
firstProcThatRecv
;
if
(
nbMessageSent
==
0
){
}
iterArray
[
totalNbCellsAtLevel
-
1
].
getCurrentCell
()
->
serializeDown
(
sendBuffer
);
}
int
endProcThatRecv
=
firstProcThatRecv
;
FMpi
::
MpiAssert
(
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
idxProcSend
,
while
(
endProcThatRecv
<
nbProcess
&&
FMpi
::
TagFmmL2L
+
idxLevel
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]),
__LINE__
);
(
getWorkingInterval
((
idxLevel
+
1
)
,
endProcThatRecv
).
leftIndex
>>
3
)
<=
(
getWorkingInterval
((
idxLevel
+
1
)
,
idProcess
).
rightIndex
>>
3
)
){
nbMessageSent
+=
1
;
++
endProcThatRecv
;
FAssertLF
(
nbMessageSent
<=
7
);
}
if
(
firstProcThatRecv
!=
endProcThatRecv
){
iterArray
[
numberOfCells
-
1
].
getCurrentCell
()
->
serializeDown
(
sendBuffer
);
for
(
int
idxProc
=
firstProcThatRecv
;
idxProc
<
endProcThatRecv
;
++
idxProc
){
MPI_Isend
(
sendBuffer
.
data
(),
sendBuffer
.
getSize
(),
MPI_PACKED
,
idxProc
,
FMpi
::
TagFmmL2L
,
comm
.
getComm
(),
&
requests
[
iterRequests
++
]);
}
}
idxProcSend
+=
1
;
}
}
righestProcToSendTo
=
idxProcSend
;
}
}
// are we sending or receiving?
if
(
iterRequests
){
if
(
iterRequests
){
// process
FLOG
(
waitCounter
.
tic
());
FLOG
(
waitCounter
.
tic
());
MPI_Waitall
(
iterRequests
,
requests
,
status
);
FAssertLF
(
iterRequests
<=
8
);
FMpi
::
MpiAssert
(
MPI_Waitall
(
iterRequests
,
requests
,
status
),
__LINE__
);
FLOG
(
waitCounter
.
tac
());
FLOG
(
waitCounter
.
tac
());
if
(
needToRecv
){
// if(idProcess == 53){
// fprintf(stderr,"You found One : %d, nbofCells %d Intervals : %d{%lld,%lld} %d{%lld,%lld} Left : %d{%lld,%lld} %d{%lld,%lld} %lld\n",
// idProcess,numberOfCells,
// idxLevel, getWorkingInterval(idxLevel).leftIndex,
// getWorkingInterval(idxLevel).rightIndex,
// idxLevel+1, getWorkingInterval(idxLevel+1).leftIndex,
// getWorkingInterval(idxLevel+1).rightIndex,
// idxLevel, getWorkingInterval(idxLevel,idProcess-1).leftIndex,
// getWorkingInterval(idxLevel,idProcess-1).rightIndex,
// idxLevel+1,getWorkingInterval(idxLevel+1,idProcess-1).leftIndex,
// getWorkingInterval(idxLevel+1,idProcess-1).rightIndex,iterArray[0].getCurrentGlobalIndex());
// }
// Need to compute
FLOG
(
computationCounter
.
tic
());
iterArray
[
firstCellWork
].
getCurrentCell
()
->
deserializeDown
(
recvBuffer
);
kernels
[
threadNumber
]
->
L2L
(
iterArray
[
firstCellWork
].
getCurrentCell
()
,
iterArray
[
firstCellWork
].
getCurrentChild
(),
idxLevel
);
FLOG
(
computationCounter
.
tac
());
}
}
}
}
//End Of Single Section
if
(
hasToReceive
){
FLOG
(
prepareCounter
.
tac
());
FAssertLF
(
iterRequests
!=
0
);
iterArray
[
nbCellsToSkip
-
1
].
getCurrentCell
()
->
deserializeDown
(
recvBuffer
);
kernels
[
threadNumber
]
->
L2L
(
iterArray
[
nbCellsToSkip
-
1
].
getCurrentCell
()
,
iterArray
[
nbCellsToSkip
-
1
].
getCurrentChild
(),
idxLevel
);
}
FLOG
(
prepareCounter
.
tac
());
}
FLOG
(
computationCounter
.
tic
());
#pragma omp single nowait
//#pragma omp parallel
{
FLOG
(
computationCounter
.
tic
());
}
#pragma omp for nowait
#pragma omp for nowait
for
(
int
idxCell
=
firstCellWork
+
1
;
idxCell
<
numberOfCells
;
++
idxCell
){
for
(
int
idxCell
=
nbCellsToSkip
;
idxCell
<
totalNbCellsAtLevel
;
++
idxCell
){
myThreadkernels
->
L2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
myThreadkernels
->
L2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
}
}
}
//End of parallel section
}
FLOG
(
computationCounter
.
tac
());
FLOG
(
computationCounter
.
tac
());
sendBuffer
.
reset
();
sendBuffer
.
reset
();
...
@@ -1852,21 +1094,6 @@ private:
...
@@ -1852,21 +1094,6 @@ private:
FMpi
::
MpiAssert
(
MPI_Allgather
(
partsToSend
,
nbProcess
,
MPI_INT
,
globalReceiveMap
,
nbProcess
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FMpi
::
MpiAssert
(
MPI_Allgather
(
partsToSend
,
nbProcess
,
MPI_INT
,
globalReceiveMap
,
nbProcess
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FLOG
(
gatherCounter
.
tac
());
FLOG
(
gatherCounter
.
tac
());
{
//TODO : remove
//Print the globalReceiveMap for Process 0
// if(idProcess == 0)
// {
// printf("\n Proc 0 :: \n");
// for(int u = 0 ; u < nbProcess ; ++u){
// for(int v = 0 ; v < nbProcess ; ++v){
// printf("\t %d",globalReceiveMap[u*nbProcess+v]);
// }
// printf("\n");
// }
// }
}
//Prepare receive
//Prepare receive
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
if
(
globalReceiveMap
[
idxProc
*
nbProcess
+
idProcess
]){
//if idxProc has sth for me.
if
(
globalReceiveMap
[
idxProc
*
nbProcess
+
idProcess
]){
//if idxProc has sth for me.
...
@@ -2288,23 +1515,6 @@ private:
...
@@ -2288,23 +1515,6 @@ private:
FMpi
::
MpiAssert
(
MPI_Allgather
(
partsToSend
,
nbProcess
,
MPI_INT
,
globalReceiveMap
,
nbProcess
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FMpi
::
MpiAssert
(
MPI_Allgather
(
partsToSend
,
nbProcess
,
MPI_INT
,
globalReceiveMap
,
nbProcess
,
MPI_INT
,
comm
.
getComm
()),
__LINE__
);
FLOG
(
gatherCounter
.
tac
());
FLOG
(
gatherCounter
.
tac
());
{
//TODO : remove
//Print the globalReceiveMap for Process 0
if
(
idProcess
==
0
)
{
printf
(
"
\n
Proc 0 ::
\n
"
);
for
(
int
u
=
0
;
u
<
nbProcess
;
++
u
){
for
(
int
v
=
0
;
v
<
nbProcess
;
++
v
){
printf
(
"
\t
%d"
,
globalReceiveMap
[
u
*
nbProcess
+
v
]);
}
printf
(
"
\n
"
);
}
}
}
//Prepare receive
//Prepare receive
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
for
(
int
idxProc
=
0
;
idxProc
<
nbProcess
;
++
idxProc
){
if
(
globalReceiveMap
[
idxProc
*
nbProcess
+
idProcess
]){
//if idxProc has sth for me.
if
(
globalReceiveMap
[
idxProc
*
nbProcess
+
idProcess
]){
//if idxProc has sth for me.
...
@@ -2391,14 +1601,7 @@ private:
...
@@ -2391,14 +1601,7 @@ private:
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
const
int
endAtThisShape
=
shapeLeaf
[
idxShape
]
+
previous
;
const
int
endAtThisShape
=
shapeLeaf
[
idxShape
]
+
previous
;
// //#pragma omp single
// {
// char file[5];
// sprintf(file,"re%d",idProcess);
// FILE * fd = fopen(file,"a+");
// fprintf(fd,"Proc %d \t, Color : %d, from %d to %d, total %d \n",idProcess,idxShape,previous,endAtThisShape,endAtThisShape-previous);
// fclose(fd);
// }
#pragma omp for schedule(auto)
#pragma omp for schedule(auto)
for
(
int
idxLeafs
=
previous
;
idxLeafs
<
endAtThisShape
;
++
idxLeafs
){
for
(
int
idxLeafs
=
previous
;
idxLeafs
<
endAtThisShape
;
++
idxLeafs
){
LeafData
&
currentIter
=
leafsDataArray
[
idxLeafs
];
LeafData
&
currentIter
=
leafsDataArray
[
idxLeafs
];
...
...
This diff is collapsed.
Click to expand it.
Src/Utils/FMpi.hpp
+
13
−
13
View file @
b095fe94
...
@@ -58,27 +58,27 @@ public:
...
@@ -58,27 +58,27 @@ public:
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
enum
FMpiTag
{
enum
FMpiTag
{
// FMpiTreeBuilder
// FMpiTreeBuilder
TagExchangeIndexs
,
TagExchangeIndexs
=
100
,
TagSplittedLeaf
,
TagSplittedLeaf
=
200
,
TagExchangeNbLeafs
,
TagExchangeNbLeafs
=
300
,
TagSandSettling
,
TagSandSettling
=
400
,
// FQuickSort
// FQuickSort
TagQuickSort
,
TagQuickSort
=
500
,
// FMM
// FMM
TagFmmM2M
,
TagFmmM2M
=
1000
,
TagFmmL2L
,
TagFmmL2L
=
2000
,
TagFmmP2P
,
TagFmmP2P
=
3000
,
// Bitonic,
// Bitonic,
TagBitonicMin
,
TagBitonicMin
=
4000
,
TagBitonicMax
,
TagBitonicMax
=
5000
,
TagBitonicMinMess
,
TagBitonicMinMess
=
6000
,
TagBitonicMaxMess
,
TagBitonicMaxMess
=
7000
,
// Last defined tag
// Last defined tag
TagLast
,
TagLast
=
8000
,
};
};
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment