Commit 14beec81 authored by EYRAUD-DUBOIS Lionel's avatar EYRAUD-DUBOIS Lionel

New indep scheduler, Indep 3/2 based on work from Grenoble

parent 454ec507
...@@ -297,6 +297,12 @@ resources. All of these only work with two types of resources. ...@@ -297,6 +297,12 @@ resources. All of these only work with two types of resources.
dual approximation, heteroprio-based greedy algorithm. Inspired from [Scheduling Data Flow Program in XKaapi: A dual approximation, heteroprio-based greedy algorithm. Inspired from [Scheduling Data Flow Program in XKaapi: A
New Affinity Based Algorithm for Heterogeneous Architectures](https://hal.inria.fr/hal-01081629v1), with only the New Affinity Based Algorithm for Heterogeneous Architectures](https://hal.inria.fr/hal-01081629v1), with only the
second part of the schedule. second part of the schedule.
+ `dp3demi`
dual approximation, dynamic programming based algorithm. Based
on APPROX-3/2 from
[Scheduling Independent Moldable Tasks on Multi-Cores with GPUs](https://hal.inria.fr/hal-01516752),
but restricted to the non moldable case. Should also appear as a
more generic (2q+1)/(2q) approximation in IJFCS.
+ `accel` + `accel`
Accel algorithm from [Scheduling Independent Tasks on Multi-cores with GPU Accelerators](https://hal.inria.fr/hal-01081625), Accel algorithm from [Scheduling Independent Tasks on Multi-cores with GPU Accelerators](https://hal.inria.fr/hal-01081625),
Section 4. Section 4.
......
#ifndef INDEPDP3DEMI_H
#define INDEPDP3DEMI_H
#include "IndepDualGeneric.h"
#include "instance.h"
#include <vector>
class IndepDP3Demi : public IndepDualGeneric {
protected:
double tryGuess(Instance &, std::vector<int> taskSet, std::vector<double>& loads,
double target, IndepResult & result, bool getResult);
double discretizationConstant = 3.0;
public:
IndepDP3Demi(const AlgOptions& opt);
};
#endif
...@@ -6,6 +6,7 @@ set(SCHED_SRC ...@@ -6,6 +6,7 @@ set(SCHED_SRC
IndepBalanced.cpp IndepBalanced.cpp
IndepDualGeneric.cpp ../include/IndepDualGeneric.h IndepDualGeneric.cpp ../include/IndepDualGeneric.h
IndepDP2.cpp IndepDP2.cpp
IndepDP3Demi.cpp
IndepImreh.cpp IndepImreh.cpp
IndepAccel.cpp IndepAccel.cpp
IndepBased.cpp IndepBased.cpp
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include "GreedyAlgorithm.h" #include "GreedyAlgorithm.h"
#include "util.h" #include "util.h"
#include "IndepDP2.h" #include "IndepDP2.h"
#include "IndepDP3Demi.h"
#include "IndepAccel.h" #include "IndepAccel.h"
#include "IndepDualHP.h" #include "IndepDualHP.h"
#include "IndepImreh.h" #include "IndepImreh.h"
...@@ -28,6 +29,8 @@ IndepBased::IndepBased(const AlgOptions& options) : GreedyAlgorithm(options) { ...@@ -28,6 +29,8 @@ IndepBased::IndepBased(const AlgOptions& options) : GreedyAlgorithm(options) {
string indepName = options.asString("indep", "dualhp"); string indepName = options.asString("indep", "dualhp");
if(indepName == "dp2") if(indepName == "dp2")
indep = new IndepDP2(options); indep = new IndepDP2(options);
if(indepName == "dp3demi")
indep = new IndepDP3Demi(options);
if(indepName == "dualhp") if(indepName == "dualhp")
indep = new IndepDualHP(options); indep = new IndepDualHP(options);
if(indepName == "accel") { if(indepName == "accel") {
......
#include "IndepDP3Demi.h"
#include <iostream>
#include <limits>
#include <new>
#include <algorithm>
#include <cmath>
#include "util.h"
using namespace std;
IndepDP3Demi::IndepDP3Demi(const AlgOptions& opt): IndepDualGeneric(opt) {
discretizationConstant = opt.asDouble("disc", discretizationConstant);
}
// returns minimum CPU load, taking into account existing loads
double IndepDP3Demi::tryGuess(Instance& instance, std::vector<int> taskSet, vector<double>& loads,
double target, IndepResult &result, bool getResult) {
// CPUload(i, g) := smallest load on CPU from the first i tasks,
// with at most g load on GPU
// So need to discretize the GPU load ? Yes. Paper says with a ratio of lambda/3n
// For all tasks in taskSet:
// forall g, CPUload(i, g) = min CPUload(i-1, g-T^G_i) CPUload(i-1, g) + T^C_i
int nbCPU = instance.nbWorkers[0];
int nbGPU = instance.nbWorkers[1];
double existingCPUload = loads[0];
double existingGPUload = loads[1];
double maxGPUload = target * nbGPU - existingGPUload;
if(maxGPUload < 0) maxGPUload = 1;
double ratio = target / (discretizationConstant * taskSet.size());
vector<int> discreteGPUtimings(instance.nbTaskTypes);
for(int i = 0; i < instance.nbTaskTypes; i++)
discreteGPUtimings[i] = ceil(instance.execTimes[1][i] / ratio);
int nbJobsWithLargeCPUTime = 0;
int nbJobsWithLargeGPUTime = 0;
for(int & t: taskSet) {
int taskType = instance.taskTypes[t];
if(instance.execTimes[0][taskType] > (target / 2))
++ nbJobsWithLargeCPUTime;
if(instance.execTimes[1][taskType] > (target / 2))
++ nbJobsWithLargeGPUTime;
}
const int N = ceil(maxGPUload / ratio);
const int maxMu = min(nbCPU, nbJobsWithLargeCPUTime);
const int maxNu = min(nbGPU, nbJobsWithLargeGPUTime);
const int stateSpaceSize = (N+1) * (maxMu + 1) * (maxNu + 1);
int length = getResult ? taskSet.size() + 1 : 1;
double** CPUload = new double*[length];
if(verbosity >= 7) {
cerr << "DP3demi: N=" << N << " maxMu= " << maxMu << " maxNu = " << maxNu << endl;
cerr << "DP3demi: allocating " << length << " space= " << stateSpaceSize << ", total= " << length * stateSpaceSize << endl;
}
CPUload[0] = new double[length * stateSpaceSize];
for(int i = 1; i < length; i++)
CPUload[i] = CPUload[i-1] + stateSpaceSize;
#define getTabValue(tab, l, m, k) (tab[l + (N+1)*m + (N+1)*(maxMu+1)*k])
if(verbosity >= 7)
cout << "IndepDP3Demi: maxGLoad = " << maxGPUload << ", ratio = " << ratio << " N= " << N << " gR: " << getResult << " mL " << target << endl;
int index = 0;
for(int i = 0; i < stateSpaceSize; ++i)
CPUload[index][i] = 0;
for(int t : taskSet) {
const int taskType = instance.taskTypes[t];
const int nextIndex = getResult ? index+1: index;
const double exec0 = instance.execTimes[0][taskType];
const double exec1 = instance.execTimes[1][taskType];
const int discreteGPUtime = discreteGPUtimings[taskType];
const int muOffset = exec0 > target ? 1 : 0;
const int nuOffset = exec1 > target ? 1 : 0;
if(exec0 > target && exec1 > target)
return -1; // Problem is not feasible: task t cannot be placed on any resource
if((exec0 <= target) && (exec1 <= target)) { // Task t can be placed on both resources
for(int mu = maxMu; mu >= muOffset; --mu) {
for(int nu = maxNu; nu >= nuOffset; --nu) {
for(int l = N; l >= discreteGPUtime; --l) {
getTabValue(CPUload[nextIndex], l, mu, nu) = min(getTabValue(CPUload[index], l, mu-muOffset, nu) + exec0,
getTabValue(CPUload[index], l - discreteGPUtime, mu, nu-nuOffset));
}
for(int l = discreteGPUtime - 1; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, mu, nu) = getTabValue(CPUload[index], l, mu-muOffset, nu) + exec0;
}
}
if(nuOffset) {
for(int l = N; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, mu, 0) = getTabValue(CPUload[index], l, mu-muOffset, 1) + exec0;
}
}
}
if(muOffset) {
for(int nu = maxNu; nu >= nuOffset; --nu) {
for(int l = N; l >= discreteGPUtime; --l) {
getTabValue(CPUload[nextIndex], l, 0, nu) = getTabValue(CPUload[index], l - discreteGPUtime, 0, nu-nuOffset);
}
for(int l = discreteGPUtime - 1; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, 0, nu) = std::numeric_limits<double>::infinity();
}
}
if(nuOffset) {
for(int l = N; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, 0, 0) = std::numeric_limits<double>::infinity();
}
}
}
} else if ((exec0 <= target) && (exec1 > target)) { // Task t can only be placed on CPUs
for(int mu = maxMu; mu >= muOffset; --mu) {
for(int nu = maxNu; nu >= 0; --nu) {
for(int l = N; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, mu, nu) = getTabValue(CPUload[index], l, mu-muOffset, nu) + exec0;
}
}
}
if(muOffset) {
for(int nu = maxNu; nu >= 0; --nu) {
for(int l = N; l >= 0; --l) {
getTabValue(CPUload[nextIndex], l, 0, nu) = std::numeric_limits<double>::infinity();
}
}
}
} else /* ((exec0 > target) && (exec1 <= target)) */ { // Task t can only be placed on GPUs
for(int mu = maxMu; mu >= 0; --mu) {
for(int nu = maxNu; nu >= nuOffset; --nu) {
for(int l = N; l >= discreteGPUtime; --l) {
getTabValue(CPUload[nextIndex], l, mu, nu) = getTabValue(CPUload[index], l - discreteGPUtime, mu, nu-nuOffset);
}
for(int l = discreteGPUtime - 1; l >= 0; l--) {
getTabValue(CPUload[nextIndex], l, mu, nu) = std::numeric_limits<double>::infinity();
}
}
if(nuOffset) {
for(int l = N; l >= 0; l--) {
getTabValue(CPUload[nextIndex], l, mu, 0) = std::numeric_limits<double>::infinity();
}
}
}
}
index = nextIndex;
}
double value = getTabValue(CPUload[index], N, maxMu, maxNu);
if(value == std::numeric_limits<double>::infinity()) {
// Problem not feasible.
return -1;
}
if(verbosity >= 7)
cerr << "DP3demi: final value is " << value << endl;
int gLoad = N;
int mu = maxMu;
int nu = maxNu;
if(getResult) {
result[0].clear();
result[1].clear();
for(; index > 0; index--) {
const int taskType = instance.taskTypes[taskSet[index-1]];
const double exec0 = instance.execTimes[0][taskType];
const double exec1 = instance.execTimes[1][taskType];
const int discreteGPUtime = discreteGPUtimings[taskType];
const int muOffset = exec0 > target ? 1 : 0;
const int nuOffset = exec1 > target ? 1 : 0;
if(getTabValue(CPUload[index], gLoad, mu, nu) == getTabValue(CPUload[index-1], gLoad, mu-muOffset, nu) + exec0) {
mu -= muOffset;
result[0].push_back(taskSet[index-1]);
}
else {
gLoad -= discreteGPUtimings[taskType];
nu -= nuOffset;
result[1].push_back(taskSet[index-1]);
}
}
}
delete[] CPUload[0];
delete[] CPUload;
return value + existingCPUload;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment