Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ce61fe7f authored by Quentin Guilloteau's avatar Quentin Guilloteau
Browse files

scripts ok g5k

parent b3526f37
Branches
No related tags found
No related merge requests found
...@@ -26,8 +26,10 @@ in { ...@@ -26,8 +26,10 @@ in {
cd ${nfsMountPoint} cd ${nfsMountPoint}
cp ${iorConfig} ${iorConfigPerCluster} cp ${iorConfig} ${iorConfigPerCluster}
NB_TASKS=$1 NB_TASKS=$1
BLOCK_SIZE=$2
sed -ri "s/(numTasks)=\w+/\1=$NB_TASKS/g" ${iorConfigPerCluster} sed -ri "s/(numTasks)=\w+/\1=$NB_TASKS/g" ${iorConfigPerCluster}
sed -ri "s/(blockSize)=\w+/\1=$BLOCK_SIZE/g" ${iorConfigPerCluster}
''; '';
start_ior_nodes = start_ior_nodes =
...@@ -39,9 +41,10 @@ in { ...@@ -39,9 +41,10 @@ in {
NB_SLOTS_PER_NODE=$(($TOTAL_NB_NODES / $NB_NODES)) NB_SLOTS_PER_NODE=$(($TOTAL_NB_NODES / $NB_NODES))
cat /etc/hosts | grep node | head -n $NB_NODES | awk -v nb_slots="$NB_SLOTS_PER_NODE" '{ print $2 " slots=" nb_slots;}' > my_hosts cat /etc/hosts | grep client | head -n $NB_NODES | awk -v nb_slots="$NB_SLOTS_PER_NODE" '{ print $2 " slots=" nb_slots;}' > my_hosts
mpirun --allow-run-as-root --oversubscribe -mca btl self,vader -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster} # mpirun --allow-run-as-root --oversubscribe -mca btl self,vader -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster}
mpirun --allow-run-as-root --oversubscribe --mca pml ^ucx --mca mtl ^psm2,ofi --mca btl ^ofi,openib -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster}
''; '';
gen_config_orangefs = gen_config_orangefs =
......
...@@ -16,6 +16,7 @@ class MyEngine(Engine): ...@@ -16,6 +16,7 @@ class MyEngine(Engine):
parser.add_argument('--nxc_build_file', help='Path to the NXC deploy file') parser.add_argument('--nxc_build_file', help='Path to the NXC deploy file')
parser.add_argument('--nb_cpu_nodes', help='Number of compute nodes') parser.add_argument('--nb_cpu_nodes', help='Number of compute nodes')
parser.add_argument('--nb_io_nodes', help='Number of io nodes') parser.add_argument('--nb_io_nodes', help='Number of io nodes')
parser.add_argument('--block_size', help='File size to write')
parser.add_argument('--walltime', help='walltime in hours') parser.add_argument('--walltime', help='walltime in hours')
parser.add_argument('--result_dir', help='where to store results') parser.add_argument('--result_dir', help='where to store results')
parser.add_argument('--flavour', help='Flavour') parser.add_argument('--flavour', help='Flavour')
...@@ -26,23 +27,25 @@ class MyEngine(Engine): ...@@ -26,23 +27,25 @@ class MyEngine(Engine):
self.nb_io_nodes = -1 self.nb_io_nodes = -1
self.nb_nodes = -1 self.nb_nodes = -1
self.flavour = None self.flavour = None
self.block_size = None
def init(self): def init(self):
self.nb_cpu_nodes = int(self.args.nb_cpu_nodes) if self.args.nb_cpu_nodes else 2 self.nb_cpu_nodes = int(self.args.nb_cpu_nodes) if self.args.nb_cpu_nodes else 2
self.nb_io_nodes = int(self.args.nb_io_nodes) if self.args.nb_io_nodes else 1 self.nb_io_nodes = int(self.args.nb_io_nodes) if self.args.nb_io_nodes else 1
self.nb_nodes = self.nb_cpu_nodes + self.nb_io_nodes self.nb_nodes = self.nb_cpu_nodes + self.nb_io_nodes
self.block_size = self.args.block_size if self.args.block_size else "1G"
walltime_hours = float(self.args.walltime) if self.args.walltime else 1 walltime_hours = float(self.args.walltime) if self.args.walltime else 1
nxc_build_file = self.args.nxc_build_file nxc_build_file = self.args.nxc_build_file
self.flavour = self.args.flavour if self.args.flavour else "g5k-image" self.flavour = self.args.flavour if self.args.flavour else "g5k-image"
site = "grenoble" site = "nancy"
cluster = "dahu" cluster = "gros"
oar_job = reserve_nodes(self.nb_nodes, site, cluster, "deploy" if self.flavour == "g5k-image" else "allow_classic_ssh", walltime=walltime_hours*60*60) oar_job = reserve_nodes(self.nb_nodes, site, cluster, "deploy" if self.flavour == "g5k-image" else "allow_classic_ssh", walltime=walltime_hours*60*60)
self.oar_job_id, site = oar_job[0] self.oar_job_id, site = oar_job[0]
roles_quantities = {"serverfs": self.nb_io_nodes, "node": self.nb_cpu_nodes} roles_quantities = {"serverfs": self.nb_io_nodes, "client": self.nb_cpu_nodes}
self.nodes = get_oar_job_nodes_nxc( self.nodes = get_oar_job_nodes_nxc(
self.oar_job_id, self.oar_job_id,
...@@ -55,7 +58,7 @@ class MyEngine(Engine): ...@@ -55,7 +58,7 @@ class MyEngine(Engine):
def run(self): def run(self):
result_dir = self.args.result_dir if self.args.result_dir else os.getcwd() result_dir = self.args.result_dir if self.args.result_dir else os.getcwd()
zip_archive_name = f"{result_dir}/results_ior_{self.nb_nodes}_nodes_{self.flavour}" zip_archive_name = f"{result_dir}/results_ior_{self.nb_cpu_nodes}_cpu_nodes_{self.nb_io_nodes}_io_nodes_{self.block_size}_block_size_{self.flavour}"
outfile = self.args.outfile[:-4] if self.args.outfile else zip_archive_name outfile = self.args.outfile[:-4] if self.args.outfile else zip_archive_name
folder_name = f"{result_dir}/expe_orangefs_{self.flavour}_{self.nb_nodes}" folder_name = f"{result_dir}/expe_orangefs_{self.flavour}_{self.nb_nodes}"
...@@ -63,24 +66,24 @@ class MyEngine(Engine): ...@@ -63,24 +66,24 @@ class MyEngine(Engine):
logger.info("Starting OrangeFS on the IO nodes") logger.info("Starting OrangeFS on the IO nodes")
io_nodes = " ".join(f"serverfs{i + 1}" for i in range(self.nb_io_nodes)) io_nodes = " ".join(f"serverfs{i + 1}" for i in range(self.nb_io_nodes))
orangefs_config_remote = Remote(f"start_orangefs {io_nodes}", self.nodes["serverfs"]) orangefs_config_remote = Remote(f"start_orangefs {io_nodes}", self.nodes["serverfs"], connection_params={'users': 'root'})
orangefs_config_remote.run() orangefs_config_remote.run()
logger.info("Starting OrangeFS on the clients") logger.info("Starting OrangeFS on the clients")
orangefs_config_client_remote = Remote("orangefs_mount serverfs1", self.nodes["node"]) orangefs_config_client_remote = Remote("orangefs_mount serverfs1", self.nodes["client"], connection_params={'user': 'root'})
orangefs_config_client_remote.run() orangefs_config_client_remote.run()
logger.info("Generating IOR config") logger.info("Generating IOR config")
run_ior_config_remote = Remote(f"generate_ior_config {self.nb_cpu_nodes}", self.nodes["node"][0], connection_params={'user': 'root'}) run_ior_config_remote = Remote(f"generate_ior_config {self.nb_cpu_nodes} {self.block_size}", self.nodes["client"][0], connection_params={'user': 'root'})
run_ior_config_remote.run() run_ior_config_remote.run()
for nb_node in range(self.nb_nodes - 1, 0, -1): for nb_node in range(self.nb_cpu_nodes, 0, -1):
is_ok = False is_ok = False
while not is_ok: while not is_ok:
# Run IOR # Run IOR
logger.info(f"Starting IOR with {nb_node} nodes") logger.info(f"Starting IOR with {nb_node} nodes")
run_ior_remote = Remote(f"start_ior_nodes {nb_node} {self.nb_cpu_nodes}", self.nodes["node"][0], connection_params={'user': 'root'}) run_ior_remote = Remote(f"start_ior_nodes {nb_node} {self.nb_cpu_nodes}", self.nodes["client"][0], connection_params={'user': 'root'})
run_ior_remote.run() run_ior_remote.run()
is_ok = run_ior_remote.ok is_ok = run_ior_remote.ok
...@@ -91,7 +94,7 @@ class MyEngine(Engine): ...@@ -91,7 +94,7 @@ class MyEngine(Engine):
# Get the result file back # Get the result file back
logger.info(f"Retreving the result file for IOR with {nb_node}") logger.info(f"Retreving the result file for IOR with {nb_node}")
get_file_command = f"cp /orangefs/results_ior.json {folder_name}/results_ior_total_{self.nb_nodes}_active_{nb_node}_{self.flavour}.json" get_file_command = f"cp /orangefs/results_ior.json {folder_name}/results_ior_total_{self.nb_nodes}_active_{nb_node}_{self.flavour}.json"
get_file_remote = Remote(get_file_command, self.nodes["node"][0], connection_params={'user': 'root'}) get_file_remote = Remote(get_file_command, self.nodes["client"][0], connection_params={'user': 'root'})
get_file_remote.run() get_file_remote.run()
is_ok = get_file_remote.ok is_ok = get_file_remote.ok
logger.info(f"Result file for IOR with {nb_node} retrieved") logger.info(f"Result file for IOR with {nb_node} retrieved")
...@@ -101,7 +104,7 @@ class MyEngine(Engine): ...@@ -101,7 +104,7 @@ class MyEngine(Engine):
remove_folder(folder_name) remove_folder(folder_name)
logger.info(f"Giving back the resources") logger.info(f"Giving back the resources")
oardel([(self.oar_job_id, "grenoble")]) oardel([(self.oar_job_id, "nancy")])
def reserve_nodes(nb_nodes, site, cluster, job_type, walltime=3600): def reserve_nodes(nb_nodes, site, cluster, job_type, walltime=3600):
jobs = oarsub([(OarSubmission("{{cluster='{}'}}/nodes={}".format(cluster, nb_nodes), walltime, job_type=[job_type]), site)]) jobs = oarsub([(OarSubmission("{{cluster='{}'}}/nodes={}".format(cluster, nb_nodes), walltime, job_type=[job_type]), site)])
......
...@@ -40,11 +40,11 @@ IOR START ...@@ -40,11 +40,11 @@ IOR START
repetitions=5 repetitions=5
numTasks=${builtins.toString numTasks} numTasks=${builtins.toString numTasks}
segmentCount=4 segmentCount=1
blockSize=128M blockSize=128M
transferSize=4M transferSize=1M
summaryFile=/data/results_ior.json summaryFile=/orangefs/results_ior.json
summaryFormat=JSON summaryFormat=JSON
RUN RUN
writeFile=0 writeFile=0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment