diff --git a/nxc/my_scripts.nix b/nxc/my_scripts.nix index e1c10f84f35981f82b9a95e0236b27a6b20f5941..bb50b694e7d32198c0d9e977b19545795ca614fb 100644 --- a/nxc/my_scripts.nix +++ b/nxc/my_scripts.nix @@ -26,8 +26,10 @@ in { cd ${nfsMountPoint} cp ${iorConfig} ${iorConfigPerCluster} NB_TASKS=$1 + BLOCK_SIZE=$2 sed -ri "s/(numTasks)=\w+/\1=$NB_TASKS/g" ${iorConfigPerCluster} + sed -ri "s/(blockSize)=\w+/\1=$BLOCK_SIZE/g" ${iorConfigPerCluster} ''; start_ior_nodes = @@ -39,9 +41,10 @@ in { NB_SLOTS_PER_NODE=$(($TOTAL_NB_NODES / $NB_NODES)) - cat /etc/hosts | grep node | head -n $NB_NODES | awk -v nb_slots="$NB_SLOTS_PER_NODE" '{ print $2 " slots=" nb_slots;}' > my_hosts + cat /etc/hosts | grep client | head -n $NB_NODES | awk -v nb_slots="$NB_SLOTS_PER_NODE" '{ print $2 " slots=" nb_slots;}' > my_hosts - mpirun --allow-run-as-root --oversubscribe -mca btl self,vader -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster} + # mpirun --allow-run-as-root --oversubscribe -mca btl self,vader -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster} + mpirun --allow-run-as-root --oversubscribe --mca pml ^ucx --mca mtl ^psm2,ofi --mca btl ^ofi,openib -np $TOTAL_NB_NODES --hostfile my_hosts ior -f ${iorConfigPerCluster} ''; gen_config_orangefs = diff --git a/nxc/script.py b/nxc/script.py index 8d0a37228cfffc66613623382aebcd073ff7d389..d5f8cedba87dae8b8be4151f3c3fc65a16a90400 100644 --- a/nxc/script.py +++ b/nxc/script.py @@ -16,6 +16,7 @@ class MyEngine(Engine): parser.add_argument('--nxc_build_file', help='Path to the NXC deploy file') parser.add_argument('--nb_cpu_nodes', help='Number of compute nodes') parser.add_argument('--nb_io_nodes', help='Number of io nodes') + parser.add_argument('--block_size', help='File size to write') parser.add_argument('--walltime', help='walltime in hours') parser.add_argument('--result_dir', help='where to store results') parser.add_argument('--flavour', help='Flavour') @@ -26,23 +27,25 @@ class MyEngine(Engine): self.nb_io_nodes = -1 self.nb_nodes = -1 self.flavour = None + self.block_size = None def init(self): self.nb_cpu_nodes = int(self.args.nb_cpu_nodes) if self.args.nb_cpu_nodes else 2 self.nb_io_nodes = int(self.args.nb_io_nodes) if self.args.nb_io_nodes else 1 self.nb_nodes = self.nb_cpu_nodes + self.nb_io_nodes + self.block_size = self.args.block_size if self.args.block_size else "1G" walltime_hours = float(self.args.walltime) if self.args.walltime else 1 nxc_build_file = self.args.nxc_build_file self.flavour = self.args.flavour if self.args.flavour else "g5k-image" - site = "grenoble" - cluster = "dahu" + site = "nancy" + cluster = "gros" oar_job = reserve_nodes(self.nb_nodes, site, cluster, "deploy" if self.flavour == "g5k-image" else "allow_classic_ssh", walltime=walltime_hours*60*60) self.oar_job_id, site = oar_job[0] - roles_quantities = {"serverfs": self.nb_io_nodes, "node": self.nb_cpu_nodes} + roles_quantities = {"serverfs": self.nb_io_nodes, "client": self.nb_cpu_nodes} self.nodes = get_oar_job_nodes_nxc( self.oar_job_id, @@ -55,7 +58,7 @@ class MyEngine(Engine): def run(self): result_dir = self.args.result_dir if self.args.result_dir else os.getcwd() - zip_archive_name = f"{result_dir}/results_ior_{self.nb_nodes}_nodes_{self.flavour}" + zip_archive_name = f"{result_dir}/results_ior_{self.nb_cpu_nodes}_cpu_nodes_{self.nb_io_nodes}_io_nodes_{self.block_size}_block_size_{self.flavour}" outfile = self.args.outfile[:-4] if self.args.outfile else zip_archive_name folder_name = f"{result_dir}/expe_orangefs_{self.flavour}_{self.nb_nodes}" @@ -63,24 +66,24 @@ class MyEngine(Engine): logger.info("Starting OrangeFS on the IO nodes") io_nodes = " ".join(f"serverfs{i + 1}" for i in range(self.nb_io_nodes)) - orangefs_config_remote = Remote(f"start_orangefs {io_nodes}", self.nodes["serverfs"]) + orangefs_config_remote = Remote(f"start_orangefs {io_nodes}", self.nodes["serverfs"], connection_params={'users': 'root'}) orangefs_config_remote.run() logger.info("Starting OrangeFS on the clients") - orangefs_config_client_remote = Remote("orangefs_mount serverfs1", self.nodes["node"]) + orangefs_config_client_remote = Remote("orangefs_mount serverfs1", self.nodes["client"], connection_params={'user': 'root'}) orangefs_config_client_remote.run() logger.info("Generating IOR config") - run_ior_config_remote = Remote(f"generate_ior_config {self.nb_cpu_nodes}", self.nodes["node"][0], connection_params={'user': 'root'}) + run_ior_config_remote = Remote(f"generate_ior_config {self.nb_cpu_nodes} {self.block_size}", self.nodes["client"][0], connection_params={'user': 'root'}) run_ior_config_remote.run() - for nb_node in range(self.nb_nodes - 1, 0, -1): + for nb_node in range(self.nb_cpu_nodes, 0, -1): is_ok = False while not is_ok: # Run IOR logger.info(f"Starting IOR with {nb_node} nodes") - run_ior_remote = Remote(f"start_ior_nodes {nb_node} {self.nb_cpu_nodes}", self.nodes["node"][0], connection_params={'user': 'root'}) + run_ior_remote = Remote(f"start_ior_nodes {nb_node} {self.nb_cpu_nodes}", self.nodes["client"][0], connection_params={'user': 'root'}) run_ior_remote.run() is_ok = run_ior_remote.ok @@ -91,7 +94,7 @@ class MyEngine(Engine): # Get the result file back logger.info(f"Retreving the result file for IOR with {nb_node}") get_file_command = f"cp /orangefs/results_ior.json {folder_name}/results_ior_total_{self.nb_nodes}_active_{nb_node}_{self.flavour}.json" - get_file_remote = Remote(get_file_command, self.nodes["node"][0], connection_params={'user': 'root'}) + get_file_remote = Remote(get_file_command, self.nodes["client"][0], connection_params={'user': 'root'}) get_file_remote.run() is_ok = get_file_remote.ok logger.info(f"Result file for IOR with {nb_node} retrieved") @@ -101,7 +104,7 @@ class MyEngine(Engine): remove_folder(folder_name) logger.info(f"Giving back the resources") - oardel([(self.oar_job_id, "grenoble")]) + oardel([(self.oar_job_id, "nancy")]) def reserve_nodes(nb_nodes, site, cluster, job_type, walltime=3600): jobs = oarsub([(OarSubmission("{{cluster='{}'}}/nodes={}".format(cluster, nb_nodes), walltime, job_type=[job_type]), site)]) diff --git a/nxc/script_ior.nix b/nxc/script_ior.nix index 590c3040d8d9c0c85dd51714097fed22b68e2990..04df93b790a528e07929233765199bda07bd05a4 100644 --- a/nxc/script_ior.nix +++ b/nxc/script_ior.nix @@ -40,11 +40,11 @@ IOR START repetitions=5 numTasks=${builtins.toString numTasks} - segmentCount=4 + segmentCount=1 blockSize=128M - transferSize=4M + transferSize=1M - summaryFile=/data/results_ior.json + summaryFile=/orangefs/results_ior.json summaryFormat=JSON RUN writeFile=0