diff --git a/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json b/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json index ecf5c10f8cb203193d4c5fb73934074a4207abeb..45cb1039c79397d3f1d1f74892a355cd81f1ecca 100644 --- a/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json +++ b/data/grid5000/sites/sophia/clusters/esterel32/esterel32.json @@ -53,8 +53,8 @@ ], "model": "Dell PowerEdge T640", "nodes_count": 1, - "nodes_description": "1 CPU Unknown vendor, 72 cores/CPU, 0GB RAM, 0GB SSD, 1 x 10Gb Ethernet", - "priority": 197001, + "nodes_description": "2 CPUs Intel Xeon Gold 6238R, 28 cores/CPU, 4 GPUs Quadro RTX 8000, 768GB RAM, 446GB HDD, 7151GB HDD, 1 x 1Gb Ethernet, 1 x 40Gb InfiniBand", + "priority": 197101, "queues": [ "admin", "testing" diff --git a/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json b/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json index e7120831b562f0dbf87ab3f48bc285efbde0cc01..cda3161beb6949a97b7c2d9b01339460e2d2cae9 100644 --- a/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json +++ b/data/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.json @@ -1,26 +1,97 @@ { "architecture": { - "cpu_core_numbering": "contiguous", - "nb_cores": 72, - "nb_procs": 1, - "nb_threads": 72, + "cpu_core_numbering": "round-robin", + "nb_cores": 56, + "nb_procs": 2, + "nb_threads": 112, "platform_type": "x86_64" }, "bios": { - "release_date": "01/01/2000", - "vendor": "Unknown", - "version": 1 + "release_date": "01/09/2025", + "vendor": "Dell Inc.", + "version": "2.23.0" }, - "bmc_version": "v1", + "bmc_version": "7.00.00.174", "chassis": { "manufactured_at": "1970-01-01", - "manufacturer": "Unknown", - "name": "Unknown", + "manufacturer": "Dell Inc.", + "name": "PowerEdge T640", + "serial": "5XXGW53", "warranty_end": "1970-01-01" }, "exotic": false, + "gpu_devices": { + "nvidia0": { + "compute_capability": "7.5", + "cores": 4608, + "cpu_affinity": 0, + "device": "/dev/nvidia0", + "memory": 48318382080, + "microarchitecture": "Turing", + "model": "Quadro RTX 8000", + "performance": { + "fp-16": 32620000000000, + "fp-32": 16310000000000, + "fp-64": 510000000000 + }, + "power_default_limit": "250.00 W", + "vbios_version": "90.02.4E.00.03", + "vendor": "Nvidia" + }, + "nvidia1": { + "compute_capability": "7.5", + "cores": 4608, + "cpu_affinity": 0, + "device": "/dev/nvidia1", + "memory": 48318382080, + "microarchitecture": "Turing", + "model": "Quadro RTX 8000", + "performance": { + "fp-16": 32620000000000, + "fp-32": 16310000000000, + "fp-64": 510000000000 + }, + "power_default_limit": "250.00 W", + "vbios_version": "90.02.4E.00.03", + "vendor": "Nvidia" + }, + "nvidia2": { + "compute_capability": "7.5", + "cores": 4608, + "cpu_affinity": 1, + "device": "/dev/nvidia2", + "memory": 48318382080, + "microarchitecture": "Turing", + "model": "Quadro RTX 8000", + "performance": { + "fp-16": 32620000000000, + "fp-32": 16310000000000, + "fp-64": 510000000000 + }, + "power_default_limit": "250.00 W", + "vbios_version": "90.02.4E.00.03", + "vendor": "Nvidia" + }, + "nvidia3": { + "compute_capability": "7.5", + "cores": 4608, + "cpu_affinity": 1, + "device": "/dev/nvidia3", + "memory": 48318382080, + "microarchitecture": "Turing", + "model": "Quadro RTX 8000", + "performance": { + "fp-16": 32620000000000, + "fp-32": 16310000000000, + "fp-64": 510000000000 + }, + "power_default_limit": "250.00 W", + "vbios_version": "90.02.4E.00.03", + "vendor": "Nvidia" + } + }, "main_memory": { - "ram_size": 8 + "ram_size": 824633720832 }, "management_tools": { "bmc_vendor_tool": "racadm", @@ -30,29 +101,143 @@ }, "memory_devices": [ { - "device": "dimm_proc 1 dimm 1", - "size": 8, + "device": "dimm_a1", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_a2", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_a3", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_a4", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_a5", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_a6", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b1", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b2", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b3", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b4", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b5", + "size": 68719476736, + "technology": "dram" + }, + { + "device": "dimm_b6", + "size": 68719476736, "technology": "dram" } ], "network_adapters": [ { "device": "eth0", - "driver": "mlx_core", + "driver": "bnxt_en", "enabled": true, + "firmware_version": "218.0.219.13/pkg 21.85.21.92", "interface": "Ethernet", "ip": "172.16.131.45", "ip6": "2001:660:4406:800:4::2d", "kavlan": false, "mac": "f4:02:70:ed:2a:74", "management": false, + "model": "BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller", "mountable": true, "mounted": true, "name": "enp1s0f0np0", "network_address": "esterel32-1.sophia.grid5000.fr", - "rate": 10000000000, + "rate": 1000000000, + "sriov": false, + "sriov_totalvfs": 0, "switch": "sw-2", - "switch_port": "1/1/18" + "switch_port": "1/1/18", + "vendor": "Broadcom Inc. and subsidiaries" + }, + { + "device": "eth1", + "driver": "bnxt_en", + "enabled": false, + "firmware_version": "218.0.219.13/pkg 21.85.21.92", + "interface": "Ethernet", + "kavlan": false, + "mac": "f4:02:70:ed:2a:75", + "management": false, + "model": "BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller", + "mountable": false, + "mounted": false, + "name": "eno2np1", + "sriov": false, + "sriov_totalvfs": 0, + "vendor": "Broadcom Inc. and subsidiaries" + }, + { + "device": "eth2", + "driver": "mlx4_en", + "enabled": false, + "firmware_version": "2.9.1000", + "interface": "Ethernet", + "kavlan": false, + "mac": "00:02:c9:4f:f9:15", + "management": false, + "model": "MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface", + "mountable": false, + "mounted": false, + "name": "enp137s0d1", + "sriov": false, + "sriov_totalvfs": 0, + "vendor": "Mellanox Technologies" + }, + { + "device": "ib0", + "driver": "mlx4_core", + "enabled": true, + "firmware_version": "2.9.1000", + "guid": "0x0002c903004ff915", + "interface": "InfiniBand", + "ip": "172.18.131.45", + "kavlan": false, + "mac": "00:02:c9:03:00:4f:f9:15", + "management": false, + "model": "MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter IC with PCIe 2.0 x8 5.0GT/s Interface", + "mountable": true, + "mounted": true, + "name": "ib0", + "netmask": "255.255.240.0", + "network_address": "esterel32-1-ib0.sophia.grid5000.fr", + "rate": 40000000000, + "vendor": "Mellanox Technologies" }, { "device": "bmc", @@ -69,32 +254,32 @@ ], "nodeset": "esterel32", "operating_system": { - "cstate_driver": "unknown", - "cstate_governor": "unknown", + "cstate_driver": "intel_idle", + "cstate_governor": "menu", "ht_enabled": true, - "pstate_driver": "unknwon", - "pstate_governor": "unknown", + "pstate_driver": "intel_pstate", + "pstate_governor": "performance", "turboboost_enabled": true }, "performance": { - "core_flops": 128, - "node_flops": 9216 + "core_flops": 70400000000, + "node_flops": 3942400000000 }, "processor": { "cache_l1": null, - "cache_l1d": 8, - "cache_l1i": 8, - "cache_l2": 8, - "cache_l3": 8, - "clock_speed": 8, + "cache_l1d": 32768, + "cache_l1i": 32768, + "cache_l2": 1048576, + "cache_l3": 40370176, + "clock_speed": 2200000000, "ht_capable": true, "instruction_set": "x86-64", - "microarchitecture": "Haswell", - "microcode": "0xd000001", - "model": "Unknown", - "other_description": "description", - "vendor": "vendor", - "version": "vendor" + "microarchitecture": "Cascade Lake-SP", + "microcode": "0x5003801", + "model": "Intel Xeon", + "other_description": "Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz", + "vendor": "Intel", + "version": "Gold 6238R" }, "redfish": true, "software": { @@ -104,14 +289,26 @@ }, "storage_devices": [ { - "by_id": "", + "by_id": "/dev/disk/by-id/wwn-0x62cea7f0578207002f76eb1f130760be", "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0", + "firmware_version": 4.3, "id": "disk0", - "interface": "SAS", - "model": "unknown", - "size": 8, - "storage": "SSD", - "vendor": "Unknown" + "interface": "RAID", + "model": "PERC H730P Adp", + "size": 479559942144, + "storage": "HDD", + "vendor": "Dell" + }, + { + "by_id": "/dev/disk/by-id/wwn-0x62cea7f05782070027de23a04219a934", + "by_path": "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0", + "firmware_version": 4.3, + "id": "disk1", + "interface": "RAID", + "model": "PERC H730P Adp", + "size": 7678864654336, + "storage": "HDD", + "vendor": "Dell" } ], "supported_job_types": { @@ -121,7 +318,8 @@ "queues": [ "admin", "testing" - ] + ], + "virtual": "ivt" }, "type": "node", "uid": "esterel32-1" diff --git a/input/grid5000/ipv4.yaml b/input/grid5000/ipv4.yaml index b6415415135943b606d0c4b6e7fb9f1d64a38f00..be8c95110ea5b43b302cf87142d25eeb49964e88 100644 --- a/input/grid5000/ipv4.yaml +++ b/input/grid5000/ipv4.yaml @@ -195,6 +195,7 @@ ipv4: sophia esterel30 eth0 0 0 3 36 sophia esterel31 eth0 0 0 3 40 sophia esterel32 eth0 0 0 3 44 + sophia esterel32 ib0 0 0 3 44 sophia esterel33 eth0 0 0 3 45 sophia esterel34 eth0 0 0 3 46 sophia esterel35 eth0 0 0 3 47 diff --git a/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml b/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml index ce546bad73ba1a37dc79f0cc5a1678fffd6b9027..7a8f378e8370da43d53bafd8e75a8517fa1f0d96 100644 --- a/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml +++ b/input/grid5000/sites/sophia/clusters/esterel32/esterel32.yaml @@ -9,16 +9,13 @@ queues: - testing nodes: esterel32-1: - chassis: - manufactured_at: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. - warranty_end: 1970-01-01 # TODO: if not Dell vendor, put date. if Dell, use rake gen:dell-product-data after g5k-checks import, and remove this line. supported_job_types: deploy: true besteffort: true max_walltime: 0 processor: - microarchitecture: Haswell # TODO: replace with microarch name. - clock_speed: 8 # TODO: Replace with clock speed. + microarchitecture: Cascade Lake-SP + clock_speed: 2200000000 network_adapters: bmc: interface: Ethernet @@ -29,11 +26,26 @@ nodes: enabled: true mountable: true mounted: true + eth1: + enabled: false + mountable: false + mounted: false + eth2: + enabled: false + mountable: false + mounted: false + ib0: + mounted: true + enabled: true + mountable: true + netmask: 255.255.240.0 storage_devices: - disk0: # This field will have to be renamed later. + pci-0000:19:00.0-scsi-0:2:0:0: id: disk0 - interface: SAS - by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0" + interface: RAID + pci-0000:19:00.0-scsi-0:2:1:0: + id: disk1 + interface: RAID software: standard-environment: debian11-x64-std management_tools: diff --git a/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb b/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb deleted file mode 100644 index bafdb5caa43209adfebb6285220a66c40c060960..0000000000000000000000000000000000000000 --- a/input/grid5000/sites/sophia/clusters/esterel32/nodes.yaml.erb +++ /dev/null @@ -1,80 +0,0 @@ -<% - # File generated by 'rake mass:create SRC=doc/sophia-clusters-mass-create-new-final.csv - # If changes are needed, it might be better to edit the source data and regenerate using 'rake mass:create' - - cluster_name = "esterel32" - nodes_number = 1 # Size of the cluster (number of nodes) - # MAC addresses declaration - mac_eth0_list = %w( -f4:02:70:ed:2a:74 - ) - mac_bmc_list = %w( -f4:02:70:ed:2a:84 - ) -%> ---- -nodes: -<% (1..nodes_number).each { |i| %> - <%= cluster_name %>-<%= i %>: - architecture: - nb_procs: 1 # Fake data, will be replaced by g5k-checks - nb_cores: 72 # Fake data, will be replaced by g5k-checks - nb_threads: 72 # Fake data, will be replaced by g5k-checks - platform_type: x86_64 # Fake data, will be replaced by g5k-checks - cpu_core_numbering: contiguous # Fake data, will be replaced by g5k-checks - bios: - release_date: 01/01/2000 # Fake date, will be replaced by g5k-checks - vendor: Unknown # Fake vendor, will be replaced by g5k-checks - version: 1 # Fake version, will be replaced by g5k-checks - bmc_version: v1 # Fake version, will be replaced by g5k-checks - chassis: - manufacturer: Unknown # Fake manufacturer, will be replaced by g5k-checks - name: Unknown # Fake name, will be replaced by g5k-checks - main_memory: - ram_size: 8 # Fake size, will be replaced by g5k-checks - memory_devices: - dimm: - size: 8 # Fake size, will be replaced by g5k-checks - technology: dram # Common memory technology, will be replaced by g5k-checks - processor: - model: Unknown # Fake model name, will be replaced by g5k-checks - other_description: description # Fake description, will be replaced by g5k-checks - vendor: vendor # Fake vendor, will be replaced by g5k-checks - version: vendor # Fake version, will be replaced by g5k-checks - cache_l1d: 8 # Fake cache, will be replaced by g5k-checks - cache_l1i: 8 # Fake cache, will be replaced by g5k-checks - cache_l2: 8 # Fake cache, will be replaced by g5k-checks - cache_l3: 8 # Fake cache, will be replaced by g5k-checks - instruction_set: x86-64 # Common instruction set, will be replaced by g5k-checks - microcode: "0xd000001" # Fake microcode, will be replaced by g5k-checks - ht_capable: true # Default ht capable value, will be replaced by g5k-checks - main_memory: - ram_size: 8 # Fake ram size, will be replaced by g5k-checks - memory_devices: - dimm_proc 1 dimm 1: # Fake dimm name, will be replaced by g5k-checks - size: 8 # Fake dimm size, will be replaced by g5k-checks - technology: dram # Default dimm technology, will be replaced by g5k-checks - operating_system: - cstate_driver: unknown # Fake driver, will be replaced by g5k-checks - cstate_governor: unknown # Fake governor, will be replaced by g5k-checks - ht_enabled: true # common value for hyper threading, will be replaced by g5k-checks - pstate_driver: unknwon # Fake driver, will be replaced by g5k-checks - pstate_governor: unknown # Fake driver, will be replaced by g5k-checks - turboboost_enabled: true # Default value for turboboost, will be replaced by g5k-checks - network_adapters: - bmc: - management: true - mac: <%= mac_bmc_list[i - 1] %> - eth0: - interface: Ethernet - management: false - driver: mlx_core # Fake data, will be replaced by g5k-checks - name: enp1s0f0np0 # Fake data, will be replaced by g5k-checks - rate: 10000000000 # Fake data, will be replaced by g5k-checks - mac: <%= mac_eth0_list[i - 1] %> - storage_devices: - disk0: - storage: SSD # Fake data, will be replaced by g5k-checks - model: unknown # Fake data, will be replaced by g5k-checks - size: 8 # Fake data, will be replaced by g5k-checks -<% } %> diff --git a/input/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.yaml b/input/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b40c53d7f8d01ce88041ccfd9edc3b8bbf1a1f77 --- /dev/null +++ b/input/grid5000/sites/sophia/clusters/esterel32/nodes/esterel32-1.yaml @@ -0,0 +1,180 @@ +# Generated by g5k-checks (g5k-checks -m api) +--- +esterel32-1: + architecture: + cpu_core_numbering: round-robin + nb_cores: 56 + nb_procs: 2 + nb_threads: 112 + platform_type: x86_64 + bios: + release_date: 01/09/2025 + vendor: Dell Inc. + version: 2.23.0 + bmc_version: 7.00.00.174 + chassis: + manufacturer: Dell Inc. + name: PowerEdge T640 + serial: 5XXGW53 + gpu_devices: + nvidia0: + cpu_affinity: 0 + device: "/dev/nvidia0" + memory: 48318382080 + model: Quadro RTX 8000 + power_default_limit: 250.00 W + vbios_version: 90.02.4E.00.03 + vendor: Nvidia + nvidia1: + cpu_affinity: 0 + device: "/dev/nvidia1" + memory: 48318382080 + model: Quadro RTX 8000 + power_default_limit: 250.00 W + vbios_version: 90.02.4E.00.03 + vendor: Nvidia + nvidia2: + cpu_affinity: 1 + device: "/dev/nvidia2" + memory: 48318382080 + model: Quadro RTX 8000 + power_default_limit: 250.00 W + vbios_version: 90.02.4E.00.03 + vendor: Nvidia + nvidia3: + cpu_affinity: 1 + device: "/dev/nvidia3" + memory: 48318382080 + model: Quadro RTX 8000 + power_default_limit: 250.00 W + vbios_version: 90.02.4E.00.03 + vendor: Nvidia + main_memory: + ram_size: 824633720832 + memory_devices: + dimm_a1: + size: 68719476736 + technology: dram + dimm_a2: + size: 68719476736 + technology: dram + dimm_a3: + size: 68719476736 + technology: dram + dimm_a4: + size: 68719476736 + technology: dram + dimm_a5: + size: 68719476736 + technology: dram + dimm_a6: + size: 68719476736 + technology: dram + dimm_b1: + size: 68719476736 + technology: dram + dimm_b2: + size: 68719476736 + technology: dram + dimm_b3: + size: 68719476736 + technology: dram + dimm_b4: + size: 68719476736 + technology: dram + dimm_b5: + size: 68719476736 + technology: dram + dimm_b6: + size: 68719476736 + technology: dram + network_adapters: + bmc: + ip: 172.17.131.45 + mac: f4:02:70:ed:2a:84 + management: true + eth0: + driver: bnxt_en + firmware_version: 218.0.219.13/pkg 21.85.21.92 + interface: Ethernet + ip: 172.16.131.45 + mac: f4:02:70:ed:2a:74 + management: false + model: BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller + name: enp1s0f0np0 + rate: 1000000000 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + eth1: + driver: bnxt_en + firmware_version: 218.0.219.13/pkg 21.85.21.92 + interface: Ethernet + mac: f4:02:70:ed:2a:75 + management: false + model: BCM57416 NetXtreme-E Dual-Media 10G RDMA Ethernet Controller + name: eno2np1 + sriov: false + sriov_totalvfs: 0 + vendor: Broadcom Inc. and subsidiaries + eth2: + driver: mlx4_en + firmware_version: 2.9.1000 + interface: Ethernet + mac: 00:02:c9:4f:f9:15 + management: false + model: MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter + IC with PCIe 2.0 x8 5.0GT/s Interface + name: enp137s0d1 + sriov: false + sriov_totalvfs: 0 + vendor: Mellanox Technologies + ib0: + driver: mlx4_core + firmware_version: 2.9.1000 + guid: '0x0002c903004ff915' + interface: InfiniBand + ip: 172.18.131.45 + mac: 00:02:c9:03:00:4f:f9:15 + management: false + model: MT25408A0-FCC-QI ConnectX, Dual Port 40Gb/s InfiniBand / 10GigE Adapter + IC with PCIe 2.0 x8 5.0GT/s Interface + name: ib0 + rate: 40000000000 + vendor: Mellanox Technologies + operating_system: + cstate_driver: intel_idle + cstate_governor: menu + ht_enabled: true + pstate_driver: intel_pstate + pstate_governor: performance + turboboost_enabled: true + processor: + cache_l1d: 32768 + cache_l1i: 32768 + cache_l2: 1048576 + cache_l3: 40370176 + ht_capable: true + instruction_set: x86-64 + microcode: '0x5003801' + model: Intel Xeon + other_description: Intel(R) Xeon(R) Gold 6238R CPU @ 2.20GHz + vendor: Intel + version: Gold 6238R + storage_devices: + pci-0000:19:00.0-scsi-0:2:0:0: + by_id: "/dev/disk/by-id/wwn-0x62cea7f0578207002f76eb1f130760be" + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:0:0" + firmware_version: 4.3 + model: PERC H730P Adp + size: 479559942144 + storage: HDD + pci-0000:19:00.0-scsi-0:2:1:0: + by_id: "/dev/disk/by-id/wwn-0x62cea7f05782070027de23a04219a934" + by_path: "/dev/disk/by-path/pci-0000:19:00.0-scsi-0:2:1:0" + firmware_version: 4.3 + model: PERC H730P Adp + size: 7678864654336 + storage: HDD + supported_job_types: + virtual: ivt