Commit 7d4149be authored by BERARD Benjamin's avatar BERARD Benjamin
Browse files

Installation du cluster avec GPU A100 à Nancy

parent 0c2adbfd
{
"created_at": "Wed, 13 Jan 2021 00:00:00 GMT",
"exotic": false,
"kavlan": false,
"metrics": [
],
"model": "Dell PowerEdge R7525",
"queues": [
"admin",
"testing"
],
"type": "cluster",
"uid": "grouille"
}
\ No newline at end of file
{
"architecture": {
"cpu_core_numbering": "contiguous",
"nb_cores": 64,
"nb_procs": 2,
"nb_threads": 128,
"platform_type": "x86_64"
},
"bios": {
"release_date": "07/27/2020",
"vendor": "Dell Inc.",
"version": "1.5.5"
},
"bmc_version": "4.30.30.30",
"chassis": {
"manufacturer": "Dell Inc.",
"name": "PowerEdge R7525",
"serial": "64WT393"
},
"exotic": false,
"gpu_devices": {
"nvidia0": {
"cpu_affinity": 0,
"device": "/dev/nvidia0",
"memory": 40537000000,
"model": "A100-PCIE-40GB",
"power_default_limit": "250.00 W",
"vbios_version": "92.00.25.00.08",
"vendor": "Nvidia"
},
"nvidia1": {
"cpu_affinity": 1,
"device": "/dev/nvidia1",
"memory": 40537000000,
"model": "A100-PCIE-40GB",
"power_default_limit": "250.00 W",
"vbios_version": "92.00.25.00.08",
"vendor": "Nvidia"
}
},
"main_memory": {
"ram_size": 137438953472
},
"monitoring": {
"wattmeter": "false"
},
"network_adapters": [
{
"device": "eth0",
"driver": "tg3",
"enabled": false,
"firmware_version": "FFV21.60.16 bc 5720-v1.39",
"interface": "Ethernet",
"kavlan": false,
"mac": "70:b5:e8:d0:b2:f8",
"management": false,
"model": "NetXtreme BCM5720 Gigabit Ethernet PCIe",
"mountable": false,
"mounted": false,
"name": "eno1",
"vendor": "Broadcom"
},
{
"device": "eth1",
"driver": "tg3",
"enabled": false,
"firmware_version": "FFV21.60.16 bc 5720-v1.39",
"interface": "Ethernet",
"kavlan": false,
"mac": "70:b5:e8:d0:b2:f9",
"management": false,
"model": "NetXtreme BCM5720 Gigabit Ethernet PCIe",
"mountable": false,
"mounted": false,
"name": "eno2",
"vendor": "Broadcom"
},
{
"device": "eth2",
"driver": "mlx5_core",
"enabled": true,
"firmware_version": "16.27.6106 (DEL0000000016)",
"interface": "Ethernet",
"ip": "172.16.79.1",
"ip6": "2001:660:4406:500:10:0::1",
"kavlan": false,
"mac": "04:3f:72:cd:c2:18",
"management": false,
"model": "MT27800 Family [ConnectX-5]",
"mountable": true,
"mounted": true,
"name": "eno33",
"network_address": "grouille-1.nancy.grid5000.fr",
"rate": 25000000000,
"switch": null,
"switch_port": null,
"vendor": "Mellanox Technologies"
},
{
"device": "eth3",
"driver": "mlx5_core",
"enabled": false,
"firmware_version": "16.27.6106 (DEL0000000016)",
"interface": "Ethernet",
"kavlan": false,
"mac": "04:3f:72:cd:c2:19",
"management": false,
"model": "MT27800 Family [ConnectX-5]",
"mountable": false,
"mounted": false,
"name": "eno34",
"vendor": "Mellanox Technologies"
},
{
"device": "bmc",
"enabled": true,
"interface": "Ethernet",
"ip": "172.17.79.1",
"kavlan": false,
"mac": "70:b5:e8:e7:3c:74",
"management": true,
"mountable": false,
"mounted": false,
"network_address": "grouille-1-bmc.nancy.grid5000.fr"
}
],
"operating_system": {
"cstate_driver": "acpi_idle",
"cstate_governor": "menu",
"ht_enabled": true,
"pstate_driver": "acpi-cpufreq",
"pstate_governor": "performance",
"turboboost_enabled": true
},
"performance": {
"core_flops": 16800000000,
"node_flops": 1075200000000
},
"processor": {
"cache_l1": null,
"cache_l1d": 32768,
"cache_l1i": 32768,
"cache_l2": 524288,
"cache_l3": 16777216,
"clock_speed": 2100000000,
"ht_capable": true,
"instruction_set": "x86-64",
"microarchitecture": "Zen",
"microcode": "0x8301038",
"model": "AMD EPYC",
"other_description": "AMD EPYC 7452 32-Core Processor",
"vendor": "AMD",
"version": 7452
},
"sensors": {
},
"software": {
"forced-deployment-timestamp": 202007300948,
"postinstall-version": "1.2021012800",
"standard-environment": "debian10-x64-std"
},
"storage_devices": [
{
"by_id": "/dev/disk/by-id/wwn-0x500a075129de046c",
"by_path": "/dev/disk/by-path/pci-0000:01:00.0-scsi-0:0:0:0",
"device": "sda",
"firmware_version": "D3DJ004",
"interface": "SAS",
"model": "MTFDDAK960TDT",
"size": 960197124096,
"storage": "SSD",
"vendor": "Micron"
},
{
"by_id": "/dev/disk/by-id/wwn-0x58ce38ee20f6cc95",
"by_path": "/dev/disk/by-path/pci-0000:01:00.0-scsi-0:0:1:0",
"device": "sdb",
"firmware_version": "B707",
"interface": "SATA",
"model": "KRM5XVUG1T92",
"size": 1920383410176,
"storage": "SSD",
"vendor": "Toshiba"
}
],
"supported_job_types": {
"besteffort": true,
"deploy": true,
"max_walltime": 0,
"queues": [
"admin",
"testing"
],
"virtual": "amd-v"
},
"type": "node",
"uid": "grouille-1"
}
\ No newline at end of file
{
"architecture": {
"cpu_core_numbering": "contiguous",
"nb_cores": 64,
"nb_procs": 2,
"nb_threads": 128,
"platform_type": "x86_64"
},
"bios": {
"release_date": "10/05/2020",
"vendor": "Dell Inc.",
"version": "1.7.3"
},
"bmc_version": "4.32.10.00",
"chassis": {
"manufacturer": "Dell Inc.",
"name": "PowerEdge R7525",
"serial": "54WT393"
},
"exotic": false,
"gpu_devices": {
"nvidia0": {
"cpu_affinity": 0,
"device": "/dev/nvidia0",
"memory": 40537000000,
"model": "A100-PCIE-40GB",
"power_default_limit": "250.00 W",
"vbios_version": "92.00.25.00.08",
"vendor": "Nvidia"
},
"nvidia1": {
"cpu_affinity": 1,
"device": "/dev/nvidia1",
"memory": 40537000000,
"model": "A100-PCIE-40GB",
"power_default_limit": "250.00 W",
"vbios_version": "92.00.25.00.08",
"vendor": "Nvidia"
}
},
"main_memory": {
"ram_size": 137438953472
},
"monitoring": {
"wattmeter": "false"
},
"network_adapters": [
{
"device": "eth0",
"driver": "tg3",
"enabled": false,
"firmware_version": "FFV21.60.16 bc 5720-v1.39",
"interface": "Ethernet",
"kavlan": false,
"mac": "70:b5:e8:d0:b1:9c",
"management": false,
"model": "NetXtreme BCM5720 Gigabit Ethernet PCIe",
"mountable": false,
"mounted": false,
"name": "eno1",
"vendor": "Broadcom"
},
{
"device": "eth1",
"driver": "tg3",
"enabled": false,
"firmware_version": "FFV21.60.16 bc 5720-v1.39",
"interface": "Ethernet",
"kavlan": false,
"mac": "70:b5:e8:d0:b1:9d",
"management": false,
"model": "NetXtreme BCM5720 Gigabit Ethernet PCIe",
"mountable": false,
"mounted": false,
"name": "eno2",
"vendor": "Broadcom"
},
{
"device": "eth2",
"driver": "mlx5_core",
"enabled": true,
"firmware_version": "16.27.6106 (DEL0000000016)",
"interface": "Ethernet",
"ip": "172.16.79.2",
"ip6": "2001:660:4406:500:10:0::2",
"kavlan": false,
"mac": "04:3f:72:da:4f:20",
"management": false,
"model": "MT27800 Family [ConnectX-5]",
"mountable": true,
"mounted": true,
"name": "eno33",
"network_address": "grouille-2.nancy.grid5000.fr",
"rate": 25000000000,
"switch": null,
"switch_port": null,
"vendor": "Mellanox Technologies"
},
{
"device": "eth3",
"driver": "mlx5_core",
"enabled": false,
"firmware_version": "16.27.6106 (DEL0000000016)",
"interface": "Ethernet",
"kavlan": false,
"mac": "04:3f:72:da:4f:21",
"management": false,
"model": "MT27800 Family [ConnectX-5]",
"mountable": false,
"mounted": false,
"name": "eno34",
"vendor": "Mellanox Technologies"
},
{
"device": "bmc",
"enabled": true,
"interface": "Ethernet",
"ip": "172.17.79.2",
"kavlan": false,
"mac": "70:b5:e8:e7:46:70",
"management": true,
"mountable": false,
"mounted": false,
"network_address": "grouille-2-bmc.nancy.grid5000.fr"
}
],
"operating_system": {
"cstate_driver": "acpi_idle",
"cstate_governor": "menu",
"ht_enabled": true,
"pstate_driver": "acpi-cpufreq",
"pstate_governor": "performance",
"turboboost_enabled": true
},
"performance": {
"core_flops": 16800000000,
"node_flops": 1075200000000
},
"processor": {
"cache_l1": null,
"cache_l1d": 32768,
"cache_l1i": 32768,
"cache_l2": 524288,
"cache_l3": 16777216,
"clock_speed": 2100000000,
"ht_capable": true,
"instruction_set": "x86-64",
"microarchitecture": "Zen",
"microcode": "0x830104d",
"model": "AMD EPYC",
"other_description": "AMD EPYC 7452 32-Core Processor",
"vendor": "AMD",
"version": 7452
},
"sensors": {
},
"software": {
"forced-deployment-timestamp": 202007300948,
"postinstall-version": "1.2021012800",
"standard-environment": "debian10-x64-std"
},
"storage_devices": [
{
"by_id": "/dev/disk/by-id/wwn-0x500a075129de049a",
"by_path": "/dev/disk/by-path/pci-0000:01:00.0-scsi-0:0:0:0",
"device": "sda",
"firmware_version": "D3DJ004",
"interface": "SAS",
"model": "MTFDDAK960TDT",
"size": 960197124096,
"storage": "SSD",
"vendor": "Micron"
},
{
"by_id": "/dev/disk/by-id/wwn-0x58ce38ee20f6cc9d",
"by_path": "/dev/disk/by-path/pci-0000:01:00.0-scsi-0:0:1:0",
"device": "sdb",
"firmware_version": "B707",
"interface": "SATA",
"model": "KRM5XVUG1T92",
"size": 1920383410176,
"storage": "SSD",
"vendor": "Toshiba"
}
],
"supported_job_types": {
"besteffort": true,
"deploy": true,
"max_walltime": 0,
"queues": [
"admin",
"testing"
],
"virtual": "amd-v"
},
"type": "node",
"uid": "grouille-2"
}
\ No newline at end of file
......@@ -50,6 +50,7 @@ disk_vendor_model_mapping:
- TOSHIBA MG04ACA4
- MG04SCA40ENY
- AL15SEB060NY
- KRM5XVUG1T92
Intel:
- INTEL SSDSC2BB30
- SSDSC2KG480G7R
......@@ -66,5 +67,6 @@ disk_vendor_model_mapping:
Micron:
- MTFDDAK480TDN
- MTFDDAK960TDN
- MTFDDAK960TDT
Unknown:
- unknown
......@@ -53,6 +53,7 @@ ipv4:
nancy grvingt eth0 0 0 12 0
nancy grue eth0 0 0 13 0
nancy grappe eth0 0 0 14 0
nancy grouille eth2 0 0 15 0
nantes econome eth0 0 0 0 0
nantes ecotype eth0 0 0 1 0
nantes ecotype eth1 0 0 2 0
......
model: Dell PowerEdge R7525
created_at: 2021-01-13
kavlan: false
exotic: false
queues:
- admin
- testing
nodes:
grouille-[1-2]:
supported_job_types:
deploy: true
besteffort: true
max_walltime: 0
processor:
microarchitecture: Zen
clock_speed: 2100000000
software:
standard-environment: debian10-x64-std
network_adapters:
bmc:
interface: Ethernet
enabled: true
mountable: false
mounted: false
eth0:
enabled: false
mountable: false
mounted: false
eth1:
enabled: false
mountable: false
mounted: false
eth2:
enabled: true
mountable: true
mounted: true
eth3:
enabled: false
mountable: false
mounted: false
storage_devices:
sda:
interface: SAS
storage: SSD
sdb:
interface: SATA
storage: SSD
# Generated by g5k-checks (g5k-checks -m api)
---
grouille-1:
architecture:
cpu_core_numbering: contiguous
nb_cores: 64
nb_procs: 2
nb_threads: 128
platform_type: x86_64
bios:
release_date: 07/27/2020
vendor: Dell Inc.
version: 1.5.5
bmc_version: 4.30.30.30
chassis:
manufacturer: Dell Inc.
name: PowerEdge R7525
serial: 64WT393
gpu_devices:
nvidia0:
cpu_affinity: 0
device: "/dev/nvidia0"
memory: 40537000000
model: A100-PCIE-40GB
power_default_limit: 250.00 W
vbios_version: 92.00.25.00.08
vendor: Nvidia
nvidia1:
cpu_affinity: 1
device: "/dev/nvidia1"
memory: 40537000000
model: A100-PCIE-40GB
power_default_limit: 250.00 W
vbios_version: 92.00.25.00.08
vendor: Nvidia
main_memory:
ram_size: 137438953472
network_adapters:
bmc:
ip: 172.17.79.1
mac: 70:b5:e8:e7:3c:74
management: true
eth0:
driver: tg3
firmware_version: FFV21.60.16 bc 5720-v1.39
interface: Ethernet
mac: 70:b5:e8:d0:b2:f8
management: false
model: NetXtreme BCM5720 Gigabit Ethernet PCIe
name: eno1
vendor: Broadcom
eth1:
driver: tg3
firmware_version: FFV21.60.16 bc 5720-v1.39
interface: Ethernet
mac: 70:b5:e8:d0:b2:f9
management: false
model: NetXtreme BCM5720 Gigabit Ethernet PCIe
name: eno2
vendor: Broadcom
eth2:
driver: mlx5_core
firmware_version: 16.27.6106 (DEL0000000016)
interface: Ethernet
ip: 172.16.79.1
mac: 04:3f:72:cd:c2:18
management: false
model: MT27800 Family [ConnectX-5]
name: eno33
rate: 25000000000
vendor: Mellanox Technologies
eth3:
driver: mlx5_core
firmware_version: 16.27.6106 (DEL0000000016)
interface: Ethernet
mac: 04:3f:72:cd:c2:19
management: false
model: MT27800 Family [ConnectX-5]
name: eno34
vendor: Mellanox Technologies
operating_system:
cstate_driver: acpi_idle
cstate_governor: menu
ht_enabled: true
pstate_driver: acpi-cpufreq
pstate_governor: performance
turboboost_enabled: true
processor:
cache_l1d: 32768
cache_l1i: 32768
cache_l2: 524288
cache_l3: 16777216
ht_capable: true
instruction_set: x86-64
microcode: '0x8301038'
model: AMD EPYC
other_description: AMD EPYC 7452 32-Core Processor
vendor: AMD
version: 7452