diff --git a/python/ironic-understack/ironic_understack/port_bios_name_hook.py b/python/ironic-understack/ironic_understack/port_bios_name_hook.py index d631befbe..2e969b538 100644 --- a/python/ironic-understack/ironic_understack/port_bios_name_hook.py +++ b/python/ironic-understack/ironic_understack/port_bios_name_hook.py @@ -1,8 +1,6 @@ from typing import ClassVar -from ironic.common import exception from ironic.drivers.modules.inspector.hooks import base -from ironic.objects.bios import BIOSSetting from oslo_log import log as logging from ironic_understack.ironic_wrapper import ironic_ports_for_node @@ -10,18 +8,17 @@ LOG = logging.getLogger(__name__) PXE_BIOS_NAME_PREFIXES = ["NIC.Integrated", "NIC.Slot"] -BIOS_SETTING_NAME = "HttpDev1Interface" class PortBiosNameHook(base.InspectionHook): """Set bios_name, pxe_enabled, local_link_connection and physical_network. Populates extra.bios_name and port name from inspection inventory, then - determines the PXE port from the BIOS HttpDev1Interface setting (populated - during enrolment). Falls back to a naming-convention heuristic if the - BIOS setting is unavailable. + determines PXE-enabled ports from node.extra["enrolled_pxe_ports"] + (populated during enrolment). If that data is unavailable, all + NIC.Integrated.* and NIC.Slot.* ports are treated as PXE-enabled. - The PXE port gets pxe_enabled=True plus placeholder physical_network and + PXE ports get pxe_enabled=True plus placeholder physical_network and local_link_connection values that neutron requires. """ @@ -37,7 +34,7 @@ def __call__(self, task, inventory, plugin_data): i["mac_address"].upper(): i["name"] for i in inspected_interfaces } - pxe_nic = _bios_pxe_nic(task) + pxe_nics = _enrolled_pxe_nics(task) for baremetal_port in ironic_ports_for_node(task.context, task.node.id): mac = baremetal_port.address.upper() @@ -46,15 +43,10 @@ def __call__(self, task, inventory, plugin_data): _set_port_extra(baremetal_port, mac, bios_name) _set_port_name(baremetal_port, mac, bios_name, task.node.name) - if pxe_nic: - is_pxe = bios_name is not None and ( - pxe_nic.startswith(bios_name) or bios_name.startswith(pxe_nic) - ) - else: - # Fallback: heuristic based on naming convention - is_pxe = bios_name == _pxe_interface_name( - inspected_interfaces, task.node.uuid - ) + is_pxe = bios_name is not None and any( + pxe_nic.startswith(bios_name) or bios_name.startswith(pxe_nic) + for pxe_nic in pxe_nics + ) if baremetal_port.pxe_enabled != is_pxe: LOG.info( @@ -72,45 +64,24 @@ def __call__(self, task, inventory, plugin_data): _set_port_local_link_connection(baremetal_port, mac) -def _bios_pxe_nic(task): - """Read the BIOS PXE NIC FQDD, or return None if unavailable.""" - try: - task.driver.bios.cache_bios_settings(task) - except Exception: - LOG.warning( - "Cannot cache BIOS settings for node %s, " - "falling back to naming heuristic for PXE port.", - task.node.uuid, - ) - return None - - try: - setting = BIOSSetting.get(task.context, task.node.id, BIOS_SETTING_NAME) - except exception.BIOSSettingNotFound: - LOG.warning( - "BIOS setting %s not found for node %s, " - "falling back to naming heuristic for PXE port.", - BIOS_SETTING_NAME, +def _enrolled_pxe_nics(task) -> list[str]: + """Read enrolled PXE NIC names from node.extra, or use broad prefixes.""" + enrolled_pxe_nics = task.node.extra.get("enrolled_pxe_ports") + if enrolled_pxe_nics: + LOG.info( + "Set node %s pxe flag on interfaces from extra.enrolled_pxe_ports %s", task.node.uuid, + enrolled_pxe_nics, ) - return None - - if not setting.value: + return enrolled_pxe_nics + else: LOG.warning( - "BIOS setting %s is empty for node %s, " - "falling back to naming heuristic for PXE port.", - BIOS_SETTING_NAME, + "Node %s extra.enrolled_pxe_ports is missing, " + "setting pxe flag on all interfaces starting %s.", task.node.uuid, + PXE_BIOS_NAME_PREFIXES, ) - return None - - LOG.info( - "Node %s BIOS %s = %s", - task.node.uuid, - BIOS_SETTING_NAME, - setting.value, - ) - return setting.value + return PXE_BIOS_NAME_PREFIXES def _set_port_extra(baremetal_port, mac, required_bios_name): @@ -165,18 +136,3 @@ def _set_port_local_link_connection(baremetal_port, mac): baremetal_port.local_link_connection, ) baremetal_port.save() - - -def _pxe_interface_name(inspected_interfaces: list[dict], node_uuid) -> str | None: - """Use a heuristic to determine our default interface for PXE.""" - names = sorted(i["name"] for i in inspected_interfaces) - for prefix in PXE_BIOS_NAME_PREFIXES: - for name in names: - if name.startswith(prefix): - return name - LOG.error( - "No PXE interface found for node %s. Expected to find an " - "interface whose bios_name starts with one of %s", - node_uuid, - PXE_BIOS_NAME_PREFIXES, - ) diff --git a/python/ironic-understack/ironic_understack/tests/test_port_bios_name_hook.py b/python/ironic-understack/ironic_understack/tests/test_port_bios_name_hook.py index edd6b772f..c5966dcfc 100644 --- a/python/ironic-understack/ironic_understack/tests/test_port_bios_name_hook.py +++ b/python/ironic-understack/ironic_understack/tests/test_port_bios_name_hook.py @@ -1,6 +1,5 @@ import logging -from ironic.common import exception from oslo_utils import uuidutils from ironic_understack.port_bios_name_hook import PortBiosNameHook @@ -14,32 +13,12 @@ } -def _make_task(mocker, bios_value=None, bios_error=None, cache_error=None): - mock_node = mocker.Mock(id=1234, uuid=uuidutils.generate_uuid()) +def _make_task(mocker, enrolled_pxe_ports=None): + mock_node = mocker.Mock(id=1234, uuid=uuidutils.generate_uuid(), extra={}) mock_node.name = "Dell-CR1MB0" - task = mocker.Mock(node=mock_node, context=mocker.Mock()) - - if cache_error: - task.driver.bios.cache_bios_settings.side_effect = cache_error - elif bios_error: - mocker.patch( - "ironic_understack.port_bios_name_hook.BIOSSetting.get", - side_effect=bios_error, - ) - elif bios_value is not None: - setting = mocker.Mock(value=bios_value) - mocker.patch( - "ironic_understack.port_bios_name_hook.BIOSSetting.get", - return_value=setting, - ) - else: - setting = mocker.Mock(value=None) - mocker.patch( - "ironic_understack.port_bios_name_hook.BIOSSetting.get", - return_value=setting, - ) - - return task + if enrolled_pxe_ports is not None: + mock_node.extra["enrolled_pxe_ports"] = enrolled_pxe_ports + return mocker.Mock(node=mock_node, context=mocker.Mock()) def _make_port(mocker, mac, **kwargs): @@ -57,10 +36,10 @@ def _make_port(mocker, mac, **kwargs): return port -def test_pxe_from_bios_setting(mocker, caplog): - """BIOS HttpDev1Interface determines pxe_enabled.""" +def test_pxe_from_enrolled_pxe_ports(mocker, caplog): + """node.extra.enrolled_pxe_ports determines pxe_enabled.""" caplog.set_level(logging.DEBUG) - task = _make_task(mocker, bios_value="NIC.Integrated.1-1-1") + task = _make_task(mocker, enrolled_pxe_ports=["NIC.Integrated.1-1-1"]) port1 = _make_port(mocker, "11:11:11:11:11:11") port2 = _make_port(mocker, "22:22:22:22:22:22") @@ -79,10 +58,13 @@ def test_pxe_from_bios_setting(mocker, caplog): assert port1.physical_network == "enrol" -def test_pxe_fallback_when_cache_fails(mocker, caplog): - """Falls back to heuristic when BIOS cache fails.""" +def test_pxe_from_enrolled_pxe_ports_enables_multiple_ports(mocker, caplog): + """Every port in enrolled_pxe_ports gets pxe_enabled=True.""" caplog.set_level(logging.DEBUG) - task = _make_task(mocker, cache_error=Exception("no BIOS")) + task = _make_task( + mocker, + enrolled_pxe_ports=["NIC.Integrated.1-1-1", "NIC.Integrated.1-2-1"], + ) port1 = _make_port(mocker, "11:11:11:11:11:11") port2 = _make_port(mocker, "22:22:22:22:22:22") @@ -94,19 +76,14 @@ def test_pxe_fallback_when_cache_fails(mocker, caplog): PortBiosNameHook().__call__(task, _INVENTORY, {}) - # Heuristic picks NIC.Integrated.1-1 (first sorted match) assert port1.pxe_enabled is True - assert port2.pxe_enabled is False - assert "falling back to naming heuristic" in caplog.text + assert port2.pxe_enabled is True -def test_pxe_fallback_when_setting_missing(mocker, caplog): - """Falls back to heuristic when BIOS setting not found.""" +def test_pxe_fallback_when_enrolled_pxe_ports_missing(mocker, caplog): + """Missing enrolled_pxe_ports enables all matching PXE prefix ports.""" caplog.set_level(logging.DEBUG) - task = _make_task( - mocker, - bios_error=exception.BIOSSettingNotFound(node="fake", name="HttpDev1Interface"), - ) + task = _make_task(mocker) port1 = _make_port(mocker, "11:11:11:11:11:11") port2 = _make_port(mocker, "22:22:22:22:22:22") @@ -119,14 +96,43 @@ def test_pxe_fallback_when_setting_missing(mocker, caplog): PortBiosNameHook().__call__(task, _INVENTORY, {}) assert port1.pxe_enabled is True - assert port2.pxe_enabled is False - assert "falling back to naming heuristic" in caplog.text + assert port2.pxe_enabled is True + assert "setting pxe flag on all interfaces starting" in caplog.text + + +def test_missing_enrolled_pxe_ports_enables_slot_ports_too(mocker, caplog): + """Missing enrolled_pxe_ports enables all supported PXE port prefixes.""" + caplog.set_level(logging.DEBUG) + task = _make_task(mocker) + inventory = { + "memory": {"physical_mb": 98304}, + "interfaces": [ + {"mac_address": "11:11:11:11:11:11", "name": "NIC.Integrated.1-1"}, + {"mac_address": "22:22:22:22:22:22", "name": "NIC.Slot.1-2"}, + {"mac_address": "33:33:33:33:33:33", "name": "eno1"}, + ], + } + + port1 = _make_port(mocker, "11:11:11:11:11:11") + port2 = _make_port(mocker, "22:22:22:22:22:22") + port3 = _make_port(mocker, "33:33:33:33:33:33") + + mocker.patch( + "ironic_understack.port_bios_name_hook.ironic_ports_for_node", + return_value=[port1, port2, port3], + ) + + PortBiosNameHook().__call__(task, inventory, {}) + + assert port1.pxe_enabled is True + assert port2.pxe_enabled is True + assert port3.pxe_enabled is False def test_retaining_physical_network(mocker, caplog): """Existing physical_network and local_link_connection are preserved.""" caplog.set_level(logging.DEBUG) - task = _make_task(mocker, bios_value="NIC.Integrated.1-1-1") + task = _make_task(mocker, enrolled_pxe_ports=["NIC.Integrated.1-1-1"]) port = _make_port( mocker, @@ -153,7 +159,7 @@ def test_retaining_physical_network(mocker, caplog): def test_clears_pxe_on_previously_enabled_port(mocker, caplog): """Port that was pxe_enabled but no longer matches gets cleared.""" caplog.set_level(logging.DEBUG) - task = _make_task(mocker, bios_value="NIC.Integrated.1-2-1") + task = _make_task(mocker, enrolled_pxe_ports=["NIC.Integrated.1-2-1"]) port1 = _make_port(mocker, "11:11:11:11:11:11", pxe_enabled=True) port2 = _make_port(mocker, "22:22:22:22:22:22") @@ -172,7 +178,7 @@ def test_clears_pxe_on_previously_enabled_port(mocker, caplog): def test_removing_bios_name(mocker, caplog): """Port with unknown MAC gets bios_name removed.""" caplog.set_level(logging.DEBUG) - task = _make_task(mocker, bios_value="NIC.Integrated.1-1-1") + task = _make_task(mocker, enrolled_pxe_ports=["NIC.Integrated.1-1-1"]) port = _make_port( mocker, diff --git a/python/understack-workflows/pyproject.toml b/python/understack-workflows/pyproject.toml index 55fed61e4..a7461fe52 100644 --- a/python/understack-workflows/pyproject.toml +++ b/python/understack-workflows/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ ] [project.scripts] -bios-settings = "understack_workflows.main.bios_settings:main" bmc-kube-password = "understack_workflows.main.bmc_display_password:main" bmc-password = "understack_workflows.main.print_bmc_password:main" enroll-server = "understack_workflows.main.enroll_server:main" diff --git a/python/understack-workflows/tests/test_bmc.py b/python/understack-workflows/tests/test_bmc.py index 122432d50..6f237808f 100644 --- a/python/understack-workflows/tests/test_bmc.py +++ b/python/understack-workflows/tests/test_bmc.py @@ -12,9 +12,15 @@ def mock_creds(mocker): return mock -def test_bmc_for_ip_address(mock_creds): +def test_bmc_for_ip_address(mock_creds, monkeypatch): + # The credential function above is overridden by the environment variable so + # make sure it is removed, so we are using the correct test credential and + # not a production one. + monkeypatch.delenv("BMC_MASTER", raising=False) assert os.getenv("BMC_MASTER") is None + bmc = bmc_for_ip_address("1.2.3.4") + assert bmc.ip_address == "1.2.3.4" assert bmc.url() == "https://1.2.3.4" assert bmc.username == "root" diff --git a/python/understack-workflows/tests/test_bmc_bios.py b/python/understack-workflows/tests/test_bmc_bios.py new file mode 100644 index 000000000..9dcd32c83 --- /dev/null +++ b/python/understack-workflows/tests/test_bmc_bios.py @@ -0,0 +1,66 @@ +from understack_workflows import bmc_bios + + +def test_update_dell_bios_settings_skips_patch_when_desired_values_are_pending(mocker): + bmc = mocker.Mock() + bmc.system_path = "/redfish/v1/Systems/System.Embedded.1" + bmc.redfish_request.side_effect = [ + { + "Attributes": { + "PxeDev1EnDis": "Disabled", + "PxeDev1Interface": "NIC.Slot.1-1", + "HttpDev1EnDis": "Disabled", + "HttpDev1Interface": "NIC.Slot.1-1", + "HttpDev1TlsMode": "TLS", + "TimeZone": "Local", + "OS-BMC.1.AdminState": "Enabled", + "IPMILan.1.Enable": "Enabled", + } + }, + {"Attributes": bmc_bios.required_bios_settings(["NIC.Embedded.1-1-1"])}, + ] + patch_bios_settings = mocker.patch.object(bmc_bios, "patch_bios_settings") + + result = bmc_bios.update_dell_bios_settings(bmc, ["NIC.Embedded.1-1-1"]) + + assert result == {} + patch_bios_settings.assert_not_called() + + +def test_update_dell_bios_settings_only_patches_settings_not_already_pending(mocker): + bmc = mocker.Mock() + bmc.system_path = "/redfish/v1/Systems/System.Embedded.1" + bmc.redfish_request.side_effect = [ + { + "Attributes": { + "PxeDev1EnDis": "Enabled", + "PxeDev1Interface": "NIC.Slot.1-1", + "HttpDev1EnDis": "Disabled", + "HttpDev1Interface": "NIC.Slot.1-1", + "HttpDev1TlsMode": "TLS", + "TimeZone": "UTC", + "OS-BMC.1.AdminState": "Enabled", + "IPMILan.1.Enable": "Disabled", + } + }, + { + "Attributes": { + "PxeDev1EnDis": "Enabled", + "PxeDev1Interface": "NIC.Embedded.1-1-1", + "TimeZone": "UTC", + "IPMILan.1.Enable": "Disabled", + } + }, + ] + patch_bios_settings = mocker.patch.object(bmc_bios, "patch_bios_settings") + + result = bmc_bios.update_dell_bios_settings(bmc, ["NIC.Embedded.1-1-1"]) + + assert result == { + "HttpDev1EnDis": "Enabled", + "HttpDev1Interface": "NIC.Embedded.1-1-1", + "HttpDev1TlsMode": "None", + "OS-BMC.1.AdminState": "Disabled", + "PxeDev1EnDis": "Disabled", + } + patch_bios_settings.assert_called_once_with(bmc, result) diff --git a/python/understack-workflows/tests/test_enroll_server.py b/python/understack-workflows/tests/test_enroll_server.py new file mode 100644 index 000000000..f3112bce3 --- /dev/null +++ b/python/understack-workflows/tests/test_enroll_server.py @@ -0,0 +1,443 @@ +from types import SimpleNamespace +from typing import cast +from unittest.mock import MagicMock +from unittest.mock import call + +from ironicclient.common.apiclient import exceptions as ironic_exceptions +from ironicclient.v1.node import Node + +from understack_workflows.bmc import RedfishRequestError +from understack_workflows.bmc_chassis_info import ChassisInfo +from understack_workflows.bmc_chassis_info import InterfaceInfo +from understack_workflows.main import enroll_server + + +def make_device_info( + *, + power_on: bool, + connected_mac: str | None = None, + serial_number: str = "ABC123", +) -> ChassisInfo: + return ChassisInfo( + manufacturer="Dell", + model_number="R760", + serial_number=serial_number, + bmc_ip_address="10.0.0.10", + bios_version="1.0.0", + power_on=power_on, + memory_gib=64, + cpu="Xeon", + interfaces=[ + InterfaceInfo("iDRAC", "bmc", "00:00:00:00:00:01"), + InterfaceInfo( + "NIC.Integrated.1-1", + "PXE NIC", + "00:00:00:00:00:02", + remote_switch_mac_address=connected_mac, + remote_switch_port_name="Ethernet1/1" if connected_mac else None, + ), + ], + ) + + +def make_bmc(mocker, fake_sushy=None): + bmc = mocker.MagicMock() + bmc.ip_address = "10.0.0.10" + bmc.username = "root" + bmc.password = "calvin" + bmc.url.return_value = "https://10.0.0.10" + bmc.get_system_path.return_value = "/redfish/v1/Systems/System.Embedded.1" + if fake_sushy is not None: + bmc.sushy.return_value = fake_sushy + return bmc + + +def make_raid_hardware(): + controller = SimpleNamespace( + identity="RAID.Integrated.1-1", + drives=[SimpleNamespace(identity="Disk1"), SimpleNamespace(identity="Disk2")], + ) + system = SimpleNamespace(storage=SimpleNamespace(get_members=lambda: [controller])) + return SimpleNamespace( + get_system_collection=lambda: SimpleNamespace(get_members=lambda: [system]) + ) + + +def make_ironic_client( + *, + node_name: str, + node_uuid: str = "node-123", + existing_node=None, + inspect_interfaces: list[str] | None = None, + traits: list[str] | None = None, + runbook_ids: dict[str, str] | None = None, +): + if inspect_interfaces is None: + inspect_interfaces = ["idrac-redfish", "idrac-redfish"] + if traits is None: + traits = [] + if runbook_ids is None: + runbook_ids = {} + + created_node = SimpleNamespace( + uuid=node_uuid, + provision_state="enroll", + driver="idrac", + inspect_interface="idrac-redfish", + ) + inspect_interface_iter = iter(inspect_interfaces) + fake_client = MagicMock() + fake_client.node.api.runbook.get.side_effect = lambda trait: SimpleNamespace( + uuid=runbook_ids[trait] + ) + fake_client.node.create.return_value = created_node + fake_client.node.get_traits.return_value = traits + + def get_node(ident, fields=None): + if ident == node_name and fields is None: + if existing_node is None: + raise ironic_exceptions.NotFound() + return existing_node + if ident == node_uuid and fields == ["driver"]: + return SimpleNamespace(driver="idrac") + if ident == node_uuid and fields == ["inspect_interface"]: + return SimpleNamespace(inspect_interface=next(inspect_interface_iter)) + raise AssertionError( + f"Unexpected node lookup ident={ident!r} fields={fields!r}" + ) + + fake_client.node.get.side_effect = get_node + return fake_client, created_node + + +def test_enrol_happy_path_uses_real_ironic_workflow(mocker): + initial_info = make_device_info(power_on=False, connected_mac=None) + powered_info = make_device_info( + power_on=True, + connected_mac="AA:BB:CC:DD:EE:FF", + ) + fake_bmc = make_bmc(mocker, fake_sushy=make_raid_hardware()) + fake_ironic, created_node = make_ironic_client( + node_name="Dell-ABC123", + inspect_interfaces=["idrac-redfish", "idrac-redfish"], + ) + + mocker.patch.object(enroll_server, "bmc_for_ip_address", return_value=fake_bmc) + mocker.patch.object( + enroll_server, + "chassis_info", + side_effect=[initial_info, powered_info], + ) + set_bmc_password = mocker.patch.object(enroll_server, "set_bmc_password") + update_dell_drac_settings = mocker.patch.object( + enroll_server, "update_dell_drac_settings" + ) + bmc_set_hostname = mocker.patch.object(enroll_server, "bmc_set_hostname") + update_dell_bios_settings = mocker.patch.object( + enroll_server, "update_dell_bios_settings" + ) + sleep = mocker.patch.object(enroll_server.time, "sleep") + mocker.patch( + "understack_workflows.ironic.client.get_ironic_client", + return_value=fake_ironic, + ) + + enroll_server.enrol( + ip_address="10.0.0.10", + firmware_update=False, + raid_configure=True, + pxe_switch_macs={"AA:BB:CC:DD:EE:FF"}, + old_password="old-password", + external_cmdb_id="cmdb-1", + ) + + set_bmc_password.assert_called_once_with( + ip_address="10.0.0.10", + new_password="calvin", + old_password="old-password", + ) + update_dell_drac_settings.assert_called_once_with(fake_bmc) + bmc_set_hostname.assert_called_once_with(fake_bmc, "None", "Dell-ABC123") + assert update_dell_bios_settings.call_count == 2 + update_dell_bios_settings.assert_any_call( + fake_bmc, + pxe_interfaces=["NIC.Integrated.1-1"], + ) + sleep.assert_called_once_with(120) + + fake_bmc.redfish_request.assert_called_once_with( + path="/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset", + payload={"ResetType": "On"}, + method="POST", + ) + fake_ironic.node.create.assert_called_once_with( + name="Dell-ABC123", + driver="idrac", + driver_info={ + "redfish_address": "https://10.0.0.10", + "redfish_verify_ca": False, + "redfish_username": "root", + "redfish_password": "calvin", + }, + boot_interface="http-ipxe", + inspect_interface="idrac-redfish", + extra={ + "external_cmdb_id": "cmdb-1", + "enrolled_pxe_ports": ["NIC.Integrated.1-1"], + }, + ) + fake_ironic.node.set_target_raid_config.assert_called_once_with( + created_node.uuid, + { + "logical_disks": [ + { + "controller": "RAID.Integrated.1-1", + "is_root_volume": True, + "physical_disks": ["Disk1", "Disk2"], + "raid_level": "1", + "size_gb": "MAX", + } + ] + }, + ) + fake_ironic.node.set_provision_state.assert_has_calls( + [ + call( + created_node.uuid, + "manage", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + created_node.uuid, + "clean", + cleansteps=[ + {"interface": "raid", "step": "delete_configuration"}, + {"interface": "raid", "step": "create_configuration"}, + ], + runbook=None, + disable_ramdisk=True, + ), + call( + created_node.uuid, + "inspect", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + created_node.uuid, + "inspect", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + created_node.uuid, + "provide", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + ] + ) + fake_ironic.node.update.assert_called_once() + patch = fake_ironic.node.update.call_args.args[1] + assert patch == [{"op": "add", "path": "/inspect_interface", "value": "agent"}] + + +def test_power_on_and_wait_retries_temporary_redfish_503(mocker): + initial_info = make_device_info(power_on=False, connected_mac=None) + powered_info = make_device_info( + power_on=True, + connected_mac="AA:BB:CC:DD:EE:FF", + ) + fake_bmc = make_bmc(mocker) + sleep = mocker.patch.object(enroll_server.time, "sleep") + mocker.patch.object( + enroll_server, + "chassis_info", + side_effect=[ + RedfishRequestError( + "BMC communications failure HTTP 503 Service Unavailable " + "Base.1.12.ServiceTemporarilyUnavailable" + ), + powered_info, + ], + ) + + result = enroll_server.power_on_and_wait(fake_bmc, initial_info) + + assert result is powered_info + fake_bmc.redfish_request.assert_called_once_with( + path="/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset", + payload={"ResetType": "On"}, + method="POST", + ) + assert sleep.call_args_list == [call(120), call(30)] + + +def test_enrol_existing_failed_node_recovers_and_updates(mocker): + device_info = make_device_info( + power_on=True, + connected_mac="AA:BB:CC:DD:EE:FF", + ) + existing_node = SimpleNamespace( + uuid="node-999", + provision_state="inspect failed", + driver="idrac", + inspect_interface="idrac-redfish", + ) + fake_bmc = make_bmc(mocker) + fake_ironic, _ = make_ironic_client( + node_name="Dell-ABC123", + node_uuid="node-999", + existing_node=existing_node, + inspect_interfaces=["idrac-redfish", "idrac-redfish"], + ) + + mocker.patch.object(enroll_server, "bmc_for_ip_address", return_value=fake_bmc) + mocker.patch.object(enroll_server, "chassis_info", return_value=device_info) + mocker.patch.object(enroll_server, "set_bmc_password") + mocker.patch.object(enroll_server, "update_dell_drac_settings") + mocker.patch.object(enroll_server, "bmc_set_hostname") + mocker.patch.object(enroll_server, "update_dell_bios_settings") + mocker.patch( + "understack_workflows.ironic.client.get_ironic_client", + return_value=fake_ironic, + ) + + enroll_server.enrol( + ip_address="10.0.0.10", + firmware_update=False, + raid_configure=False, + pxe_switch_macs={"AA:BB:CC:DD:EE:FF"}, + old_password=None, + external_cmdb_id="cmdb-1", + ) + + fake_ironic.node.create.assert_not_called() + fake_ironic.node.set_target_raid_config.assert_not_called() + fake_ironic.node.set_provision_state.assert_has_calls( + [ + call( + existing_node.uuid, + "manage", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + existing_node.uuid, + "inspect", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + existing_node.uuid, + "inspect", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + call( + existing_node.uuid, + "provide", + cleansteps=None, + runbook=None, + disable_ramdisk=None, + ), + ] + ) + update_patch = fake_ironic.node.update.call_args_list[0].args[1] + assert { + "op": "add", + "path": "/extra/enrolled_pxe_ports", + "value": ["NIC.Integrated.1-1"], + } in update_patch + + +def test_apply_firmware_updates_runs_traits_in_numeric_order(mocker): + fake_ironic, _ = make_ironic_client( + node_name="unused", + node_uuid="node-789", + traits=[ + "CUSTOM_FIRMWARE_UPDATE_20_BIOS", + "CUSTOM_FIRMWARE_UPDATE_3_NIC", + "CUSTOM_FIRMWARE_UPDATE_100_STORAGE", + "CUSTOM_UNRELATED", + ], + runbook_ids={ + "CUSTOM_FIRMWARE_UPDATE_3_NIC": "runbook-3", + "CUSTOM_FIRMWARE_UPDATE_20_BIOS": "runbook-20", + "CUSTOM_FIRMWARE_UPDATE_100_STORAGE": "runbook-100", + }, + ) + mocker.patch( + "understack_workflows.ironic.client.get_ironic_client", + return_value=fake_ironic, + ) + node = cast(Node, SimpleNamespace(uuid="node-789")) + + enroll_server.ironic_node.apply_firmware_updates(node) + + fake_ironic.node.api.runbook.get.assert_has_calls( + [ + call("CUSTOM_FIRMWARE_UPDATE_3_NIC"), + call("CUSTOM_FIRMWARE_UPDATE_20_BIOS"), + call("CUSTOM_FIRMWARE_UPDATE_100_STORAGE"), + ] + ) + fake_ironic.node.set_provision_state.assert_has_calls( + [ + call( + "node-789", + "clean", + cleansteps=None, + runbook="runbook-3", + disable_ramdisk=None, + ), + call( + "node-789", + "clean", + cleansteps=None, + runbook="runbook-20", + disable_ramdisk=None, + ), + call( + "node-789", + "clean", + cleansteps=None, + runbook="runbook-100", + disable_ramdisk=None, + ), + ] + ) + + +def test_guess_pxe_interface_unknown_name_avoids_bmc_interface(): + device_info = ChassisInfo( + manufacturer="Dell", + model_number="R760", + serial_number="ABC123", + bmc_ip_address="1.2.3.4", + bios_version="1.0.0", + power_on=False, + memory_gib=0, + cpu="cpu", + interfaces=[ + InterfaceInfo("iDRAC", "bmc", "00:00:00:00:00:01"), + InterfaceInfo( + "NIC.Custom.9-1", + "custom nic", + "00:00:00:00:00:02", + remote_switch_mac_address="AA:BB:CC:DD:EE:FF", + ), + ], + ) + + assert enroll_server.guess_pxe_interfaces(device_info, {"AA:BB:CC:DD:EE:FF"}) == [ + "NIC.Custom.9-1" + ] diff --git a/python/understack-workflows/tests/test_pxe_port_heuristic.py b/python/understack-workflows/tests/test_pxe_port_heuristic.py index fa2f06084..6ab3ec83c 100644 --- a/python/understack-workflows/tests/test_pxe_port_heuristic.py +++ b/python/understack-workflows/tests/test_pxe_port_heuristic.py @@ -1,6 +1,6 @@ from understack_workflows.bmc_chassis_info import ChassisInfo from understack_workflows.bmc_chassis_info import InterfaceInfo -from understack_workflows.pxe_port_heuristic import guess_pxe_interface +from understack_workflows.pxe_port_heuristic import guess_pxe_interfaces def test_integrated_is_best(): @@ -24,7 +24,12 @@ def test_integrated_is_best(): InterfaceInfo("NIC.Slot.1-2", x, x), ], ) - assert guess_pxe_interface(device_info) == "NIC.Integrated.1-1" + assert guess_pxe_interfaces(device_info) == [ + "NIC.Integrated.1-1", + "NIC.Slot.1-1", + "NIC.Integrated.1-2", + "NIC.Slot.1-2", + ] def test_slot_is_second_best(): @@ -48,7 +53,12 @@ def test_slot_is_second_best(): InterfaceInfo("NIC.Slot.2-1", x, x), ], ) - assert guess_pxe_interface(device_info) == "NIC.Slot.1-1" + assert guess_pxe_interfaces(device_info) == [ + "NIC.Slot.1-1", + "NIC.Slot.2-1", + "NIC.Slot.1-2", + "NIC.Slot.2-2", + ] def test_connected_is_better(): @@ -63,13 +73,88 @@ def test_connected_is_better(): memory_gib=0, cpu=x, interfaces=[ - InterfaceInfo("iDRAC", x, x, remote_switch_port_name=x), - InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_port_name=x), - InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_port_name=x), - InterfaceInfo("NIC.Integrated.1-1", x, x, remote_switch_port_name=None), - InterfaceInfo("NIC.Integrated.1-2", x, x, remote_switch_port_name=None), - InterfaceInfo("NIC.Slot.1-1", x, x, remote_switch_port_name=None), - InterfaceInfo("NIC.Slot.1-2", x, x, remote_switch_port_name=x), + InterfaceInfo("iDRAC", x, x, remote_switch_mac_address=x), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=x), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=x), + InterfaceInfo("NIC.Integrated.1-1", x, x, remote_switch_mac_address=None), + InterfaceInfo("NIC.Integrated.1-2", x, x, remote_switch_mac_address=None), + InterfaceInfo("NIC.Slot.1-1", x, x, remote_switch_mac_address=None), + InterfaceInfo("NIC.Slot.1-2", x, x, remote_switch_mac_address=x), ], ) - assert guess_pxe_interface(device_info) == "NIC.Slot.1-2" + assert guess_pxe_interfaces(device_info) == [ + "NIC.Slot.1-2", + "NIC.Integrated.1-1", + "NIC.Slot.1-1", + "NIC.Integrated.1-2", + ] + + +def test_connected_macs_picks_first(): + any_mac = "00:00:00:00:00:00" + pxe_mac = "11:22:33:44:55:66" + x = "test" + device_info = ChassisInfo( + manufacturer=x, + model_number=x, + serial_number=x, + bmc_ip_address=x, + bios_version=x, + power_on=False, + memory_gib=0, + cpu=x, + interfaces=[ + InterfaceInfo("iDRAC", x, x, remote_switch_mac_address=x), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=None), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=any_mac), + InterfaceInfo( + "NIC.Integrated.1-1", x, x, remote_switch_mac_address=any_mac + ), + InterfaceInfo( + "NIC.Integrated.1-2", x, x, remote_switch_mac_address=any_mac + ), + InterfaceInfo("NIC.Slot.1-1", x, x, remote_switch_mac_address=any_mac), + InterfaceInfo("NIC.Slot.1-2", x, x, remote_switch_mac_address=any_mac), + ], + ) + assert guess_pxe_interfaces(device_info, {pxe_mac}) == [ + "NIC.Integrated.1-1", + "NIC.Slot.1-1", + "NIC.Integrated.1-2", + "NIC.Slot.1-2", + ] + + +def test_connected_to_known_pxe_is_best(): + any_mac = "00:00:00:00:00:00" + pxe_mac = "11:22:33:44:55:66" + x = "test" + device_info = ChassisInfo( + manufacturer=x, + model_number=x, + serial_number=x, + bmc_ip_address=x, + bios_version=x, + power_on=False, + memory_gib=0, + cpu=x, + interfaces=[ + InterfaceInfo("iDRAC", x, x, remote_switch_mac_address=x), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=None), + InterfaceInfo("NIC.Embedded.1-1", x, x, remote_switch_mac_address=any_mac), + InterfaceInfo( + "NIC.Integrated.1-1", x, x, remote_switch_mac_address=any_mac + ), + InterfaceInfo( + "NIC.Integrated.1-2", x, x, remote_switch_mac_address=any_mac + ), + InterfaceInfo("NIC.Slot.1-1", x, x, remote_switch_mac_address=pxe_mac), + InterfaceInfo("NIC.Slot.1-2", x, x, remote_switch_mac_address=pxe_mac), + ], + ) + assert guess_pxe_interfaces(device_info, {pxe_mac}) == [ + "NIC.Slot.1-1", + "NIC.Slot.1-2", + "NIC.Integrated.1-1", + "NIC.Integrated.1-2", + ] diff --git a/python/understack-workflows/understack_workflows/bmc_bios.py b/python/understack-workflows/understack_workflows/bmc_bios.py index e087daa92..f8af6cc63 100644 --- a/python/understack-workflows/understack_workflows/bmc_bios.py +++ b/python/understack-workflows/understack_workflows/bmc_bios.py @@ -6,13 +6,17 @@ logger = logging.getLogger(__name__) -def required_bios_settings(pxe_interface: str) -> dict: - """Return adjusted Bios settings map for BMC.""" - return { - "PxeDev1EnDis": "Enabled", - "PxeDev1Interface": pxe_interface, - "HttpDev1EnDis": "Enabled", - "HttpDev1Interface": pxe_interface, +def required_bios_settings(pxe_interfaces: list[str]) -> dict[str, str]: + """Return Bios settings map for BMC. + + Note that we set values like HttpDev8Interface for each of the + pxe_interfaces, however dell BIOS has a setting for "number of pxe + interfaces" which we currently leave at the default of 4, so we'll never + actually attempt pxe on anything but the first 4 interfaces. + """ + settings = { + # PXE is enabled by default on DELL, but we don't use it: + "PxeDev1EnDis": "Disabled", # at this time ironic conductor returns http URLs # when its serving data from its own http server "HttpDev1TlsMode": "None", @@ -28,8 +32,61 @@ def required_bios_settings(pxe_interface: str) -> dict: "IPMILan.1.Enable": "Disabled", } + for i in range(1, 8): + if len(pxe_interfaces) >= i: + settings[f"HttpDev{i}EnDis"] = "Enabled" + settings[f"HttpDev{i}Interface"] = pxe_interfaces[i - 1] + else: + settings[f"HttpDev{i}EnDis"] = "Disabled" + + return settings + + +def required_change_for_bios_setting( + key: str, + required_value: str, + current_settings: dict, + pending_settings: dict, +) -> str | None: + active_value = current_settings.get(key) + pending_value = pending_settings.get(key) + + if active_value is None: + logger.debug("X - BIOS has no %s setting", key) + return None -def update_dell_bios_settings(bmc: Bmc, pxe_interface: str) -> dict: + if pending_value == required_value: + logger.debug( + "[✓] %s currently %r but already pending update to %r", + key, + active_value, + required_value, + ) + return None + + if pending_value is not None: + logger.debug( + "⚠ - %s should be set to %r but with pending update to %r, updating", + key, + required_value, + pending_value, + ) + return required_value + + if active_value == required_value: + logger.debug("✓ - %s already set to %r", key, required_value) + return None + + logger.debug( + "→ - %s is currently %r, setting to %r", + key, + active_value, + required_value, + ) + return required_value + + +def update_dell_bios_settings(bmc: Bmc, pxe_interfaces: list[str]) -> dict: """Check and update BIOS settings to standard as required. Any changes take effect on next server reboot. @@ -37,16 +94,25 @@ def update_dell_bios_settings(bmc: Bmc, pxe_interface: str) -> dict: Returns the changes that were made """ current_settings = bmc.redfish_request(bmc.system_path + "/Bios")["Attributes"] - required_settings = required_bios_settings(pxe_interface) + pending_settings = bmc.redfish_request(bmc.system_path + "/Bios/Settings").get( + "Attributes", {} + ) + required_settings = required_bios_settings(pxe_interfaces) - required_changes = { - k: v - for k, v in required_settings.items() - if (k in current_settings and current_settings[k] != v) - } + logger.info("%s Checking BIOS settings", bmc) + required_changes = {} + for key, required_value in required_settings.items(): + required_change = required_change_for_bios_setting( + key, + required_value, + current_settings, + pending_settings, + ) + if required_change is None: + continue + required_changes[key] = required_change if required_changes: - logger.info("%s Updating BIOS settings: %s", bmc, required_changes) patch_bios_settings(bmc, required_changes) logger.info("%s BIOS settings will be updated on next server boot.", bmc) else: diff --git a/python/understack-workflows/understack_workflows/bmc_chassis_info.py b/python/understack-workflows/understack_workflows/bmc_chassis_info.py index 6f773aef2..0dd5e4da3 100644 --- a/python/understack-workflows/understack_workflows/bmc_chassis_info.py +++ b/python/understack-workflows/understack_workflows/bmc_chassis_info.py @@ -61,6 +61,26 @@ def neighbors(self) -> set: if interface.remote_switch_mac_address } + @property + def dump(self) -> list[str]: + return [ + f"{self.manufacturer} {self.model_number} serial {self.serial_number}", + f"BIOS VERSION {self.bios_version}", + f"BMC IP Address {self.bmc_ip_address}", + *self.dump_interfaces, + f"Power on {self.power_on}", + ] + + @property + def dump_interfaces(self) -> list[str]: + return [ + ( + f"NIC {i.mac_address} {i.name} " + f"[LLDP: {i.remote_switch_mac_address} {i.remote_switch_port_name}]" + ) + for i in self.interfaces + ] + def chassis_info(bmc: Bmc) -> ChassisInfo: """Query DRAC for basic system info via redfish. diff --git a/python/understack-workflows/understack_workflows/bmc_settings.py b/python/understack-workflows/understack_workflows/bmc_settings.py index cfb3330c1..cbe44aba6 100644 --- a/python/understack-workflows/understack_workflows/bmc_settings.py +++ b/python/understack-workflows/understack_workflows/bmc_settings.py @@ -48,7 +48,7 @@ def update_dell_drac_settings(bmc: Bmc) -> dict: elif key not in current_values: logger.warning("%s has no BMC attribute '%s'", bmc, key) elif current_values[key] == required_value: - logger.warning("%s: '%s' already set to '%s'", bmc, key, required_value) + logger.info("%s: '%s' already set to '%s'", bmc, key, required_value) else: required_changes[key] = VALUES_TO_POST[key] diff --git a/python/understack-workflows/understack_workflows/ironic/client.py b/python/understack-workflows/understack_workflows/ironic/client.py index 9f61efcf8..9a7d60a22 100644 --- a/python/understack-workflows/understack_workflows/ironic/client.py +++ b/python/understack-workflows/understack_workflows/ironic/client.py @@ -29,6 +29,38 @@ def get_node(self, node_ident: str, fields: list[str] | None = None) -> Node: def update_node(self, node_id, patch): return self.client.node.update(node_id, patch) + def set_node_provision_state( + self, + node_id: str, + target: str, + clean_steps: list[dict] | None = None, + runbook: str | None = None, + disable_ramdisk: bool | None = None, + ) -> None: + self.client.node.set_provision_state( + node_id, + target, + cleansteps=clean_steps, + runbook=runbook, + disable_ramdisk=disable_ramdisk, + ) + + def wait_for_node_provision_state( + self, node_id: str, expected_state: str, timeout: int = 1800 + ) -> None: + self.client.node.wait_for_provision_state( + node_id, expected_state, timeout=timeout + ) + + def set_node_target_raid_config(self, node_id: str, raid_config: dict) -> None: + self.client.node.set_target_raid_config(node_id, raid_config) + + def get_node_traits(self, node_id: str) -> list[str]: + return cast(list[str], self.client.node.get_traits(node_id)) + + def get_runbook(self, runbook_id: str): + return self.client.node.api.runbook.get(runbook_id) + def get_node_inventory(self, node_ident: str) -> dict: """Fetch node inventory data from Ironic API. diff --git a/python/understack-workflows/understack_workflows/ironic_node.py b/python/understack-workflows/understack_workflows/ironic_node.py index 2f7726455..cf1eb02be 100644 --- a/python/understack-workflows/understack_workflows/ironic_node.py +++ b/python/understack-workflows/understack_workflows/ironic_node.py @@ -1,39 +1,86 @@ +import json import logging +from collections.abc import Sequence import ironicclient.common.apiclient.exceptions from ironicclient.common.utils import args_array_to_patch +from ironicclient.v1.node import Node from understack_workflows.bmc import Bmc from understack_workflows.ironic.client import IronicClient -from understack_workflows.node_configuration import IronicNodeConfiguration -STATES_ALLOWING_UPDATES = ["enroll", "manageable"] +FIRMWARE_UPDATE_TRAIT_PREFIX = "CUSTOM_FIRMWARE_UPDATE_" +ENROLLABLE_STATES = {"clean failed", "inspect failed", "enroll", "manageable"} logger = logging.getLogger(__name__) def create_or_update( - bmc: Bmc, name: str, manufacturer: str, external_cmdb_id: str | None = None -) -> IronicNodeConfiguration: - """Note interfaces/ports are not synced here, that happens elsewhere.""" + bmc: Bmc, + name: str, + manufacturer: str, + external_cmdb_id: str | None = None, + enrolled_pxe_ports: list[str] | None = None, +) -> Node: + """Find-or-create Node by name, update attributes, set state to Manageable. + + If the node exists in a quiescent state like "clean failed" then we will set + it to "manageable" state before proceeding to update the node attributes. + + If the node exists in a "busy" state then we raise an exception. + + If the node does not already exist it is created and then transitioned from + enrol->manageable state. + + Note interfaces/ports are not synced here, that happens elsewhere. + """ client = IronicClient() driver, inspect_interface = _driver_for(manufacturer) try: - ironic_node = client.get_node(name) + node = client.get_node(name) logger.debug( - "Using existing baremetal node %s with name %s", ironic_node.uuid, name + "Baremetal node %s already exists with name %s in provision_state %s", + node.uuid, + name, + node.provision_state, ) + + if node.provision_state not in ENROLLABLE_STATES: + raise Exception( + f"Re-enrol cannot proceed unless node is in one of the states " + f"{sorted(ENROLLABLE_STATES)} but node {node.uuid} " + f"is in '{node.provision_state}'." + ) + + if node.provision_state != "manageable": + transition(node, target_state="manage", expected_state="manageable") + update_ironic_node( - client, bmc, ironic_node, name, driver, inspect_interface, external_cmdb_id + client, + bmc, + node, + name, + driver, + inspect_interface, + external_cmdb_id, + enrolled_pxe_ports, ) - # Return node as IronicNodeConfiguration (duck typing - Node has same attrs) - return ironic_node # type: ignore[return-value] except ironicclient.common.apiclient.exceptions.NotFound: logger.debug("Baremetal Node with name %s not found in Ironic, creating.", name) - return create_ironic_node( - client, bmc, name, driver, inspect_interface, external_cmdb_id + node = create_ironic_node( + client, + bmc, + name, + driver, + inspect_interface, + external_cmdb_id, + enrolled_pxe_ports, ) + # All newly-created nodes start out with "enrol" state: + transition(node, target_state="manage", expected_state="manageable") + + return node def update_ironic_node( @@ -44,15 +91,8 @@ def update_ironic_node( driver, inspect_interface, external_cmdb_id: str | None = None, + enrolled_pxe_ports: list[str] | None = None, ): - if ironic_node.provision_state not in STATES_ALLOWING_UPDATES: - logger.info( - "Baremetal node %s is in %s provision_state, so no updates are allowed", - ironic_node.uuid, - ironic_node.provision_state, - ) - return - updates = [ f"name={name}", f"driver={driver}", @@ -67,12 +107,15 @@ def update_ironic_node( # Update external_cmdb_id only when explicitly provided if external_cmdb_id: updates.append(f"extra/external_cmdb_id={external_cmdb_id}") + if enrolled_pxe_ports is not None: + payload = json.dumps(enrolled_pxe_ports) + updates.append(f"extra/enrolled_pxe_ports={payload}") patches = args_array_to_patch("add", updates) logger.info("Updating Ironic node %s patches=%s", ironic_node.uuid, patches) - response = client.update_node(ironic_node.uuid, patches) - logger.info("Ironic node %s Updated: response=%s", ironic_node.uuid, response) + client.update_node(ironic_node.uuid, patches) + logger.debug("Ironic node %s Updated.") def create_ironic_node( @@ -82,7 +125,8 @@ def create_ironic_node( driver: str, inspect_interface: str, external_cmdb_id: str | None = None, -) -> IronicNodeConfiguration: + enrolled_pxe_ports: list[str] | None = None, +) -> Node: node_data = { "name": name, "driver": driver, @@ -95,10 +139,17 @@ def create_ironic_node( "boot_interface": "http-ipxe", "inspect_interface": inspect_interface, } + if external_cmdb_id or enrolled_pxe_ports is not None: + node_data["extra"] = {} if external_cmdb_id: - node_data["extra"] = {"external_cmdb_id": external_cmdb_id} - # Return node as IronicNodeConfiguration (duck typing - Node has same attrs) - return client.create_node(node_data) # type: ignore[return-value] + node_data["extra"]["external_cmdb_id"] = external_cmdb_id + if enrolled_pxe_ports is not None: + node_data["extra"]["enrolled_pxe_ports"] = enrolled_pxe_ports + + if driver == "fake-hardware": + node_data["resource_class"] = "fakehw" + + return client.create_node(node_data) def _driver_for(manufacturer: str) -> tuple[str, str]: @@ -107,3 +158,102 @@ def _driver_for(manufacturer: str) -> tuple[str, str]: return ("idrac", "idrac-redfish") else: return ("redfish", "redfish") + + +def transition( + node: Node, + target_state: str, + expected_state: str | None = None, + clean_steps: list[dict] | None = None, + runbook: str | None = None, + disable_ramdisk: bool | None = None, +) -> None: + client = IronicClient() + + logger.info("%s requesting provision state %s", node.uuid, target_state) + client.set_node_provision_state( + node.uuid, + target_state, + clean_steps=clean_steps, + runbook=runbook, + disable_ramdisk=disable_ramdisk, + ) + if expected_state: + logger.info( + "Waiting for node %s to enter provision state %s", + node.uuid, + expected_state, + ) + client.wait_for_node_provision_state(node.uuid, expected_state, timeout=1800) + + +def patch(node: Node, updates: Sequence[str]) -> None: + if not updates: + return + logger.info("%s updating node %s", node.uuid, list(updates)) + IronicClient().update_node(node.uuid, args_array_to_patch("add", list(updates))) + + +def set_target_raid_config(node: Node, raid_config: dict) -> None: + IronicClient().set_node_target_raid_config(node.uuid, raid_config) + + +def inspect_out_of_band(node: Node): + refreshed_node = refresh(node, fields=["driver"]) + if refreshed_node.driver == "idrac": + inspect_interface = "idrac-redfish" + elif refreshed_node.driver == "redfish": + inspect_interface = "redfish" + else: + logger.warning("No out-of-band inspection for driver %s", refreshed_node.driver) + return + + logger.info("%s Performing out-of-band inspection", node.uuid) + inspect(node, inspect_interface) + + +def inspect(node: Node, inspect_interface: str = "agent"): + """Set the node inspect interface, inspect and wait for success. + + We raise an error if anything failed. If we return then the everything has + succeeded and the node has returned to the manageable state. + """ + refreshed_node = refresh(node, fields=["inspect_interface"]) + if refreshed_node.inspect_interface != inspect_interface: + patch(node, [f"inspect_interface={inspect_interface}"]) + + transition(node, target_state="inspect", expected_state="manageable") + + +def apply_firmware_updates(node: Node) -> None: + client = IronicClient() + traits = client.get_node_traits(node.uuid) + update_traits = sorted( + (trait for trait in traits if trait.startswith(FIRMWARE_UPDATE_TRAIT_PREFIX)), + key=firmware_trait_sort_key, + ) + if not update_traits: + logger.info("%s No firmware update traits found", node.uuid) + return + + for trait in update_traits: + runbook = client.get_runbook(trait) + logger.info("%s Running firmware update trait %s", node.uuid, trait) + transition( + node, + "clean", + expected_state="manageable", + runbook=runbook.uuid, + ) + + +def firmware_trait_sort_key(trait: str) -> tuple[int, str]: + parts = trait.split("_") + order = parts[3] if len(parts) > 3 else "" + if order.isdigit(): + return (int(order), trait) + return (10**9, trait) + + +def refresh(node: Node, fields: list[str] | None = None) -> Node: + return IronicClient().get_node(node.uuid, fields=fields) diff --git a/python/understack-workflows/understack_workflows/main/bios_settings.py b/python/understack-workflows/understack_workflows/main/bios_settings.py deleted file mode 100644 index b64a2021a..000000000 --- a/python/understack-workflows/understack_workflows/main/bios_settings.py +++ /dev/null @@ -1,61 +0,0 @@ -import argparse -import logging -import os - -from understack_workflows.bmc import bmc_for_ip_address -from understack_workflows.bmc_bios import update_dell_bios_settings -from understack_workflows.bmc_chassis_info import chassis_info -from understack_workflows.helpers import setup_logger -from understack_workflows.pxe_port_heuristic import guess_pxe_interface - -logger = logging.getLogger(__name__) - - -def main(): - """Update BIOS settings to undercloud standard for the given server. - - - Using BMC, configure our standard BIOS settings - - set PXE boot device - - set timezone to UTC - - set the hostname - - NOTE: take care with order of execution of these workflow steps: - - When asked to change BIOS settings, iDRAC enqueues a "job" that will execute - on next boot of the server. - - The assumption for this workflow is that this server will shortly be PXE - booting into the IPA image for cleaning or inspection. - - Therefore this workflow does not itself boot the server. - - Any subsequent iDRAC operations such as a code update or ironic validation - can clear our pending BIOS update job from the iDRAC job queue. Before we - perform any such operation, we should first do something that will cause a - reboot of the server. - """ - setup_logger() - args = argument_parser().parse_args() - - bmc_ip_address = args.bmc_ip_address - logger.info("%s starting for bmc_ip_address=%s", __file__, bmc_ip_address) - - bmc = bmc_for_ip_address(bmc_ip_address) - device_info = chassis_info(bmc) - pxe_interface = guess_pxe_interface(device_info) - logger.info("Selected %s as PXE interface", pxe_interface) - - update_dell_bios_settings(bmc, pxe_interface=pxe_interface) - - -def argument_parser(): - """Parse runtime arguments.""" - parser = argparse.ArgumentParser( - prog=os.path.basename(__file__), description="Update BIOS Settings" - ) - parser.add_argument("--bmc-ip-address", type=str, required=True, help="BMC IP") - return parser - - -if __name__ == "__main__": - main() diff --git a/python/understack-workflows/understack_workflows/main/enroll_server.py b/python/understack-workflows/understack_workflows/main/enroll_server.py index 1be24d8be..3df7b0362 100644 --- a/python/understack-workflows/understack_workflows/main/enroll_server.py +++ b/python/understack-workflows/understack_workflows/main/enroll_server.py @@ -3,39 +3,37 @@ import argparse import logging import os -from pprint import pformat +import time + +from ironicclient.v1.node import Node from understack_workflows import ironic_node from understack_workflows.bmc import Bmc +from understack_workflows.bmc import RedfishRequestError from understack_workflows.bmc import bmc_for_ip_address +from understack_workflows.bmc_bios import update_dell_bios_settings +from understack_workflows.bmc_chassis_info import ChassisInfo from understack_workflows.bmc_chassis_info import chassis_info from understack_workflows.bmc_credentials import set_bmc_password from understack_workflows.bmc_hostname import bmc_set_hostname from understack_workflows.bmc_settings import update_dell_drac_settings from understack_workflows.helpers import setup_logger +from understack_workflows.pxe_port_heuristic import guess_pxe_interfaces logger = logging.getLogger(__name__) +POST_POWER_ON_WAIT_SECONDS = 120 +POST_POWER_ON_RETRY_SECONDS = 30 +POST_POWER_ON_MAX_RETRIES = 8 + -def main(): +def main() -> None: """On-board new or Refresh existing baremetal node. - connect to the BMC using standard password, if that fails then use password supplied in --old-bmc-password option, or factory default - - ensure standard BMC password is set - - - if DHCP, set permanent IP address, netmask, default gw - - - TODO: create and install SSL certificate - - - TODO: set NTP Server IPs for DRAC - (NTP server IP addresses are different per region) - - - Using BMC, configure our standard BIOS settings - - set PXE boot device - - set timezone to UTC - - set the hostname + - ensure standard BMC password is set, and other basic BMC settings - from BMC, discover basic hardware info: - manufacturer, model number, serial number @@ -45,29 +43,104 @@ def main(): - MAC address - LLDP connections [{remote_mac, remote_interface_name}] + - Figure out which NICs to enable for PXE + + - Configure our standard BIOS settings including HTTP boot devices + - Find or create this baremetal node in Ironic - - set the name to "{manufacturer}-{servicetag}" - - set the driver as appropriate + - Set the name to "{manufacturer}-{servicetag}" + - Set the driver as appropriate for the manufacturer/model + - Configure RAID + - Transition through enrol->manage->inspect->cleaning->provide """ setup_logger() args = argument_parser().parse_args() - bmc_ip_address = args.bmc_ip_address - logger.info("%s starting for bmc_ip_address=%s", __file__, bmc_ip_address) + enrol( + ip_address=args.ip_address, + old_password=args.old_password, + firmware_update=args.firmware_update, + pxe_switch_macs=parse_maclist(str(args.pxe_switch_macs)), + raid_configure=args.raid_configure, + external_cmdb_id=args.external_cmdb_id, + ) + + +def parse_maclist(maclist: str) -> set[str]: + return {mac.strip().upper() for mac in maclist.split(",")} + + +def enrol( + ip_address: str, + firmware_update: bool, + raid_configure: bool, + pxe_switch_macs: set[str], + old_password: str | None, + external_cmdb_id: str | None = None, +) -> None: + logger.info("Starting enrol workflow for bmc_ip_address=%s", ip_address) + logger.info(" pxe_switch_macs=%s", pxe_switch_macs) + + if external_cmdb_id: + logger.info(" external_cmdb_id=%s", external_cmdb_id) + + bmc = bmc_for_ip_address(ip_address) + device_info = initialize_bmc(bmc, old_password) + + if insufficient_lldp_data(device_info): + device_info = power_on_and_wait(bmc, device_info) - bmc = bmc_for_ip_address(bmc_ip_address) + pxe_interfaces = guess_pxe_interfaces(device_info, pxe_switch_macs) + logger.info("Selected %s as PXE interfaces", pxe_interfaces) - device_id = enroll_server(bmc, args.old_bmc_password, args.external_cmdb_id) + node = ironic_node.create_or_update( + bmc=bmc, + name=device_name(device_info), + manufacturer=device_info.manufacturer, + external_cmdb_id=external_cmdb_id, + enrolled_pxe_ports=pxe_interfaces, + ) + + if raid_configure: + # Raid configuration runs a clean step which does a PXE boot. That + # can't work unless we first apply_bios_settings. + # TODO: why isn't skip_ramdisk working here? + apply_bios_settings(bmc, pxe_interfaces) + configure_raid(node, bmc) + else: + logger.info("%s RAID configuration was not requested", node.uuid) + + ironic_node.inspect_out_of_band(node) + + # Anecdotally, applying firmware updates can upset the next-boot HTTP, and + # potentially even upset the bios-settings configuration job in the iDRAC, + # so we do firmware first, which causes a reboot, and only then do we set + # the BIOS settings. + # + # Also, just maybe, the bios setting keys we are trying to set might not be + # available in the old version of the bios, in which case we need to boot + # the bios before redfish will allow us to set those settings. + if firmware_update: + ironic_node.apply_firmware_updates(node) - # argo workflows captures stdout as the results which we can use - # to return the device UUID - print(device_id) + # Applying BIOS settings on Dell servers requires a reboot which we achieve + # by initiating agent inspection. Agent inspection requires BIOS settings + # (to set boot device). Therefore these two actions must go hand-in-hand. + # + # Note that we may have already applied BIOS settings above. That is okay, + # it is idempotent. + apply_bios_settings(bmc, pxe_interfaces) + ironic_node.inspect(node) + # After successful inspection, our node is left in "manageable" state. All + # being well, the "provide" action will transition manageable -> cleaning -> + # available. + ironic_node.transition(node, target_state="provide", expected_state="available") + logger.info("Completed enrol workflow for bmc_ip_address=%s", ip_address) -def enroll_server( - bmc: Bmc, old_password: str | None, external_cmdb_id: str | None = None -) -> str: - """Enroll BMC to Undercloud Ironic.""" + +def initialize_bmc(bmc: Bmc, old_password: str | None) -> ChassisInfo: + """Discover and configure BMC with Undercloud standard settings.""" set_bmc_password( ip_address=bmc.ip_address, new_password=bmc.password, @@ -75,39 +148,180 @@ def enroll_server( ) device_info = chassis_info(bmc) - logger.info("Discovered %s", pformat(device_info)) - device_name = f"{device_info.manufacturer}-{device_info.serial_number}" + for line in device_info.dump: + logger.info("Discovered %s", line) - update_dell_drac_settings(bmc) + if device_info.manufacturer == "Dell": + update_dell_drac_settings(bmc) - bmc_set_hostname(bmc, device_info.bmc_hostname, device_name) + bmc_set_hostname(bmc, device_info.bmc_hostname, device_name(device_info)) + return device_info - node = ironic_node.create_or_update( - bmc=bmc, - name=device_name, - manufacturer=device_info.manufacturer, - external_cmdb_id=external_cmdb_id, + +def insufficient_lldp_data(device_info: ChassisInfo) -> bool: + """Whether the device_info is populated with switch connections. + + We normally get LLDP data for the BMC's own (out of band) interface but that + is not relevant to our investigation so we exclude known BMC interface + names. + + If the server has been powered on, we will often get LLDP data for the other + ports as well. + """ + for i in device_info.interfaces: + if ( + "DRAC" not in i.name.upper() + and "ILO" not in i.name.upper() + and i.remote_switch_mac_address + ): + return False + + return True + + +def power_on_and_wait(bmc: Bmc, device_info: ChassisInfo) -> ChassisInfo: + """If power is off, switch on and wait a minute.""" + if device_info.power_on: + logger.debug("Power is on") + return device_info + + logger.info("Power is off. Switching on and waiting for links to stabilize") + # TODO: figure out how to do this with sushy + bmc.redfish_request( + path=f"{bmc.get_system_path()}/Actions/ComputerSystem.Reset", + payload={"ResetType": "On"}, + method="POST", + ) + time.sleep(POST_POWER_ON_WAIT_SECONDS) + + for attempt in range(POST_POWER_ON_MAX_RETRIES + 1): + try: + return chassis_info(bmc) + except RedfishRequestError as exc: + if not _is_temporary_redfish_unavailable(exc): + raise + if attempt == POST_POWER_ON_MAX_RETRIES: + raise + logger.warning( + "BMC Redfish data is temporarily unavailable after power on for %s. " + "Retrying in %s seconds.", + bmc.ip_address, + POST_POWER_ON_RETRY_SECONDS, + ) + time.sleep(POST_POWER_ON_RETRY_SECONDS) + + raise AssertionError("unreachable") + + +def _is_temporary_redfish_unavailable(exc: RedfishRequestError) -> bool: + message = str(exc) + return "HTTP 503" in message or "ServiceTemporarilyUnavailable" in message + + +def device_name(device_info: ChassisInfo) -> str: + return f"{device_info.manufacturer}-{device_info.serial_number}" + + +def configure_raid(node: Node, bmc: Bmc): + raid_details = discover_controller_details(bmc) + if not raid_details: + logger.info("%s No RAID hardware found in node", node.uuid) + return + + logger.info("%s Applying RAID configuration", node.uuid) + raid_config = build_raid_config(**raid_details) + ironic_node.set_target_raid_config(node, raid_config) + ironic_node.transition( + node, + target_state="clean", + expected_state="manageable", + clean_steps=[ + {"interface": "raid", "step": "delete_configuration"}, + {"interface": "raid", "step": "create_configuration"}, + ], + disable_ramdisk=True, ) - logger.info("%s complete for %s", __file__, bmc.ip_address) - return node.uuid + +def discover_controller_details(bmc: Bmc) -> dict | None: + """Parse available RAID controller details for execution.""" + system_objects = bmc.sushy().get_system_collection().get_members() + system = system_objects[0] + for controller in system.storage.get_members(): + if "RAID" in controller.identity: + return { + "controller": controller.identity, + "physical_disks": [d.identity for d in controller.drives], + } + return None + + +def build_raid_config(controller: str, physical_disks: list[str]): + """Return a raid config supported by ironic for cleanup tasks.""" + if len(physical_disks) < 2: + raid_level = "0" + elif len(physical_disks) > 2: + raid_level = "5" + else: + raid_level = "1" + + result = { + "logical_disks": [ + { + "controller": controller, + "is_root_volume": True, + "physical_disks": physical_disks, + "raid_level": raid_level, + "size_gb": "MAX", + } + ] + } + return result + + +def apply_bios_settings(bmc: Bmc, pxe_interfaces: list[str]): + update_dell_bios_settings(bmc, pxe_interfaces=pxe_interfaces) + + +def parse_bool(value: str) -> bool: + return value.lower() == "true" def argument_parser(): - """Parse runtime arguments.""" parser = argparse.ArgumentParser( - prog=os.path.basename(__file__), description="Ingest Baremetal Node" + prog=os.path.basename(__file__), + description="Run the server enrol workflow", ) - parser.add_argument("--bmc-ip-address", type=str, required=True, help="BMC IP") + parser.add_argument("--ip-address", required=True, help="IP Address of BMC") parser.add_argument( - "--old-bmc-password", type=str, required=False, help="Old Password" + "--old-password", + required=False, + help="Old (current) BMC password", + ) + parser.add_argument( + "--firmware-update", + type=parse_bool, + default=False, + help="Run firmware update runbooks after inspection", + ) + parser.add_argument( + "--raid-configure", + type=parse_bool, + default=True, + help="Configure RAID before inspection", ) parser.add_argument( "--external-cmdb-id", - type=str, required=False, + default="", help="External CMDB ID for RXDB integration", ) + parser.add_argument( + "--pxe-switch-macs", + required=False, + default="", + help="Chassis MAC address of switches providing PXE network", + ) return parser diff --git a/python/understack-workflows/understack_workflows/pxe_port_heuristic.py b/python/understack-workflows/understack_workflows/pxe_port_heuristic.py index 5d0047d69..c1ab42081 100644 --- a/python/understack-workflows/understack_workflows/pxe_port_heuristic.py +++ b/python/understack-workflows/understack_workflows/pxe_port_heuristic.py @@ -1,55 +1,64 @@ +import logging + from understack_workflows.bmc_chassis_info import ChassisInfo from understack_workflows.bmc_chassis_info import InterfaceInfo +logger = logging.getLogger(__name__) + # We try not choose interface whose name contains any of these: DISQUALIFIED = ["DRAC", "ILO", "NIC.EMBEDDED"] -# A higher number is more likely to be PXE interface: -NIC_PREFERENCE = { - "NIC.Integrated.1-1-1": 100, - "NIC.Integrated.1-1": 99, - "NIC.Slot.1-1-1": 98, - "NIC.Slot.1-1": 97, - "NIC.Integrated.1-2-1": 96, - "NIC.Integrated.1-2": 95, - "NIC.Slot.1-2-1": 94, - "NIC.Slot.1-2": 93, - "NIC.Slot.1-3-1": 92, - "NIC.Slot.1-3": 91, - "NIC.Slot.2-1-1": 90, - "NIC.Slot.2-1": 89, - "NIC.Integrated.2-1-1": 88, - "NIC.Integrated.2-1": 87, - "NIC.Slot.2-2-1": 86, - "NIC.Slot.2-2": 85, - "NIC.Integrated.2-2-1": 84, - "NIC.Integrated.2-2": 83, - "NIC.Slot.3-1-1": 82, - "NIC.Slot.3-1": 81, - "NIC.Slot.3-2-1": 80, - "NIC.Slot.3-2": 79, -} - - -def guess_pxe_interface(device_info: ChassisInfo) -> str: - """Determine most probable PXE interface for BMC.""" - interface = max(device_info.interfaces, key=_pxe_preference) - return interface.name - - -def _pxe_preference(interface: InterfaceInfo) -> list: - """Relative likelihood that interface is used for PXE. - - Prefer names that are not disqualified. - - After that, prefer interfaces that have an LLDP neighbor. - - Finally, score the interface name according to the list above. - """ - is_eligible = not any(x in interface.name.upper() for x in DISQUALIFIED) - link_detected = interface.remote_switch_port_name is not None +def guess_pxe_interfaces( + device_info: ChassisInfo, pxe_switch_macs: set[str] | None = None +) -> list[str]: + """First 8 interface names, most probable PXE interfaces first.""" + if pxe_switch_macs is None: + pxe_switch_macs = set() + + candidate_interfaces = {i for i in device_info.interfaces if not disqualified(i)} + + names = [ + i.name + for i in sorted( + candidate_interfaces, + key=lambda i: likelihood(i, pxe_switch_macs), + ) + ] + + return names[0:8] + + +def disqualified(interface: InterfaceInfo) -> bool: + return any(x in interface.name.upper() for x in DISQUALIFIED) - name_preference = NIC_PREFERENCE.get(interface.name, 0) - return [is_eligible, link_detected, name_preference] +def likelihood(interface: InterfaceInfo, pxe_switch_macs: set[str]) -> list[bool | str]: + """A value that is sortable. Lower is better.""" + return [ + # python sort order: false is "better" than true + not connected_to_known_pxe_switch(interface, pxe_switch_macs), + not connected_to_any_switch(interface), + nic_port_number(interface), + interface.name, # tiebreaker + ] + + +def connected_to_known_pxe_switch(interface: InterfaceInfo, pxe_switch_macs) -> bool: + return interface.remote_switch_mac_address in pxe_switch_macs + + +def connected_to_any_switch(interface: InterfaceInfo) -> bool: + return interface.remote_switch_mac_address is not None + + +def nic_port_number(interface: InterfaceInfo) -> str: + """Preference of interface name like NIC.Slot.1-2-1. + + Prefer Lowest partition, then + prefer Lowest port, then + prefer Lowest slot number, then + prefer Integrated over Slot. + """ + reversed_name = interface.name[::-1] + return reversed_name diff --git a/workflows/argo-events/workflowtemplates/enroll-server.yaml b/workflows/argo-events/workflowtemplates/enroll-server.yaml index ea58a6d2c..0001ee46a 100644 --- a/workflows/argo-events/workflowtemplates/enroll-server.yaml +++ b/workflows/argo-events/workflowtemplates/enroll-server.yaml @@ -36,77 +36,19 @@ spec: steps: - - name: enroll-server template: enroll-server - - - name: server-enroll-state - template: openstack-state-cmd - arguments: - parameters: - - name: device_id - value: "{{steps.enroll-server.outputs.result}}" - - - name: manage-server - template: openstack-wait-cmd - arguments: - parameters: - - name: operation - value: "manage" - - name: device_id - value: "{{steps.enroll-server.outputs.result}}" - when: "{{steps.server-enroll-state.outputs.result}} == enroll" - - - name: get-raid-config - template: get-raid-config - when: "{{workflow.parameters.raid_configure}} == true" - - - name: openstack-set-baremetal-node-raid-config - template: openstack-set-baremetal-node-raid-config - arguments: - parameters: - - name: device_id - value: "{{steps.enroll-server.outputs.result}}" - - name: raid_config - value: "{{steps.get-raid-config.outputs.result}}" - when: "{{workflow.parameters.raid_configure}} == true" - - - name: server-manage-state - template: openstack-state-cmd - arguments: - parameters: - - name: device_id - value: "{{steps.enroll-server.outputs.result}}" - - - name: bios-settings - template: bios-settings - - - name: inspect-server - templateRef: - name: inspect-server - template: inspect-server - arguments: - parameters: - - name: node - value: "{{steps.enroll-server.outputs.result}}" - when: "{{steps.server-manage-state.outputs.result}} == manageable" - - - name: firmware-update - templateRef: - name: server-firmware-update - template: server-firmware-update - arguments: - parameters: - - name: node - value: "{{steps.enroll-server.outputs.result}}" - when: "{{workflow.parameters.firmware_update}} == true" - - - name: avail-server - template: openstack-wait-cmd - arguments: - parameters: - - name: operation - value: "provide" - - name: device_id - value: "{{steps.enroll-server.outputs.result}}" - when: "{{steps.server-manage-state.outputs.result}} == manageable" - name: enroll-server container: image: ghcr.io/rackerlabs/understack/ironic-nautobot-client:latest command: - enroll-server args: - - --bmc-ip-address + - --ip-address - "{{workflow.parameters.ip_address}}" - - --old-bmc-password + - --firmware-update + - "{{workflow.parameters.firmware_update}}" + - --raid-configure + - "{{workflow.parameters.raid_configure}}" + - --old-password - "{{workflow.parameters.old_password}}" - --external-cmdb-id - "{{workflow.parameters.external_cmdb_id}}" @@ -118,157 +60,14 @@ spec: name: bmc-master readOnly: true envFrom: - - configMapRef: - name: cluster-metadata - env: - - name: WF_NS - value: "{{workflow.namespace}}" - - name: WF_NAME - value: "{{workflow.name}}" - - name: WF_UID - value: "{{workflow.uid}}" - volumes: - - name: bmc-master - secret: - secretName: bmc-master - - name: baremetal-manage - secret: - secretName: baremetal-manage - items: - - key: clouds.yaml - path: clouds.yaml - - name: bios-settings - container: - image: ghcr.io/rackerlabs/understack/ironic-nautobot-client:latest - command: - - bios-settings - args: - - --bmc-ip-address - - "{{workflow.parameters.ip_address}}" - volumeMounts: - - mountPath: /etc/openstack - name: baremetal-manage - readOnly: true - - mountPath: /etc/bmc_master/ - name: bmc-master - readOnly: true - envFrom: - - configMapRef: - name: cluster-metadata - env: - - name: WF_NS - value: "{{workflow.namespace}}" - - name: WF_NAME - value: "{{workflow.name}}" - - name: WF_UID - value: "{{workflow.uid}}" - volumes: - - name: bmc-master - secret: - secretName: bmc-master - - name: openstack-wait-cmd - inputs: - parameters: - - name: operation - - name: device_id - container: - image: ghcr.io/rackerlabs/understack/openstack-client:2025.2 - command: - - openstack - args: - - baremetal - - node - - "{{inputs.parameters.operation}}" - - --wait - - "0" - - "{{inputs.parameters.device_id}}" + - configMapRef: + name: cluster-metadata env: + - name: WF_NS + value: "{{workflow.namespace}}" + - name: WF_NAME + value: "{{workflow.name}}" + - name: WF_UID + value: "{{workflow.uid}}" - name: OS_CLOUD value: understack - volumeMounts: - - mountPath: /etc/openstack - name: baremetal-manage - - name: get-raid-config - container: - image: ghcr.io/rackerlabs/understack/ironic-nautobot-client:latest - command: - - get-raid-devices - args: - - --ip-address - - "{{workflow.parameters.ip_address}}" - volumeMounts: - - mountPath: /etc/openstack - name: baremetal-manage - readOnly: true - - mountPath: /etc/bmc_master/ - name: bmc-master - readOnly: true - env: - - name: WF_NS - value: "{{workflow.namespace}}" - - name: WF_NAME - value: "{{workflow.name}}" - - name: WF_UID - value: "{{workflow.uid}}" - - name: openstack-set-baremetal-node-raid-config - inputs: - parameters: - - name: device_id - - name: raid_config - # https://rackerlabs.github.io/understack/user-guide/openstack-ironic/#setting-baremetal-node-flavor - script: - image: ghcr.io/rackerlabs/understack/openstack-client:2025.2 - command: [sh] - source: | - echo "setting RAID config for node: {{inputs.parameters.device_id}}" - # create the raid-config.json file. I find this easier to read - # than passing a big json string on command line - cat <<'EOF' >> raid-config.json - {{inputs.parameters.raid_config}} - EOF - # create the initial clean steps which will create a raid config - cat <<'EOF' >> raid-clean-steps.json - [ - { - "interface": "raid", - "step": "delete_configuration" - }, - { - "interface": "raid", - "step": "create_configuration" - } - ] - EOF - # apply the target raid config to the node - openstack baremetal node set {{inputs.parameters.device_id}} --target-raid-config raid-config.json - # create the raid config - openstack baremetal node clean --wait 0 --clean-steps raid-clean-steps.json --disable-ramdisk {{inputs.parameters.device_id}} - env: - - name: OS_CLOUD - value: understack - volumeMounts: - - mountPath: /etc/openstack - name: baremetal-manage - - name: openstack-state-cmd - inputs: - parameters: - - name: device_id - container: - image: ghcr.io/rackerlabs/understack/openstack-client:2025.2 - command: - - openstack - args: - - baremetal - - node - - show - - "-f" - - value - - "-c" - - provision_state - - "{{inputs.parameters.device_id}}" - env: - - name: OS_CLOUD - value: understack - volumeMounts: - - mountPath: /etc/openstack - name: baremetal-manage