diff --git a/nodescraper/interfaces/task.py b/nodescraper/interfaces/task.py index effd3029..16d1a70b 100644 --- a/nodescraper/interfaces/task.py +++ b/nodescraper/interfaces/task.py @@ -107,6 +107,8 @@ def _build_event( data = {"task_name": self.__class__.__name__, "task_type": self.TASK_TYPE} else: + # Copy to avoid mutating the caller's dict + data = copy.copy(data) data["task_name"] = self.__class__.__name__ data["task_type"] = self.TASK_TYPE diff --git a/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py b/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py index 88506f6e..82a82f91 100644 --- a/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py +++ b/nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py @@ -26,7 +26,7 @@ from typing import Optional from nodescraper.base import InBandDataCollector -from nodescraper.connection.inband.inband import CommandArtifact +from nodescraper.connection.inband.inband import CommandArtifact, TextFileArtifact from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult @@ -38,9 +38,10 @@ class DeviceEnumerationCollector(InBandDataCollector[DeviceEnumerationDataModel, DATA_MODEL = DeviceEnumerationDataModel - CMD_CPU_COUNT_LINUX = "lscpu | grep Socket | awk '{ print $2 }'" CMD_GPU_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'VGA\\|Display\\|3D' | wc -l" CMD_VF_COUNT_LINUX = "lspci -d {vendorid_ep}: | grep -i 'Virtual Function' | wc -l" + CMD_LSCPU_LINUX = "lscpu" + CMD_LSHW_LINUX = "lshw" CMD_CPU_COUNT_WINDOWS = ( 'powershell -Command "(Get-WmiObject -Class Win32_Processor | Measure-Object).Count"' @@ -61,9 +62,8 @@ def _warning( description=description, data={ "command": command.command, - "stdout": command.stdout, - "stderr": command.stderr, "exit_code": command.exit_code, + "stderr": command.stderr, }, priority=EventPriority.WARNING, ) @@ -75,8 +75,7 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio On Windows, use WMI and hyper-v cmdlets """ if self.system_info.os_family == OSFamily.LINUX: - # Count CPU sockets - cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_LINUX) + lscpu_res = self._run_sut_cmd(self.CMD_LSCPU_LINUX, log_artifact=False) # Count all AMD GPUs vendor_id = format(self.system_info.vendorid_ep, "x") @@ -86,6 +85,9 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio # Count AMD Virtual Functions vf_count_res = self._run_sut_cmd(self.CMD_VF_COUNT_LINUX.format(vendorid_ep=vendor_id)) + + # Collect lshw output + lshw_res = self._run_sut_cmd(self.CMD_LSHW_LINUX, sudo=True, log_artifact=False) else: cpu_count_res = self._run_sut_cmd(self.CMD_CPU_COUNT_WINDOWS) gpu_count_res = self._run_sut_cmd(self.CMD_GPU_COUNT_WINDOWS) @@ -93,10 +95,32 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio device_enum = DeviceEnumerationDataModel() - if cpu_count_res.exit_code == 0: - device_enum.cpu_count = int(cpu_count_res.stdout) + if self.system_info.os_family == OSFamily.LINUX: + if lscpu_res.exit_code == 0 and lscpu_res.stdout: + # Extract socket count from lscpu output + for line in lscpu_res.stdout.splitlines(): + if line.startswith("Socket(s):"): + try: + device_enum.cpu_count = int(line.split(":")[1].strip()) + break + except (ValueError, IndexError): + self._warning( + description="Cannot parse CPU count from lscpu output", + command=lscpu_res, + ) + device_enum.lscpu_output = lscpu_res.stdout + self._log_event( + category=EventCategory.PLATFORM, + description="Collected lscpu output", + priority=EventPriority.INFO, + ) + else: + self._warning(description="Cannot collect lscpu output", command=lscpu_res) else: - self._warning(description="Cannot determine CPU count", command=cpu_count_res) + if cpu_count_res.exit_code == 0: + device_enum.cpu_count = int(cpu_count_res.stdout) + else: + self._warning(description="Cannot determine CPU count", command=cpu_count_res) if gpu_count_res.exit_code == 0: device_enum.gpu_count = int(gpu_count_res.stdout) @@ -112,14 +136,33 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[DeviceEnumeratio category=EventCategory.SW_DRIVER, ) + # Collect lshw output on Linux + if self.system_info.os_family == OSFamily.LINUX: + if lshw_res.exit_code == 0 and lshw_res.stdout: + device_enum.lshw_output = lshw_res.stdout + self.result.artifacts.append( + TextFileArtifact(filename="lshw.txt", contents=lshw_res.stdout) + ) + self._log_event( + category=EventCategory.PLATFORM, + description="Collected lshw output", + priority=EventPriority.INFO, + ) + else: + self._warning(description="Cannot collect lshw output", command=lshw_res) + if device_enum.cpu_count or device_enum.gpu_count or device_enum.vf_count: + log_data = device_enum.model_dump( + exclude_none=True, + exclude={"lscpu_output", "lshw_output", "task_name", "task_type", "parent"}, + ) self._log_event( category=EventCategory.PLATFORM, description=f"Counted {device_enum.cpu_count} CPUs, {device_enum.gpu_count} GPUs, {device_enum.vf_count} VFs", - data=device_enum.model_dump(exclude_none=True), + data=log_data, priority=EventPriority.INFO, ) - self.result.message = f"Device Enumeration: {device_enum.model_dump(exclude_none=True)}" + self.result.message = f"Device Enumeration: {log_data}" self.result.status = ExecutionStatus.OK return self.result, device_enum else: diff --git a/nodescraper/plugins/inband/device_enumeration/deviceenumdata.py b/nodescraper/plugins/inband/device_enumeration/deviceenumdata.py index 74939209..bef13492 100644 --- a/nodescraper/plugins/inband/device_enumeration/deviceenumdata.py +++ b/nodescraper/plugins/inband/device_enumeration/deviceenumdata.py @@ -32,3 +32,5 @@ class DeviceEnumerationDataModel(DataModel): cpu_count: Optional[int] = None gpu_count: Optional[int] = None vf_count: Optional[int] = None + lscpu_output: Optional[str] = None + lshw_output: Optional[str] = None diff --git a/test/unit/plugin/test_device_enumeration_collector.py b/test/unit/plugin/test_device_enumeration_collector.py index a5a1ef30..795611a6 100644 --- a/test/unit/plugin/test_device_enumeration_collector.py +++ b/test/unit/plugin/test_device_enumeration_collector.py @@ -51,13 +51,16 @@ def test_collect_linux(system_info, device_enumeration_collector): """Test linux typical output""" system_info.os_family = OSFamily.LINUX + lscpu_output = "Architecture: x86_64\nCPU(s): 64\nSocket(s): 2" + lshw_output = "*-cpu\n product: AMD EPYC 1234 64-Core Processor" + device_enumeration_collector._run_sut_cmd = MagicMock( side_effect=[ MagicMock( exit_code=0, - stdout="2", + stdout=lscpu_output, stderr="", - command="lscpu | grep Socket | awk '{ print $2 }'", + command="lscpu", ), MagicMock( exit_code=0, @@ -71,12 +74,24 @@ def test_collect_linux(system_info, device_enumeration_collector): stderr="", command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l", ), + MagicMock( + exit_code=0, + stdout=lshw_output, + stderr="", + command="lshw", + ), ] ) result, data = device_enumeration_collector.collect_data() assert result.status == ExecutionStatus.OK - assert data == DeviceEnumerationDataModel(cpu_count=2, gpu_count=8, vf_count=0) + assert data == DeviceEnumerationDataModel( + cpu_count=2, gpu_count=8, vf_count=0, lscpu_output=lscpu_output, lshw_output=lshw_output + ) + assert ( + len([a for a in result.artifacts if hasattr(a, "filename") and a.filename == "lshw.txt"]) + == 1 + ) def test_collect_windows(system_info, device_enumeration_collector): @@ -119,9 +134,9 @@ def test_collect_error(system_info, device_enumeration_collector): side_effect=[ MagicMock( exit_code=1, - stdout="some output", + stdout="", stderr="command failed", - command="lscpu | grep Socket | awk '{ print $2 }'", + command="lscpu", ), MagicMock( exit_code=1, @@ -135,6 +150,12 @@ def test_collect_error(system_info, device_enumeration_collector): stderr="command failed", command="lspci -d 1002: | grep -i 'Virtual Function' | wc -l", ), + MagicMock( + exit_code=1, + stdout="", + stderr="command failed", + command="lshw", + ), ] )