From daa5e5fed77874c771dd4847011f6d98755d5cf4 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 10 Dec 2025 13:22:59 -0600 Subject: [PATCH] added numactl -H call + datamodel update for lsmem output parsing --- .../plugins/inband/memory/memory_collector.py | 216 ++++++++++++++-- .../plugins/inband/memory/memorydata.py | 58 ++++- test/unit/plugin/test_memory_collector.py | 236 +++++++++++++++--- 3 files changed, 451 insertions(+), 59 deletions(-) diff --git a/nodescraper/plugins/inband/memory/memory_collector.py b/nodescraper/plugins/inband/memory/memory_collector.py index 0ca25605..43dd39ad 100644 --- a/nodescraper/plugins/inband/memory/memory_collector.py +++ b/nodescraper/plugins/inband/memory/memory_collector.py @@ -30,7 +30,15 @@ from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult -from .memorydata import MemoryDataModel +from .memorydata import ( + LsmemData, + MemoryBlock, + MemoryDataModel, + MemorySummary, + NumaDistance, + NumaNode, + NumaTopology, +) class MemoryCollector(InBandDataCollector[MemoryDataModel, None]): @@ -42,7 +50,8 @@ class MemoryCollector(InBandDataCollector[MemoryDataModel, None]): "wmic OS get FreePhysicalMemory /Value; wmic ComputerSystem get TotalPhysicalMemory /Value" ) CMD = "free -b" - CMD_LSMEM = "/usr/bin/lsmem" + CMD_LSMEM = "lsmem" + CMD_NUMACTL = "numactl -H" def collect_data(self, args=None) -> tuple[TaskResult, Optional[MemoryDataModel]]: """ @@ -84,12 +93,23 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[MemoryDataModel] lsmem_cmd = self._run_sut_cmd(self.CMD_LSMEM) if lsmem_cmd.exit_code == 0: lsmem_data = self._parse_lsmem_output(lsmem_cmd.stdout) - self._log_event( - category=EventCategory.OS, - description="lsmem output collected", - data=lsmem_data, - priority=EventPriority.INFO, - ) + if lsmem_data: + self._log_event( + category=EventCategory.OS, + description="lsmem output collected", + data={ + "memory_blocks": len(lsmem_data.memory_blocks), + "total_online_memory": lsmem_data.summary.total_online_memory, + }, + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.OS, + description="Failed to parse lsmem output", + priority=EventPriority.WARNING, + console_log=False, + ) else: self._log_event( category=EventCategory.OS, @@ -103,9 +123,48 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[MemoryDataModel] console_log=False, ) + # Collect NUMA topology information + numa_topology = None + if self.system_info.os_family != OSFamily.WINDOWS: + numactl_cmd = self._run_sut_cmd(self.CMD_NUMACTL) + if numactl_cmd.exit_code == 0: + numa_topology = self._parse_numactl_hardware(numactl_cmd.stdout) + if numa_topology: + self._log_event( + category=EventCategory.MEMORY, + description="NUMA topology collected", + data={ + "available_nodes": numa_topology.available_nodes, + "node_count": len(numa_topology.nodes), + }, + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.MEMORY, + description="Failed to parse numactl output", + priority=EventPriority.WARNING, + console_log=False, + ) + else: + self._log_event( + category=EventCategory.MEMORY, + description="Error running numactl command", + data={ + "command": numactl_cmd.command, + "exit_code": numactl_cmd.exit_code, + "stderr": numactl_cmd.stderr, + }, + priority=EventPriority.WARNING, + console_log=False, + ) + if mem_free and mem_total: mem_data = MemoryDataModel( - mem_free=mem_free, mem_total=mem_total, lsmem_output=lsmem_data + mem_free=mem_free, + mem_total=mem_total, + lsmem_data=lsmem_data, + numa_topology=numa_topology, ) self._log_event( category=EventCategory.OS, @@ -122,19 +181,19 @@ def collect_data(self, args=None) -> tuple[TaskResult, Optional[MemoryDataModel] return self.result, mem_data - def _parse_lsmem_output(self, output: str) -> dict: + def _parse_lsmem_output(self, output: str): """ - Parse lsmem command output into a structured dictionary. + Parse lsmem command output into a structured LsmemData object. Args: output: Raw stdout from lsmem command Returns: - dict: Parsed lsmem data with memory blocks and summary information + LsmemData: Parsed lsmem data with memory blocks and summary information """ lines = output.strip().split("\n") memory_blocks = [] - summary = {} + summary_dict = {} for line in lines: line = line.strip() @@ -146,21 +205,126 @@ def _parse_lsmem_output(self, output: str) -> dict: parts = line.split() if len(parts) >= 4: memory_blocks.append( - { - "range": parts[0], - "size": parts[1], - "state": parts[2], - "removable": parts[3] if len(parts) > 3 else None, - "block": parts[4] if len(parts) > 4 else None, - } + MemoryBlock( + range=parts[0], + size=parts[1], + state=parts[2], + removable=parts[3] if len(parts) > 3 else None, + block=parts[4] if len(parts) > 4 else None, + ) ) # Parse summary lines elif ":" in line: key, value = line.split(":", 1) - summary[key.strip().lower().replace(" ", "_")] = value.strip() + summary_dict[key.strip().lower().replace(" ", "_")] = value.strip() + + summary = MemorySummary( + memory_block_size=summary_dict.get("memory_block_size"), + total_online_memory=summary_dict.get("total_online_memory"), + total_offline_memory=summary_dict.get("total_offline_memory"), + ) + + if not memory_blocks: + return None + + return LsmemData(memory_blocks=memory_blocks, summary=summary) + + def _parse_numactl_hardware(self, output: str): + """ + Parse 'numactl -H' output into NumaTopology structure. + + Args: + output: Raw stdout from numactl -H command + + Returns: + NumaTopology object or None if parsing fails + """ + lines = output.strip().split("\n") + available_nodes = [] + nodes = [] + distances = [] + distance_matrix = {} + + current_section = None + + for line in lines: + line = line.strip() + if not line: + continue + + # Parse available nodes line + if line.startswith("available:"): + match = re.search(r"available:\s*(\d+)\s+nodes?\s*\(([^)]+)\)", line) + if match: + node_range = match.group(2) + if "-" in node_range: + start, end = node_range.split("-") + available_nodes = list(range(int(start), int(end) + 1)) + else: + available_nodes = [int(x.strip()) for x in node_range.split()] + + # Parse node CPU line + elif line.startswith("node") and "cpus:" in line: + match = re.search(r"node\s+(\d+)\s+cpus:\s*(.+)", line) + if match: + node_id = int(match.group(1)) + cpu_list_str = match.group(2).strip() + if cpu_list_str: + cpus = [int(x) for x in cpu_list_str.split()] + else: + cpus = [] + nodes.append(NumaNode(node_id=node_id, cpus=cpus)) + + # Parse node memory size + elif line.startswith("node") and "size:" in line: + match = re.search(r"node\s+(\d+)\s+size:\s*(\d+)\s*MB", line) + if match: + node_id = int(match.group(1)) + size_mb = int(match.group(2)) + # Find existing node and update + for node in nodes: + if node.node_id == node_id: + node.memory_size_mb = size_mb + break + + # Parse node free memory + elif line.startswith("node") and "free:" in line: + match = re.search(r"node\s+(\d+)\s+free:\s*(\d+)\s*MB", line) + if match: + node_id = int(match.group(1)) + free_mb = int(match.group(2)) + # Find existing node and update + for node in nodes: + if node.node_id == node_id: + node.memory_free_mb = free_mb + break + + # Parse distance matrix + elif line.startswith("node distances:"): + current_section = "distances" + + elif current_section == "distances": + if line.startswith("node") and ":" not in line: + continue + elif ":" in line: + parts = line.split(":") + if len(parts) == 2: + from_node = int(parts[0].strip()) + dist_values = [int(x) for x in parts[1].split()] + + distance_matrix[from_node] = {} + for to_node, dist in enumerate(dist_values): + distance_matrix[from_node][to_node] = dist + distances.append( + NumaDistance(from_node=from_node, to_node=to_node, distance=dist) + ) + + if not nodes: + return None - return { - "raw_output": output, - "memory_blocks": memory_blocks, - "summary": summary, - } + return NumaTopology( + available_nodes=available_nodes if available_nodes else [n.node_id for n in nodes], + nodes=nodes, + distances=distances, + distance_matrix=distance_matrix if distance_matrix else None, + ) diff --git a/nodescraper/plugins/inband/memory/memorydata.py b/nodescraper/plugins/inband/memory/memorydata.py index 4d0142e9..2687beaf 100644 --- a/nodescraper/plugins/inband/memory/memorydata.py +++ b/nodescraper/plugins/inband/memory/memorydata.py @@ -25,10 +25,66 @@ ############################################################################### from typing import Optional +from pydantic import BaseModel + from nodescraper.models import DataModel +class MemoryBlock(BaseModel): + """Memory block information from lsmem""" + + range: str + size: str + state: str + removable: Optional[str] = None + block: Optional[str] = None + + +class MemorySummary(BaseModel): + """Summary information from lsmem""" + + memory_block_size: Optional[str] = None + total_online_memory: Optional[str] = None + total_offline_memory: Optional[str] = None + + +class LsmemData(BaseModel): + """Complete lsmem output data""" + + memory_blocks: list[MemoryBlock] + summary: MemorySummary + + +class NumaNode(BaseModel): + """NUMA node information""" + + node_id: int + cpus: list[int] + memory_size_mb: Optional[int] = None + memory_free_mb: Optional[int] = None + + +class NumaDistance(BaseModel): + """Distance between two NUMA nodes""" + + from_node: int + to_node: int + distance: int + + +class NumaTopology(BaseModel): + """Complete NUMA topology from 'numactl --hardware'""" + + available_nodes: list[int] + nodes: list[NumaNode] + distances: list[NumaDistance] + distance_matrix: Optional[dict[int, dict[int, int]]] = None + + class MemoryDataModel(DataModel): + """Memory data model""" + mem_free: str mem_total: str - lsmem_output: Optional[dict] = None + lsmem_data: Optional[LsmemData] = None + numa_topology: Optional[NumaTopology] = None diff --git a/test/unit/plugin/test_memory_collector.py b/test/unit/plugin/test_memory_collector.py index 973243fe..dfdc53d7 100644 --- a/test/unit/plugin/test_memory_collector.py +++ b/test/unit/plugin/test_memory_collector.py @@ -68,7 +68,26 @@ def mock_run_command(command, **kwargs): "Total offline memory: 0B\n" ), stderr="", - command="/usr/bin/lsmem", + command="lsmem", + ) + elif "numactl" in command: + return CommandArtifact( + exit_code=0, + stdout=( + "available: 2 nodes (0-1)\n" + "node 0 cpus: 0 1 2 3 4 5 6 7\n" + "node 0 size: 32768 MB\n" + "node 0 free: 16384 MB\n" + "node 1 cpus: 8 9 10 11 12 13 14 15\n" + "node 1 size: 32768 MB\n" + "node 1 free: 20000 MB\n" + "node distances:\n" + "node 0 1\n" + " 0: 10 21\n" + " 1: 21 10" + ), + stderr="", + command="numactl -H", ) return CommandArtifact(exit_code=1, stdout="", stderr="", command=command) @@ -79,16 +98,18 @@ def mock_run_command(command, **kwargs): assert result.status == ExecutionStatus.OK assert data.mem_free == "2097459761152" assert data.mem_total == "2164113772544" - assert data.lsmem_output is not None - assert "memory_blocks" in data.lsmem_output - assert "summary" in data.lsmem_output - assert "raw_output" in data.lsmem_output - assert len(data.lsmem_output["memory_blocks"]) == 2 - assert data.lsmem_output["memory_blocks"][0]["range"] == "0x0000000000000000-0x000000007fffffff" - assert data.lsmem_output["memory_blocks"][0]["size"] == "2G" - assert data.lsmem_output["memory_blocks"][0]["state"] == "online" - assert data.lsmem_output["summary"]["memory_block_size"] == "128M" - assert data.lsmem_output["summary"]["total_online_memory"] == "128G" + assert data.lsmem_data is not None + assert len(data.lsmem_data.memory_blocks) == 2 + assert data.lsmem_data.memory_blocks[0].range == "0x0000000000000000-0x000000007fffffff" + assert data.lsmem_data.memory_blocks[0].size == "2G" + assert data.lsmem_data.memory_blocks[0].state == "online" + assert data.lsmem_data.summary.memory_block_size == "128M" + assert data.lsmem_data.summary.total_online_memory == "128G" + assert data.numa_topology is not None + assert len(data.numa_topology.nodes) == 2 + assert data.numa_topology.nodes[0].node_id == 0 + assert data.numa_topology.nodes[0].memory_size_mb == 32768 + assert data.numa_topology.distance_matrix[0][1] == 21 def test_run_windows(collector, conn_mock): @@ -105,7 +126,7 @@ def test_run_windows(collector, conn_mock): assert result.status == ExecutionStatus.OK assert data.mem_free == "12345678" assert data.mem_total == "123412341234" - assert data.lsmem_output is None + assert data.lsmem_data is None assert conn_mock.run_command.call_count == 1 @@ -127,7 +148,14 @@ def mock_run_command(command, **kwargs): exit_code=127, stdout="", stderr="lsmem: command not found", - command="/usr/bin/lsmem", + command="lsmem", + ) + elif "numactl" in command: + return CommandArtifact( + exit_code=127, + stdout="", + stderr="numactl: command not found", + command="numactl -H", ) return CommandArtifact(exit_code=1, stdout="", stderr="", command=command) @@ -138,9 +166,12 @@ def mock_run_command(command, **kwargs): assert result.status == ExecutionStatus.OK assert data.mem_free == "2097459761152" assert data.mem_total == "2164113772544" - assert data.lsmem_output is None + assert data.lsmem_data is None + assert data.numa_topology is None lsmem_events = [e for e in result.events if "lsmem" in e.description] assert len(lsmem_events) > 0 + numactl_events = [e for e in result.events if "numactl" in e.description] + assert len(numactl_events) > 0 def test_run_error(collector, conn_mock): @@ -179,25 +210,166 @@ def test_parse_lsmem_output(collector): result = collector._parse_lsmem_output(lsmem_output) - assert "raw_output" in result - assert "memory_blocks" in result - assert "summary" in result - assert result["raw_output"] == lsmem_output - assert len(result["memory_blocks"]) == 3 + assert result is not None + assert len(result.memory_blocks) == 3 + + assert result.memory_blocks[0].range == "0x0000000000000000-0x000000007fffffff" + assert result.memory_blocks[0].size == "2G" + assert result.memory_blocks[0].state == "online" + assert result.memory_blocks[0].removable == "yes" + assert result.memory_blocks[0].block == "0-15" + + assert result.memory_blocks[1].range == "0x0000000100000000-0x000000207fffffff" + assert result.memory_blocks[1].size == "126G" + assert result.memory_blocks[1].state == "online" + + assert result.memory_blocks[2].removable == "no" + assert result.memory_blocks[2].block == "2048-4095" + + assert result.summary.memory_block_size == "128M" + assert result.summary.total_online_memory == "254G" + assert result.summary.total_offline_memory == "0B" + + +def test_parse_lsmem_output_no_blocks(collector): + """Test parsing of lsmem output with no memory blocks.""" + lsmem_output = ( + "RANGE SIZE STATE REMOVABLE BLOCK\n" + "\n" + "Memory block size: 128M\n" + "Total online memory: 0G\n" + "Total offline memory: 0B\n" + ) + + result = collector._parse_lsmem_output(lsmem_output) + + assert result is None + + +def test_parse_lsmem_output_empty(collector): + """Test parsing of empty lsmem output.""" + result = collector._parse_lsmem_output("") + assert result is None + + +def test_parse_numactl_hardware_two_nodes(collector): + """Test parsing of numactl -H output with 2 NUMA nodes.""" + numactl_output = """available: 2 nodes (0-1) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +node 0 size: 32768 MB +node 0 free: 15234 MB +node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +node 1 size: 32768 MB +node 1 free: 20145 MB +node distances: +node 0 1 + 0: 10 21 + 1: 21 10""" + + result = collector._parse_numactl_hardware(numactl_output) + + assert result is not None + assert result.available_nodes == [0, 1] + assert len(result.nodes) == 2 + + # Check node 0 + assert result.nodes[0].node_id == 0 + assert result.nodes[0].cpus == list(range(16)) + assert result.nodes[0].memory_size_mb == 32768 + assert result.nodes[0].memory_free_mb == 15234 + + # Check node 1 + assert result.nodes[1].node_id == 1 + assert result.nodes[1].cpus == list(range(16, 32)) + assert result.nodes[1].memory_size_mb == 32768 + assert result.nodes[1].memory_free_mb == 20145 + + # Check distances + assert len(result.distances) == 4 + assert result.distance_matrix is not None + assert result.distance_matrix[0][0] == 10 + assert result.distance_matrix[0][1] == 21 + assert result.distance_matrix[1][0] == 21 + assert result.distance_matrix[1][1] == 10 + + +def test_parse_numactl_hardware_single_node(collector): + """Test parsing of numactl -H output with single NUMA node.""" + numactl_output = """available: 1 nodes (0) +node 0 cpus: 0 1 2 3 4 5 6 7 +node 0 size: 16384 MB +node 0 free: 8192 MB +node distances: +node 0 + 0: 10""" + + result = collector._parse_numactl_hardware(numactl_output) + + assert result is not None + assert result.available_nodes == [0] + assert len(result.nodes) == 1 + assert result.nodes[0].node_id == 0 + assert result.nodes[0].cpus == [0, 1, 2, 3, 4, 5, 6, 7] + assert result.nodes[0].memory_size_mb == 16384 + assert result.nodes[0].memory_free_mb == 8192 + assert len(result.distances) == 1 + assert result.distance_matrix[0][0] == 10 + + +def test_parse_numactl_hardware_no_memory_info(collector): + """Test parsing of numactl -H output without memory size/free info.""" + numactl_output = """available: 2 nodes (0-1) +node 0 cpus: 0 1 2 3 +node 1 cpus: 4 5 6 7 +node distances: +node 0 1 + 0: 10 21 + 1: 21 10""" + + result = collector._parse_numactl_hardware(numactl_output) + + assert result is not None + assert len(result.nodes) == 2 + assert result.nodes[0].memory_size_mb is None + assert result.nodes[0].memory_free_mb is None + assert result.nodes[1].memory_size_mb is None + assert result.nodes[1].memory_free_mb is None + + +def test_parse_numactl_hardware_empty_output(collector): + """Test parsing of empty numactl output.""" + result = collector._parse_numactl_hardware("") + assert result is None - assert result["memory_blocks"][0]["range"] == "0x0000000000000000-0x000000007fffffff" - assert result["memory_blocks"][0]["size"] == "2G" - assert result["memory_blocks"][0]["state"] == "online" - assert result["memory_blocks"][0]["removable"] == "yes" - assert result["memory_blocks"][0]["block"] == "0-15" - assert result["memory_blocks"][1]["range"] == "0x0000000100000000-0x000000207fffffff" - assert result["memory_blocks"][1]["size"] == "126G" - assert result["memory_blocks"][1]["state"] == "online" +def test_parse_numactl_hardware_four_nodes(collector): + """Test parsing of numactl -H output with 4 NUMA nodes.""" + numactl_output = """available: 4 nodes (0-3) +node 0 cpus: 0 1 2 3 +node 0 size: 8192 MB +node 0 free: 4096 MB +node 1 cpus: 4 5 6 7 +node 1 size: 8192 MB +node 1 free: 3000 MB +node 2 cpus: 8 9 10 11 +node 2 size: 8192 MB +node 2 free: 5000 MB +node 3 cpus: 12 13 14 15 +node 3 size: 8192 MB +node 3 free: 6000 MB +node distances: +node 0 1 2 3 + 0: 10 21 21 21 + 1: 21 10 21 21 + 2: 21 21 10 21 + 3: 21 21 21 10""" - assert result["memory_blocks"][2]["removable"] == "no" - assert result["memory_blocks"][2]["block"] == "2048-4095" + result = collector._parse_numactl_hardware(numactl_output) - assert result["summary"]["memory_block_size"] == "128M" - assert result["summary"]["total_online_memory"] == "254G" - assert result["summary"]["total_offline_memory"] == "0B" + assert result is not None + assert result.available_nodes == [0, 1, 2, 3] + assert len(result.nodes) == 4 + assert len(result.distances) == 16 + assert result.distance_matrix[0][0] == 10 + assert result.distance_matrix[0][3] == 21 + assert result.distance_matrix[3][3] == 10