Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions src/tests/ftest/control/changing_fabric_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""
(C) Copyright 2026 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""

import os

from ior_test_base import IorTestBase
from util.network_utils import get_common_provider, SUPPORTED_PROVIDERS

Check failure on line 10 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

no-name-in-module, No name 'network_utils' in module 'util'

Check failure on line 10 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

import-error, Unable to import 'util.network_utils'


class ChangingFabricProvider(IorTestBase):
"""Test class Description: Test changing the fabric provider without reformatting the storage

Look for confirmation in the DAOS logs

:avocado: recursive
"""

def test_changing_fabric_provider(self):
"""

Test Description:
Purpose of this test is to test the fabric provider can
be changed without reformatting the storage. Confirm that
the provider changed by looking for messages in
the logs.

Use case:

:avocado: tags=all,full_regression
:avocado: tags=hw,medium
:avocado: tags=control
:avocado: tags=ChangingFabricProvider,test_changing_fabric_provider
"""
ior_read_flags = self.params.get("read_flags", "/run/ior/*")
intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')

# Get all providers supported by the interface in use
self.log_step("Find common providers")
common_providers = get_common_provider(self.log, self.host_info.all_hosts, self.test_env.interface)

Check warning on line 42 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (107/100)

Check failure on line 42 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (107 > 100 characters)
self.log.info(f"common providers: {common_providers}")

Check warning on line 43 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

logging-fstring-interpolation, Use lazy % formatting in logging functions
# Get a different provider than what is being used
original_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
new_provider = None
for provider in common_providers:
if original_provider not in provider and provider in SUPPORTED_PROVIDERS:
new_provider = provider
break

if new_provider is None:
self.fail(f"No alternative provider found. Available: {common_providers}, Current: {original_provider}")

Check warning on line 53 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (116/100)

Check failure on line 53 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (116 > 100 characters)

self.log.info(f"Original provider: {original_provider}, New provider: {new_provider}")

# Run IOR with the original provider
try:
self.run_ior_with_pool(intercept=intercept, fail_on_warning=False)
self.log.info("Initial IOR write completed successfully")
except Exception as error:
self.fail(f"Initial IOR write failed with original provider {original_provider}: {error}")

Check warning on line 62 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (102/100)

Check failure on line 62 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (102 > 100 characters)

# Stop all DAOS engines and agent processes
self.log_step("Stop all DAOS engines and agents")
self.agent_managers[0].dump_attachinfo()
self.server_managers[0].dmg.system_stop(False)
self.stop_agents()

# Update the provider and write a new server YAML file.
self.log_step(f"Generate config at {self.test_env.server_config} and update provider to {new_provider}")

Check warning on line 71 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (112/100)

Check failure on line 71 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (112 > 100 characters)

try:
self.server_managers[0].manager.job.yaml.provider.value = new_provider
generated_yaml = self.server_managers[0].manager.job.yaml.get_yaml_data()
self.server_managers[0].manager.job.create_yaml_file(yaml_data=generated_yaml)
self.log.info(f"Successfully updated server config with new provider: {new_provider}")
except Exception as error:
self.fail(f"Failed to update server configuration with new provider: {error}")

# Get the daos server yaml data again and check values
self.log.info(f'self.server_managers[0].manager.job.yaml.get_yaml_data() = {self.server_managers[0].manager.job.yaml.get_yaml_data()}')

Check warning on line 82 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (143/100)

Check failure on line 82 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (143 > 100 characters)

# Restart server with the new config.
self.log_step(f"Restarting server with the new provider {self.server_managers[0].manager.job.yaml.get_value('provider')}")

Check warning on line 85 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (130/100)

Check failure on line 85 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (130 > 100 characters)
try:
self.restart_servers()
self.server_managers[0].dmg.system_query()
self.log.info("Server restart completed successfully")
except Exception as error:
self.fail(f"Failed to restart servers with new provider: {error}")


# Restart the daos_agent and dump agent info

Check failure on line 94 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E303 too many blank lines (2)
self.log_step("Restarting DAOS agents")
try:
self.start_agent_managers()
self.agent_managers[0].dump_attachinfo()
self.log.info("Agent restart completed successfully")
except Exception as error:
self.fail(f"Failed to restart agents: {error}")

# Verify the provider was actually changed
current_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
self.log.info(f"Current provider after restart: {current_provider}")
if current_provider != new_provider:
self.fail(f"Provider change failed. Expected: {new_provider}, Actual: {current_provider}")

Check warning on line 107 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (102/100)

Check failure on line 107 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (102 > 100 characters)

# Check RAS event in doas_control.log

# IOR read file to verify system works with new provider
self.log_step("Running IOR read test with new provider")
try:
self.ior_cmd.flags.update(ior_read_flags)
self.run_ior_with_pool(intercept=intercept, create_pool=False, create_cont=False)
self.log.info("IOR read test with new provider completed successfully")
except Exception as error:
self.fail(f"IOR read test failed with new provider {new_provider}: {error}")

# Change the provider back to the original and verify the switch back works
self.log_step(f"Restoring original provider: {original_provider}")

# Stop engines and agents again
self.server_managers[0].dmg.system_stop(False)
self.stop_agents()

# Restore original provider
self.server_managers[0].manager.job.yaml.provider.value = original_provider
generated_yaml = self.server_managers[0].manager.job.yaml.get_yaml_data()
self.server_managers[0].manager.job.create_yaml_file(yaml_data=generated_yaml)


# Restart servers with original provider

Check failure on line 133 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E303 too many blank lines (2)
self.log_step("Restarting DAOS servers")
try:
self.restart_servers()
self.server_managers[0].dmg.system_query()
self.log.info("Server restart completed successfully")
except Exception as error:
self.fail(f"Failed to restart servers with original provider: {error}")

# Restart the daos_agent and dump agent info
self.log_step("Restarting DAOS agents")
try:
self.start_agent_managers()
self.agent_managers[0].dump_attachinfo()
self.log.info("Agent restart completed successfully")
except Exception as error:
self.fail(f"Failed to restart agents: {error}")

# Verify restoration of original provider
restored_provider = self.server_managers[0].manager.job.yaml.get_value('provider')
if restored_provider != original_provider:
self.fail(f"Provider restoration failed. Expected: {original_provider}, Actual: {restored_provider}")

Check warning on line 154 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (113/100)

Check failure on line 154 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Flake8 check

E501 line too long (113 > 100 characters)

# IOR read file to verify system works with original provider
self.log_step("Running IOR read test with original provider")
try:
self.ior_cmd.flags.update(ior_read_flags)
self.run_ior_with_pool(intercept=intercept, create_pool=False, create_cont=False)
self.log.info("IOR read test with new provider completed successfully")
except Exception as error:
self.fail(f"IOR read test failed with new provider {new_provider}: {error}")


self.log.info("Test completed successfully - fabric provider was changed and restored without storage reformatting")

Check warning on line 166 in src/tests/ftest/control/changing_fabric_provider.py

View workflow job for this annotation

GitHub Actions / Pylint check

line-too-long, Line too long (124/100)
47 changes: 47 additions & 0 deletions src/tests/ftest/control/changing_fabric_provider.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
hosts:
test_servers: 3
test_clients: 1
setup:
start_agents_once: false
start_servers_once: false
timeout: 600
server_config:
name: daos_server
engines_per_host: 1
engines:
0:
log_mask: INFO
storage: auto
system_ram_reserved: 2
pool:
size: 90%
svcn: 1
container:
type: POSIX
control_method: daos
ior:
env_vars:
- D_LOG_MASK=INFO
- DD_MASK=all
- DD_SUBSYS=all
api: POSIX
client_processes:
np_16:
np: 16
test_file: testFile
repetitions: 1
read_flags: "-v -C -k -e -r -R -G 27"
iorflags:
flags: "-v -k -e -w -r -R -G 27"
transfer_size: '1M'
block_size: '100M'
write_x: 2
read_x: 1
objectclass:
oclass_SX:
dfs_oclass: "SX"
config_generate_params:
scm_only: True
net_provider: "ofi+sockets"
dfuse:
disable_caching: true
Loading