From ad9915e244780953d9be09baf30c6241e4e35124 Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Wed, 4 Feb 2026 19:22:37 +0000 Subject: [PATCH 1/2] make startup more robust with retries for login and for start page --- src/browsergym/workarena/api/utils.py | 6 ++- src/browsergym/workarena/config.py | 2 +- src/browsergym/workarena/instance.py | 4 +- src/browsergym/workarena/tasks/base.py | 13 +++--- src/browsergym/workarena/utils.py | 56 +++++++++++++++++++++++--- 5 files changed, 65 insertions(+), 16 deletions(-) diff --git a/src/browsergym/workarena/api/utils.py b/src/browsergym/workarena/api/utils.py index 47a6b2a..59113aa 100644 --- a/src/browsergym/workarena/api/utils.py +++ b/src/browsergym/workarena/api/utils.py @@ -1,13 +1,15 @@ import requests - +import json from ..instance import SNowInstance - +import os from requests.exceptions import HTTPError from time import sleep # ServiceNow API configuration SNOW_API_HEADERS = {"Content-Type": "application/json", "Accept": "application/json"} +if os.environ.get("EXTRA_HTTP_HEADERS"): + SNOW_API_HEADERS.update(json.loads(os.environ.get("EXTRA_HTTP_HEADERS"))) def table_api_call( instance: SNowInstance, diff --git a/src/browsergym/workarena/config.py b/src/browsergym/workarena/config.py index 74a8bdc..c7aee0d 100644 --- a/src/browsergym/workarena/config.py +++ b/src/browsergym/workarena/config.py @@ -7,7 +7,7 @@ # ServiceNow configuration SNOW_DATA_LOOKBACK_MINUTES = 5 -SNOW_BROWSER_TIMEOUT = 30000 # Milliseconds +SNOW_BROWSER_TIMEOUT = 60000 # Milliseconds SNOW_JS_UTILS_FILEPATH = str(resources.files(utils).joinpath("js_utils.js")) SNOW_SUPPORTED_RELEASES = ["washingtondc"] diff --git a/src/browsergym/workarena/instance.py b/src/browsergym/workarena/instance.py index 39ca7ea..23208d7 100644 --- a/src/browsergym/workarena/instance.py +++ b/src/browsergym/workarena/instance.py @@ -172,13 +172,13 @@ def _check_is_hibernating(self): f"ServiceNow instance is hibernating. Please navigate to {self.snow_url} wake it up." ) - def _check_is_reachable(self): + def _check_is_reachable(self, headers: Optional[dict] = None): """ Test that the ServiceNow instance is reachable """ try: - requests.get(self.snow_url, timeout=SNOW_BROWSER_TIMEOUT) + requests.get(self.snow_url, timeout=SNOW_BROWSER_TIMEOUT, headers=headers) except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): raise RuntimeError( f"ServiceNow instance at {self.snow_url} is not reachable. Please check the URL." diff --git a/src/browsergym/workarena/tasks/base.py b/src/browsergym/workarena/tasks/base.py index 34a8c1e..f0e40ad 100644 --- a/src/browsergym/workarena/tasks/base.py +++ b/src/browsergym/workarena/tasks/base.py @@ -18,7 +18,7 @@ from ..api.user import create_user from ..api.utils import table_api_call from ..config import SNOW_BROWSER_TIMEOUT, SNOW_JS_UTILS_FILEPATH -from ..utils import url_login +from ..utils import url_login, goto_with_retry from ..instance import SNowInstance @@ -36,6 +36,7 @@ def __init__( final_rel_url: Optional[str] = None, user_roles: List[str] = ["admin"], has_description: bool = False, + create_user_on_setup: bool = True, ) -> None: """ Initialize the task @@ -54,7 +55,8 @@ def __init__( The roles to assign to the user (default: ["admin"]) has_description: bool Whether the task has a description in L3 compositional tasks - + create_user_on_setup: bool + Whether to create a user on setup (default: True) """ super().__init__(seed) @@ -77,6 +79,7 @@ def __init__( # Flag to ensure the task is setup only once self.task_is_setup = False self.delete_user_on_teardown = False + self.create_user_on_setup = create_user_on_setup self.user_roles = user_roles self.has_description = ( has_description # Whether the task has a description in L3 compositional tasks @@ -127,7 +130,7 @@ def setup(self, page: playwright.sync_api.Page, do_start=True) -> tuple[str, dic page.set_default_timeout(SNOW_BROWSER_TIMEOUT) # Create a new user to run the task if this is the starting task - if do_start: + if do_start and self.create_user_on_setup: self._base_initial_instance = self.instance self._base_user_name, self._base_user_password, self._base_user_sysid = create_user( instance=self.instance, user_roles=self.user_roles, random=self.random @@ -179,8 +182,8 @@ def start(self, page: playwright.sync_api.Page) -> None: page=page, ) - # Navigate to the task's url - page.goto(self.start_url) + # Navigate to the task's url with retry logic + goto_with_retry(page, self.start_url) def teardown(self) -> None: """ diff --git a/src/browsergym/workarena/utils.py b/src/browsergym/workarena/utils.py index 4ba87a9..262278f 100644 --- a/src/browsergym/workarena/utils.py +++ b/src/browsergym/workarena/utils.py @@ -3,6 +3,7 @@ """ +import logging import playwright.sync_api from browsergym.workarena.instance import SNowInstance @@ -10,6 +11,50 @@ from urllib import parse +def goto_with_retry( + page: playwright.sync_api.Page, + url: str, + max_retries: int = 3, + validation_selector: str = None, + wait_for_state: str = "domcontentloaded", +) -> None: + """ + Navigate to URL with retry logic and optional element validation. + + Parameters: + ----------- + page: playwright.sync_api.Page + The Playwright page object + url: str + The URL to navigate to + max_retries: int + Maximum number of retry attempts (default: 3) + validation_selector: str + Optional CSS selector to wait for after page load + wait_for_state: str + Playwright load state to wait for (default: "domcontentloaded") + + Raises: + ------- + RuntimeError + If navigation fails after all retry attempts + """ + last_error = None + for attempt in range(max_retries): + try: + page.goto(url) + page.wait_for_load_state(wait_for_state) + if validation_selector: + page.wait_for_selector(validation_selector, timeout=10000) + return + except Exception as e: + last_error = e + if attempt < max_retries - 1: + logging.warning(f"Navigation to {url} attempt {attempt + 1} failed: {e}. Retrying...") + + raise RuntimeError(f"Failed to load {url} after {max_retries} attempts: {last_error}") + + def impersonate_user(username: str, page: playwright.sync_api.Page): """ Impersonate a user in the ServiceNow interface @@ -54,8 +99,8 @@ def ui_login(instance: SNowInstance, page: playwright.sync_api.Page): """ (snow_username, snow_password) = instance.snow_credentials - # Navigate to instance - page.goto(instance.snow_url) + # Navigate to instance with retry logic + goto_with_retry(page, instance.snow_url) # If login is required, we'll be redirected to the login page if "log in | servicenow" in page.title().lower(): @@ -88,10 +133,9 @@ def url_login(instance: SNowInstance, page: playwright.sync_api.Page): snow_username = parse.quote(snow_username) snow_password = parse.quote(snow_password) - # Log in via URL - page.goto( - f"{instance.snow_url}/login.do?user_name={snow_username}&user_password={snow_password}&sys_action=sysverb_login" - ) + # Log in via URL with retry logic + login_url = f"{instance.snow_url}/login.do?user_name={snow_username}&user_password={snow_password}&sys_action=sysverb_login" + goto_with_retry(page, login_url) # Check if we have been returned to the login page current_url = parse.urlparse(parse.unquote(page.evaluate("() => window.location.href"))) From e44c811c4a9d70f8ca74813dbddea87d7ca74d2e Mon Sep 17 00:00:00 2001 From: Patrice Bechard Date: Wed, 4 Feb 2026 19:28:28 +0000 Subject: [PATCH 2/2] formatting --- src/browsergym/workarena/api/utils.py | 1 + src/browsergym/workarena/utils.py | 4 +++- src/wa_action_traces.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/browsergym/workarena/api/utils.py b/src/browsergym/workarena/api/utils.py index 59113aa..036b5d8 100644 --- a/src/browsergym/workarena/api/utils.py +++ b/src/browsergym/workarena/api/utils.py @@ -11,6 +11,7 @@ if os.environ.get("EXTRA_HTTP_HEADERS"): SNOW_API_HEADERS.update(json.loads(os.environ.get("EXTRA_HTTP_HEADERS"))) + def table_api_call( instance: SNowInstance, table: str, diff --git a/src/browsergym/workarena/utils.py b/src/browsergym/workarena/utils.py index 262278f..7b07946 100644 --- a/src/browsergym/workarena/utils.py +++ b/src/browsergym/workarena/utils.py @@ -50,7 +50,9 @@ def goto_with_retry( except Exception as e: last_error = e if attempt < max_retries - 1: - logging.warning(f"Navigation to {url} attempt {attempt + 1} failed: {e}. Retrying...") + logging.warning( + f"Navigation to {url} attempt {attempt + 1} failed: {e}. Retrying..." + ) raise RuntimeError(f"Failed to load {url} after {max_retries} attempts: {last_error}") diff --git a/src/wa_action_traces.py b/src/wa_action_traces.py index 69ade37..59a7a18 100644 --- a/src/wa_action_traces.py +++ b/src/wa_action_traces.py @@ -112,7 +112,7 @@ def extract_trace(task_cls, headless=True): env.reset() # For compositional tasks, we need to cheat on each subtask - if hasattr(env.task, 'subtasks'): + if hasattr(env.task, "subtasks"): # This is a compositional task, solve each subtask for subtask_idx in range(len(env.task.subtasks)): env.task.cheat(env.page, env.chat.messages, subtask_idx)