WIP

MarcCote · MarcCote · commit f3f849050b33 · 2025-07-15T13:00:41.000-07:00
diff --git a/debug_gym/agents/guided_agent.py b/debug_gym/agents/guided_agent.py
@@ -12,6 +12,10 @@
 class GuidedRewriteAgent(RewriteAgent):
     name: str = "guided_agent"
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.logger.set_no_live()
+
     def try_rewrite(self, task_name):
         # make a copy of the env for the llm
         from ipdb import set_trace
@@ -38,54 +42,106 @@ def try_rewrite(self, task_name):
         return info.done
 
     def run(self, task_name=None, debug=False):
-        self.logger.level = logging.DEBUG
-        self.llm.logger = DebugGymLogger(
-            name="LLM", level=logging.ERROR, log_dir=self.logger.log_file.parent
-        )
-        self.human = LLM.instantiate(llm_name="human", logger=self.logger)
-
-        self.history.reset()
-        info = self.env.reset(options={"task_name": task_name})
-        # initial state does not have prompt and response
-        self.history.step(info, None)
-
-        if info.done is True:
-            # msg = "Environment started with entrypoint passing without errors."
-            return True
-
-        highscore = info.score
-
-        for step in self.logger.tqdm(range(self.config["max_steps"])):
-            highscore = max(highscore, info.score)
-            self.logger.info(
-                f"Score: {info.score}/{info.max_score} ({info.score/info.max_score:.1%}) [Best: {highscore}]"
+        step = 0
+        max_steps = self.config["max_steps"]
+        try:
+            self.logger.level = logging.DEBUG
+            self.logger.icon = "👤"
+            self.llm.logger = DebugGymLogger(
+                name="LLM", level=logging.ERROR, log_dir=self.logger.log_file.parent
             )
+            self.llm.logger.icon = "🤖"
+            self.human = LLM.instantiate(llm_name="human", logger=self.logger)
+
+            self.history.reset()
+            info = self.env.reset(options={"task_name": task_name})
+            # initial state does not have prompt and response
+            self.history.step(info, None)
+
+            if info.done is True:
+                # msg = "Environment started with entrypoint passing without errors."self.logger.report_progress(
+                self.logger.report_progress(
+                    problem_id=task_name,
+                    step=1,
+                    total_steps=1,
+                    score=info.score,
+                    max_score=info.max_score,
+                    status="resolved",
+                )
+                return True
 
-            llm_done = self.try_rewrite(task_name)
-            if llm_done:
-                msg = f"*** The rewrite-only agent with {self.llm.model_name} managed to solve the task with the current context. ***"
-                self.logger.info(colored(msg, "green"))
-                break
-            else:
-                msg = f"*** The rewrite-only agent with {self.llm.model_name} failed to solve the task with the current context. ***"
-                self.logger.info(colored(msg, "red"))
-
-            # If the LLM did not manage to solve the task, we continue with the guided approach.
-            prompt = self.build_prompt(info)
-            human_response = self.human(prompt, info.tools)
-
-            if debug:
-                breakpoint()
-
-            # step the environment with the human response
-            info = self.env.step(human_response.tool)
-            # log the human response
-            self.history.step(info, human_response)
+            highscore = info.score
 
-            if info.done:
+            for step in range(max_steps):
+                self.logger.info(f"\n{'='*20} STEP {step+1} {'='*20}\n")
+                highscore = max(highscore, info.score)
                 self.logger.info(
-                    "You managed to provide the patch that solves the task before the LLM. Congrats!"
+                    f"Step: {step} | Score: {info.score}/{info.max_score} ({info.score/info.max_score:.1%}) [Best: {highscore}]"
                 )
-                break
 
-        return info.done
+                llm_done = self.try_rewrite(task_name)
+                if llm_done:
+                    msg = f"[green]*** The rewrite-only agent with {self.llm.model_name} managed to solve the task with the current context. ***[/green]"
+                    self.logger.info(msg)
+                    break
+                else:
+                    msg = f"[red]*** The rewrite-only agent with {self.llm.model_name} failed to solve the task with the current context. ***[/red]"
+                    self.logger.info(msg)
+
+                # If the LLM did not manage to solve the task, we continue with the guided approach.
+                prompt = self.build_prompt(info)
+                human_response = self.human(prompt, info.tools)
+
+                if debug:
+                    breakpoint()
+
+                # step the environment with the human response
+                info = self.env.step(human_response.tool)
+                # log the human response
+                self.history.step(info, human_response)
+
+                if info.done:
+                    self.logger.info(
+                        "You managed to provide the patch that solves the task before the LLM. Congrats!"
+                    )
+                    # early stop, set current step and total steps to be the same
+                    self.logger.report_progress(
+                        problem_id=task_name,
+                        step=step + 1,
+                        total_steps=step + 1,
+                        score=info.score,
+                        max_score=info.max_score,
+                        status="resolved" if info.done else "unresolved",
+                    )
+                    break
+                # keep progress bar running until max_steps is reached
+                self.logger.report_progress(
+                    problem_id=task_name,
+                    step=step + 1,
+                    total_steps=max_steps + 1,
+                    score=info.score,
+                    max_score=info.max_score,
+                    status="running",
+                )
+            # max_steps was reached, task was either resolved or unresolved
+            self.logger.report_progress(
+                problem_id=task_name,
+                step=step + 1,
+                total_steps=step + 1,
+                score=info.score,
+                max_score=info.max_score,
+                status="resolved" if info.done else "unresolved",
+            )
+
+            return info.done
+        except Exception:
+            # report any error that happens during the run
+            self.logger.report_progress(
+                problem_id=task_name,
+                step=step + 1,
+                total_steps=step + 1,
+                score=info.score if info else 0,
+                max_score=info.max_score if info else 1,
+                status="error",
+            )
+            raise
diff --git a/debug_gym/logger.py b/debug_gym/logger.py
@@ -420,6 +420,7 @@ def __init__(
         log_dir: str | None = None,
         level: str | int = logging.INFO,
         mode: str = "a",
+        icon: str = "🐸",
     ):
         super().__init__(name)
         # If var env "DEBUG_GYM_DEBUG" is set, turn on debug mode
@@ -428,6 +429,7 @@ def __init__(
 
         # Prevent the log messages from being propagated to the root logger
         self.propagate = False
+        self.icon = icon  # Icon to use in log messages
 
         self.setLevel(level)  # Set logger level, might be overridden by file handler
         self.log_file = None  # File handler for logging to a file
@@ -451,7 +453,9 @@ def _initialize_main_logger(self, level):
             rich_tracebacks=True,
             markup=True,
         )
-        rich_handler.setFormatter(logging.Formatter("🐸 [%(name)-12s]: %(message)s"))
+        rich_handler.setFormatter(
+            logging.Formatter(f"{self.icon} [%(name)-12s]: %(message)s")
+        )
         rich_handler.setLevel(level)
         self.addHandler(rich_handler)