microsoft · RolandMinrui · Aug 12, 2025 · Aug 13, 2025
diff --git a/rdagent/scenarios/data_science/dev/runner/__init__.py b/rdagent/scenarios/data_science/dev/runner/__init__.py
@@ -65,17 +65,27 @@ def implement_one_task(
             output_spec = PythonBatchEditOut.get_spec(with_del=False)
             extract_output_fn = PythonBatchEditOut.extract_output
 
+        # Status description
+        status_desc=self.scen.describe_current_status(
+            stage="Running",
+            step="coder",
+            max_loop=DS_RD_SETTING.runner_max_loop,
+            cur_loop=len(queried_former_failed_knowledge)-1,
+        )
+
         if prev_task_feedback.acceptable is False:
             task_information_str = target_task.get_task_information()
             # Use system_debugger for error fixing and debugging
             system_prompt = T(".prompts:DSCoSTEER.system_debugger").r(
+                status_desc=status_desc,
                 task_desc=task_information_str,
                 out_spec=output_spec,
                 diff_mode=self.settings.diff_mode,
             )
         else:
             # Use system_refine for hyperparameter tuning
             system_prompt = T(".prompts:DSCoSTEER.system_refine").r(
+                status_desc=status_desc,
                 out_spec=output_spec,
                 diff_mode=self.settings.diff_mode,
             )

diff --git a/rdagent/scenarios/data_science/dev/runner/eval.py b/rdagent/scenarios/data_science/dev/runner/eval.py
@@ -187,6 +187,12 @@ def evaluate(
         enable_hyperparameter_tuning_check = c1 and c2 and c3 and c4
 
         system_prompt = T(".prompts:DSCoSTEER_eval.system").r(
+            status_desc=self.scen.describe_current_status(
+                stage="Running",
+                step="evaluator",
+                max_loop=DS_RD_SETTING.runner_max_loop,
+                cur_loop=len(queried_former_failed_knowledge)-1,
+            ),
             scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)),
             task_desc=target_task.get_task_information(),
             enable_hyperparameter_tuning_check=enable_hyperparameter_tuning_check,

diff --git a/rdagent/scenarios/data_science/dev/runner/prompts.yaml b/rdagent/scenarios/data_science/dev/runner/prompts.yaml
@@ -1,6 +1,6 @@
 DSCoSTEER_eval:
   system: |-
-    {% include "scenarios.data_science.share:scen.role" %}
+    {{ status_desc }}
     You will be provided with:
     1. `Code base`: The code base of the solution
     2. `The stdout of code execution and testing`: The generated stdout when executing the code base and corresponding testing
@@ -89,8 +89,7 @@ DSCoSTEER_eval:
 
 DSCoSTEER:
   system_debugger: |-
-    {% include "scenarios.data_science.share:scen.role" %}
-    You have finished the implementation of the whole workflow which has executed well on a sampled dataset. Now we are working on the full dataset.
+    {{ status_desc }}
     The user has reported that the workflow failed to execute on the full dataset.
     Your will be provided with:
     1. Code base.
@@ -120,8 +119,7 @@ DSCoSTEER:
     {% endif %}
 
   system_refine: |-
-    {% include "scenarios.data_science.share:scen.role" %}
-    You have finished the implementation of the whole workflow which has executed well on a sampled dataset. Now we are working on the full dataset.
+    {{ status_desc }}
     The user has reported that the hyperparameters are not reasonable and the code didn't make the best use of the time limit.
     Your will be provided with:
     1. Code base.

diff --git a/rdagent/scenarios/data_science/scen/__init__.py b/rdagent/scenarios/data_science/scen/__init__.py
@@ -4,6 +4,7 @@
 from typing import Dict
 
 from rdagent.app.data_science.conf import DS_RD_SETTING
+from rdagent.components.coder.CoSTEER import RD_Agent_TIMER_wrapper
 from rdagent.components.coder.data_science.conf import get_ds_env
 from rdagent.core.experiment import FBWorkspace
 from rdagent.core.scenario import Scenario
@@ -61,6 +62,30 @@ def __init__(self, competition: str) -> None:
             self._get_direction()
         )  # True indicates higher is better, False indicates lower is better
         self.timeout_increase_count = 0
+        self.timer = RD_Agent_TIMER_wrapper.timer
+
+    def describe_current_status(self, stage: str, **kwargs):
+        """
+        Generates a description of the current scenario status including stage details and timing information.
+        Args:
+            stage (str): One of [Proposal, Coding, Running, Feedback]
+            **kwargs: Additional keyword arguments to be passed to the stage description
+        Example:
+            describe_current_status("Running", step="evaluator", max_loop=3, cur_loop=1)
+        """
+        # Overall status description
+        status_desc = T("scenarios.data_science.share:scen.status_desc").r(
+            stage=stage,
+            total_time=self.timer.all_duration,
+            remain_time=self.timer.remain_time(),
+        )
+
+        # Stage-specific description
+        try:
+            stage_desc = T(f"scenarios.data_science.share:scen.{stage.lower()}_desc").r(**kwargs)
+            return f"# Current Status Description\n{status_desc}\n## Current Stage Description\n{stage_desc}"
+        except Exception as e:
+            return f"# Current Status Description\n{status_desc}"
 
     def reanalyze_competition_description(self):
         self.raw_description = self._get_description()

diff --git a/rdagent/scenarios/data_science/share.yaml b/rdagent/scenarios/data_science/share.yaml
@@ -69,6 +69,22 @@ scen:  # customizable
     You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
   input_path: "./workspace_input/"
   cache_path: "./workspace_cache/"
+  status_desc: |-
+    You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
+    The user is iteratively improving a Kaggle competition solution. Each new iteration (trace) typically modifies the current State-of-the-Art (SOTA) solution. If the new trace outperforms the current SOTA, it becomes the new SOTA; otherwise, it is considered a failed experiment.
+    Each trace consists of the following stages:
+    [Proposal]: Identify challenges from previous traces and propose actionable hypotheses to address them. This stage sets the direction for the trace.
+    [Coding]: Write implementable Python code to realize the proposed hypothesis on a sampled dataset.
+    [Running]: Execute the implemented code on the full dataset, obtain validation scores, and refine the code if necessary.
+    [Feedback]: Analyze the implemented code, compare it with the previous SOTA, and generate feedback.
+    You are currently working on the [{{ stage }}] stage.
+    The total time limit for this competition is {{ total_time }}, with {{ remain_time }} remaining for the rest of the traces.
+  running_desc: |-
+    You have successfully implemented the workflow on a sampled dataset and we are now transitioning to the full dataset.
+    The code base will be iteratively evolved through a series of [evaluator] and [coder] step in this stage.
+    The [evaluator] step execute the code base on full dataset and evaluate the code base based on the execution result.
+    The [coder] step debug or refine the code base based on the evaluator feedback.
+    The maximum number of evolution steps is {{ max_loop }}, and you are currently on [{{ step }}] step of loop {{ cur_loop }}.
 
 component_description:
   DataLoadSpec: |-