Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions rdagent/scenarios/data_science/dev/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,27 @@ def implement_one_task(
output_spec = PythonBatchEditOut.get_spec(with_del=False)
extract_output_fn = PythonBatchEditOut.extract_output

# Status description
status_desc=self.scen.describe_current_status(
stage="Running",
step="coder",
max_loop=DS_RD_SETTING.runner_max_loop,
cur_loop=len(queried_former_failed_knowledge)-1,
)

if prev_task_feedback.acceptable is False:
task_information_str = target_task.get_task_information()
# Use system_debugger for error fixing and debugging
system_prompt = T(".prompts:DSCoSTEER.system_debugger").r(
status_desc=status_desc,
task_desc=task_information_str,
out_spec=output_spec,
diff_mode=self.settings.diff_mode,
)
else:
# Use system_refine for hyperparameter tuning
system_prompt = T(".prompts:DSCoSTEER.system_refine").r(
status_desc=status_desc,
out_spec=output_spec,
diff_mode=self.settings.diff_mode,
)
Expand Down
6 changes: 6 additions & 0 deletions rdagent/scenarios/data_science/dev/runner/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ def evaluate(
enable_hyperparameter_tuning_check = c1 and c2 and c3 and c4

system_prompt = T(".prompts:DSCoSTEER_eval.system").r(
status_desc=self.scen.describe_current_status(
stage="Running",
step="evaluator",
max_loop=DS_RD_SETTING.runner_max_loop,
cur_loop=len(queried_former_failed_knowledge)-1,
),
scenario=self.scen.get_scenario_all_desc(eda_output=implementation.file_dict.get("EDA.md", None)),
task_desc=target_task.get_task_information(),
enable_hyperparameter_tuning_check=enable_hyperparameter_tuning_check,
Expand Down
8 changes: 3 additions & 5 deletions rdagent/scenarios/data_science/dev/runner/prompts.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DSCoSTEER_eval:
system: |-
{% include "scenarios.data_science.share:scen.role" %}
{{ status_desc }}
You will be provided with:
1. `Code base`: The code base of the solution
2. `The stdout of code execution and testing`: The generated stdout when executing the code base and corresponding testing
Expand Down Expand Up @@ -89,8 +89,7 @@ DSCoSTEER_eval:

DSCoSTEER:
system_debugger: |-
{% include "scenarios.data_science.share:scen.role" %}
You have finished the implementation of the whole workflow which has executed well on a sampled dataset. Now we are working on the full dataset.
{{ status_desc }}
The user has reported that the workflow failed to execute on the full dataset.
Your will be provided with:
1. Code base.
Expand Down Expand Up @@ -120,8 +119,7 @@ DSCoSTEER:
{% endif %}

system_refine: |-
{% include "scenarios.data_science.share:scen.role" %}
You have finished the implementation of the whole workflow which has executed well on a sampled dataset. Now we are working on the full dataset.
{{ status_desc }}
The user has reported that the hyperparameters are not reasonable and the code didn't make the best use of the time limit.
Your will be provided with:
1. Code base.
Expand Down
25 changes: 25 additions & 0 deletions rdagent/scenarios/data_science/scen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Dict

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.coder.CoSTEER import RD_Agent_TIMER_wrapper
from rdagent.components.coder.data_science.conf import get_ds_env
from rdagent.core.experiment import FBWorkspace
from rdagent.core.scenario import Scenario
Expand Down Expand Up @@ -61,6 +62,30 @@ def __init__(self, competition: str) -> None:
self._get_direction()
) # True indicates higher is better, False indicates lower is better
self.timeout_increase_count = 0
self.timer = RD_Agent_TIMER_wrapper.timer

def describe_current_status(self, stage: str, **kwargs):
"""
Generates a description of the current scenario status including stage details and timing information.
Args:
stage (str): One of [Proposal, Coding, Running, Feedback]
**kwargs: Additional keyword arguments to be passed to the stage description
Example:
describe_current_status("Running", step="evaluator", max_loop=3, cur_loop=1)
"""
# Overall status description
status_desc = T("scenarios.data_science.share:scen.status_desc").r(
stage=stage,
total_time=self.timer.all_duration,
remain_time=self.timer.remain_time(),
)

# Stage-specific description
try:
stage_desc = T(f"scenarios.data_science.share:scen.{stage.lower()}_desc").r(**kwargs)
return f"# Current Status Description\n{status_desc}\n## Current Stage Description\n{stage_desc}"
except Exception as e:
return f"# Current Status Description\n{status_desc}"

def reanalyze_competition_description(self):
self.raw_description = self._get_description()
Expand Down
16 changes: 16 additions & 0 deletions rdagent/scenarios/data_science/share.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,22 @@ scen: # customizable
You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
input_path: "./workspace_input/"
cache_path: "./workspace_cache/"
status_desc: |-
You are a Kaggle Grandmaster and expert ML engineer with deep expertise in statistics, machine learning, and competition optimization.
The user is iteratively improving a Kaggle competition solution. Each new iteration (trace) typically modifies the current State-of-the-Art (SOTA) solution. If the new trace outperforms the current SOTA, it becomes the new SOTA; otherwise, it is considered a failed experiment.
Each trace consists of the following stages:
[Proposal]: Identify challenges from previous traces and propose actionable hypotheses to address them. This stage sets the direction for the trace.
[Coding]: Write implementable Python code to realize the proposed hypothesis on a sampled dataset.
[Running]: Execute the implemented code on the full dataset, obtain validation scores, and refine the code if necessary.
[Feedback]: Analyze the implemented code, compare it with the previous SOTA, and generate feedback.
You are currently working on the [{{ stage }}] stage.
The total time limit for this competition is {{ total_time }}, with {{ remain_time }} remaining for the rest of the traces.
running_desc: |-
You have successfully implemented the workflow on a sampled dataset and we are now transitioning to the full dataset.
The code base will be iteratively evolved through a series of [evaluator] and [coder] step in this stage.
The [evaluator] step execute the code base on full dataset and evaluate the code base based on the execution result.
The [coder] step debug or refine the code base based on the evaluator feedback.
The maximum number of evolution steps is {{ max_loop }}, and you are currently on [{{ step }}] step of loop {{ cur_loop }}.

component_description:
DataLoadSpec: |-
Expand Down
Loading