Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
780469c
add hypo select by llm (without time)
Hoder-zyf Jul 25, 2025
99f6f52
add time_info and log color
Hoder-zyf Jul 25, 2025
3229058
Merge remote-tracking branch 'origin/main' into hypo_select
jingyuanlm Jul 30, 2025
a41f917
select no smooth
jingyuanlm Jul 30, 2025
dc4d4fa
2 hypo
jingyuanlm Jul 31, 2025
b9a3259
Merge remote-tracking branch 'origin/main' into hypo_select
jingyuanlm Aug 1, 2025
837a1d5
merge mian 0806
Hoder-zyf Aug 6, 2025
4365802
Merge remote-tracking branch 'origin/main' into hypo_select
jingyuanlm Aug 13, 2025
268e05d
change select
jingyuanlm Aug 13, 2025
c41cf05
small change
jingyuanlm Aug 13, 2025
285686a
fix bug
jingyuanlm Aug 13, 2025
afe334f
fix bug and add hypothesis router and begin flag
jingyuanlm Aug 14, 2025
45fd7f0
fix bug v1
jingyuanlm Aug 14, 2025
0f4fd55
fix bug v2
jingyuanlm Aug 14, 2025
69bc380
fix feedback
jingyuanlm Aug 15, 2025
401f04b
add new model
jingyuanlm Aug 15, 2025
a952c95
add filter
jingyuanlm Aug 15, 2025
417a757
fix bug v3
jingyuanlm Aug 15, 2025
2ccbe1c
fix bug v4
jingyuanlm Aug 15, 2025
10277b4
change prompts v2
jingyuanlm Aug 15, 2025
a1f5581
fix bug v5
jingyuanlm Aug 15, 2025
98f7c5b
fix bug v6
jingyuanlm Aug 16, 2025
b308a47
fix hypo
jingyuanlm Aug 16, 2025
c080772
fix some bug(sota socre, prompts, ensemble prompts ) and add path le…
jingyuanlm Aug 18, 2025
5a473b2
fix bug v7
jingyuanlm Aug 18, 2025
2b9bbe7
fix bug v8
jingyuanlm Aug 19, 2025
ce9e85a
fix bug v9
jingyuanlm Aug 19, 2025
1547da8
fix bug v10
jingyuanlm Aug 19, 2025
b4e2c35
add ensemble/merge
jingyuanlm Aug 20, 2025
ead3213
fix bug v11
jingyuanlm Aug 20, 2025
118222a
fix merge
jingyuanlm Aug 20, 2025
da4ba06
change merge/ selector / sota socre bug/
jingyuanlm Aug 21, 2025
7692b01
change task_gen
jingyuanlm Aug 21, 2025
f974713
fix bug v12
jingyuanlm Aug 21, 2025
a7eca98
fix bug v13
jingyuanlm Aug 21, 2025
08869e8
fix bug v14
jingyuanlm Aug 22, 2025
44a7d3b
fix bug v15
jingyuanlm Aug 25, 2025
2e6644d
fix bug v16
jingyuanlm Aug 26, 2025
fd606f1
change int to float
jingyuanlm Aug 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions rdagent/app/data_science/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
"""The recommend time limit for running on full data"""
full_timeout: int = 3600
"""The timeout limit for running on full data"""
ensemble_timeout: int = 3600*5

### specific feature

Expand Down Expand Up @@ -103,6 +104,10 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
sota_count_threshold: int = 1
"""The threshold for SOTA count"""

ensemble_with_merge: bool = False

ratio_merge_or_ensemble: int = 70 # 70% for merge/ensemble

#### multi-trace: SOTA experiment selector
sota_exp_selector_name: str = "rdagent.scenarios.data_science.proposal.exp_gen.select.submit.GlobalSOTASelector"
"""The name of the SOTA experiment selector to use"""
Expand All @@ -116,7 +121,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
exp_gen_version_list: str = "v3,v2"

#### multi-trace: time for final multi-trace merge
merge_hours: int = 0
merge_hours: float = 0
"""The time for merge"""

#### multi-trace: max SOTA-retrieved number, used in AutoSOTAexpSelector
Expand All @@ -136,10 +141,13 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
show_hard_limit: bool = True

#### hypothesis critique and rewrite
enable_hypo_critique_rewrite: bool = True
enable_hypo_critique_rewrite: bool = False
"""Enable hypothesis critique and rewrite stages for improving hypothesis quality"""
enable_scale_check: bool = False

#### hypothesis selection method
llm_select_hypothesis: bool = True
"""Whether to use LLM to select hypothesis. If True, use LLM selection; if False, use the existing ranking method."""
#### enable runner code change summary
runner_enable_code_change_summary: bool = True

Expand Down
11 changes: 8 additions & 3 deletions rdagent/log/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .base import Storage
from .storage import FileStorage
from .utils import get_caller_info
from .utils import LogColors, get_caller_info


class RDAgentLog(SingletonBaseClass):
Expand Down Expand Up @@ -127,10 +127,15 @@ def _log(self, level: str, msg: str, *, tag: str = "", raw: bool = False) -> Non
logger.add(sys.stderr)

def info(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
# Use default color for info messages
self._log("info", msg, tag=tag, raw=raw)

def warning(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
self._log("warning", msg, tag=tag, raw=raw)
# Add yellow color for warning messages
colored_msg = f"{LogColors.YELLOW}{msg}{LogColors.END}"
self._log("warning", colored_msg, tag=tag, raw=True)

def error(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
self._log("error", msg, tag=tag, raw=raw)
# Add red color for error messages
colored_msg = f"{LogColors.RED}{msg}{LogColors.END}"
self._log("error", colored_msg, tag=tag, raw=True)
10 changes: 7 additions & 3 deletions rdagent/scenarios/data_science/dev/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@ exp_feedback:
- If overfitting is detected, provide a detailed analysis explaining how and why it occurs, referencing scenario description, code implementation, and validation scores to support your findings.
- If such discrepancies or risks are found:
- Clearly document these issues in `Reasoning`, referencing both scenario description and code implementation—not just validation scores.
- Set `"Evaluation Aligned With Task": "no"` and `"Replace Best Result": "no"`.
- Begin your `reasoning` with `[Evaluation error]`, explicitly stating the evaluation alignment issues causing experiment failure.
- If evaluation alignment passes, set `"Evaluation Aligned With Task": "yes"`, and then proceed to Step 3.
- Severity-based handling:
- Severe risk — likely to invert or invalidate the performance trend between validation and test (e.g., strong overfitting, label leakage, test distribution shift):
- Set "Evaluation Aligned With Task": "no" and "Replace Best Result": "no".
- Begin your reasoning with [Evaluation error], explicitly stating the evaluation alignment issues causing experiment failure.
- Mild/moderate risk — may cause slightly optimistic or biased validation scores but is unlikely to change the relative performance trend (e.g., scaling or PCA fit on full training data that’s also applied consistently to test):
- Set "Evaluation Aligned With Task": "yes" but note the potential bias in Reasoning.
- Proceed to Step 3 for result comparison.

Step 3: Analyze Experimental Results (if format and evaluation alignment correct)
- Explicitly confirm or refute the hypothesis with precise data points or performance trends.
Expand Down
Loading