-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy patheval.py
More file actions
63 lines (56 loc) · 2.9 KB
/
eval.py
File metadata and controls
63 lines (56 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations
from typing import Any
from umm.core.config import load_config
def run_eval_command(args: Any) -> int:
"""Route evaluation command based on benchmark type."""
config_path = args.config
raw_cfg = load_config(config_path)
eval_cfg = raw_cfg.get("eval", {}) if isinstance(raw_cfg.get("eval"), dict) else {}
benchmark = str(eval_cfg.get("benchmark", "")).strip().lower()
if not benchmark and isinstance(raw_cfg.get("benchmark"), str):
benchmark = str(raw_cfg.get("benchmark", "")).strip().lower()
# Lazy imports to avoid pulling heavy deps that may not be installed
# in every container image (e.g. geneval image lacks `datasets`).
if benchmark == "dpg_bench" or "dpg_bench" in raw_cfg:
from umm.cli.dpg_bench import run_eval_command as _fn
return _fn(args)
if benchmark == "mme" or "mme" in raw_cfg:
from umm.cli.mme_eval import run_mme_eval_command as _fn
return _fn(args)
if benchmark == "mmmu" or "mmmu" in raw_cfg:
from umm.cli.mmmu_eval import run_mmmu_eval_command as _fn
return _fn(args)
if benchmark == "mmbench" or "mmbench" in raw_cfg:
from umm.cli.mmbench_eval import run_mmbench_eval_command as _fn
return _fn(args)
if benchmark == "mmvet" or "mmvet" in raw_cfg:
from umm.cli.mmvet_eval import run_mmvet_eval_command as _fn
return _fn(args)
if benchmark == "mathvista" or "mathvista" in raw_cfg:
from umm.cli.mathvista_eval import run_mathvista_eval_command as _fn
return _fn(args)
if benchmark == "uni_mmmu" or "uni_mmmu" in raw_cfg:
from umm.cli.uni_mmmu import run_eval_command as _fn
return _fn(args)
if benchmark == "wise" or "wise" in raw_cfg:
from umm.cli.wise import run_wise_eval_command as _fn
return _fn(args)
if benchmark == "ueval" or "ueval" in raw_cfg:
from umm.cli.ueval_eval import run_ueval_eval_command as _fn
return _fn(args)
if benchmark == "imgedit" or "imgedit" in raw_cfg:
from umm.cli.imgedit import run_imgedit_eval_command as _fn
return _fn(args)
if benchmark == "gedit" or "gedit" in raw_cfg:
from umm.cli.gedit import run_gedit_eval_command as _fn
return _fn(args)
if benchmark == "geneval" or "geneval" in raw_cfg:
from umm.cli.geneval import run_eval_command as _fn
return _fn(args)
if benchmark == "unified_bench" or "unified_bench" in raw_cfg:
from umm.cli.unified_bench import run_unified_bench_eval_command as _fn
return _fn(args)
if benchmark in {"unipath_online_route", "planner_online_route"} or "online_route" in raw_cfg:
from umm.post_training.unipath.planner.online_route_eval import run_online_route_eval_command as _fn
return _fn(args)
raise NotImplementedError(f"`umm eval` benchmark '{benchmark}' is not supported yet (config: {args.config}).")