From 3fbd8461a5ade5f14546d9f8b52c3217a0bdbee4 Mon Sep 17 00:00:00 2001 From: shravani-whoi Date: Tue, 28 Apr 2026 11:09:20 -0400 Subject: [PATCH 1/3] vastdb config --- src/improv/api/app.py | 12 ++++++++++-- src/improv/config.py | 34 ++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/improv/api/app.py b/src/improv/api/app.py index 9eda241..486b664 100644 --- a/src/improv/api/app.py +++ b/src/improv/api/app.py @@ -17,7 +17,8 @@ def create_app(config: "ImprovConfig") -> FastAPI: Registers service tables at startup via lifespan, injects ImageService via app.state for use in route dependencies. """ - from amplify_db_utils import DuckDBParquetStore + from amplify_db_utils import DuckDBParquetStore, DuckDBParquetConfig + from amplify_db_utils.vastdb_store import VastDBConfig, VastDBStore from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -28,7 +29,14 @@ def create_app(config: "ImprovConfig") -> FastAPI: @asynccontextmanager async def lifespan(app: FastAPI): - store = DuckDBParquetStore(config.db_config) + if isinstance(config.db_config, VastDBConfig): + store = VastDBStore(config.db_config) + elif isinstance(config.db_config, DuckDBParquetConfig): + store = DuckDBParquetStore(config.db_config) + else: + raise RuntimeError( + f"Unsupported db_config type: {type(config.db_config).__name__}" + ) engine = create_engine(config.database_url or "sqlite:///:memory:") Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) diff --git a/src/improv/config.py b/src/improv/config.py index 06451af..4de5b51 100644 --- a/src/improv/config.py +++ b/src/improv/config.py @@ -20,6 +20,7 @@ from typing import TYPE_CHECKING from amplify_db_utils import DuckDBParquetConfig +from amplify_db_utils.vastdb_store import VastDBConfig if TYPE_CHECKING: from improv.ids import ImageIdParser @@ -30,7 +31,7 @@ @dataclass class ImprovConfig: # Columnar store — required - db_config: DuckDBParquetConfig + db_config: DuckDBParquetConfig | VastDBConfig # OLTP database — required for service mode; omit for batch-producer use database_url: str | None = None @@ -45,19 +46,28 @@ class ImprovConfig: def load_config() -> ImprovConfig: """Build ImprovConfig from environment variables.""" + backend = os.environ.get("IMPROV_DB_BACKEND", "duckdb").lower() db_root = os.environ.get("IMPROV_DB_ROOT") - if not db_root: - raise RuntimeError( - "IMPROV_DB_ROOT environment variable is required. " - "Set it to a local path or s3:// URL." + + if backend == "vastdb": + db_cfg = VastDBConfig( + endpoint=os.environ["IMPROV_VASTDB_ENDPOINT"], + access_key=os.environ["IMPROV_VASTDB_ACCESS_KEY"], + secret_key=os.environ["IMPROV_VASTDB_SECRET_KEY"], + bucket=os.environ["IMPROV_VASTDB_BUCKET"], + schema=os.environ["IMPROV_VASTDB_SCHEMA"], + add_written_at=True, + ) + elif backend == "duckdb": + db_root = os.environ.get("IMPROV_DB_ROOT") + if not db_root: + raise RuntimeError("IMPROV_DB_ROOT required for duckdb backend.") + db_cfg = DuckDBParquetConfig( + root=db_root, + s3_endpoint=os.environ.get("IMPROV_S3_ENDPOINT"), + s3_access_key=os.environ.get("IMPROV_S3_ACCESS_KEY"), + s3_secret_key=os.environ.get("IMPROV_S3_SECRET_KEY"), ) - - db_cfg = DuckDBParquetConfig( - root=db_root, - s3_endpoint=os.environ.get("IMPROV_S3_ENDPOINT"), - s3_access_key=os.environ.get("IMPROV_S3_ACCESS_KEY"), - s3_secret_key=os.environ.get("IMPROV_S3_SECRET_KEY"), - ) storage = None storage_path = os.environ.get("IMPROV_STORAGE_PATH") From 92aae675fd0168e8ea345e3036a238ca6c3cb881 Mon Sep 17 00:00:00 2001 From: shravani-whoi Date: Tue, 5 May 2026 10:22:25 -0400 Subject: [PATCH 2/3] working vastdb updates --- run_dev.py | 3 +++ tests/conftest.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 run_dev.py diff --git a/run_dev.py b/run_dev.py new file mode 100644 index 0000000..8428a8d --- /dev/null +++ b/run_dev.py @@ -0,0 +1,3 @@ +from improv.api.app import create_app +from improv.config import load_config +app = create_app(load_config()) diff --git a/tests/conftest.py b/tests/conftest.py index 5eef05d..b4fb536 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,16 @@ from improv.service import ImageService from improv.store.tables import register_service_tables +import os +import uuid +import pytest + +from amplify_db_utils import DuckDBParquetConfig, DuckDBParquetStore +from amplify_db_utils.vastdb_store import VastDBConfig, VastDBStore + +_BACKENDS = ["duckdb"] +if os.environ.get("IMPROV_TEST_VASTDB") == "1": + _BACKENDS.append("vastdb") # --------------------------------------------------------------------------- # Synthetic parsers for tests — no real instrument format assumed @@ -72,10 +82,27 @@ def parsers(alpha_parser, beta_parser) -> list: # Columnar store # --------------------------------------------------------------------------- -@pytest.fixture -def store(tmp_path): - config = DuckDBParquetConfig(root=str(tmp_path / "store")) - return DuckDBParquetStore(config) +@pytest.fixture(params=_BACKENDS) +def store(request, tmp_path): + if request.param == "duckdb": + cfg = DuckDBParquetConfig(root=str(tmp_path / "store")) + yield DuckDBParquetStore(cfg) + return + + # VastDB: per-test unique schema for isolation, torn down after. + cfg = VastDBConfig( + endpoint=os.environ["IMPROV_TEST_VASTDB_ENDPOINT"], + access_key=os.environ["IMPROV_TEST_VASTDB_ACCESS_KEY"], + secret_key=os.environ["IMPROV_TEST_VASTDB_SECRET_KEY"], + bucket=os.environ["IMPROV_TEST_VASTDB_BUCKET"], + schema=f"improv_test_{uuid.uuid4().hex[:8]}", + add_written_at=True, + ) + s = VastDBStore(cfg) + try: + yield s + finally: + s.drop_schema() @pytest.fixture From bf3a7e602b5fb87203043711408d9741ce9523f7 Mon Sep 17 00:00:00 2001 From: shravani-whoi <91488778+shravani-whoi@users.noreply.github.com> Date: Wed, 6 May 2026 15:43:23 -0400 Subject: [PATCH 3/3] Delete run_dev.py --- run_dev.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 run_dev.py diff --git a/run_dev.py b/run_dev.py deleted file mode 100644 index 8428a8d..0000000 --- a/run_dev.py +++ /dev/null @@ -1,3 +0,0 @@ -from improv.api.app import create_app -from improv.config import load_config -app = create_app(load_config())