diff --git a/src/improv/api/app.py b/src/improv/api/app.py index 9eda241..486b664 100644 --- a/src/improv/api/app.py +++ b/src/improv/api/app.py @@ -17,7 +17,8 @@ def create_app(config: "ImprovConfig") -> FastAPI: Registers service tables at startup via lifespan, injects ImageService via app.state for use in route dependencies. """ - from amplify_db_utils import DuckDBParquetStore + from amplify_db_utils import DuckDBParquetStore, DuckDBParquetConfig + from amplify_db_utils.vastdb_store import VastDBConfig, VastDBStore from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker @@ -28,7 +29,14 @@ def create_app(config: "ImprovConfig") -> FastAPI: @asynccontextmanager async def lifespan(app: FastAPI): - store = DuckDBParquetStore(config.db_config) + if isinstance(config.db_config, VastDBConfig): + store = VastDBStore(config.db_config) + elif isinstance(config.db_config, DuckDBParquetConfig): + store = DuckDBParquetStore(config.db_config) + else: + raise RuntimeError( + f"Unsupported db_config type: {type(config.db_config).__name__}" + ) engine = create_engine(config.database_url or "sqlite:///:memory:") Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) diff --git a/src/improv/config.py b/src/improv/config.py index 06451af..4de5b51 100644 --- a/src/improv/config.py +++ b/src/improv/config.py @@ -20,6 +20,7 @@ from typing import TYPE_CHECKING from amplify_db_utils import DuckDBParquetConfig +from amplify_db_utils.vastdb_store import VastDBConfig if TYPE_CHECKING: from improv.ids import ImageIdParser @@ -30,7 +31,7 @@ @dataclass class ImprovConfig: # Columnar store — required - db_config: DuckDBParquetConfig + db_config: DuckDBParquetConfig | VastDBConfig # OLTP database — required for service mode; omit for batch-producer use database_url: str | None = None @@ -45,19 +46,28 @@ class ImprovConfig: def load_config() -> ImprovConfig: """Build ImprovConfig from environment variables.""" + backend = os.environ.get("IMPROV_DB_BACKEND", "duckdb").lower() db_root = os.environ.get("IMPROV_DB_ROOT") - if not db_root: - raise RuntimeError( - "IMPROV_DB_ROOT environment variable is required. " - "Set it to a local path or s3:// URL." + + if backend == "vastdb": + db_cfg = VastDBConfig( + endpoint=os.environ["IMPROV_VASTDB_ENDPOINT"], + access_key=os.environ["IMPROV_VASTDB_ACCESS_KEY"], + secret_key=os.environ["IMPROV_VASTDB_SECRET_KEY"], + bucket=os.environ["IMPROV_VASTDB_BUCKET"], + schema=os.environ["IMPROV_VASTDB_SCHEMA"], + add_written_at=True, + ) + elif backend == "duckdb": + db_root = os.environ.get("IMPROV_DB_ROOT") + if not db_root: + raise RuntimeError("IMPROV_DB_ROOT required for duckdb backend.") + db_cfg = DuckDBParquetConfig( + root=db_root, + s3_endpoint=os.environ.get("IMPROV_S3_ENDPOINT"), + s3_access_key=os.environ.get("IMPROV_S3_ACCESS_KEY"), + s3_secret_key=os.environ.get("IMPROV_S3_SECRET_KEY"), ) - - db_cfg = DuckDBParquetConfig( - root=db_root, - s3_endpoint=os.environ.get("IMPROV_S3_ENDPOINT"), - s3_access_key=os.environ.get("IMPROV_S3_ACCESS_KEY"), - s3_secret_key=os.environ.get("IMPROV_S3_SECRET_KEY"), - ) storage = None storage_path = os.environ.get("IMPROV_STORAGE_PATH") diff --git a/tests/conftest.py b/tests/conftest.py index 5eef05d..b4fb536 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,16 @@ from improv.service import ImageService from improv.store.tables import register_service_tables +import os +import uuid +import pytest + +from amplify_db_utils import DuckDBParquetConfig, DuckDBParquetStore +from amplify_db_utils.vastdb_store import VastDBConfig, VastDBStore + +_BACKENDS = ["duckdb"] +if os.environ.get("IMPROV_TEST_VASTDB") == "1": + _BACKENDS.append("vastdb") # --------------------------------------------------------------------------- # Synthetic parsers for tests — no real instrument format assumed @@ -72,10 +82,27 @@ def parsers(alpha_parser, beta_parser) -> list: # Columnar store # --------------------------------------------------------------------------- -@pytest.fixture -def store(tmp_path): - config = DuckDBParquetConfig(root=str(tmp_path / "store")) - return DuckDBParquetStore(config) +@pytest.fixture(params=_BACKENDS) +def store(request, tmp_path): + if request.param == "duckdb": + cfg = DuckDBParquetConfig(root=str(tmp_path / "store")) + yield DuckDBParquetStore(cfg) + return + + # VastDB: per-test unique schema for isolation, torn down after. + cfg = VastDBConfig( + endpoint=os.environ["IMPROV_TEST_VASTDB_ENDPOINT"], + access_key=os.environ["IMPROV_TEST_VASTDB_ACCESS_KEY"], + secret_key=os.environ["IMPROV_TEST_VASTDB_SECRET_KEY"], + bucket=os.environ["IMPROV_TEST_VASTDB_BUCKET"], + schema=f"improv_test_{uuid.uuid4().hex[:8]}", + add_written_at=True, + ) + s = VastDBStore(cfg) + try: + yield s + finally: + s.drop_schema() @pytest.fixture