From cf02c94f9c2997462c3fde364906420b541eb70e Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Mon, 4 May 2026 08:44:44 +0200 Subject: [PATCH] fix(phlower): reclaim freed pages so DROP TABLE actually shrinks the file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prod-us PVC filled at 49 GB on 2026-05-04 04:47 UTC during the hourly purge. Emergency purge dropped invocations_legacy (26 GB), but the file didn't shrink — SQLite's DROP TABLE marks pages as free in the file's internal freelist without returning them to the OS, and the DROP transaction's WAL temporarily ballooned to fill what little headroom was left. Result: disk full, INSERTs failing with sqlite3.OperationalError, ~14h of records silently dropped through the buffer-cap safety net. Three changes: - PRAGMA auto_vacuum=INCREMENTAL set in _connect() before any tables are created. Tracks freed pages in a separate list so they can be reclaimed by PRAGMA incremental_vacuum. Must be set on a fresh DB — on existing DBs it requires a full VACUUM to apply, which is exactly why this PR pairs with a PVC drop / fresh-start deploy. - _reclaim_free_pages() called after DROP TABLE in purge_old_partitions(). Uses PRAGMA incremental_vacuum to truncate the freed pages off the file. The PRAGMA emits one result row per freed page so the cursor MUST be drained with fetchall() — without that, only one page gets reclaimed and the file barely shrinks. - SQLITE_DISK_USAGE_PCT_CAP default lowered from 85 → 75, giving more headroom for WAL spikes during a DROP. With incremental_vacuum keeping the file lean this should rarely fire, but the cushion matters when it does. --- src/phlower/config.py | 2 +- src/phlower/sqlite_store.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/phlower/config.py b/src/phlower/config.py index 277f979..f6b6688 100644 --- a/src/phlower/config.py +++ b/src/phlower/config.py @@ -65,7 +65,7 @@ class Config: default_factory=lambda: int(os.environ.get("SQLITE_INVOCATION_RETENTION_HOURS", "120")) ) sqlite_disk_usage_pct_cap: int = field( - default_factory=lambda: int(os.environ.get("SQLITE_DISK_USAGE_PCT_CAP", "85")) + default_factory=lambda: int(os.environ.get("SQLITE_DISK_USAGE_PCT_CAP", "75")) ) snapshot_interval_seconds: int = field( default_factory=lambda: int(os.environ.get("SNAPSHOT_INTERVAL_SECONDS", "60")) diff --git a/src/phlower/sqlite_store.py b/src/phlower/sqlite_store.py index 0a03071..78689a4 100644 --- a/src/phlower/sqlite_store.py +++ b/src/phlower/sqlite_store.py @@ -123,6 +123,14 @@ def __init__(self, db_path: str) -> None: def _connect(self, path: str) -> sqlite3.Connection: conn = sqlite3.connect(path, check_same_thread=False) + # Incremental auto_vacuum: tracks freed pages in a separate list so + # ``PRAGMA incremental_vacuum`` can return them to the OS without a + # full VACUUM. SQLite requires this PRAGMA to be set BEFORE any + # tables are created; on existing DBs it's a no-op without a + # follow-up full VACUUM. We set it here so a fresh DB picks it up + # at first init_schema() — keeps the file size tracking live data + # instead of the high-water mark. + conn.execute("PRAGMA auto_vacuum=INCREMENTAL") conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA synchronous=NORMAL") conn.execute("PRAGMA busy_timeout=5000") @@ -336,6 +344,10 @@ def purge_old_partitions(self, retention_hours: int) -> int: Each DROP TABLE is a metadata operation (fast, predictable); the whole purge replaces the multi-minute row-by-row DELETE that previously starved the flush loop. + + After the drops, ``incremental_vacuum`` returns the freed pages + to the filesystem — without it SQLite holds onto them as internal + free pages and the file never shrinks, eventually filling the PVC. """ cutoff_ts = time.time() - retention_hours * 3600 cutoff_suffix = _suffix_for_ts(cutoff_ts) @@ -354,8 +366,32 @@ def purge_old_partitions(self, retention_hours: int) -> int: # Legacy tables: drop wholesale once their newest row is past # retention. Cheap to check — single MAX() per table. self._maybe_drop_legacy(cutoff_ts) + if dropped or not self._has_legacy_inv: + self._reclaim_free_pages() return dropped + def _reclaim_free_pages(self) -> None: + """Return any freed pages to the OS via incremental_vacuum. + + No-op unless the DB was created with ``auto_vacuum=INCREMENTAL``; + on those DBs it's fast (proportional to free-page count, not DB + size). Called after DROP TABLE so file size tracks live data. + + The PRAGMA emits one result row per freed page, so the cursor + MUST be drained — without ``fetchall()`` only one page gets + reclaimed and the file barely shrinks. + """ + try: + row = self._conn.execute("PRAGMA freelist_count").fetchone() + free_pages = row[0] if row else 0 + if free_pages == 0: + return + self._conn.execute("PRAGMA incremental_vacuum").fetchall() + self._conn.commit() + logger.info("incremental_vacuum reclaimed %d pages", free_pages) + except Exception: + logger.exception("incremental_vacuum failed") + def _maybe_drop_legacy(self, cutoff_ts: float) -> None: if self._has_legacy_inv: row = self._conn.execute(