From da193ac80102fefb88e434e872c09b18476717d2 Mon Sep 17 00:00:00 2001 From: pi-dal Date: Mon, 1 Jun 2026 10:05:26 +0800 Subject: [PATCH 1/2] feat: migrate graph backend to LadybugDB KuzuDB has been archived, and its extension server dependency makes OpenKL's graph and vector-search path brittle for fresh installs. LadybugDB is the active successor and preserves the embedded Cypher/vector-index API shape that OpenKL relies on, so this migrates the runtime backend while keeping the current CLI and graph schema behavior intact. Replace the runtime dependency and lockfile entry from kuzu to ladybug, update database initialization to create new graph stores under ~/.ok/ladybug, and add a legacy ~/.ok/kuzu warning instead of attempting implicit in-place reuse of old derived graph state. This keeps old Kuzu data untouched and makes the migration explicit for users. Update the doctor command and graph/vector-search modules for LadybugDB naming, tighten the type boundary around the migrated db/graph/vector modules, and keep the existing CREATE_VECTOR_INDEX / QUERY_VECTOR_INDEX path covered. The migration also includes smoke tests for schema creation, vector search, and legacy-path warning behavior. Refresh README and RFC references so the documented architecture points to LadybugDB and the new ~/.ok/ladybug location. Verification run before commit: - uv run pytest -q - uv run ruff check . && uv run ruff format --check . - uv run mypy openkl/db.py openkl/graph.py openkl/vector_search.py - isolated HOME CLI smoke covering ok doctor, mem add/search, graph cypher, and graph list-indexes --- README.md | 8 +++- openkl/cli.py | 6 +-- openkl/db.py | 30 ++++++++------ openkl/distill.py | 5 +-- openkl/graph.py | 38 ++++++++++-------- openkl/memory.py | 3 +- openkl/vector_search.py | 45 ++++++++++----------- pyproject.toml | 4 +- rfcs/0000-openkl-design.md | 4 +- tests/test_db_backend.py | 31 +++++++++++++++ tests/test_vector_search.py | 24 +++++++++++ uv.lock | 79 +++++++++++++++++++++---------------- 12 files changed, 182 insertions(+), 95 deletions(-) create mode 100644 tests/test_db_backend.py create mode 100644 tests/test_vector_search.py diff --git a/README.md b/README.md index f499b07..9b7369d 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ A local-first, open-source knowledge and memory layer for AI agents. OpenKL prov - **Memory Management**: Distilled insights, facts, and user-provided notes with temporal organization - **Grounding Store**: External knowledge corpus (docs, media, logs, transcripts) with automatic chunking -- **Knowledge Graph**: Structured entities and relationships with provenance using Kùzu DB +- **Knowledge Graph**: Structured entities and relationships with provenance using LadybugDB - **Citations**: Reproducible, verifiable, portable references with both transient and persisted modes - **Vector Search**: Native HNSW vector indexes with FastEmbed for semantic similarity - **Hybrid Search**: Cross-surface search across memory and grounding store @@ -115,12 +115,16 @@ uv run ok --help ## Architecture -OpenKL uses a file-based approach with an embedded Kùzu graph database: +OpenKL uses a file-based approach with an embedded LadybugDB graph database: - **Files**: Canonical content (grep-friendly) - **Graph**: Derived structure (fast retrieval) - **Citations**: Stable provenance and verification +### LadybugDB Migration + +OpenKL now uses LadybugDB instead of the archived KuzuDB package. Existing `~/.ok/kuzu` graph data is treated as a legacy derived index; keep it as a backup and rebuild into `~/.ok/ladybug` before relying on old graph state. + ## License Apache License 2.0 diff --git a/openkl/cli.py b/openkl/cli.py index 3141a0f..b06a773 100644 --- a/openkl/cli.py +++ b/openkl/cli.py @@ -35,11 +35,11 @@ def doctor( # Check Python packages try: - import kuzu # noqa: F401 + import ladybug # noqa: F401 - console.print("[green]✓[/green] Kùzu DB available") + console.print("[green]✓[/green] LadybugDB available") except ImportError: - console.print("[red]✗[/red] Kùzu DB not found") + console.print("[red]✗[/red] LadybugDB not found") try: import fastembed # noqa: F401 diff --git a/openkl/db.py b/openkl/db.py index 32c7975..f575214 100644 --- a/openkl/db.py +++ b/openkl/db.py @@ -1,18 +1,19 @@ """ -Database layer using Kùzu DB for graph storage and vector search. +Database layer using LadybugDB for graph storage and vector search. """ import logging from pathlib import Path -import kuzu +import ladybug as graphdb logger = logging.getLogger(__name__) # Default database path -DB_PATH = Path.home() / ".ok" / "kuzu" +DB_PATH = Path.home() / ".ok" / "ladybug" +LEGACY_KUZU_DB_PATH = Path.home() / ".ok" / "kuzu" -# Kùzu schema definitions +# LadybugDB schema definitions SCHEMA = [ # Memory nodes "CREATE NODE TABLE MemoryNote(id STRING PRIMARY KEY, text STRING, ts STRING, tags STRING[], vec FLOAT[384]);", @@ -31,22 +32,29 @@ ] # Global connection -_connection: kuzu.Connection | None = None +_connection: graphdb.Connection | None = None -def init_db(db_path: Path | None = None) -> kuzu.Connection: - """Initialize the Kùzu database with schema.""" +def init_db(db_path: Path | None = None) -> graphdb.Connection: + """Initialize the LadybugDB database with schema.""" global _connection if db_path is None: db_path = DB_PATH + if LEGACY_KUZU_DB_PATH.exists() and not DB_PATH.exists(): + logger.warning( + "Found legacy Kuzu database at %s. OpenKL now uses LadybugDB at %s. " + "Rebuild or migrate the derived graph before relying on old graph data.", + LEGACY_KUZU_DB_PATH, + DB_PATH, + ) # Ensure directory exists db_path.parent.mkdir(parents=True, exist_ok=True) # Create database and connection - db = kuzu.Database(str(db_path)) - conn = kuzu.Connection(db) + db = graphdb.Database(str(db_path)) + conn = graphdb.Connection(db) # Install and load vector extension try: @@ -72,7 +80,7 @@ def init_db(db_path: Path | None = None) -> kuzu.Connection: return conn -def get_connection() -> kuzu.Connection: +def get_connection() -> graphdb.Connection: """Get the database connection, initializing if needed.""" global _connection @@ -82,7 +90,7 @@ def get_connection() -> kuzu.Connection: return _connection -def close_connection(): +def close_connection() -> None: """Close the database connection.""" global _connection if _connection is not None: diff --git a/openkl/distill.py b/openkl/distill.py index afd74bf..c2d47c2 100644 --- a/openkl/distill.py +++ b/openkl/distill.py @@ -3,7 +3,6 @@ """ from pathlib import Path -from typing import Optional from rich.console import Console from rich.table import Table @@ -127,8 +126,8 @@ def create_memory_from_distillation( self, distilled_content: str, source_citations: list[str], - tags: Optional[list[str]] = None, - topics: Optional[list[str]] = None, + tags: list[str] | None = None, + topics: list[str] | None = None, ) -> str: """Create a memory from agent-distilled content with proper relationships.""" if not distilled_content.strip(): diff --git a/openkl/graph.py b/openkl/graph.py index 017207e..5df2c88 100644 --- a/openkl/graph.py +++ b/openkl/graph.py @@ -1,8 +1,8 @@ """ -Graph operations using Kùzu DB for OpenKL. +Graph operations using LadybugDB for OpenKL. """ -from typing import Any +from typing import Any, cast from rich.console import Console from rich.json import JSON @@ -16,11 +16,11 @@ class GraphManager: """Manages graph operations and Cypher queries.""" - def __init__(self): + def __init__(self) -> None: pass def run_cypher( - self, query: str, params: dict[str, Any] = None + self, query: str, params: dict[str, Any] | None = None ) -> list[dict[str, Any]]: """Execute a Cypher query and return results.""" if params is None: @@ -60,16 +60,18 @@ def get_entity_stats(self) -> dict[str, int]: # Count nodes result = conn.execute("MATCH (n) RETURN labels(n) as label, count(n) as count") for row in result: - label = row[0][0] if row[0] else "Unknown" - stats[f"{label}_count"] = row[1] + row_values = cast(list[Any], row) + label = row_values[0][0] if row_values[0] else "Unknown" + stats[f"{label}_count"] = row_values[1] - # Count relationships - Kùzu doesn't have type() function, so we'll count by relationship name + # Count relationships by name because the backend does not expose type(). rel_types = ["HAS_CHUNK", "Mentions", "MemMentions", "DerivedFrom", "HasTopic"] for rel_type in rel_types: result = conn.execute( f"MATCH ()-[r:{rel_type}]->() RETURN count(r) as count" ) - count = list(result)[0][0] if result else 0 + rows = cast(list[list[Any]], list(result)) + count = rows[0][0] if rows else 0 stats[f"{rel_type}_count"] = count return stats @@ -94,7 +96,9 @@ def get_memory_entities(self, memory_id: str) -> list[dict[str, Any]]: return self.run_cypher(query, {"id": memory_id}) - def print_results(self, results: list[dict[str, Any]], json_output: bool = False): + def print_results( + self, results: list[dict[str, Any]], json_output: bool = False + ) -> None: """Print query results.""" if not results: console.print("[yellow]No results found[/yellow]") @@ -108,7 +112,7 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False if results: # Process first result to determine columns - first_result = self._process_kuzu_result(results[0]) + first_result = self._process_graph_result(results[0]) columns = [ col for col in first_result.keys() @@ -120,7 +124,7 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False # Add rows for result in results: - processed_result = self._process_kuzu_result(result) + processed_result = self._process_graph_result(result) row_values = [] for col in columns: value = processed_result.get(col, "") @@ -132,10 +136,10 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False console.print(table) - def _process_kuzu_result(self, result) -> dict[str, Any]: - """Process a Kùzu result object and filter out vector fields.""" + def _process_graph_result(self, result: Any) -> dict[str, Any]: + """Process a graph result object and filter out vector fields.""" if hasattr(result, "__dict__"): - # Handle Kùzu object - if it's a single column with an object, expand it + # If it's a single column with an object, expand it. result_dict = { k: v for k, v in result.__dict__.items() if not k.startswith("_") } @@ -160,9 +164,9 @@ def _process_kuzu_result(self, result) -> dict[str, Any]: if k.endswith("vec") or k == "vec": continue - # If the value is a Kùzu object, recursively process it + # If the value is a graph object, recursively process it. if hasattr(v, "__dict__"): - v = self._process_kuzu_result(v) + v = self._process_graph_result(v) # If the value is a list (like a vector), skip it elif isinstance(v, list) and len(v) > 10: # Likely a vector continue @@ -172,7 +176,7 @@ def _process_kuzu_result(self, result) -> dict[str, Any]: return filtered_result def _clean_string_representation(self, obj_str: str) -> str: - """Clean string representation of Kùzu objects to remove vectors.""" + """Clean string representation of graph objects to remove vectors.""" import re # Remove vector fields from string representation - more aggressive approach diff --git a/openkl/memory.py b/openkl/memory.py index a3af772..55316d1 100644 --- a/openkl/memory.py +++ b/openkl/memory.py @@ -183,7 +183,8 @@ def update( # Build update query updates = [] if text is not None: - updates.append(f"m.text = '{text.replace("'", "\\'")}'") + escaped_text = text.replace("'", "\\'") + updates.append(f"m.text = '{escaped_text}'") if tags is not None: tags_str = "[" + ", ".join([f"'{tag}'" for tag in tags]) + "]" updates.append(f"m.tags = {tags_str}") diff --git a/openkl/vector_search.py b/openkl/vector_search.py index 2553a5b..7471f73 100644 --- a/openkl/vector_search.py +++ b/openkl/vector_search.py @@ -1,13 +1,13 @@ """ -Vector search utilities leveraging Kùzu's native vector index capabilities. +Vector search utilities leveraging LadybugDB's native vector index capabilities. """ -from typing import Any +from typing import Any, cast from .db import get_connection -def _ensure_vector_extension_loaded(conn, verbose: bool = False): +def _ensure_vector_extension_loaded(conn: Any, verbose: bool = False) -> None: """Ensure the vector extension is loaded.""" try: # Try to load the vector extension @@ -25,7 +25,7 @@ def _ensure_vector_extension_loaded(conn, verbose: bool = False): raise RuntimeError(f"Failed to load vector extension: {e2}") from e2 -def create_vector_indexes(verbose: bool = False): +def create_vector_indexes(verbose: bool = False) -> None: """Create vector indexes for memory notes and chunks.""" conn = get_connection() @@ -89,7 +89,7 @@ def create_vector_indexes(verbose: bool = False): raise -def _ensure_vector_indexes_exist(conn, verbose: bool = False): +def _ensure_vector_indexes_exist(conn: Any, verbose: bool = False) -> None: """Ensure vector indexes exist, create them if they don't.""" try: # Check if memory index exists @@ -142,9 +142,9 @@ def _ensure_vector_indexes_exist(conn, verbose: bool = False): def search_memory_vectors( - query_vector: list[float], k: int = 5, verbose: bool = False + query_vector: Any, k: int = 5, verbose: bool = False ) -> list[dict[str, Any]]: - """Search memory notes using Kùzu's native vector index.""" + """Search memory notes using LadybugDB's native vector index.""" conn = get_connection() # Ensure vector extension is loaded @@ -175,9 +175,9 @@ def search_memory_vectors( results = [] for row in result: - memory_id, text, ts, tags, distance = row + memory_id, text, ts, tags, distance = cast(list[Any], row) # Convert distance to similarity (1 - distance for cosine similarity) - similarity = 1.0 - distance + similarity = 1.0 - float(distance) results.append( { "id": memory_id, @@ -193,9 +193,9 @@ def search_memory_vectors( def search_chunk_vectors( - query_vector: list[float], k: int = 5, verbose: bool = False + query_vector: Any, k: int = 5, verbose: bool = False ) -> list[dict[str, Any]]: - """Search document chunks using Kùzu's native vector index.""" + """Search document chunks using LadybugDB's native vector index.""" conn = get_connection() # Ensure vector extension is loaded @@ -228,9 +228,9 @@ def search_chunk_vectors( results = [] for row in result: - chunk_id, text, path, doc_id, distance = row + chunk_id, text, path, doc_id, distance = cast(list[Any], row) # Convert distance to similarity (1 - distance for cosine similarity) - similarity = 1.0 - distance + similarity = 1.0 - float(distance) results.append( { "id": chunk_id, @@ -261,16 +261,16 @@ def get_vector_stats() -> dict[str, Any]: # Count memory notes with vectors memory_result = conn.execute("MATCH (m:MemoryNote) RETURN count(m) as count") - memory_count = list(memory_result)[0][0] + memory_count = cast(list[list[Any]], list(memory_result))[0][0] # Count chunks with vectors chunk_result = conn.execute("MATCH (c:Chunk) RETURN count(c) as count") - chunk_count = list(chunk_result)[0][0] + chunk_count = cast(list[list[Any]], list(chunk_result))[0][0] # Get vector dimension try: vec_result = conn.execute("MATCH (m:MemoryNote) RETURN m.vec LIMIT 1") - vec_sample = list(vec_result)[0][0] + vec_sample = cast(list[list[Any]], list(vec_result))[0][0] vector_dim = len(vec_sample) if vec_sample else 0 except Exception: vector_dim = 0 @@ -301,14 +301,15 @@ def list_vector_indexes() -> list[dict[str, Any]]: result = conn.execute("CALL SHOW_INDEXES() RETURN *") indexes = [] for row in result: + row_values = cast(list[Any], row) indexes.append( { - "table_name": row[0], - "index_name": row[1], - "index_type": row[2], - "property_names": row[3], - "extension_loaded": row[4], - "index_definition": row[5], + "table_name": row_values[0], + "index_name": row_values[1], + "index_type": row_values[2], + "property_names": row_values[3], + "extension_loaded": row_values[4], + "index_definition": row_values[5], } ) return indexes diff --git a/pyproject.toml b/pyproject.toml index 366419c..7152bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ ] dependencies = [ "typer>=0.9.0", - "kuzu>=0.8.0", + "ladybug>=0.17.0", "pyyaml>=6.0", "rich>=13.0", "fastembed>=0.2.0", @@ -82,6 +82,8 @@ disallow_untyped_defs = true [dependency-groups] dev = [ + "mypy>=1.0", "pre-commit>=4.3.0", + "pytest>=7.0", "ruff>=0.13.1", ] diff --git a/rfcs/0000-openkl-design.md b/rfcs/0000-openkl-design.md index 358d89f..c86e3c8 100644 --- a/rfcs/0000-openkl-design.md +++ b/rfcs/0000-openkl-design.md @@ -52,7 +52,7 @@ openkl/ ├─ citations/ # stable cite objects (JSONL) │ └─ .json └─ .ok/ # internal - ├─ kuzu/ # embedded Graph DB + ├─ ladybug/ # embedded Graph DB ├─ cache/ ├─ mapping.jsonl # docID → current path mapping └─ config.yml @@ -85,7 +85,7 @@ Insight text… ## 4. Graph + Index Design -**Engine**: [Kùzu DB](https://docs.kuzudb.com/) (embedded, Cypher, HNSW vectors, FT index) +**Engine**: [LadybugDB](https://docs.ladybugdb.com/) (embedded, Cypher, HNSW vectors, FT index) **Schema (conceptual)**: diff --git a/tests/test_db_backend.py b/tests/test_db_backend.py new file mode 100644 index 0000000..f2b2efa --- /dev/null +++ b/tests/test_db_backend.py @@ -0,0 +1,31 @@ +import logging + +import openkl.db as db_module +from openkl.db import close_connection, init_db + + +def test_init_db_creates_core_schema(tmp_path): + conn = init_db(tmp_path / "ladybug") + try: + result = conn.execute("MATCH (m:MemoryNote) RETURN count(m)") + assert list(result)[0][0] == 0 + finally: + close_connection() + + +def test_init_db_warns_when_legacy_kuzu_path_exists(tmp_path, monkeypatch, caplog): + legacy_path = tmp_path / "kuzu" + ladybug_path = tmp_path / "ladybug" + legacy_path.mkdir() + + monkeypatch.setattr(db_module, "LEGACY_KUZU_DB_PATH", legacy_path) + monkeypatch.setattr(db_module, "DB_PATH", ladybug_path) + + caplog.set_level(logging.WARNING, logger="openkl.db") + init_db() + try: + assert "Found legacy Kuzu database" in caplog.text + assert "OpenKL now uses LadybugDB" in caplog.text + assert ladybug_path.exists() + finally: + close_connection() diff --git a/tests/test_vector_search.py b/tests/test_vector_search.py new file mode 100644 index 0000000..acd06f3 --- /dev/null +++ b/tests/test_vector_search.py @@ -0,0 +1,24 @@ +from openkl.db import close_connection, init_db +from openkl.vector_search import create_vector_indexes, search_memory_vectors + + +def test_memory_vector_search_round_trip(tmp_path): + conn = init_db(tmp_path / "ladybug") + try: + conn.execute( + "CREATE (m:MemoryNote {id: $id, text: $text, ts: $ts, tags: $tags, vec: $vec})", + { + "id": "m-test", + "text": "ladybug migration", + "ts": "2026-05-31T00:00:00", + "tags": ["test"], + "vec": [0.1] * 384, + }, + ) + + create_vector_indexes() + + results = search_memory_vectors([0.1] * 384, k=1) + assert results[0]["id"] == "m-test" + finally: + close_connection() diff --git a/uv.lock b/uv.lock index 2f5a57c..0420fbf 100644 --- a/uv.lock +++ b/uv.lock @@ -553,37 +553,46 @@ wheels = [ ] [[package]] -name = "kuzu" -version = "0.11.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/66/fd/adbd05ccf81e6ad2674fcd3849d5d6ffeaf2141a9b8d1c1c4e282e923e1f/kuzu-0.11.2.tar.gz", hash = "sha256:9f224ec218ab165a18acaea903695779780d70335baf402d9b7f59ba389db0bd", size = 4902887, upload-time = "2025-08-21T05:17:00.152Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/91/bed837f5f49220a9f869da8a078b34a3484f210f7b57b267177821545c03/kuzu-0.11.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b25174cdb721aae47896ed62842d3859679607b493a9a6bbbcd9fb7fb3707", size = 3702618, upload-time = "2025-08-21T05:15:53.726Z" }, - { url = "https://files.pythonhosted.org/packages/72/8a/fd5e053b0055718afe00b6a99393a835c6254354128fbb7f66a35fd76089/kuzu-0.11.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:9a8567c53bfe282f4727782471ff718842ffead8c48c1762c1df9197408fc986", size = 4101371, upload-time = "2025-08-21T05:15:55.889Z" }, - { url = "https://files.pythonhosted.org/packages/ad/4b/e45cadc85bdc5079f432675bbe8d557600f0d4ab46fe24ef218374419902/kuzu-0.11.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d793bb5a0a14ada730a697eccac2a4c68b434b82692d985942900ef2003e099e", size = 6211974, upload-time = "2025-08-21T05:15:57.505Z" }, - { url = "https://files.pythonhosted.org/packages/10/ca/92d6a1e6452fcf06bfc423ce2cde819ace6b6e47921921cc8fae87c27780/kuzu-0.11.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1be4e9b6c93ca8591b1fb165f9b9a27d70a56af061831afcdfe7aebb89ee6ff", size = 6992196, upload-time = "2025-08-21T05:15:59.006Z" }, - { url = "https://files.pythonhosted.org/packages/49/6c/983fc6265dfc1169c87c4a0722f36ee665c5688e1166faeb4cd85e6af078/kuzu-0.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0ec7a304c746a2a98ecfd7e7c3f6fe92c4dfee2e2565c0b7cb4cffd0c2e374a", size = 4303517, upload-time = "2025-08-21T05:16:00.814Z" }, - { url = "https://files.pythonhosted.org/packages/b5/14/8ae2c52657b93715052ecf47d70232f2c8d9ffe2d1ec3527c8e9c3cb2df5/kuzu-0.11.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf53b4f321a4c05882b14cef96d39a1e90fa993bab88a1554fb1565367553b8c", size = 3704177, upload-time = "2025-08-21T05:16:02.354Z" }, - { url = "https://files.pythonhosted.org/packages/2d/7a/bce7bb755e16f9ca855f76a3acc6cfa9fae88c4d6af9df3784c50b2120a5/kuzu-0.11.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:2d749883b74f5da5ff4a4b0635a98f6cc5165743995828924321d2ca797317cb", size = 4102372, upload-time = "2025-08-21T05:16:04.249Z" }, - { url = "https://files.pythonhosted.org/packages/c8/12/f5b1d51fcb78a86c078fb85cc53184ce962a3e86852d47d30e287a932e3f/kuzu-0.11.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:632507e5982928ed24fbb5e70ad143d7970bc4059046e77e0522707efbad303b", size = 6212492, upload-time = "2025-08-21T05:16:05.99Z" }, - { url = "https://files.pythonhosted.org/packages/81/96/d6e57af6ccf9e0697812ad3039c80b87b768cf2674833b0b23d317ea3427/kuzu-0.11.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5211884601f8f08ae97ba25006d0facde24077c5333411d944282b8a2068ab4", size = 6992888, upload-time = "2025-08-21T05:16:07.896Z" }, - { url = "https://files.pythonhosted.org/packages/40/ee/1f275ac5679a3f615ce0d9cf8c79001fdb535ccc8bc344e49b14484c7cd7/kuzu-0.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:82a6c8bfe1278dc1010790e398bf772683797ef5c16052fa0f6f78bacbc59aa3", size = 4304064, upload-time = "2025-08-21T05:16:10.163Z" }, - { url = "https://files.pythonhosted.org/packages/73/ba/9f20d9e83681a0ddae8ec13046b116c34745fa0e66862d4e2d8414734ce0/kuzu-0.11.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aed88ffa695d07289a3d8557bd8f9e743298a4f4349208a60bbb06f4ebf15c26", size = 3703781, upload-time = "2025-08-21T05:16:12.232Z" }, - { url = "https://files.pythonhosted.org/packages/53/a0/bb815c0490f3d4d30389156369b9fe641e154f0d4b1e8340f09a76021922/kuzu-0.11.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:595824b03248af928e3faee57f6825d3a46920f2d3b9bd0c0bb7fc3fa097fce9", size = 4103990, upload-time = "2025-08-21T05:16:14.139Z" }, - { url = "https://files.pythonhosted.org/packages/a5/6f/97b647c0547a634a669055ff4cfd21a92ea3999aedc6a7fe9004f03f25e3/kuzu-0.11.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5674c6d9d26f5caa0c7ce6f34c02e4411894879aa5b2ce174fad576fa898523", size = 6211947, upload-time = "2025-08-21T05:16:16.48Z" }, - { url = "https://files.pythonhosted.org/packages/42/74/c7f1a1cfb08c05c91c5a94483be387e80fafab8923c4243a22e9cced5c1b/kuzu-0.11.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c61daf02da35b671f4c6f3c17105725c399a5e14b7349b00eafbcd24ac90034a", size = 6991879, upload-time = "2025-08-21T05:16:18.402Z" }, - { url = "https://files.pythonhosted.org/packages/54/9e/50d67d7bc08faed95ede6de1a6aa0d81079c98028ca99e32d09c2ab1aead/kuzu-0.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:682096cd87dcbb8257f933ea4172d9dc5617a8d0a5bdd19cd66cf05b68881afd", size = 4305706, upload-time = "2025-08-21T05:16:20.244Z" }, - { url = "https://files.pythonhosted.org/packages/65/f0/5649a01af37def50293cd7c194afc19f09b343fd2b7f2b28e021a207f8ce/kuzu-0.11.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:17a11b67652e8b331c85cd1a1a30b32ee6783750084473abbab2aa1963ee2a3b", size = 3703740, upload-time = "2025-08-21T05:16:21.896Z" }, - { url = "https://files.pythonhosted.org/packages/24/e2/e0beb9080911fc1689899a42da0f83534949f43169fb80197def3ec1223f/kuzu-0.11.2-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:bdded35426210faeca8da11e8b4a54e60ccc0c1a832660d76587b5be133b0f55", size = 4104073, upload-time = "2025-08-21T05:16:23.819Z" }, - { url = "https://files.pythonhosted.org/packages/f2/4c/7a831c9c6e609692953db677f54788bd1dde4c9d34e6ba91f1e153d2e7fe/kuzu-0.11.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6116b609aac153f3523130b31295643d34a6c9509914c0fa9d804b26b23eee73", size = 6212263, upload-time = "2025-08-21T05:16:25.351Z" }, - { url = "https://files.pythonhosted.org/packages/47/95/615ef10b46b22ec1d33fdbba795e6e79733d9a244aabdeeb910f267ab36c/kuzu-0.11.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:09da5b8cb24dc6b281a6e4ac0f7f24226eb9909803b187e02d014da13ba57bcf", size = 6992492, upload-time = "2025-08-21T05:16:27.518Z" }, - { url = "https://files.pythonhosted.org/packages/a7/dd/2c905575913c743e6c67a5ca89a6b4ea9d9737238966d85d7e710f0d3e60/kuzu-0.11.2-cp313-cp313-win_amd64.whl", hash = "sha256:c663fb84682f8ebffbe7447a4e552a0e03bd29097d319084a2c53c2e032a780e", size = 4305267, upload-time = "2025-08-21T05:16:29.307Z" }, - { url = "https://files.pythonhosted.org/packages/89/05/44fbfc9055dba3f472ea4aaa8110635864d3441eede987526ef401680765/kuzu-0.11.2-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5c03fb95ffb9185c1519333f8ee92b7a9695aa7aa9a179e868a7d7bd13d10a16", size = 6216795, upload-time = "2025-08-21T05:16:30.944Z" }, - { url = "https://files.pythonhosted.org/packages/4f/ca/16c81dc68cc1e8918f8481e7ee89c28aa665c5cb36be7ad0fc1d0d295760/kuzu-0.11.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d857f0efddf26d5e2dc189facb84bf04a997e395972486669b418a470cc76034", size = 6996333, upload-time = "2025-08-21T05:16:32.568Z" }, - { url = "https://files.pythonhosted.org/packages/48/d8/9275c7e6312bd76dc670e8e2da68639757c22cf2c366e96527595a1d881c/kuzu-0.11.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb9e4641867c35b98ceaa604aa79832c0eeed41f5fd1b6da22b1c217b2f1b8ea", size = 6212202, upload-time = "2025-08-21T05:16:34.571Z" }, - { url = "https://files.pythonhosted.org/packages/88/89/67a977493c60bca3610845df13020711f357a5d80bf91549e4b48d877c2f/kuzu-0.11.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:553408d76a0b4fdecc1338b69b71d7bde42f6936d3b99d9852b30d33bda15978", size = 6992264, upload-time = "2025-08-21T05:16:36.316Z" }, - { url = "https://files.pythonhosted.org/packages/b6/49/869ceceb1d8a5ea032a35c734e55cfee919340889973623096da7eb94f6b/kuzu-0.11.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989a87fa13ffa39ab7773d968fe739ac4f8faf9ddb5dad72ced2eeef12180293", size = 6216814, upload-time = "2025-08-21T05:16:38.348Z" }, - { url = "https://files.pythonhosted.org/packages/bc/cd/933b34a246edb882a042eb402747167719222c05149b73b48ba7d310d554/kuzu-0.11.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e67420d04a9643fd6376a23b17b398a3e32bb0c2bd8abbf8d1e4697056596c7e", size = 6996343, upload-time = "2025-08-21T05:16:39.973Z" }, +name = "ladybug" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/78/29/a0e43961b719f80e274dae27b5bf14f82d6061f89b6878e54242c8ecfcee/ladybug-0.17.0.tar.gz", hash = "sha256:6edc8d14d0b17c659bb39af2c5432264c18e87d0a6dc042430553fc4304e790e", size = 10157573, upload-time = "2026-05-28T22:35:50.853Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/f9/ba1634fdbc63ea70f1ce1380e945d88a6888ee04e15c77df360c17085bd1/ladybug-0.17.0-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:0a861f447058cc8a1eb020389696f20ebf337fc95d1accc235571a09f6fbe947", size = 4176422, upload-time = "2026-05-28T22:34:51.663Z" }, + { url = "https://files.pythonhosted.org/packages/63/7e/04b3085d1ce69a245a93308be30c362c05e82e76eeff3a796fdba8dfc3a1/ladybug-0.17.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:fefcab39139d00fd28ef3626aadc87e44c981d89b3356cfef92889d85521c8dc", size = 4659486, upload-time = "2026-05-28T22:34:53.36Z" }, + { url = "https://files.pythonhosted.org/packages/b6/e8/558c351e94eb5ce0e8395e1f24137f69e48161c2896392d5611b1234ac56/ladybug-0.17.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0815e23b4f73dd1c03848659498c25adc50c3f9de695e77418f6be81108ac35", size = 7061307, upload-time = "2026-05-28T22:34:54.976Z" }, + { url = "https://files.pythonhosted.org/packages/9a/dd/403e5f8609dfc9bf0e2862d5a475adb3187a26f1a390856a21add4f13a10/ladybug-0.17.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0fd2ec61564bf5dd48efde85033579b0ada3d0650b1a1d2a389a51997178ee3f", size = 7915305, upload-time = "2026-05-28T22:34:56.722Z" }, + { url = "https://files.pythonhosted.org/packages/bc/75/fa4b1e752911dba5c23086dc86e6fac340784e9cd2138e1847246586406b/ladybug-0.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a35df06bc0bed039ce93d69b8fe47a8a6c418bc150165a84aede3224c137672c", size = 7888557, upload-time = "2026-05-28T22:34:58.444Z" }, + { url = "https://files.pythonhosted.org/packages/5d/aa/a726781c45d92cc0ba1ba7a4cd5871c0bbbc993d9bb0e30caf81eae96196/ladybug-0.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:353eab63e7ca22316e19b933f276aebbeeb62ec5a56a6eeac0e7109a82c5a461", size = 8773247, upload-time = "2026-05-28T22:35:00.212Z" }, + { url = "https://files.pythonhosted.org/packages/3f/28/bea65b841dfd7c59c5a69a5100d43048b82626b1a6bd33e8194f0e1962b1/ladybug-0.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:d5f305251e5e37d91e3f95e0f27fedbb3cd688352005c1bf8e4fcc36ce63276c", size = 5280512, upload-time = "2026-05-28T22:35:02.219Z" }, + { url = "https://files.pythonhosted.org/packages/10/12/c590b3f8d239104416057cb7394d5fcb9f4f530ba206daa9c1c2170bebf4/ladybug-0.17.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:c91a44d158d1b81fdd1380e38bcb98663277cc15ebf77048144d59575dcdfdd5", size = 4177662, upload-time = "2026-05-28T22:35:03.705Z" }, + { url = "https://files.pythonhosted.org/packages/d4/65/35242144f1aa650e2c2df9a6ff7a482d12955425c907a25dd24e67681553/ladybug-0.17.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:533a53023710d6e8df6531a4001ca7b96f73a38708b491bb96b5919c13042fe8", size = 4661177, upload-time = "2026-05-28T22:35:05.088Z" }, + { url = "https://files.pythonhosted.org/packages/be/59/a08816408293b62d1a48233000d0db2ce0a6d19e80fd4f2eb58aa57dff32/ladybug-0.17.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3708626e54f52688567cf6038ce74af4a3b991ae6b8e2f77abd24cb157c9fc77", size = 7062327, upload-time = "2026-05-28T22:35:06.637Z" }, + { url = "https://files.pythonhosted.org/packages/99/b6/81687d21dc4c44a3793e9e4a0390bae2e50ef52e4b4ea70bf49d0b687891/ladybug-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ea17eb5abf86b89c0eebf1fa9c24ef8cb6d2274c30fa9df13e30690e7fd7af6", size = 7915818, upload-time = "2026-05-28T22:35:08.344Z" }, + { url = "https://files.pythonhosted.org/packages/8f/79/d2dc632bba752eebc304967f054a9f849509510c3427570c75c9b2e04415/ladybug-0.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a9906d02af550feeef83575b08a953411187ddab5f9a2190825affb9dbd74c6a", size = 7890285, upload-time = "2026-05-28T22:35:09.975Z" }, + { url = "https://files.pythonhosted.org/packages/ed/94/8ad9670bdba2c189cb7d2ef930b69f4341c5d6bb8659c49368aedfdc2cd6/ladybug-0.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7bb5c5608e8e013ed9536c9e49f5e565e5ffe6f93764fe48294b81f4643b202f", size = 8774367, upload-time = "2026-05-28T22:35:11.681Z" }, + { url = "https://files.pythonhosted.org/packages/ca/9b/727d0bde180ed96a34194d04cbf63672afdada17e4dd90c58dd9e6e57762/ladybug-0.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:738887b33dfdd94aa44a3bed094a4f165ed9ca702db4ef760b56cf947c87c37d", size = 5281537, upload-time = "2026-05-28T22:35:13.581Z" }, + { url = "https://files.pythonhosted.org/packages/d9/4e/21ccb1c2feafd98081470ae331e145592fd12ed6cde64fc8801ee5bba180/ladybug-0.17.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:774af1f777e3af8d6199558fec1bbed11a73efe07b2220de53e4c623db8796f8", size = 4178304, upload-time = "2026-05-28T22:35:15.108Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f7/a1779003b5e2b53006a17f1d5a443e5e9bd854cad0ea63e2070cb652bed3/ladybug-0.17.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:74f9d03ab4193fb41847f89fd32c9becdd1983a558dc5d00e23c1368760ea71f", size = 4663758, upload-time = "2026-05-28T22:35:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4f/669c633ee3695c2fe589879103ee2639b53a595ff75c9aa284c9aa4ed2da/ladybug-0.17.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7ee790f33a24a82a1179bb7b5a8e10c177f717de9845f1ab0241b58f3311ac4", size = 7058713, upload-time = "2026-05-28T22:35:18.515Z" }, + { url = "https://files.pythonhosted.org/packages/eb/95/bcd9635e9b5177a8fb2c4de24ea132053eb932bd281f77f8b7e446049b2a/ladybug-0.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be16051baddf815ddf608bed476d6cc8b6e8bdb603ca9da1ace7be5c0c3616c0", size = 7913797, upload-time = "2026-05-28T22:35:20.113Z" }, + { url = "https://files.pythonhosted.org/packages/38/0c/87472def5d2ef5f1a62c69ea4b9709304ce2f6ec0e829777503e323c55c9/ladybug-0.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa2482403b4231ff828125bd574ff799393afc6b7c699ca31e61f7532396b0ec", size = 7885749, upload-time = "2026-05-28T22:35:21.988Z" }, + { url = "https://files.pythonhosted.org/packages/15/a6/280bcab11b8b35dc817209db30fc57b335af92e2ff00b3be78fb3f4a23e3/ladybug-0.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:60edc7a12c7a7f4cec4371797ee4361839adc9563fc623ec7b4bb258c1888145", size = 8771629, upload-time = "2026-05-28T22:35:23.624Z" }, + { url = "https://files.pythonhosted.org/packages/f2/8e/28238b764c813fa9a8430cc35609d26ba0b94a09c9c0e8debc0e17a16fcb/ladybug-0.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a53ae9dcc8636fda23c78430ac6fb547ea7ab3b6ac90cd78219deb7d9cd05a8", size = 5283482, upload-time = "2026-05-28T22:35:25.555Z" }, + { url = "https://files.pythonhosted.org/packages/d4/ad/ac807e3738d3f400058fcdb4354d86d7749bf140da94253c8f89c0fdeb7b/ladybug-0.17.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:c932ac3f677cbae443686f8fa506aceefa69dff26fc7464e88204f490c96febc", size = 4178530, upload-time = "2026-05-28T22:35:27.068Z" }, + { url = "https://files.pythonhosted.org/packages/3c/38/d625539302ce76e85213292aa64439379ac56df140445549fcbef666828e/ladybug-0.17.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:eff95469aeaad48042ab2291d3e4368ee10320cc52144c42258c2f0334dd13bc", size = 4663695, upload-time = "2026-05-28T22:35:28.599Z" }, + { url = "https://files.pythonhosted.org/packages/bb/2e/1f882abe36463b7b01a0bd30681f19e658ce085cfb3180bbb520b9101b5e/ladybug-0.17.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20514fa823b60db273ec05950e5d8667b865d9a92dce9a7f892366c036312693", size = 7058497, upload-time = "2026-05-28T22:35:29.97Z" }, + { url = "https://files.pythonhosted.org/packages/08/86/fee9f77217a5db2d273cf6b5f243360f201dee60457b557facfbb32cde66/ladybug-0.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ee8d6740438d4caff5d90b676e6fca48dcc3aafac97f6facf21f51913655ffa", size = 7913423, upload-time = "2026-05-28T22:35:31.62Z" }, + { url = "https://files.pythonhosted.org/packages/f5/0e/ddde4e0fe9ee887a5dedb514896ef98cfc7d36cc8456c913d5a220dc2b46/ladybug-0.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:04c77bdd3aa08b662635165293f768d26f865efb90c06db1f6929e9ca54bb555", size = 7886130, upload-time = "2026-05-28T22:35:33.306Z" }, + { url = "https://files.pythonhosted.org/packages/09/01/bdee64149a7f9f7880609d941a579d7d848083138c216c99ae06ec342855/ladybug-0.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa4979350522d113d8947535279c8ae464e21f49188b9fc817c14674b111e3ee", size = 8771760, upload-time = "2026-05-28T22:35:35.076Z" }, + { url = "https://files.pythonhosted.org/packages/20/7e/c92bcd1395e505cb22340f331c9fe2539b3d7099fa3c9a276d47f4adc5a9/ladybug-0.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:2527dd74ac08203f46787c47c732a76716200d323225b548233b2565afc1dc28", size = 5283061, upload-time = "2026-05-28T22:35:36.874Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e8/c1b1a365e7eda91654935fe5951119bf91ff217107718f0dac7beb77b9cc/ladybug-0.17.0-cp314-cp314-macosx_13_0_arm64.whl", hash = "sha256:257bee3a30982681abcac3ede11308451df1c1b7e9a01ea03b29dc88ffc9f894", size = 4179850, upload-time = "2026-05-28T22:35:38.364Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/466c01b6ec05cd3bc8695c17ef89c89dfc5f5fd0f05b5764d6e788f1be01/ladybug-0.17.0-cp314-cp314-macosx_13_0_x86_64.whl", hash = "sha256:1e3cf3273a79801b7189eff4887fb09225f716960620f9673936e6c330f3b9f0", size = 4664133, upload-time = "2026-05-28T22:35:40.141Z" }, + { url = "https://files.pythonhosted.org/packages/6d/e8/4e7abc3cd9b38bbb48e5e3a2bd70f958e6d0cf180be64f9790140e0ae0f7/ladybug-0.17.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dcaa5ec52738b23a6b6647bc3a4310063fbd7d5868a453d1bd608a9da9a83890", size = 7060293, upload-time = "2026-05-28T22:35:41.948Z" }, + { url = "https://files.pythonhosted.org/packages/ba/ee/19005cf9fc3a1909cb2696301b2ba5746950cae87d7dfd01d4fa1b52e4d4/ladybug-0.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45027585cfb1b16a6a7d5f58628618e0581a9b56e35e9570c87c3232dc5f9f45", size = 7913855, upload-time = "2026-05-28T22:35:43.55Z" }, + { url = "https://files.pythonhosted.org/packages/c2/bc/dfa7a35320c6576015d6883d605f88786ec5b2cc63017303b1b259562347/ladybug-0.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bb50a8fa65275bb7905365c0c343760ba609abb4ba4430c828363e5be832b664", size = 7887901, upload-time = "2026-05-28T22:35:45.208Z" }, + { url = "https://files.pythonhosted.org/packages/71/2f/e36a6d3277c92c50402945934e08c471907148dec474a7062ea799bc05ba/ladybug-0.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fae26ea3f2bf6ded9eee9357f695d04258d76f96822248f30969891c3c467966", size = 8772470, upload-time = "2026-05-28T22:35:47.068Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b7/6263c53afd1ff148dba328481e5809d4cd7d2c0a9096c7d7d51824fcf9da/ladybug-0.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:b13f1e31c73e0e635b8cd35372a9e74e709ae466c1deac136f157c39610b6794", size = 5447470, upload-time = "2026-05-28T22:35:49.083Z" }, ] [[package]] @@ -1288,7 +1297,7 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "fastembed" }, { name = "kreuzberg" }, - { name = "kuzu" }, + { name = "ladybug" }, { name = "markdown" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -1308,7 +1317,9 @@ dev = [ [package.dev-dependencies] dev = [ + { name = "mypy" }, { name = "pre-commit" }, + { name = "pytest" }, { name = "ruff" }, ] @@ -1318,7 +1329,7 @@ requires-dist = [ { name = "beautifulsoup4" }, { name = "fastembed", specifier = ">=0.2.0" }, { name = "kreuzberg" }, - { name = "kuzu", specifier = ">=0.8.0" }, + { name = "ladybug", specifier = ">=0.17.0" }, { name = "markdown" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0" }, { name = "numpy", specifier = ">=1.21.0" }, @@ -1334,7 +1345,9 @@ provides-extras = ["dev"] [package.metadata.requires-dev] dev = [ + { name = "mypy", specifier = ">=1.0" }, { name = "pre-commit", specifier = ">=4.3.0" }, + { name = "pytest", specifier = ">=7.0" }, { name = "ruff", specifier = ">=0.13.1" }, ] From 00b499f6de2bd1ceb611c659b6cdf81f29b752b2 Mon Sep 17 00:00:00 2001 From: pi-dal Date: Mon, 1 Jun 2026 10:32:25 +0800 Subject: [PATCH 2/2] fix: harden LadybugDB review paths Address the actionable CodeRabbit review feedback on the LadybugDB migration.\n\nFail fast when the required LadybugDB VECTOR extension cannot be installed or loaded so OpenKL does not continue with a partially functional graph backend. Make schema creation idempotent with IF NOT EXISTS instead of relying on substring checks against exception messages.\n\nParameterize the MemoryManager.update existence check and SET clause so memory text and tags are passed through LadybugDB parameters instead of interpolated Cypher fragments. Validate vector-search inputs before touching the database, constrain k to a bounded positive integer, and serialize only finite numeric vector values into QUERY_VECTOR_INDEX calls.\n\nTighten tests around these behaviors: vector-extension failure now raises, vector search rejects invalid vectors and k before DB access, memory updates handle quoted text and tags through parameters, and the vector-search round trip asserts a non-empty result before indexing.\n\nVerification:\n- uv run pytest -q\n- uv run ruff check . && uv run ruff format --check .\n- uv run mypy openkl/db.py openkl/graph.py openkl/vector_search.py\n- HOME="/var/folders/vf/8zspxjpd00l_hmmb95ygld940000gn/T/tmp.fIREvsujDR" uv run ok doctor --- openkl/db.py | 33 +++++++++------------ openkl/memory.py | 19 ++++++------ openkl/vector_search.py | 56 +++++++++++++++++++++++++++++------- tests/test_db_backend.py | 26 ++++++++++++++++- tests/test_memory_manager.py | 32 +++++++++++++++++++++ tests/test_vector_search.py | 30 ++++++++++++++++++- 6 files changed, 156 insertions(+), 40 deletions(-) create mode 100644 tests/test_memory_manager.py diff --git a/openkl/db.py b/openkl/db.py index f575214..a46b6fe 100644 --- a/openkl/db.py +++ b/openkl/db.py @@ -16,19 +16,19 @@ # LadybugDB schema definitions SCHEMA = [ # Memory nodes - "CREATE NODE TABLE MemoryNote(id STRING PRIMARY KEY, text STRING, ts STRING, tags STRING[], vec FLOAT[384]);", + "CREATE NODE TABLE IF NOT EXISTS MemoryNote(id STRING PRIMARY KEY, text STRING, ts STRING, tags STRING[], vec FLOAT[384]);", # Grounding Store nodes - "CREATE NODE TABLE Doc(id STRING PRIMARY KEY, path STRING, sha256 STRING);", - "CREATE NODE TABLE Chunk(id STRING PRIMARY KEY, text STRING, span STRING, vec FLOAT[384]);", + "CREATE NODE TABLE IF NOT EXISTS Doc(id STRING PRIMARY KEY, path STRING, sha256 STRING);", + "CREATE NODE TABLE IF NOT EXISTS Chunk(id STRING PRIMARY KEY, text STRING, span STRING, vec FLOAT[384]);", # Entity and topic nodes - "CREATE NODE TABLE Entity(id STRING PRIMARY KEY, name STRING, type STRING);", - "CREATE NODE TABLE Topic(id STRING PRIMARY KEY, name STRING);", + "CREATE NODE TABLE IF NOT EXISTS Entity(id STRING PRIMARY KEY, name STRING, type STRING);", + "CREATE NODE TABLE IF NOT EXISTS Topic(id STRING PRIMARY KEY, name STRING);", # Relationships - "CREATE REL TABLE HAS_CHUNK(FROM Doc TO Chunk);", - "CREATE REL TABLE Mentions(FROM Chunk TO Entity);", - "CREATE REL TABLE MemMentions(FROM MemoryNote TO Entity);", - "CREATE REL TABLE DerivedFrom(FROM MemoryNote TO Chunk);", - "CREATE REL TABLE HasTopic(FROM MemoryNote TO Topic);", + "CREATE REL TABLE IF NOT EXISTS HAS_CHUNK(FROM Doc TO Chunk);", + "CREATE REL TABLE IF NOT EXISTS Mentions(FROM Chunk TO Entity);", + "CREATE REL TABLE IF NOT EXISTS MemMentions(FROM MemoryNote TO Entity);", + "CREATE REL TABLE IF NOT EXISTS DerivedFrom(FROM MemoryNote TO Chunk);", + "CREATE REL TABLE IF NOT EXISTS HasTopic(FROM MemoryNote TO Topic);", ] # Global connection @@ -62,18 +62,13 @@ def init_db(db_path: Path | None = None) -> graphdb.Connection: conn.execute("LOAD VECTOR;") logger.info("Vector extension installed and loaded") except Exception as e: - logger.warning(f"Failed to install vector extension: {e}") + logger.error("Failed to install vector extension: %s", e) + raise RuntimeError("Vector extension is required for OpenKL") from e # Create schema for stmt in SCHEMA: - try: - conn.execute(stmt) - logger.debug(f"Executed schema statement: {stmt[:50]}...") - except Exception as e: - # Ignore "already exists" errors - if "already exists" not in str(e).lower(): - logger.error(f"Failed to execute schema statement: {stmt}") - raise + conn.execute(stmt) + logger.debug(f"Executed schema statement: {stmt[:50]}...") _connection = conn logger.info(f"Database initialized at {db_path}") diff --git a/openkl/memory.py b/openkl/memory.py index 55316d1..6c30609 100644 --- a/openkl/memory.py +++ b/openkl/memory.py @@ -176,24 +176,25 @@ def update( conn = get_connection() # Check if memory exists - result = conn.execute(f"MATCH (m:MemoryNote {{id: '{memory_id}'}}) RETURN m") + result = conn.execute( + "MATCH (m:MemoryNote {id: $id}) RETURN m", {"id": memory_id} + ) if not list(result): return False # Build update query updates = [] + params: dict[str, Any] = {"id": memory_id} if text is not None: - escaped_text = text.replace("'", "\\'") - updates.append(f"m.text = '{escaped_text}'") + updates.append("m.text = $text") + params["text"] = text if tags is not None: - tags_str = "[" + ", ".join([f"'{tag}'" for tag in tags]) + "]" - updates.append(f"m.tags = {tags_str}") + updates.append("m.tags = $tags") + params["tags"] = tags if updates: - update_query = ( - f"MATCH (m:MemoryNote {{id: '{memory_id}'}}) SET {', '.join(updates)}" - ) - conn.execute(update_query) + update_query = f"MATCH (m:MemoryNote {{id: $id}}) SET {', '.join(updates)}" + conn.execute(update_query, params) # Update topics if provided if topics is not None: diff --git a/openkl/vector_search.py b/openkl/vector_search.py index 7471f73..d72f8e2 100644 --- a/openkl/vector_search.py +++ b/openkl/vector_search.py @@ -2,10 +2,48 @@ Vector search utilities leveraging LadybugDB's native vector index capabilities. """ +import math from typing import Any, cast from .db import get_connection +MAX_VECTOR_SEARCH_K = 1000 + + +def _normalize_query_vector(query_vector: Any) -> list[float]: + """Convert a vector-like value into a finite numeric list.""" + if hasattr(query_vector, "tolist"): + query_vector = query_vector.tolist() + + if not isinstance(query_vector, (list, tuple)): + raise TypeError("query_vector must be a list or tuple of numbers") + + try: + normalized = [float(value) for value in query_vector] + except (TypeError, ValueError) as exc: + raise ValueError("query_vector must contain only numeric values") from exc + + if not normalized: + raise ValueError("query_vector must not be empty") + if not all(math.isfinite(value) for value in normalized): + raise ValueError("query_vector must contain only finite numeric values") + + return normalized + + +def _normalize_k(k: int) -> int: + """Validate vector-search result count before query interpolation.""" + if not isinstance(k, int) or isinstance(k, bool): + raise TypeError("k must be an integer") + if not 1 <= k <= MAX_VECTOR_SEARCH_K: + raise ValueError(f"k must be between 1 and {MAX_VECTOR_SEARCH_K}") + return k + + +def _serialize_vector(values: list[float]) -> str: + """Serialize a validated numeric vector for LadybugDB vector procedures.""" + return "[" + ", ".join(f"{value:.17g}" for value in values) + "]" + def _ensure_vector_extension_loaded(conn: Any, verbose: bool = False) -> None: """Ensure the vector extension is loaded.""" @@ -145,6 +183,9 @@ def search_memory_vectors( query_vector: Any, k: int = 5, verbose: bool = False ) -> list[dict[str, Any]]: """Search memory notes using LadybugDB's native vector index.""" + query_vector = _normalize_query_vector(query_vector) + k = _normalize_k(k) + conn = get_connection() # Ensure vector extension is loaded @@ -153,12 +194,8 @@ def search_memory_vectors( # Ensure vector indexes exist _ensure_vector_indexes_exist(conn, verbose) - # Convert numpy array to list if needed - if hasattr(query_vector, "tolist"): - query_vector = query_vector.tolist() - # Create query with inline vector values - vector_str = str(query_vector) + vector_str = _serialize_vector(query_vector) query = f""" CALL QUERY_VECTOR_INDEX( 'MemoryNote', @@ -196,6 +233,9 @@ def search_chunk_vectors( query_vector: Any, k: int = 5, verbose: bool = False ) -> list[dict[str, Any]]: """Search document chunks using LadybugDB's native vector index.""" + query_vector = _normalize_query_vector(query_vector) + k = _normalize_k(k) + conn = get_connection() # Ensure vector extension is loaded @@ -204,12 +244,8 @@ def search_chunk_vectors( # Ensure vector indexes exist _ensure_vector_indexes_exist(conn, verbose) - # Convert numpy array to list if needed - if hasattr(query_vector, "tolist"): - query_vector = query_vector.tolist() - # Create query with inline vector values - vector_str = str(query_vector) + vector_str = _serialize_vector(query_vector) query = f""" CALL QUERY_VECTOR_INDEX( 'Chunk', diff --git a/tests/test_db_backend.py b/tests/test_db_backend.py index f2b2efa..2247758 100644 --- a/tests/test_db_backend.py +++ b/tests/test_db_backend.py @@ -1,5 +1,7 @@ import logging +import pytest + import openkl.db as db_module from openkl.db import close_connection, init_db @@ -8,11 +10,33 @@ def test_init_db_creates_core_schema(tmp_path): conn = init_db(tmp_path / "ladybug") try: result = conn.execute("MATCH (m:MemoryNote) RETURN count(m)") - assert list(result)[0][0] == 0 + row = next(iter(result)) + assert row[0] == 0 finally: close_connection() +def test_init_db_raises_when_vector_extension_is_unavailable(tmp_path, monkeypatch): + class FakeDatabase: + def __init__(self, path: str): + self.path = path + + class FakeConnection: + def __init__(self, db: FakeDatabase): + self.db = db + + def execute(self, stmt: str): + if stmt == "INSTALL VECTOR;": + raise RuntimeError("vector extension unavailable") + return [] + + monkeypatch.setattr(db_module.graphdb, "Database", FakeDatabase) + monkeypatch.setattr(db_module.graphdb, "Connection", FakeConnection) + + with pytest.raises(RuntimeError, match="Vector extension is required"): + init_db(tmp_path / "ladybug") + + def test_init_db_warns_when_legacy_kuzu_path_exists(tmp_path, monkeypatch, caplog): legacy_path = tmp_path / "kuzu" ladybug_path = tmp_path / "ladybug" diff --git a/tests/test_memory_manager.py b/tests/test_memory_manager.py new file mode 100644 index 0000000..49d379f --- /dev/null +++ b/tests/test_memory_manager.py @@ -0,0 +1,32 @@ +from openkl.db import close_connection, init_db +from openkl.memory import MemoryManager + + +def test_memory_update_parameterizes_text_and_tags(tmp_path): + conn = init_db(tmp_path / "ladybug") + manager = MemoryManager(tmp_path / "ok-home") + memory_id = "m-20260601-test" + + try: + conn.execute( + "CREATE (m:MemoryNote {id: $id, text: $text, ts: $ts, tags: $tags, vec: $vec})", + { + "id": memory_id, + "text": "original", + "ts": "2026-06-01T00:00:00", + "tags": ["old"], + "vec": [0.0] * 384, + }, + ) + + assert manager.update(memory_id, text="O'Reilly note", tags=["review's tag"]) + + result = conn.execute( + "MATCH (m:MemoryNote {id: $id}) RETURN m.text, m.tags", + {"id": memory_id}, + ) + row = next(iter(result)) + assert row[0] == "O'Reilly note" + assert row[1] == ["review's tag"] + finally: + close_connection() diff --git a/tests/test_vector_search.py b/tests/test_vector_search.py index acd06f3..aff7985 100644 --- a/tests/test_vector_search.py +++ b/tests/test_vector_search.py @@ -1,5 +1,12 @@ +import pytest + +import openkl.vector_search as vector_search from openkl.db import close_connection, init_db -from openkl.vector_search import create_vector_indexes, search_memory_vectors +from openkl.vector_search import ( + create_vector_indexes, + search_chunk_vectors, + search_memory_vectors, +) def test_memory_vector_search_round_trip(tmp_path): @@ -19,6 +26,27 @@ def test_memory_vector_search_round_trip(tmp_path): create_vector_indexes() results = search_memory_vectors([0.1] * 384, k=1) + assert results, "Expected at least one vector-search result" assert results[0]["id"] == "m-test" finally: close_connection() + + +def test_vector_search_rejects_non_numeric_vectors_before_db_access(monkeypatch): + def fail_get_connection(): + raise AssertionError("database should not be touched for invalid vectors") + + monkeypatch.setattr(vector_search, "get_connection", fail_get_connection) + + with pytest.raises(ValueError, match="query_vector must contain only numeric"): + search_memory_vectors(["not-a-number"], k=1) + + +def test_vector_search_rejects_invalid_k_before_db_access(monkeypatch): + def fail_get_connection(): + raise AssertionError("database should not be touched for invalid k") + + monkeypatch.setattr(vector_search, "get_connection", fail_get_connection) + + with pytest.raises(ValueError, match="k must be between"): + search_chunk_vectors([0.1] * 384, k=0)