Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ A local-first, open-source knowledge and memory layer for AI agents. OpenKL prov

- **Memory Management**: Distilled insights, facts, and user-provided notes with temporal organization
- **Grounding Store**: External knowledge corpus (docs, media, logs, transcripts) with automatic chunking
- **Knowledge Graph**: Structured entities and relationships with provenance using Kùzu DB
- **Knowledge Graph**: Structured entities and relationships with provenance using LadybugDB
- **Citations**: Reproducible, verifiable, portable references with both transient and persisted modes
- **Vector Search**: Native HNSW vector indexes with FastEmbed for semantic similarity
- **Hybrid Search**: Cross-surface search across memory and grounding store
Expand Down Expand Up @@ -115,12 +115,16 @@ uv run ok --help

## Architecture

OpenKL uses a file-based approach with an embedded Kùzu graph database:
OpenKL uses a file-based approach with an embedded LadybugDB graph database:

- **Files**: Canonical content (grep-friendly)
- **Graph**: Derived structure (fast retrieval)
- **Citations**: Stable provenance and verification

### LadybugDB Migration

OpenKL now uses LadybugDB instead of the archived KuzuDB package. Existing `~/.ok/kuzu` graph data is treated as a legacy derived index; keep it as a backup and rebuild into `~/.ok/ladybug` before relying on old graph state.

## License

Apache License 2.0
Expand Down
6 changes: 3 additions & 3 deletions openkl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ def doctor(

# Check Python packages
try:
import kuzu # noqa: F401
import ladybug # noqa: F401

console.print("[green]✓[/green] Kùzu DB available")
console.print("[green]✓[/green] LadybugDB available")
except ImportError:
console.print("[red]✗[/red] Kùzu DB not found")
console.print("[red]✗[/red] LadybugDB not found")

try:
import fastembed # noqa: F401
Expand Down
63 changes: 33 additions & 30 deletions openkl/db.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,81 @@
"""
Database layer using Kùzu DB for graph storage and vector search.
Database layer using LadybugDB for graph storage and vector search.
"""

import logging
from pathlib import Path

import kuzu
import ladybug as graphdb
Comment thread
coderabbitai[bot] marked this conversation as resolved.

logger = logging.getLogger(__name__)

# Default database path
DB_PATH = Path.home() / ".ok" / "kuzu"
DB_PATH = Path.home() / ".ok" / "ladybug"
LEGACY_KUZU_DB_PATH = Path.home() / ".ok" / "kuzu"

# Kùzu schema definitions
# LadybugDB schema definitions
SCHEMA = [
# Memory nodes
"CREATE NODE TABLE MemoryNote(id STRING PRIMARY KEY, text STRING, ts STRING, tags STRING[], vec FLOAT[384]);",
"CREATE NODE TABLE IF NOT EXISTS MemoryNote(id STRING PRIMARY KEY, text STRING, ts STRING, tags STRING[], vec FLOAT[384]);",
# Grounding Store nodes
"CREATE NODE TABLE Doc(id STRING PRIMARY KEY, path STRING, sha256 STRING);",
"CREATE NODE TABLE Chunk(id STRING PRIMARY KEY, text STRING, span STRING, vec FLOAT[384]);",
"CREATE NODE TABLE IF NOT EXISTS Doc(id STRING PRIMARY KEY, path STRING, sha256 STRING);",
"CREATE NODE TABLE IF NOT EXISTS Chunk(id STRING PRIMARY KEY, text STRING, span STRING, vec FLOAT[384]);",
# Entity and topic nodes
"CREATE NODE TABLE Entity(id STRING PRIMARY KEY, name STRING, type STRING);",
"CREATE NODE TABLE Topic(id STRING PRIMARY KEY, name STRING);",
"CREATE NODE TABLE IF NOT EXISTS Entity(id STRING PRIMARY KEY, name STRING, type STRING);",
"CREATE NODE TABLE IF NOT EXISTS Topic(id STRING PRIMARY KEY, name STRING);",
# Relationships
"CREATE REL TABLE HAS_CHUNK(FROM Doc TO Chunk);",
"CREATE REL TABLE Mentions(FROM Chunk TO Entity);",
"CREATE REL TABLE MemMentions(FROM MemoryNote TO Entity);",
"CREATE REL TABLE DerivedFrom(FROM MemoryNote TO Chunk);",
"CREATE REL TABLE HasTopic(FROM MemoryNote TO Topic);",
"CREATE REL TABLE IF NOT EXISTS HAS_CHUNK(FROM Doc TO Chunk);",
"CREATE REL TABLE IF NOT EXISTS Mentions(FROM Chunk TO Entity);",
"CREATE REL TABLE IF NOT EXISTS MemMentions(FROM MemoryNote TO Entity);",
"CREATE REL TABLE IF NOT EXISTS DerivedFrom(FROM MemoryNote TO Chunk);",
"CREATE REL TABLE IF NOT EXISTS HasTopic(FROM MemoryNote TO Topic);",
]

# Global connection
_connection: kuzu.Connection | None = None
_connection: graphdb.Connection | None = None


def init_db(db_path: Path | None = None) -> kuzu.Connection:
"""Initialize the Kùzu database with schema."""
def init_db(db_path: Path | None = None) -> graphdb.Connection:
"""Initialize the LadybugDB database with schema."""
global _connection

if db_path is None:
db_path = DB_PATH
if LEGACY_KUZU_DB_PATH.exists() and not DB_PATH.exists():
logger.warning(
"Found legacy Kuzu database at %s. OpenKL now uses LadybugDB at %s. "
"Rebuild or migrate the derived graph before relying on old graph data.",
LEGACY_KUZU_DB_PATH,
DB_PATH,
)

# Ensure directory exists
db_path.parent.mkdir(parents=True, exist_ok=True)

# Create database and connection
db = kuzu.Database(str(db_path))
conn = kuzu.Connection(db)
db = graphdb.Database(str(db_path))
conn = graphdb.Connection(db)

# Install and load vector extension
try:
conn.execute("INSTALL VECTOR;")
conn.execute("LOAD VECTOR;")
logger.info("Vector extension installed and loaded")
except Exception as e:
logger.warning(f"Failed to install vector extension: {e}")
logger.error("Failed to install vector extension: %s", e)
raise RuntimeError("Vector extension is required for OpenKL") from e

# Create schema
for stmt in SCHEMA:
try:
conn.execute(stmt)
logger.debug(f"Executed schema statement: {stmt[:50]}...")
except Exception as e:
# Ignore "already exists" errors
if "already exists" not in str(e).lower():
logger.error(f"Failed to execute schema statement: {stmt}")
raise
conn.execute(stmt)
logger.debug(f"Executed schema statement: {stmt[:50]}...")

_connection = conn
logger.info(f"Database initialized at {db_path}")
return conn


def get_connection() -> kuzu.Connection:
def get_connection() -> graphdb.Connection:
"""Get the database connection, initializing if needed."""
global _connection

Expand All @@ -82,7 +85,7 @@ def get_connection() -> kuzu.Connection:
return _connection


def close_connection():
def close_connection() -> None:
"""Close the database connection."""
global _connection
if _connection is not None:
Expand Down
5 changes: 2 additions & 3 deletions openkl/distill.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

from pathlib import Path
from typing import Optional

from rich.console import Console
from rich.table import Table
Expand Down Expand Up @@ -127,8 +126,8 @@ def create_memory_from_distillation(
self,
distilled_content: str,
source_citations: list[str],
tags: Optional[list[str]] = None,
topics: Optional[list[str]] = None,
tags: list[str] | None = None,
topics: list[str] | None = None,
) -> str:
"""Create a memory from agent-distilled content with proper relationships."""
if not distilled_content.strip():
Expand Down
38 changes: 21 additions & 17 deletions openkl/graph.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Graph operations using Kùzu DB for OpenKL.
Graph operations using LadybugDB for OpenKL.
"""

from typing import Any
from typing import Any, cast

from rich.console import Console
from rich.json import JSON
Expand All @@ -16,11 +16,11 @@
class GraphManager:
"""Manages graph operations and Cypher queries."""

def __init__(self):
def __init__(self) -> None:
pass

def run_cypher(
self, query: str, params: dict[str, Any] = None
self, query: str, params: dict[str, Any] | None = None
) -> list[dict[str, Any]]:
"""Execute a Cypher query and return results."""
if params is None:
Expand Down Expand Up @@ -60,16 +60,18 @@ def get_entity_stats(self) -> dict[str, int]:
# Count nodes
result = conn.execute("MATCH (n) RETURN labels(n) as label, count(n) as count")
for row in result:
label = row[0][0] if row[0] else "Unknown"
stats[f"{label}_count"] = row[1]
row_values = cast(list[Any], row)
label = row_values[0][0] if row_values[0] else "Unknown"
stats[f"{label}_count"] = row_values[1]

# Count relationships - Kùzu doesn't have type() function, so we'll count by relationship name
# Count relationships by name because the backend does not expose type().
rel_types = ["HAS_CHUNK", "Mentions", "MemMentions", "DerivedFrom", "HasTopic"]
for rel_type in rel_types:
result = conn.execute(
f"MATCH ()-[r:{rel_type}]->() RETURN count(r) as count"
)
count = list(result)[0][0] if result else 0
rows = cast(list[list[Any]], list(result))
count = rows[0][0] if rows else 0
stats[f"{rel_type}_count"] = count

return stats
Expand All @@ -94,7 +96,9 @@ def get_memory_entities(self, memory_id: str) -> list[dict[str, Any]]:

return self.run_cypher(query, {"id": memory_id})

def print_results(self, results: list[dict[str, Any]], json_output: bool = False):
def print_results(
self, results: list[dict[str, Any]], json_output: bool = False
) -> None:
"""Print query results."""
if not results:
console.print("[yellow]No results found[/yellow]")
Expand All @@ -108,7 +112,7 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False

if results:
# Process first result to determine columns
first_result = self._process_kuzu_result(results[0])
first_result = self._process_graph_result(results[0])
columns = [
col
for col in first_result.keys()
Expand All @@ -120,7 +124,7 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False

# Add rows
for result in results:
processed_result = self._process_kuzu_result(result)
processed_result = self._process_graph_result(result)
row_values = []
for col in columns:
value = processed_result.get(col, "")
Expand All @@ -132,10 +136,10 @@ def print_results(self, results: list[dict[str, Any]], json_output: bool = False

console.print(table)

def _process_kuzu_result(self, result) -> dict[str, Any]:
"""Process a Kùzu result object and filter out vector fields."""
def _process_graph_result(self, result: Any) -> dict[str, Any]:
"""Process a graph result object and filter out vector fields."""
if hasattr(result, "__dict__"):
# Handle Kùzu object - if it's a single column with an object, expand it
# If it's a single column with an object, expand it.
result_dict = {
k: v for k, v in result.__dict__.items() if not k.startswith("_")
}
Expand All @@ -160,9 +164,9 @@ def _process_kuzu_result(self, result) -> dict[str, Any]:
if k.endswith("vec") or k == "vec":
continue

# If the value is a Kùzu object, recursively process it
# If the value is a graph object, recursively process it.
if hasattr(v, "__dict__"):
v = self._process_kuzu_result(v)
v = self._process_graph_result(v)
# If the value is a list (like a vector), skip it
elif isinstance(v, list) and len(v) > 10: # Likely a vector
continue
Expand All @@ -172,7 +176,7 @@ def _process_kuzu_result(self, result) -> dict[str, Any]:
return filtered_result

def _clean_string_representation(self, obj_str: str) -> str:
"""Clean string representation of Kùzu objects to remove vectors."""
"""Clean string representation of graph objects to remove vectors."""
import re

# Remove vector fields from string representation - more aggressive approach
Expand Down
18 changes: 10 additions & 8 deletions openkl/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,25 @@ def update(
conn = get_connection()

# Check if memory exists
result = conn.execute(f"MATCH (m:MemoryNote {{id: '{memory_id}'}}) RETURN m")
result = conn.execute(
"MATCH (m:MemoryNote {id: $id}) RETURN m", {"id": memory_id}
)
if not list(result):
return False

# Build update query
updates = []
params: dict[str, Any] = {"id": memory_id}
if text is not None:
updates.append(f"m.text = '{text.replace("'", "\\'")}'")
updates.append("m.text = $text")
params["text"] = text
if tags is not None:
tags_str = "[" + ", ".join([f"'{tag}'" for tag in tags]) + "]"
updates.append(f"m.tags = {tags_str}")
updates.append("m.tags = $tags")
params["tags"] = tags

if updates:
update_query = (
f"MATCH (m:MemoryNote {{id: '{memory_id}'}}) SET {', '.join(updates)}"
)
conn.execute(update_query)
update_query = f"MATCH (m:MemoryNote {{id: $id}}) SET {', '.join(updates)}"
conn.execute(update_query, params)

# Update topics if provided
if topics is not None:
Expand Down
Loading