From e0069fd8c6a480dfc1d1e62779297aa1dae8fd1a Mon Sep 17 00:00:00 2001 From: "david.delagneau" Date: Thu, 21 May 2026 09:13:07 -0600 Subject: [PATCH] feat: implement local indexer for project-knowledge and add memory hybrid search functionality --- .gitignore | 1 + core/services/local-rag-index.md | 102 ++++++++ scripts/aiw/README.md | 13 ++ scripts/aiw/indexer.py | 258 +++++++++++++++++++++ scripts/aiw/test_indexer.py | 56 +++++ scripts/mcp/aiw-context-mcp/README.md | 9 + scripts/mcp/aiw-context-mcp/server.py | 98 ++++++++ scripts/mcp/aiw-context-mcp/test_server.py | 38 +++ 8 files changed, 575 insertions(+) create mode 100644 core/services/local-rag-index.md create mode 100644 scripts/aiw/indexer.py create mode 100644 scripts/aiw/test_indexer.py diff --git a/.gitignore b/.gitignore index 66a4b3d..c945ff4 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ project-knowledge/.obsidian/cache/ # AI Workspace local service runtime .aiw/runtime/ +.aiw/indexes/ diff --git a/core/services/local-rag-index.md b/core/services/local-rag-index.md new file mode 100644 index 0000000..ad803a9 --- /dev/null +++ b/core/services/local-rag-index.md @@ -0,0 +1,102 @@ +--- +type: service-design +status: active +updated: 2026-05-21 +tags: + - ai-workspace + - rag + - index +--- + +# Local RAG Index + +## Goal + +Add retrieval over canonical workspace memory without replacing the human-readable `project-knowledge/` vault. + +The local index is derived and disposable. If the index disagrees with Markdown, the Markdown wins. + +--- + +## Current Implementation + +The first implementation is dependency-free and lexical: + +```text +scripts/aiw/indexer.py +``` + +It reads: + +```text +project-knowledge/**/*.md +``` + +and writes: + +```text +.aiw/indexes//project-knowledge.jsonl +.aiw/indexes//manifest.json +``` + +It skips: + +```text +project-knowledge/09-templates/ +``` + +so Obsidian templates do not appear as real memory. + +--- + +## Commands + +Build the index: + +```bash +python3 scripts/aiw/indexer.py build --profile fidelity +``` + +Check index status: + +```bash +python3 scripts/aiw/indexer.py status --profile fidelity +``` + +Search the index: + +```bash +python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity +``` + +--- + +## MCP Exposure + +`aiw-context-mcp` exposes: + +```text +memory_hybrid_search +``` + +Current behavior: + +- searches the derived local index when it exists +- returns cited paths, headings, snippets, scores, hashes, and mtimes +- falls back to live Markdown search when no index exists +- remains read-only + +--- + +## Future Upgrade Path + +This layer can later add: + +- full-text ranking +- embeddings +- Qdrant or Chroma as a local vector store +- hybrid lexical + semantic search +- reranking +- Mattermost evidence indexing with strict source filters + +Do not make the vector store canonical. It should remain rebuildable from Markdown and selected evidence. diff --git a/scripts/aiw/README.md b/scripts/aiw/README.md index b4cd790..7871bf1 100644 --- a/scripts/aiw/README.md +++ b/scripts/aiw/README.md @@ -32,6 +32,18 @@ python3 scripts/aiw/services.py start --profile fidelity --group inbox The service manager unifies startup and status. It does not move capture behavior into the MCP. +## Local project-knowledge index + +The workspace includes a dependency-free local indexer for canonical Markdown memory. The index is derived from `project-knowledge/` and written under `.aiw/indexes//`; it is safe to delete and rebuild. + +```bash +python3 scripts/aiw/indexer.py build --profile fidelity +python3 scripts/aiw/indexer.py status --profile fidelity +python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity +``` + +`aiw-context-mcp` exposes the same derived search through the read-only `memory_hybrid_search` tool and falls back to live Markdown search if the index has not been built yet. + ## Robustness features - Manifest validation before lifecycle actions. @@ -47,4 +59,5 @@ The service manager unifies startup and status. It does not move capture behavio ```bash python3 scripts/aiw/test_services.py +python3 scripts/aiw/test_indexer.py ``` diff --git a/scripts/aiw/indexer.py b/scripts/aiw/indexer.py new file mode 100644 index 0000000..e4c6a10 --- /dev/null +++ b/scripts/aiw/indexer.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +"""Dependency-free local indexer for AI Workspace canonical Markdown memory. + +This is intentionally a small lexical/hybrid-ready index. It keeps +`project-knowledge/` as the source of truth and writes a derived, disposable +JSONL index under `.aiw/indexes//`. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import re +import time +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parents[2] +INDEX_ROOT = ROOT / ".aiw" / "indexes" +DEFAULT_PROFILE = "fidelity" +MAX_CHARS = 1800 +OVERLAP_CHARS = 180 + + +@dataclass(frozen=True) +class Chunk: + chunk_id: str + path: str + heading: str + text: str + mtime: float + sha256: str + + +def project_knowledge_dir(profile: str) -> Path: + profile_base = ROOT / "profiles" / profile + candidate = profile_base / "project-knowledge" + if candidate.exists(): + return candidate + return ROOT / "project-knowledge" + + +def index_dir(profile: str) -> Path: + return INDEX_ROOT / profile + + +def index_path(profile: str) -> Path: + return index_dir(profile) / "project-knowledge.jsonl" + + +def manifest_path(profile: str) -> Path: + return index_dir(profile) / "manifest.json" + + +def normalize_space(text: str) -> str: + return re.sub(r"\s+", " ", text).strip() + + +def tokens(text: str) -> set[str]: + return {item for item in re.findall(r"[a-z0-9][a-z0-9_-]{1,}", text.lower()) if len(item) > 1} + + +def iter_markdown_files(base: Path) -> list[Path]: + files: list[Path] = [] + for path in sorted(base.rglob("*.md")): + rel = path.relative_to(base) + if str(rel).startswith("09-templates/"): + continue + files.append(path) + return files + + +def heading_for_line(line: str, current: str) -> str: + stripped = line.strip() + if stripped.startswith("#"): + return stripped.lstrip("#").strip() or current + return current + + +def split_sections(text: str) -> list[tuple[str, str]]: + sections: list[tuple[str, list[str]]] = [("", [])] + current_heading = "" + for line in text.splitlines(): + new_heading = heading_for_line(line, current_heading) + if new_heading != current_heading and line.strip().startswith("#"): + current_heading = new_heading + sections.append((current_heading, [line])) + else: + sections[-1][1].append(line) + return [(heading, "\n".join(lines).strip()) for heading, lines in sections if "\n".join(lines).strip()] + + +def chunk_text(section_text: str, max_chars: int = MAX_CHARS, overlap_chars: int = OVERLAP_CHARS) -> list[str]: + text = section_text.strip() + if len(text) <= max_chars: + return [text] if text else [] + chunks: list[str] = [] + start = 0 + while start < len(text): + end = min(len(text), start + max_chars) + if end < len(text): + boundary = max(text.rfind("\n\n", start, end), text.rfind(". ", start, end)) + if boundary > start + max_chars // 2: + end = boundary + 1 + chunk = text[start:end].strip() + if chunk: + chunks.append(chunk) + if end >= len(text): + break + start = max(0, end - overlap_chars) + return chunks + + +def build_chunks(profile: str) -> list[Chunk]: + base = project_knowledge_dir(profile) + chunks: list[Chunk] = [] + for path in iter_markdown_files(base): + raw = path.read_text(encoding="utf-8", errors="replace") + rel = str(path.relative_to(ROOT)) + digest = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest() + mtime = path.stat().st_mtime + for section_index, (heading, section) in enumerate(split_sections(raw)): + for chunk_index, chunk in enumerate(chunk_text(section)): + chunk_digest = hashlib.sha256(f"{rel}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16] + chunks.append(Chunk(chunk_id=chunk_digest, path=rel, heading=heading, text=chunk, mtime=mtime, sha256=digest)) + return chunks + + +def write_index(profile: str) -> dict[str, Any]: + out_dir = index_dir(profile) + out_dir.mkdir(parents=True, exist_ok=True) + chunks = build_chunks(profile) + with index_path(profile).open("w", encoding="utf-8") as handle: + for chunk in chunks: + handle.write(json.dumps(chunk.__dict__, ensure_ascii=False, sort_keys=True) + "\n") + files = sorted({chunk.path for chunk in chunks}) + manifest = { + "profile": profile, + "source": str(project_knowledge_dir(profile).relative_to(ROOT)), + "canonical": False, + "derived_from": "project-knowledge", + "index_type": "lexical-markdown-chunks", + "created_at": datetime.now(timezone.utc).isoformat(), + "file_count": len(files), + "chunk_count": len(chunks), + "index_path": str(index_path(profile).relative_to(ROOT)), + } + manifest_path(profile).write_text(json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") + return manifest + + +def read_index(profile: str) -> list[dict[str, Any]]: + path = index_path(profile) + if not path.is_file(): + return [] + rows: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8", errors="replace").splitlines(): + if not line.strip(): + continue + try: + rows.append(json.loads(line)) + except json.JSONDecodeError: + continue + return rows + + +def score_chunk(query: str, query_tokens: set[str], chunk: dict[str, Any]) -> float: + text = str(chunk.get("text") or "") + haystack = f"{chunk.get('path', '')} {chunk.get('heading', '')} {text}".lower() + exact = haystack.count(query.lower()) + chunk_tokens = tokens(haystack) + overlap = len(query_tokens & chunk_tokens) + if exact == 0 and overlap == 0: + return 0.0 + heading_bonus = 1.5 if query.lower() in str(chunk.get("heading") or "").lower() else 0.0 + path_bonus = 1.0 if query.lower() in str(chunk.get("path") or "").lower() else 0.0 + return exact * 5.0 + overlap * 1.25 + heading_bonus + path_bonus + + +def snippet_for(query: str, text: str, width: int = 520) -> str: + lowered = text.lower() + index = lowered.find(query.lower()) if query else -1 + if index < 0: + query_terms = tokens(query) + candidates = [lowered.find(term) for term in query_terms if lowered.find(term) >= 0] + index = min(candidates) if candidates else 0 + start = max(0, index - width // 2) + end = min(len(text), start + width) + return normalize_space(text[start:end]) + + +def search_index(profile: str, query: str, limit: int = 10) -> dict[str, Any]: + query = query.strip() + if not query: + raise SystemExit("query is required") + rows = read_index(profile) + query_tokens = tokens(query) + scored: list[tuple[float, dict[str, Any]]] = [] + for row in rows: + score = score_chunk(query, query_tokens, row) + if score > 0: + scored.append((score, row)) + scored.sort(key=lambda item: (-item[0], item[1].get("path", ""), item[1].get("chunk_id", ""))) + matches = [] + for score, row in scored[:limit]: + matches.append({ + "score": round(score, 3), + "path": row.get("path"), + "heading": row.get("heading"), + "chunk_id": row.get("chunk_id"), + "snippet": snippet_for(query, str(row.get("text") or "")), + "mtime": row.get("mtime"), + "sha256": row.get("sha256"), + }) + manifest = {} + if manifest_path(profile).is_file(): + manifest = json.loads(manifest_path(profile).read_text(encoding="utf-8")) + return {"profile": profile, "query": query, "canonical": False, "source": "derived-index", "manifest": manifest, "matches": matches} + + +def status(profile: str) -> dict[str, Any]: + manifest_file = manifest_path(profile) + if not manifest_file.is_file(): + return {"profile": profile, "indexed": False, "index_path": str(index_path(profile).relative_to(ROOT))} + manifest = json.loads(manifest_file.read_text(encoding="utf-8")) + path = index_path(profile) + manifest["indexed"] = path.is_file() + manifest["index_bytes"] = path.stat().st_size if path.is_file() else 0 + manifest["age_seconds"] = int(time.time() - datetime.fromisoformat(manifest["created_at"]).timestamp()) if manifest.get("created_at") else None + return manifest + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest="command", required=True) + for name in ["build", "status"]: + command = subparsers.add_parser(name) + command.add_argument("--profile", default=DEFAULT_PROFILE) + search = subparsers.add_parser("search") + search.add_argument("query") + search.add_argument("--profile", default=DEFAULT_PROFILE) + search.add_argument("--limit", type=int, default=10) + args = parser.parse_args() + if args.command == "build": + payload = write_index(args.profile) + elif args.command == "search": + payload = search_index(args.profile, args.query, limit=max(1, min(args.limit, 50))) + else: + payload = status(args.profile) + print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True)) + + +if __name__ == "__main__": + main() diff --git a/scripts/aiw/test_indexer.py b/scripts/aiw/test_indexer.py new file mode 100644 index 0000000..93c4bf3 --- /dev/null +++ b/scripts/aiw/test_indexer.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import importlib.util +import json +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + + +INDEXER_PATH = Path(__file__).with_name("indexer.py") +SPEC = importlib.util.spec_from_file_location("aiw_indexer", INDEXER_PATH) +indexer = importlib.util.module_from_spec(SPEC) +assert SPEC.loader is not None +sys.modules[SPEC.name] = indexer +SPEC.loader.exec_module(indexer) + + +class IndexerTests(unittest.TestCase): + def test_build_skips_templates_and_searches_canonical_files(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + real = root / "project-knowledge" / "03-context" / "project.md" + template = root / "project-knowledge" / "09-templates" / "daily.md" + real.parent.mkdir(parents=True) + template.parent.mkdir(parents=True) + real.write_text("# XFlow\nDismissal lifecycle context", encoding="utf-8") + template.write_text("# XFlow\nTemplate-only text", encoding="utf-8") + + with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"): + manifest = indexer.write_index("fidelity") + result = indexer.search_index("fidelity", "dismissal lifecycle", limit=5) + + self.assertEqual(manifest["file_count"], 1) + self.assertEqual(len(result["matches"]), 1) + self.assertIn("03-context/project.md", result["matches"][0]["path"]) + + def test_status_reports_unindexed_profile(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"): + result = indexer.status("fidelity") + + self.assertFalse(result["indexed"]) + self.assertIn(".aiw/indexes/fidelity/project-knowledge.jsonl", result["index_path"]) + + def test_cli_search_payload_is_json_serializable(self) -> None: + payload = {"matches": [{"path": "project-knowledge/01-current/current-work.md", "score": 1.0}]} + self.assertIsInstance(json.dumps(payload), str) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/mcp/aiw-context-mcp/README.md b/scripts/mcp/aiw-context-mcp/README.md index f110d0a..6496614 100644 --- a/scripts/mcp/aiw-context-mcp/README.md +++ b/scripts/mcp/aiw-context-mcp/README.md @@ -42,10 +42,19 @@ python3 scripts/mcp/aiw-context-mcp/server.py --transport stdio - `communication_thread_context` - `project_current_context` - `project_search_memory` +- `memory_hybrid_search` - `photos_latest` All tools are read-only. Mattermost tools read `ai/inbox/mattermost-mirror/`; photo tools list local Photo Inbox files without embedding image data; project tools read canonical Markdown under `project-knowledge/`. +`memory_hybrid_search` reads the derived local index built by: + +```bash +python3 scripts/aiw/indexer.py build --profile fidelity +``` + +If the index is missing, it falls back to bounded live Markdown search over `project-knowledge/`. The index is not canonical memory; `project-knowledge/` remains the source of truth. + Mattermost latest/date/standup tools filter to the active profile's context channels by default. For Fidelity, that list lives in `profiles/fidelity/context-sources.json`. Pass explicit `channels` to override the profile list, or `include_all_channels: true` when broad unfiltered mirror evidence is intentionally needed. ## Resources diff --git a/scripts/mcp/aiw-context-mcp/server.py b/scripts/mcp/aiw-context-mcp/server.py index 9d4f458..6da93a0 100644 --- a/scripts/mcp/aiw-context-mcp/server.py +++ b/scripts/mcp/aiw-context-mcp/server.py @@ -9,8 +9,10 @@ owned by the AI Workspace Service Manager. from __future__ import annotations import argparse +import hashlib import json import os +import re import sys import urllib.parse from datetime import date, datetime, timedelta @@ -25,6 +27,7 @@ PROTOCOL_VERSION = "2025-06-18" SERVER_NAME = "aiw-context-mcp" SERVER_VERSION = "0.1.0" LOCAL_ENV = ROOT / "scripts" / "mattermost-proxy" / ".env" +INDEX_ROOT = ROOT / ".aiw" / "indexes" def load_local_env(path: Path = LOCAL_ENV) -> None: @@ -283,6 +286,100 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]: return tool_result({"profile": profile, "canonical": True, "query": query, "matches": matches}) +def index_path(profile: str) -> Path: + return INDEX_ROOT / profile / "project-knowledge.jsonl" + + +def index_manifest_path(profile: str) -> Path: + return INDEX_ROOT / profile / "manifest.json" + + +def search_tokens(text: str) -> set[str]: + return {item for item in re.findall(r"[a-z0-9][a-z0-9_-]{1,}", text.lower()) if len(item) > 1} + + +def read_project_index(profile: str) -> list[dict[str, Any]]: + path = index_path(profile) + if not path.is_file(): + return [] + rows: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8", errors="replace").splitlines(): + if not line.strip(): + continue + try: + rows.append(json.loads(line)) + except json.JSONDecodeError: + continue + return rows + + +def indexed_snippet(query: str, text: str, width: int = 520) -> str: + lowered = text.lower() + index = lowered.find(query.lower()) if query else -1 + if index < 0: + positions = [lowered.find(term) for term in search_tokens(query) if lowered.find(term) >= 0] + index = min(positions) if positions else 0 + start = max(0, index - width // 2) + end = min(len(text), start + width) + return re.sub(r"\s+", " ", text[start:end]).strip() + + +def score_index_row(query: str, query_tokens: set[str], row: dict[str, Any]) -> float: + text = str(row.get("text") or "") + haystack = f"{row.get('path', '')} {row.get('heading', '')} {text}".lower() + exact = haystack.count(query.lower()) + overlap = len(query_tokens & search_tokens(haystack)) + if exact == 0 and overlap == 0: + return 0.0 + heading_bonus = 1.5 if query.lower() in str(row.get("heading") or "").lower() else 0.0 + path_bonus = 1.0 if query.lower() in str(row.get("path") or "").lower() else 0.0 + return exact * 5.0 + overlap * 1.25 + heading_bonus + path_bonus + + +def read_index_manifest(profile: str) -> dict[str, Any]: + path = index_manifest_path(profile) + if not path.is_file(): + return {} + try: + return json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return {} + + +def memory_hybrid_search(args: dict[str, Any]) -> dict[str, Any]: + profile = str(args.get("profile") or "fidelity") + query = str(args.get("query") or "").strip() + if not query: + return tool_error("query is required") + limit = clamp_limit(args.get("limit"), default=10, maximum=50) + rows = read_project_index(profile) + if not rows: + fallback = project_search_memory({"profile": profile, "query": query, "limit": limit})["structuredContent"] + fallback["source"] = "live-project-knowledge-fallback" + fallback["index_available"] = False + return tool_result(fallback) + query_tokens = search_tokens(query) + scored = [] + for row in rows: + score = score_index_row(query, query_tokens, row) + if score > 0: + scored.append((score, row)) + scored.sort(key=lambda item: (-item[0], item[1].get("path", ""), item[1].get("chunk_id", ""))) + matches = [] + for score, row in scored[:limit]: + text = str(row.get("text") or "") + matches.append({ + "score": round(score, 3), + "path": row.get("path"), + "heading": row.get("heading"), + "chunk_id": row.get("chunk_id") or hashlib.sha256(text.encode("utf-8")).hexdigest()[:16], + "snippet": indexed_snippet(query, text), + "mtime": row.get("mtime"), + "sha256": row.get("sha256"), + }) + return tool_result({"profile": profile, "canonical": False, "source": "derived-project-knowledge-index", "index_available": True, "manifest": read_index_manifest(profile), "query": query, "matches": matches}) + + def photos_latest(args: dict[str, Any]) -> dict[str, Any]: profile = str(args.get("profile") or "fidelity") limit = clamp_limit(args.get("limit"), default=20, maximum=100) @@ -373,6 +470,7 @@ TOOLS: dict[str, dict[str, Any]] = { "communication_thread_context": {"handler": communication_thread_context, "description": "Read Mattermost mirror evidence for a thread id.", "properties": {"profile": {"type": "string"}, "thread_id": {"type": "string"}, "limit": {"type": "integer"}}}, "project_current_context": {"handler": project_current_context, "description": "Read canonical current-work and work-items context.", "properties": {"profile": {"type": "string"}}}, "project_search_memory": {"handler": project_search_memory, "description": "Search canonical project-knowledge Markdown files.", "properties": {"profile": {"type": "string"}, "query": {"type": "string"}, "limit": {"type": "integer"}}}, + "memory_hybrid_search": {"handler": memory_hybrid_search, "description": "Search the derived local project-knowledge index with lexical scoring and source citations; falls back to live Markdown search if no index exists.", "properties": {"profile": {"type": "string"}, "query": {"type": "string"}, "limit": {"type": "integer"}}}, "photos_latest": {"handler": photos_latest, "description": "List recent local Photo Inbox files without embedding image data.", "properties": {"profile": {"type": "string"}, "limit": {"type": "integer"}}}, } diff --git a/scripts/mcp/aiw-context-mcp/test_server.py b/scripts/mcp/aiw-context-mcp/test_server.py index d478a5f..c6d8877 100644 --- a/scripts/mcp/aiw-context-mcp/test_server.py +++ b/scripts/mcp/aiw-context-mcp/test_server.py @@ -32,6 +32,7 @@ class ContextMCPTests(unittest.TestCase): names = {tool["name"] for tool in response["result"]["tools"]} self.assertIn("project_search_memory", names) + self.assertIn("memory_hybrid_search", names) self.assertIn("communication_latest", names) def test_initialize_response_declares_resources(self) -> None: @@ -158,6 +159,43 @@ class ContextMCPTests(unittest.TestCase): self.assertEqual(len(result["matches"]), 1) self.assertIn("03-context/project.md", result["matches"][0]["path"]) + def test_memory_hybrid_search_uses_index_when_available(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + index = root / ".aiw" / "indexes" / "fidelity" / "project-knowledge.jsonl" + manifest = root / ".aiw" / "indexes" / "fidelity" / "manifest.json" + index.parent.mkdir(parents=True) + index.write_text(json.dumps({ + "chunk_id": "abc", + "path": "project-knowledge/03-context/project.md", + "heading": "XFlow", + "text": "Dismissal lifecycle sequencing for XFlow", + "mtime": 1.0, + "sha256": "hash", + }) + "\n", encoding="utf-8") + manifest.write_text(json.dumps({"chunk_count": 1}), encoding="utf-8") + + with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"): + result = server.memory_hybrid_search({"profile": "fidelity", "query": "dismissal lifecycle"})["structuredContent"] + + self.assertTrue(result["index_available"]) + self.assertEqual(result["source"], "derived-project-knowledge-index") + self.assertEqual(result["matches"][0]["chunk_id"], "abc") + + def test_memory_hybrid_search_falls_back_without_index(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + real = root / "project-knowledge" / "03-context" / "project.md" + real.parent.mkdir(parents=True) + real.write_text("Important XFlow context", encoding="utf-8") + + with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"): + result = server.memory_hybrid_search({"profile": "fidelity", "query": "XFlow"})["structuredContent"] + + self.assertFalse(result["index_available"]) + self.assertEqual(result["source"], "live-project-knowledge-fallback") + self.assertEqual(len(result["matches"]), 1) + def test_previous_workday_skips_weekend(self) -> None: monday = date(2026, 5, 18)