feat: implement local indexer for project-knowledge and add memory hybrid search functionality

2026-05-21 09:13:07 -06:00
parent fc2abda588
commit e0069fd8c6
8 changed files with 575 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,4 @@ project-knowledge/.obsidian/cache/
 # AI Workspace local service runtime
 .aiw/runtime/
 .aiw/indexes/
--- a/core/services/local-rag-index.md
+++ b/core/services/local-rag-index.md
@@ -0,0 +1,102 @@
 ---
 type: service-design
 status: active
 updated: 2026-05-21
 tags:
  - ai-workspace
  - rag
  - index
 ---
 # Local RAG Index
 ## Goal
 Add retrieval over canonical workspace memory without replacing the human-readable `project-knowledge/` vault.
 The local index is derived and disposable. If the index disagrees with Markdown, the Markdown wins.
 ---
 ## Current Implementation
 The first implementation is dependency-free and lexical:
 ```text
 scripts/aiw/indexer.py
 ```
 It reads:
 ```text
 project-knowledge/**/*.md
 ```
 and writes:
 ```text
 .aiw/indexes/<profile>/project-knowledge.jsonl
 .aiw/indexes/<profile>/manifest.json
 ```
 It skips:
 ```text
 project-knowledge/09-templates/
 ```
 so Obsidian templates do not appear as real memory.
 ---
 ## Commands
 Build the index:
 ```bash
 python3 scripts/aiw/indexer.py build --profile fidelity
 ```
 Check index status:
 ```bash
 python3 scripts/aiw/indexer.py status --profile fidelity
 ```
 Search the index:
 ```bash
 python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity
 ```
 ---
 ## MCP Exposure
 `aiw-context-mcp` exposes:
 ```text
 memory_hybrid_search
 ```
 Current behavior:
 - searches the derived local index when it exists
 - returns cited paths, headings, snippets, scores, hashes, and mtimes
 - falls back to live Markdown search when no index exists
 - remains read-only
 ---
 ## Future Upgrade Path
 This layer can later add:
 - full-text ranking
 - embeddings
 - Qdrant or Chroma as a local vector store
 - hybrid lexical + semantic search
 - reranking
 - Mattermost evidence indexing with strict source filters
 Do not make the vector store canonical. It should remain rebuildable from Markdown and selected evidence.
--- a/scripts/aiw/README.md
+++ b/scripts/aiw/README.md
@@ -32,6 +32,18 @@ python3 scripts/aiw/services.py start --profile fidelity --group inbox
 The service manager unifies startup and status. It does not move capture behavior into the MCP.
 ## Local project-knowledge index
 The workspace includes a dependency-free local indexer for canonical Markdown memory. The index is derived from `project-knowledge/` and written under `.aiw/indexes/<profile>/`; it is safe to delete and rebuild.
 ```bash
 python3 scripts/aiw/indexer.py build --profile fidelity
 python3 scripts/aiw/indexer.py status --profile fidelity
 python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity
 ```
 `aiw-context-mcp` exposes the same derived search through the read-only `memory_hybrid_search` tool and falls back to live Markdown search if the index has not been built yet.
 ## Robustness features
 - Manifest validation before lifecycle actions.
@@ -47,4 +59,5 @@ The service manager unifies startup and status. It does not move capture behavio
 ```bash
 python3 scripts/aiw/test_services.py
 python3 scripts/aiw/test_indexer.py
 ```
--- a/scripts/aiw/indexer.py
+++ b/scripts/aiw/indexer.py
@@ -0,0 +1,258 @@
 #!/usr/bin/env python3
 """Dependency-free local indexer for AI Workspace canonical Markdown memory.
 This is intentionally a small lexical/hybrid-ready index. It keeps
 `project-knowledge/` as the source of truth and writes a derived, disposable
 JSONL index under `.aiw/indexes/<profile>/`.
 """
 from __future__ import annotations
 import argparse
 import hashlib
 import json
 import re
 import time
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 ROOT = Path(__file__).resolve().parents[2]
 INDEX_ROOT = ROOT / ".aiw" / "indexes"
 DEFAULT_PROFILE = "fidelity"
 MAX_CHARS = 1800
 OVERLAP_CHARS = 180
@dataclass(frozen=True)
 class Chunk:
    chunk_id: str
    path: str
    heading: str
    text: str
    mtime: float
    sha256: str
 def project_knowledge_dir(profile: str) -> Path:
    profile_base = ROOT / "profiles" / profile
    candidate = profile_base / "project-knowledge"
    if candidate.exists():
        return candidate
    return ROOT / "project-knowledge"
 def index_dir(profile: str) -> Path:
    return INDEX_ROOT / profile
 def index_path(profile: str) -> Path:
    return index_dir(profile) / "project-knowledge.jsonl"
 def manifest_path(profile: str) -> Path:
    return index_dir(profile) / "manifest.json"
 def normalize_space(text: str) -> str:
    return re.sub(r"\s+", " ", text).strip()
 def tokens(text: str) -> set[str]:
    return {item for item in re.findall(r"[a-z0-9][a-z0-9_-]{1,}", text.lower()) if len(item) > 1}
 def iter_markdown_files(base: Path) -> list[Path]:
    files: list[Path] = []
    for path in sorted(base.rglob("*.md")):
        rel = path.relative_to(base)
        if str(rel).startswith("09-templates/"):
            continue
        files.append(path)
    return files
 def heading_for_line(line: str, current: str) -> str:
    stripped = line.strip()
    if stripped.startswith("#"):
        return stripped.lstrip("#").strip() or current
    return current
 def split_sections(text: str) -> list[tuple[str, str]]:
    sections: list[tuple[str, list[str]]] = [("", [])]
    current_heading = ""
    for line in text.splitlines():
        new_heading = heading_for_line(line, current_heading)
        if new_heading != current_heading and line.strip().startswith("#"):
            current_heading = new_heading
            sections.append((current_heading, [line]))
        else:
            sections[-1][1].append(line)
    return [(heading, "\n".join(lines).strip()) for heading, lines in sections if "\n".join(lines).strip()]
 def chunk_text(section_text: str, max_chars: int = MAX_CHARS, overlap_chars: int = OVERLAP_CHARS) -> list[str]:
    text = section_text.strip()
    if len(text) <= max_chars:
        return [text] if text else []
    chunks: list[str] = []
    start = 0
    while start < len(text):
        end = min(len(text), start + max_chars)
        if end < len(text):
            boundary = max(text.rfind("\n\n", start, end), text.rfind(". ", start, end))
            if boundary > start + max_chars // 2:
                end = boundary + 1
        chunk = text[start:end].strip()
        if chunk:
            chunks.append(chunk)
        if end >= len(text):
            break
        start = max(0, end - overlap_chars)
    return chunks
 def build_chunks(profile: str) -> list[Chunk]:
    base = project_knowledge_dir(profile)
    chunks: list[Chunk] = []
    for path in iter_markdown_files(base):
        raw = path.read_text(encoding="utf-8", errors="replace")
        rel = str(path.relative_to(ROOT))
        digest = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest()
        mtime = path.stat().st_mtime
        for section_index, (heading, section) in enumerate(split_sections(raw)):
            for chunk_index, chunk in enumerate(chunk_text(section)):
                chunk_digest = hashlib.sha256(f"{rel}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16]
                chunks.append(Chunk(chunk_id=chunk_digest, path=rel, heading=heading, text=chunk, mtime=mtime, sha256=digest))
    return chunks
 def write_index(profile: str) -> dict[str, Any]:
    out_dir = index_dir(profile)
    out_dir.mkdir(parents=True, exist_ok=True)
    chunks = build_chunks(profile)
    with index_path(profile).open("w", encoding="utf-8") as handle:
        for chunk in chunks:
            handle.write(json.dumps(chunk.__dict__, ensure_ascii=False, sort_keys=True) + "\n")
    files = sorted({chunk.path for chunk in chunks})
    manifest = {
        "profile": profile,
        "source": str(project_knowledge_dir(profile).relative_to(ROOT)),
        "canonical": False,
        "derived_from": "project-knowledge",
        "index_type": "lexical-markdown-chunks",
        "created_at": datetime.now(timezone.utc).isoformat(),
        "file_count": len(files),
        "chunk_count": len(chunks),
        "index_path": str(index_path(profile).relative_to(ROOT)),
    }
    manifest_path(profile).write_text(json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
    return manifest
 def read_index(profile: str) -> list[dict[str, Any]]:
    path = index_path(profile)
    if not path.is_file():
        return []
    rows: list[dict[str, Any]] = []
    for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
        if not line.strip():
            continue
        try:
            rows.append(json.loads(line))
        except json.JSONDecodeError:
            continue
    return rows
 def score_chunk(query: str, query_tokens: set[str], chunk: dict[str, Any]) -> float:
    text = str(chunk.get("text") or "")
    haystack = f"{chunk.get('path', '')} {chunk.get('heading', '')} {text}".lower()
    exact = haystack.count(query.lower())
    chunk_tokens = tokens(haystack)
    overlap = len(query_tokens & chunk_tokens)
    if exact == 0 and overlap == 0:
        return 0.0
    heading_bonus = 1.5 if query.lower() in str(chunk.get("heading") or "").lower() else 0.0
    path_bonus = 1.0 if query.lower() in str(chunk.get("path") or "").lower() else 0.0
    return exact * 5.0 + overlap * 1.25 + heading_bonus + path_bonus
 def snippet_for(query: str, text: str, width: int = 520) -> str:
    lowered = text.lower()
    index = lowered.find(query.lower()) if query else -1
    if index < 0:
        query_terms = tokens(query)
        candidates = [lowered.find(term) for term in query_terms if lowered.find(term) >= 0]
        index = min(candidates) if candidates else 0
    start = max(0, index - width // 2)
    end = min(len(text), start + width)
    return normalize_space(text[start:end])
 def search_index(profile: str, query: str, limit: int = 10) -> dict[str, Any]:
    query = query.strip()
    if not query:
        raise SystemExit("query is required")
    rows = read_index(profile)
    query_tokens = tokens(query)
    scored: list[tuple[float, dict[str, Any]]] = []
    for row in rows:
        score = score_chunk(query, query_tokens, row)
        if score > 0:
            scored.append((score, row))
    scored.sort(key=lambda item: (-item[0], item[1].get("path", ""), item[1].get("chunk_id", "")))
    matches = []
    for score, row in scored[:limit]:
        matches.append({
            "score": round(score, 3),
            "path": row.get("path"),
            "heading": row.get("heading"),
            "chunk_id": row.get("chunk_id"),
            "snippet": snippet_for(query, str(row.get("text") or "")),
            "mtime": row.get("mtime"),
            "sha256": row.get("sha256"),
        })
    manifest = {}
    if manifest_path(profile).is_file():
        manifest = json.loads(manifest_path(profile).read_text(encoding="utf-8"))
    return {"profile": profile, "query": query, "canonical": False, "source": "derived-index", "manifest": manifest, "matches": matches}
 def status(profile: str) -> dict[str, Any]:
    manifest_file = manifest_path(profile)
    if not manifest_file.is_file():
        return {"profile": profile, "indexed": False, "index_path": str(index_path(profile).relative_to(ROOT))}
    manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
    path = index_path(profile)
    manifest["indexed"] = path.is_file()
    manifest["index_bytes"] = path.stat().st_size if path.is_file() else 0
    manifest["age_seconds"] = int(time.time() - datetime.fromisoformat(manifest["created_at"]).timestamp()) if manifest.get("created_at") else None
    return manifest
 def main() -> None:
    parser = argparse.ArgumentParser(description=__doc__)
    subparsers = parser.add_subparsers(dest="command", required=True)
    for name in ["build", "status"]:
        command = subparsers.add_parser(name)
        command.add_argument("--profile", default=DEFAULT_PROFILE)
    search = subparsers.add_parser("search")
    search.add_argument("query")
    search.add_argument("--profile", default=DEFAULT_PROFILE)
    search.add_argument("--limit", type=int, default=10)
    args = parser.parse_args()
    if args.command == "build":
        payload = write_index(args.profile)
    elif args.command == "search":
        payload = search_index(args.profile, args.query, limit=max(1, min(args.limit, 50)))
    else:
        payload = status(args.profile)
    print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))
 if __name__ == "__main__":
    main()
--- a/scripts/aiw/test_indexer.py
+++ b/scripts/aiw/test_indexer.py
@@ -0,0 +1,56 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import importlib.util
 import json
 import sys
 import tempfile
 import unittest
 from pathlib import Path
 from unittest.mock import patch
 INDEXER_PATH = Path(__file__).with_name("indexer.py")
 SPEC = importlib.util.spec_from_file_location("aiw_indexer", INDEXER_PATH)
 indexer = importlib.util.module_from_spec(SPEC)
 assert SPEC.loader is not None
 sys.modules[SPEC.name] = indexer
 SPEC.loader.exec_module(indexer)
 class IndexerTests(unittest.TestCase):
    def test_build_skips_templates_and_searches_canonical_files(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            real = root / "project-knowledge" / "03-context" / "project.md"
            template = root / "project-knowledge" / "09-templates" / "daily.md"
            real.parent.mkdir(parents=True)
            template.parent.mkdir(parents=True)
            real.write_text("# XFlow\nDismissal lifecycle context", encoding="utf-8")
            template.write_text("# XFlow\nTemplate-only text", encoding="utf-8")
            with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"):
                manifest = indexer.write_index("fidelity")
                result = indexer.search_index("fidelity", "dismissal lifecycle", limit=5)
        self.assertEqual(manifest["file_count"], 1)
        self.assertEqual(len(result["matches"]), 1)
        self.assertIn("03-context/project.md", result["matches"][0]["path"])
    def test_status_reports_unindexed_profile(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"):
                result = indexer.status("fidelity")
        self.assertFalse(result["indexed"])
        self.assertIn(".aiw/indexes/fidelity/project-knowledge.jsonl", result["index_path"])
    def test_cli_search_payload_is_json_serializable(self) -> None:
        payload = {"matches": [{"path": "project-knowledge/01-current/current-work.md", "score": 1.0}]}
        self.assertIsInstance(json.dumps(payload), str)
 if __name__ == "__main__":
    unittest.main()
--- a/scripts/mcp/aiw-context-mcp/README.md
+++ b/scripts/mcp/aiw-context-mcp/README.md
@@ -42,10 +42,19 @@ python3 scripts/mcp/aiw-context-mcp/server.py --transport stdio
 - `communication_thread_context`
 - `project_current_context`
 - `project_search_memory`
 - `memory_hybrid_search`
 - `photos_latest`
 All tools are read-only. Mattermost tools read `ai/inbox/mattermost-mirror/`; photo tools list local Photo Inbox files without embedding image data; project tools read canonical Markdown under `project-knowledge/`.
 `memory_hybrid_search` reads the derived local index built by:
 ```bash
 python3 scripts/aiw/indexer.py build --profile fidelity
 ```
 If the index is missing, it falls back to bounded live Markdown search over `project-knowledge/`. The index is not canonical memory; `project-knowledge/` remains the source of truth.
 Mattermost latest/date/standup tools filter to the active profile's context channels by default. For Fidelity, that list lives in `profiles/fidelity/context-sources.json`. Pass explicit `channels` to override the profile list, or `include_all_channels: true` when broad unfiltered mirror evidence is intentionally needed.
 ## Resources
--- a/scripts/mcp/aiw-context-mcp/server.py
+++ b/scripts/mcp/aiw-context-mcp/server.py
@@ -9,8 +9,10 @@ owned by the AI Workspace Service Manager.
 from __future__ import annotations
 import argparse
 import hashlib
 import json
 import os
 import re
 import sys
 import urllib.parse
 from datetime import date, datetime, timedelta
@@ -25,6 +27,7 @@ PROTOCOL_VERSION = "2025-06-18"
 SERVER_NAME = "aiw-context-mcp"
 SERVER_VERSION = "0.1.0"
 LOCAL_ENV = ROOT / "scripts" / "mattermost-proxy" / ".env"
 INDEX_ROOT = ROOT / ".aiw" / "indexes"
 def load_local_env(path: Path = LOCAL_ENV) -> None:
@@ -283,6 +286,100 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]:
    return tool_result({"profile": profile, "canonical": True, "query": query, "matches": matches})
 def index_path(profile: str) -> Path:
    return INDEX_ROOT / profile / "project-knowledge.jsonl"
 def index_manifest_path(profile: str) -> Path:
    return INDEX_ROOT / profile / "manifest.json"
 def search_tokens(text: str) -> set[str]:
    return {item for item in re.findall(r"[a-z0-9][a-z0-9_-]{1,}", text.lower()) if len(item) > 1}
 def read_project_index(profile: str) -> list[dict[str, Any]]:
    path = index_path(profile)
    if not path.is_file():
        return []
    rows: list[dict[str, Any]] = []
    for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
        if not line.strip():
            continue
        try:
            rows.append(json.loads(line))
        except json.JSONDecodeError:
            continue
    return rows
 def indexed_snippet(query: str, text: str, width: int = 520) -> str:
    lowered = text.lower()
    index = lowered.find(query.lower()) if query else -1
    if index < 0:
        positions = [lowered.find(term) for term in search_tokens(query) if lowered.find(term) >= 0]
        index = min(positions) if positions else 0
    start = max(0, index - width // 2)
    end = min(len(text), start + width)
    return re.sub(r"\s+", " ", text[start:end]).strip()
 def score_index_row(query: str, query_tokens: set[str], row: dict[str, Any]) -> float:
    text = str(row.get("text") or "")
    haystack = f"{row.get('path', '')} {row.get('heading', '')} {text}".lower()
    exact = haystack.count(query.lower())
    overlap = len(query_tokens & search_tokens(haystack))
    if exact == 0 and overlap == 0:
        return 0.0
    heading_bonus = 1.5 if query.lower() in str(row.get("heading") or "").lower() else 0.0
    path_bonus = 1.0 if query.lower() in str(row.get("path") or "").lower() else 0.0
    return exact * 5.0 + overlap * 1.25 + heading_bonus + path_bonus
 def read_index_manifest(profile: str) -> dict[str, Any]:
    path = index_manifest_path(profile)
    if not path.is_file():
        return {}
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except json.JSONDecodeError:
        return {}
 def memory_hybrid_search(args: dict[str, Any]) -> dict[str, Any]:
    profile = str(args.get("profile") or "fidelity")
    query = str(args.get("query") or "").strip()
    if not query:
        return tool_error("query is required")
    limit = clamp_limit(args.get("limit"), default=10, maximum=50)
    rows = read_project_index(profile)
    if not rows:
        fallback = project_search_memory({"profile": profile, "query": query, "limit": limit})["structuredContent"]
        fallback["source"] = "live-project-knowledge-fallback"
        fallback["index_available"] = False
        return tool_result(fallback)
    query_tokens = search_tokens(query)
    scored = []
    for row in rows:
        score = score_index_row(query, query_tokens, row)
        if score > 0:
            scored.append((score, row))
    scored.sort(key=lambda item: (-item[0], item[1].get("path", ""), item[1].get("chunk_id", "")))
    matches = []
    for score, row in scored[:limit]:
        text = str(row.get("text") or "")
        matches.append({
            "score": round(score, 3),
            "path": row.get("path"),
            "heading": row.get("heading"),
            "chunk_id": row.get("chunk_id") or hashlib.sha256(text.encode("utf-8")).hexdigest()[:16],
            "snippet": indexed_snippet(query, text),
            "mtime": row.get("mtime"),
            "sha256": row.get("sha256"),
        })
    return tool_result({"profile": profile, "canonical": False, "source": "derived-project-knowledge-index", "index_available": True, "manifest": read_index_manifest(profile), "query": query, "matches": matches})
 def photos_latest(args: dict[str, Any]) -> dict[str, Any]:
    profile = str(args.get("profile") or "fidelity")
    limit = clamp_limit(args.get("limit"), default=20, maximum=100)
@@ -373,6 +470,7 @@ TOOLS: dict[str, dict[str, Any]] = {
    "communication_thread_context": {"handler": communication_thread_context, "description": "Read Mattermost mirror evidence for a thread id.", "properties": {"profile": {"type": "string"}, "thread_id": {"type": "string"}, "limit": {"type": "integer"}}},
    "project_current_context": {"handler": project_current_context, "description": "Read canonical current-work and work-items context.", "properties": {"profile": {"type": "string"}}},
    "project_search_memory": {"handler": project_search_memory, "description": "Search canonical project-knowledge Markdown files.", "properties": {"profile": {"type": "string"}, "query": {"type": "string"}, "limit": {"type": "integer"}}},
    "memory_hybrid_search": {"handler": memory_hybrid_search, "description": "Search the derived local project-knowledge index with lexical scoring and source citations; falls back to live Markdown search if no index exists.", "properties": {"profile": {"type": "string"}, "query": {"type": "string"}, "limit": {"type": "integer"}}},
    "photos_latest": {"handler": photos_latest, "description": "List recent local Photo Inbox files without embedding image data.", "properties": {"profile": {"type": "string"}, "limit": {"type": "integer"}}},
 }
--- a/scripts/mcp/aiw-context-mcp/test_server.py
+++ b/scripts/mcp/aiw-context-mcp/test_server.py
@@ -32,6 +32,7 @@ class ContextMCPTests(unittest.TestCase):
        names = {tool["name"] for tool in response["result"]["tools"]}
        self.assertIn("project_search_memory", names)
        self.assertIn("memory_hybrid_search", names)
        self.assertIn("communication_latest", names)
    def test_initialize_response_declares_resources(self) -> None:
@@ -158,6 +159,43 @@ class ContextMCPTests(unittest.TestCase):
        self.assertEqual(len(result["matches"]), 1)
        self.assertIn("03-context/project.md", result["matches"][0]["path"])
    def test_memory_hybrid_search_uses_index_when_available(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            index = root / ".aiw" / "indexes" / "fidelity" / "project-knowledge.jsonl"
            manifest = root / ".aiw" / "indexes" / "fidelity" / "manifest.json"
            index.parent.mkdir(parents=True)
            index.write_text(json.dumps({
                "chunk_id": "abc",
                "path": "project-knowledge/03-context/project.md",
                "heading": "XFlow",
                "text": "Dismissal lifecycle sequencing for XFlow",
                "mtime": 1.0,
                "sha256": "hash",
            }) + "\n", encoding="utf-8")
            manifest.write_text(json.dumps({"chunk_count": 1}), encoding="utf-8")
            with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"):
                result = server.memory_hybrid_search({"profile": "fidelity", "query": "dismissal lifecycle"})["structuredContent"]
        self.assertTrue(result["index_available"])
        self.assertEqual(result["source"], "derived-project-knowledge-index")
        self.assertEqual(result["matches"][0]["chunk_id"], "abc")
    def test_memory_hybrid_search_falls_back_without_index(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            real = root / "project-knowledge" / "03-context" / "project.md"
            real.parent.mkdir(parents=True)
            real.write_text("Important XFlow context", encoding="utf-8")
            with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"):
                result = server.memory_hybrid_search({"profile": "fidelity", "query": "XFlow"})["structuredContent"]
        self.assertFalse(result["index_available"])
        self.assertEqual(result["source"], "live-project-knowledge-fallback")
        self.assertEqual(len(result["matches"]), 1)
    def test_previous_workday_skips_weekend(self) -> None:
        monday = date(2026, 5, 18)