From f0d3cd4ce940362c6e901195cc010e6be816c866 Mon Sep 17 00:00:00 2001 From: "david.delagneau" Date: Thu, 21 May 2026 10:21:52 -0600 Subject: [PATCH] feat: enhance profile path resolution and add example profiles for better project adaptability --- README.md | 1 + docs/architecture.md | 2 +- docs/profiles.md | 4 +- profiles/example/workspace.json | 8 +++ profiles/fidelity/workspace.json | 8 +++ scripts/aiw/README.md | 21 +++++++ scripts/aiw/indexer.py | 29 +++++---- scripts/aiw/profile.py | 72 ++++++++++++++++++++++ scripts/aiw/test_indexer.py | 24 +++++++- scripts/aiw/test_profile.py | 47 ++++++++++++++ scripts/mcp/aiw-context-mcp/server.py | 30 ++++----- scripts/mcp/aiw-context-mcp/test_server.py | 22 ++++++- 12 files changed, 234 insertions(+), 34 deletions(-) create mode 100644 profiles/example/workspace.json create mode 100644 profiles/fidelity/workspace.json create mode 100644 scripts/aiw/profile.py create mode 100644 scripts/aiw/test_profile.py diff --git a/README.md b/README.md index 5e89a3e..c3aee4e 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,7 @@ Indexes live under `.aiw/indexes/` and are ignored because they are rebuildable ```bash python3 scripts/aiw/test_services.py +python3 scripts/aiw/test_profile.py python3 scripts/aiw/test_indexer.py python3 scripts/mcp/aiw-context-mcp/test_server.py python3 scripts/iphone-photo-inbox/test_receiver.py diff --git a/docs/architecture.md b/docs/architecture.md index 27b825a..f352597 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -36,7 +36,7 @@ AI clients and agent workflows ## Current Repository Shape -The current repo still keeps the first real profile's vault at root-level `project-knowledge/`. That is acceptable during migration, but reusable code should increasingly resolve paths from profile configuration rather than hardcoding Fidelity-specific locations. +The current repo still keeps the first real profile's vault at root-level `project-knowledge/`. That is acceptable during migration, but reusable code should resolve paths from `profiles//workspace.json` rather than hardcoding Fidelity-specific locations. Target direction: diff --git a/docs/profiles.md b/docs/profiles.md index abe0a8d..efbe768 100644 --- a/docs/profiles.md +++ b/docs/profiles.md @@ -46,7 +46,7 @@ Human-readable summary for agents and developers: ### `workspace.json` -Planned path configuration. Initial versions can point to current paths: +Profile path configuration. Initial versions can point to current paths: ```json { @@ -58,6 +58,8 @@ Planned path configuration. Initial versions can point to current paths: } ``` +Reusable scripts should resolve these paths through `scripts/aiw/profile.py`. + ### `services.json` Profile-specific local service manifest for `scripts/aiw/services.py`. diff --git a/profiles/example/workspace.json b/profiles/example/workspace.json new file mode 100644 index 0000000..6f2b6b5 --- /dev/null +++ b/profiles/example/workspace.json @@ -0,0 +1,8 @@ +{ + "profile": "example", + "display_name": "Example Project", + "description": "Sanitized example profile for adapting AI Workspace to a new project.", + "knowledge_dir": "workspaces/example/project-knowledge", + "inbox_dir": "workspaces/example/inbox", + "index_dir": ".aiw/indexes/example" +} diff --git a/profiles/fidelity/workspace.json b/profiles/fidelity/workspace.json new file mode 100644 index 0000000..ebd4581 --- /dev/null +++ b/profiles/fidelity/workspace.json @@ -0,0 +1,8 @@ +{ + "profile": "fidelity", + "display_name": "Fidelity", + "description": "Current Fidelity AI Workspace profile. Paths intentionally point to the existing root-level vault and inbox until the data migration phase.", + "knowledge_dir": "project-knowledge", + "inbox_dir": "ai/inbox", + "index_dir": ".aiw/indexes/fidelity" +} diff --git a/scripts/aiw/README.md b/scripts/aiw/README.md index 7871bf1..5c70f99 100644 --- a/scripts/aiw/README.md +++ b/scripts/aiw/README.md @@ -44,6 +44,26 @@ python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity `aiw-context-mcp` exposes the same derived search through the read-only `memory_hybrid_search` tool and falls back to live Markdown search if the index has not been built yet. +## Profile path configuration + +Reusable scripts resolve profile-specific paths through: + +```text +profiles//workspace.json +``` + +Current fields: + +```json +{ + "knowledge_dir": "project-knowledge", + "inbox_dir": "ai/inbox", + "index_dir": ".aiw/indexes/fidelity" +} +``` + +Use `scripts/aiw/profile.py` from new scripts instead of hardcoding root-level `project-knowledge/` or `ai/inbox/` paths. + ## Robustness features - Manifest validation before lifecycle actions. @@ -59,5 +79,6 @@ python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity ```bash python3 scripts/aiw/test_services.py +python3 scripts/aiw/test_profile.py python3 scripts/aiw/test_indexer.py ``` diff --git a/scripts/aiw/indexer.py b/scripts/aiw/indexer.py index e4c6a10..01f8590 100644 --- a/scripts/aiw/indexer.py +++ b/scripts/aiw/indexer.py @@ -12,6 +12,7 @@ import argparse import hashlib import json import re +import sys import time from dataclasses import dataclass from datetime import datetime, timezone @@ -20,11 +21,13 @@ from typing import Any ROOT = Path(__file__).resolve().parents[2] -INDEX_ROOT = ROOT / ".aiw" / "indexes" DEFAULT_PROFILE = "fidelity" MAX_CHARS = 1800 OVERLAP_CHARS = 180 +sys.path.insert(0, str(Path(__file__).resolve().parent)) +import profile as aiw_profile # noqa: E402 + @dataclass(frozen=True) class Chunk: @@ -37,15 +40,15 @@ class Chunk: def project_knowledge_dir(profile: str) -> Path: - profile_base = ROOT / "profiles" / profile - candidate = profile_base / "project-knowledge" - if candidate.exists(): - return candidate - return ROOT / "project-knowledge" + return aiw_profile.knowledge_dir(profile, root=ROOT) def index_dir(profile: str) -> Path: - return INDEX_ROOT / profile + return aiw_profile.index_dir(profile, root=ROOT) + + +def rel(path: Path) -> str: + return aiw_profile.relative_to_root(path, root=ROOT) def index_path(profile: str) -> Path: @@ -120,13 +123,13 @@ def build_chunks(profile: str) -> list[Chunk]: chunks: list[Chunk] = [] for path in iter_markdown_files(base): raw = path.read_text(encoding="utf-8", errors="replace") - rel = str(path.relative_to(ROOT)) + rel_path = rel(path) digest = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest() mtime = path.stat().st_mtime for section_index, (heading, section) in enumerate(split_sections(raw)): for chunk_index, chunk in enumerate(chunk_text(section)): - chunk_digest = hashlib.sha256(f"{rel}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16] - chunks.append(Chunk(chunk_id=chunk_digest, path=rel, heading=heading, text=chunk, mtime=mtime, sha256=digest)) + chunk_digest = hashlib.sha256(f"{rel_path}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16] + chunks.append(Chunk(chunk_id=chunk_digest, path=rel_path, heading=heading, text=chunk, mtime=mtime, sha256=digest)) return chunks @@ -140,14 +143,14 @@ def write_index(profile: str) -> dict[str, Any]: files = sorted({chunk.path for chunk in chunks}) manifest = { "profile": profile, - "source": str(project_knowledge_dir(profile).relative_to(ROOT)), + "source": rel(project_knowledge_dir(profile)), "canonical": False, "derived_from": "project-knowledge", "index_type": "lexical-markdown-chunks", "created_at": datetime.now(timezone.utc).isoformat(), "file_count": len(files), "chunk_count": len(chunks), - "index_path": str(index_path(profile).relative_to(ROOT)), + "index_path": rel(index_path(profile)), } manifest_path(profile).write_text(json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") return manifest @@ -225,7 +228,7 @@ def search_index(profile: str, query: str, limit: int = 10) -> dict[str, Any]: def status(profile: str) -> dict[str, Any]: manifest_file = manifest_path(profile) if not manifest_file.is_file(): - return {"profile": profile, "indexed": False, "index_path": str(index_path(profile).relative_to(ROOT))} + return {"profile": profile, "indexed": False, "index_path": rel(index_path(profile))} manifest = json.loads(manifest_file.read_text(encoding="utf-8")) path = index_path(profile) manifest["indexed"] = path.is_file() diff --git a/scripts/aiw/profile.py b/scripts/aiw/profile.py new file mode 100644 index 0000000..5a93b53 --- /dev/null +++ b/scripts/aiw/profile.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +"""Profile path resolution for AI Workspace scripts. + +Profiles own their configuration. Reusable scripts should call this module +instead of hardcoding root-level project paths. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parents[2] + + +DEFAULT_WORKSPACE = { + "knowledge_dir": "project-knowledge", + "inbox_dir": "ai/inbox", + "index_dir": ".aiw/indexes/{profile}", +} + + +def workspace_config_path(profile: str, root: Path | None = None) -> Path: + base = root or ROOT + return base / "profiles" / profile / "workspace.json" + + +def load_workspace_config(profile: str, root: Path | None = None) -> dict[str, Any]: + base = root or ROOT + config = dict(DEFAULT_WORKSPACE) + config["profile"] = profile + path = workspace_config_path(profile, root=base) + if path.is_file(): + try: + loaded = json.loads(path.read_text(encoding="utf-8")) + if isinstance(loaded, dict): + config.update(loaded) + except json.JSONDecodeError: + pass + return config + + +def resolve_path(raw: str | None, *, profile: str, root: Path | None = None, fallback: str) -> Path: + base = root or ROOT + value = (raw or fallback).format(profile=profile) + path = Path(value).expanduser() + return path if path.is_absolute() else base / path + + +def knowledge_dir(profile: str, root: Path | None = None) -> Path: + config = load_workspace_config(profile, root=root) + return resolve_path(config.get("knowledge_dir"), profile=profile, root=root, fallback="project-knowledge") + + +def inbox_dir(profile: str, root: Path | None = None) -> Path: + config = load_workspace_config(profile, root=root) + return resolve_path(config.get("inbox_dir"), profile=profile, root=root, fallback="ai/inbox") + + +def index_dir(profile: str, root: Path | None = None) -> Path: + config = load_workspace_config(profile, root=root) + return resolve_path(config.get("index_dir"), profile=profile, root=root, fallback=".aiw/indexes/{profile}") + + +def relative_to_root(path: Path, root: Path | None = None) -> str: + base = root or ROOT + try: + return str(path.relative_to(base)) + except ValueError: + return str(path) diff --git a/scripts/aiw/test_indexer.py b/scripts/aiw/test_indexer.py index 93c4bf3..d18313c 100644 --- a/scripts/aiw/test_indexer.py +++ b/scripts/aiw/test_indexer.py @@ -30,7 +30,7 @@ class IndexerTests(unittest.TestCase): real.write_text("# XFlow\nDismissal lifecycle context", encoding="utf-8") template.write_text("# XFlow\nTemplate-only text", encoding="utf-8") - with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"): + with patch.object(indexer, "ROOT", root): manifest = indexer.write_index("fidelity") result = indexer.search_index("fidelity", "dismissal lifecycle", limit=5) @@ -41,7 +41,7 @@ class IndexerTests(unittest.TestCase): def test_status_reports_unindexed_profile(self) -> None: with tempfile.TemporaryDirectory() as tmp: root = Path(tmp) - with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"): + with patch.object(indexer, "ROOT", root): result = indexer.status("fidelity") self.assertFalse(result["indexed"]) @@ -51,6 +51,26 @@ class IndexerTests(unittest.TestCase): payload = {"matches": [{"path": "project-knowledge/01-current/current-work.md", "score": 1.0}]} self.assertIsInstance(json.dumps(payload), str) + def test_build_uses_workspace_json_paths(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + config = root / "profiles" / "demo" / "workspace.json" + real = root / "workspaces" / "demo" / "project-knowledge" / "03-context" / "project.md" + config.parent.mkdir(parents=True) + real.parent.mkdir(parents=True) + config.write_text(json.dumps({ + "knowledge_dir": "workspaces/demo/project-knowledge", + "index_dir": ".aiw/indexes/demo", + }), encoding="utf-8") + real.write_text("# Demo\nReusable profile memory", encoding="utf-8") + + with patch.object(indexer, "ROOT", root): + manifest = indexer.write_index("demo") + result = indexer.search_index("demo", "profile memory", limit=5) + + self.assertEqual(manifest["source"], "workspaces/demo/project-knowledge") + self.assertEqual(result["matches"][0]["path"], "workspaces/demo/project-knowledge/03-context/project.md") + if __name__ == "__main__": unittest.main() diff --git a/scripts/aiw/test_profile.py b/scripts/aiw/test_profile.py new file mode 100644 index 0000000..018e0fe --- /dev/null +++ b/scripts/aiw/test_profile.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import importlib.util +import json +import sys +import tempfile +import unittest +from pathlib import Path + + +PROFILE_PATH = Path(__file__).with_name("profile.py") +SPEC = importlib.util.spec_from_file_location("aiw_profile", PROFILE_PATH) +profile = importlib.util.module_from_spec(SPEC) +assert SPEC.loader is not None +sys.modules[SPEC.name] = profile +SPEC.loader.exec_module(profile) + + +class ProfileTests(unittest.TestCase): + def test_workspace_config_resolves_profile_paths(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + config = root / "profiles" / "demo" / "workspace.json" + config.parent.mkdir(parents=True) + config.write_text(json.dumps({ + "knowledge_dir": "workspaces/demo/project-knowledge", + "inbox_dir": "workspaces/demo/inbox", + "index_dir": ".aiw/indexes/demo", + }), encoding="utf-8") + + self.assertEqual(profile.knowledge_dir("demo", root=root), root / "workspaces" / "demo" / "project-knowledge") + self.assertEqual(profile.inbox_dir("demo", root=root), root / "workspaces" / "demo" / "inbox") + self.assertEqual(profile.index_dir("demo", root=root), root / ".aiw" / "indexes" / "demo") + + def test_defaults_preserve_current_root_paths(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + + self.assertEqual(profile.knowledge_dir("missing", root=root), root / "project-knowledge") + self.assertEqual(profile.inbox_dir("missing", root=root), root / "ai" / "inbox") + self.assertEqual(profile.index_dir("missing", root=root), root / ".aiw" / "indexes" / "missing") + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/mcp/aiw-context-mcp/server.py b/scripts/mcp/aiw-context-mcp/server.py index 6da93a0..3ad3ef1 100644 --- a/scripts/mcp/aiw-context-mcp/server.py +++ b/scripts/mcp/aiw-context-mcp/server.py @@ -27,7 +27,9 @@ PROTOCOL_VERSION = "2025-06-18" SERVER_NAME = "aiw-context-mcp" SERVER_VERSION = "0.1.0" LOCAL_ENV = ROOT / "scripts" / "mattermost-proxy" / ".env" -INDEX_ROOT = ROOT / ".aiw" / "indexes" +AIW_SCRIPT_DIR = ROOT / "scripts" / "aiw" +sys.path.insert(0, str(AIW_SCRIPT_DIR)) +import profile as aiw_profile # noqa: E402 def load_local_env(path: Path = LOCAL_ENV) -> None: @@ -47,22 +49,20 @@ def load_local_env(path: Path = LOCAL_ENV) -> None: def profile_dir(profile: str) -> Path: - if profile == "fidelity": - return ROOT candidate = ROOT / "profiles" / profile return candidate if candidate.exists() else ROOT def knowledge_dir(profile: str) -> Path: - base = profile_dir(profile) - candidate = base / "project-knowledge" - return candidate if candidate.exists() else ROOT / "project-knowledge" + return aiw_profile.knowledge_dir(profile, root=ROOT) def inbox_dir(profile: str) -> Path: - base = profile_dir(profile) - candidate = base / "ai" / "inbox" - return candidate if candidate.exists() else ROOT / "ai" / "inbox" + return aiw_profile.inbox_dir(profile, root=ROOT) + + +def rel(path: Path) -> str: + return aiw_profile.relative_to_root(path, root=ROOT) def mattermost_mirror_dir(profile: str) -> Path: @@ -257,7 +257,7 @@ def project_current_context(args: dict[str, Any]) -> dict[str, Any]: result = [] for path in files: if path.is_file(): - result.append({"path": str(path.relative_to(ROOT)), "text": path.read_text(encoding="utf-8", errors="replace")}) + result.append({"path": rel(path), "text": path.read_text(encoding="utf-8", errors="replace")}) return tool_result({"profile": profile, "canonical": True, "files": result}) @@ -270,8 +270,8 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]: base = knowledge_dir(profile) matches: list[dict[str, Any]] = [] for path in sorted(base.rglob("*.md")): - rel = path.relative_to(base) - if str(rel).startswith("09-templates/"): + relative_to_base = path.relative_to(base) + if str(relative_to_base).startswith("09-templates/"): continue text = path.read_text(encoding="utf-8", errors="replace") lowered = text.lower() @@ -280,18 +280,18 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]: continue start = max(0, index - 220) end = min(len(text), index + len(query) + 220) - matches.append({"path": str(path.relative_to(ROOT)), "snippet": text[start:end].strip()}) + matches.append({"path": rel(path), "snippet": text[start:end].strip()}) if len(matches) >= limit: break return tool_result({"profile": profile, "canonical": True, "query": query, "matches": matches}) def index_path(profile: str) -> Path: - return INDEX_ROOT / profile / "project-knowledge.jsonl" + return aiw_profile.index_dir(profile, root=ROOT) / "project-knowledge.jsonl" def index_manifest_path(profile: str) -> Path: - return INDEX_ROOT / profile / "manifest.json" + return aiw_profile.index_dir(profile, root=ROOT) / "manifest.json" def search_tokens(text: str) -> set[str]: diff --git a/scripts/mcp/aiw-context-mcp/test_server.py b/scripts/mcp/aiw-context-mcp/test_server.py index c6d8877..1bb738e 100644 --- a/scripts/mcp/aiw-context-mcp/test_server.py +++ b/scripts/mcp/aiw-context-mcp/test_server.py @@ -175,7 +175,7 @@ class ContextMCPTests(unittest.TestCase): }) + "\n", encoding="utf-8") manifest.write_text(json.dumps({"chunk_count": 1}), encoding="utf-8") - with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"): + with patch.object(server, "ROOT", root): result = server.memory_hybrid_search({"profile": "fidelity", "query": "dismissal lifecycle"})["structuredContent"] self.assertTrue(result["index_available"]) @@ -189,13 +189,31 @@ class ContextMCPTests(unittest.TestCase): real.parent.mkdir(parents=True) real.write_text("Important XFlow context", encoding="utf-8") - with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"): + with patch.object(server, "ROOT", root): result = server.memory_hybrid_search({"profile": "fidelity", "query": "XFlow"})["structuredContent"] self.assertFalse(result["index_available"]) self.assertEqual(result["source"], "live-project-knowledge-fallback") self.assertEqual(len(result["matches"]), 1) + def test_project_context_uses_workspace_json_paths(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + config = root / "profiles" / "demo" / "workspace.json" + current = root / "workspaces" / "demo" / "project-knowledge" / "01-current" / "current-work.md" + work_items = root / "workspaces" / "demo" / "project-knowledge" / "01-current" / "work-items.md" + config.parent.mkdir(parents=True) + current.parent.mkdir(parents=True) + config.write_text(json.dumps({"knowledge_dir": "workspaces/demo/project-knowledge"}), encoding="utf-8") + current.write_text("# Current\nDemo current work", encoding="utf-8") + work_items.write_text("# Work Items", encoding="utf-8") + + with patch.object(server, "ROOT", root): + result = server.project_current_context({"profile": "demo"})["structuredContent"] + + self.assertEqual(result["files"][0]["path"], "workspaces/demo/project-knowledge/01-current/current-work.md") + self.assertIn("Demo current work", result["files"][0]["text"]) + def test_previous_workday_skips_weekend(self) -> None: monday = date(2026, 5, 18)