feat: enhance profile path resolution and add example profiles for better project adaptability

This commit is contained in:
2026-05-21 10:21:52 -06:00
parent fb8a6ba2d9
commit f0d3cd4ce9
12 changed files with 234 additions and 34 deletions

View File

@@ -44,6 +44,26 @@ python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity
`aiw-context-mcp` exposes the same derived search through the read-only `memory_hybrid_search` tool and falls back to live Markdown search if the index has not been built yet.
## Profile path configuration
Reusable scripts resolve profile-specific paths through:
```text
profiles/<profile>/workspace.json
```
Current fields:
```json
{
"knowledge_dir": "project-knowledge",
"inbox_dir": "ai/inbox",
"index_dir": ".aiw/indexes/fidelity"
}
```
Use `scripts/aiw/profile.py` from new scripts instead of hardcoding root-level `project-knowledge/` or `ai/inbox/` paths.
## Robustness features
- Manifest validation before lifecycle actions.
@@ -59,5 +79,6 @@ python3 scripts/aiw/indexer.py search "dismissal lifecycle" --profile fidelity
```bash
python3 scripts/aiw/test_services.py
python3 scripts/aiw/test_profile.py
python3 scripts/aiw/test_indexer.py
```

View File

@@ -12,6 +12,7 @@ import argparse
import hashlib
import json
import re
import sys
import time
from dataclasses import dataclass
from datetime import datetime, timezone
@@ -20,11 +21,13 @@ from typing import Any
ROOT = Path(__file__).resolve().parents[2]
INDEX_ROOT = ROOT / ".aiw" / "indexes"
DEFAULT_PROFILE = "fidelity"
MAX_CHARS = 1800
OVERLAP_CHARS = 180
sys.path.insert(0, str(Path(__file__).resolve().parent))
import profile as aiw_profile # noqa: E402
@dataclass(frozen=True)
class Chunk:
@@ -37,15 +40,15 @@ class Chunk:
def project_knowledge_dir(profile: str) -> Path:
profile_base = ROOT / "profiles" / profile
candidate = profile_base / "project-knowledge"
if candidate.exists():
return candidate
return ROOT / "project-knowledge"
return aiw_profile.knowledge_dir(profile, root=ROOT)
def index_dir(profile: str) -> Path:
return INDEX_ROOT / profile
return aiw_profile.index_dir(profile, root=ROOT)
def rel(path: Path) -> str:
return aiw_profile.relative_to_root(path, root=ROOT)
def index_path(profile: str) -> Path:
@@ -120,13 +123,13 @@ def build_chunks(profile: str) -> list[Chunk]:
chunks: list[Chunk] = []
for path in iter_markdown_files(base):
raw = path.read_text(encoding="utf-8", errors="replace")
rel = str(path.relative_to(ROOT))
rel_path = rel(path)
digest = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest()
mtime = path.stat().st_mtime
for section_index, (heading, section) in enumerate(split_sections(raw)):
for chunk_index, chunk in enumerate(chunk_text(section)):
chunk_digest = hashlib.sha256(f"{rel}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16]
chunks.append(Chunk(chunk_id=chunk_digest, path=rel, heading=heading, text=chunk, mtime=mtime, sha256=digest))
chunk_digest = hashlib.sha256(f"{rel_path}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16]
chunks.append(Chunk(chunk_id=chunk_digest, path=rel_path, heading=heading, text=chunk, mtime=mtime, sha256=digest))
return chunks
@@ -140,14 +143,14 @@ def write_index(profile: str) -> dict[str, Any]:
files = sorted({chunk.path for chunk in chunks})
manifest = {
"profile": profile,
"source": str(project_knowledge_dir(profile).relative_to(ROOT)),
"source": rel(project_knowledge_dir(profile)),
"canonical": False,
"derived_from": "project-knowledge",
"index_type": "lexical-markdown-chunks",
"created_at": datetime.now(timezone.utc).isoformat(),
"file_count": len(files),
"chunk_count": len(chunks),
"index_path": str(index_path(profile).relative_to(ROOT)),
"index_path": rel(index_path(profile)),
}
manifest_path(profile).write_text(json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return manifest
@@ -225,7 +228,7 @@ def search_index(profile: str, query: str, limit: int = 10) -> dict[str, Any]:
def status(profile: str) -> dict[str, Any]:
manifest_file = manifest_path(profile)
if not manifest_file.is_file():
return {"profile": profile, "indexed": False, "index_path": str(index_path(profile).relative_to(ROOT))}
return {"profile": profile, "indexed": False, "index_path": rel(index_path(profile))}
manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
path = index_path(profile)
manifest["indexed"] = path.is_file()

72
scripts/aiw/profile.py Normal file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""Profile path resolution for AI Workspace scripts.
Profiles own their configuration. Reusable scripts should call this module
instead of hardcoding root-level project paths.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[2]
DEFAULT_WORKSPACE = {
"knowledge_dir": "project-knowledge",
"inbox_dir": "ai/inbox",
"index_dir": ".aiw/indexes/{profile}",
}
def workspace_config_path(profile: str, root: Path | None = None) -> Path:
base = root or ROOT
return base / "profiles" / profile / "workspace.json"
def load_workspace_config(profile: str, root: Path | None = None) -> dict[str, Any]:
base = root or ROOT
config = dict(DEFAULT_WORKSPACE)
config["profile"] = profile
path = workspace_config_path(profile, root=base)
if path.is_file():
try:
loaded = json.loads(path.read_text(encoding="utf-8"))
if isinstance(loaded, dict):
config.update(loaded)
except json.JSONDecodeError:
pass
return config
def resolve_path(raw: str | None, *, profile: str, root: Path | None = None, fallback: str) -> Path:
base = root or ROOT
value = (raw or fallback).format(profile=profile)
path = Path(value).expanduser()
return path if path.is_absolute() else base / path
def knowledge_dir(profile: str, root: Path | None = None) -> Path:
config = load_workspace_config(profile, root=root)
return resolve_path(config.get("knowledge_dir"), profile=profile, root=root, fallback="project-knowledge")
def inbox_dir(profile: str, root: Path | None = None) -> Path:
config = load_workspace_config(profile, root=root)
return resolve_path(config.get("inbox_dir"), profile=profile, root=root, fallback="ai/inbox")
def index_dir(profile: str, root: Path | None = None) -> Path:
config = load_workspace_config(profile, root=root)
return resolve_path(config.get("index_dir"), profile=profile, root=root, fallback=".aiw/indexes/{profile}")
def relative_to_root(path: Path, root: Path | None = None) -> str:
base = root or ROOT
try:
return str(path.relative_to(base))
except ValueError:
return str(path)

View File

@@ -30,7 +30,7 @@ class IndexerTests(unittest.TestCase):
real.write_text("# XFlow\nDismissal lifecycle context", encoding="utf-8")
template.write_text("# XFlow\nTemplate-only text", encoding="utf-8")
with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"):
with patch.object(indexer, "ROOT", root):
manifest = indexer.write_index("fidelity")
result = indexer.search_index("fidelity", "dismissal lifecycle", limit=5)
@@ -41,7 +41,7 @@ class IndexerTests(unittest.TestCase):
def test_status_reports_unindexed_profile(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
with patch.object(indexer, "ROOT", root), patch.object(indexer, "INDEX_ROOT", root / ".aiw" / "indexes"):
with patch.object(indexer, "ROOT", root):
result = indexer.status("fidelity")
self.assertFalse(result["indexed"])
@@ -51,6 +51,26 @@ class IndexerTests(unittest.TestCase):
payload = {"matches": [{"path": "project-knowledge/01-current/current-work.md", "score": 1.0}]}
self.assertIsInstance(json.dumps(payload), str)
def test_build_uses_workspace_json_paths(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
config = root / "profiles" / "demo" / "workspace.json"
real = root / "workspaces" / "demo" / "project-knowledge" / "03-context" / "project.md"
config.parent.mkdir(parents=True)
real.parent.mkdir(parents=True)
config.write_text(json.dumps({
"knowledge_dir": "workspaces/demo/project-knowledge",
"index_dir": ".aiw/indexes/demo",
}), encoding="utf-8")
real.write_text("# Demo\nReusable profile memory", encoding="utf-8")
with patch.object(indexer, "ROOT", root):
manifest = indexer.write_index("demo")
result = indexer.search_index("demo", "profile memory", limit=5)
self.assertEqual(manifest["source"], "workspaces/demo/project-knowledge")
self.assertEqual(result["matches"][0]["path"], "workspaces/demo/project-knowledge/03-context/project.md")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
from __future__ import annotations
import importlib.util
import json
import sys
import tempfile
import unittest
from pathlib import Path
PROFILE_PATH = Path(__file__).with_name("profile.py")
SPEC = importlib.util.spec_from_file_location("aiw_profile", PROFILE_PATH)
profile = importlib.util.module_from_spec(SPEC)
assert SPEC.loader is not None
sys.modules[SPEC.name] = profile
SPEC.loader.exec_module(profile)
class ProfileTests(unittest.TestCase):
def test_workspace_config_resolves_profile_paths(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
config = root / "profiles" / "demo" / "workspace.json"
config.parent.mkdir(parents=True)
config.write_text(json.dumps({
"knowledge_dir": "workspaces/demo/project-knowledge",
"inbox_dir": "workspaces/demo/inbox",
"index_dir": ".aiw/indexes/demo",
}), encoding="utf-8")
self.assertEqual(profile.knowledge_dir("demo", root=root), root / "workspaces" / "demo" / "project-knowledge")
self.assertEqual(profile.inbox_dir("demo", root=root), root / "workspaces" / "demo" / "inbox")
self.assertEqual(profile.index_dir("demo", root=root), root / ".aiw" / "indexes" / "demo")
def test_defaults_preserve_current_root_paths(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
self.assertEqual(profile.knowledge_dir("missing", root=root), root / "project-knowledge")
self.assertEqual(profile.inbox_dir("missing", root=root), root / "ai" / "inbox")
self.assertEqual(profile.index_dir("missing", root=root), root / ".aiw" / "indexes" / "missing")
if __name__ == "__main__":
unittest.main()

View File

@@ -27,7 +27,9 @@ PROTOCOL_VERSION = "2025-06-18"
SERVER_NAME = "aiw-context-mcp"
SERVER_VERSION = "0.1.0"
LOCAL_ENV = ROOT / "scripts" / "mattermost-proxy" / ".env"
INDEX_ROOT = ROOT / ".aiw" / "indexes"
AIW_SCRIPT_DIR = ROOT / "scripts" / "aiw"
sys.path.insert(0, str(AIW_SCRIPT_DIR))
import profile as aiw_profile # noqa: E402
def load_local_env(path: Path = LOCAL_ENV) -> None:
@@ -47,22 +49,20 @@ def load_local_env(path: Path = LOCAL_ENV) -> None:
def profile_dir(profile: str) -> Path:
if profile == "fidelity":
return ROOT
candidate = ROOT / "profiles" / profile
return candidate if candidate.exists() else ROOT
def knowledge_dir(profile: str) -> Path:
base = profile_dir(profile)
candidate = base / "project-knowledge"
return candidate if candidate.exists() else ROOT / "project-knowledge"
return aiw_profile.knowledge_dir(profile, root=ROOT)
def inbox_dir(profile: str) -> Path:
base = profile_dir(profile)
candidate = base / "ai" / "inbox"
return candidate if candidate.exists() else ROOT / "ai" / "inbox"
return aiw_profile.inbox_dir(profile, root=ROOT)
def rel(path: Path) -> str:
return aiw_profile.relative_to_root(path, root=ROOT)
def mattermost_mirror_dir(profile: str) -> Path:
@@ -257,7 +257,7 @@ def project_current_context(args: dict[str, Any]) -> dict[str, Any]:
result = []
for path in files:
if path.is_file():
result.append({"path": str(path.relative_to(ROOT)), "text": path.read_text(encoding="utf-8", errors="replace")})
result.append({"path": rel(path), "text": path.read_text(encoding="utf-8", errors="replace")})
return tool_result({"profile": profile, "canonical": True, "files": result})
@@ -270,8 +270,8 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]:
base = knowledge_dir(profile)
matches: list[dict[str, Any]] = []
for path in sorted(base.rglob("*.md")):
rel = path.relative_to(base)
if str(rel).startswith("09-templates/"):
relative_to_base = path.relative_to(base)
if str(relative_to_base).startswith("09-templates/"):
continue
text = path.read_text(encoding="utf-8", errors="replace")
lowered = text.lower()
@@ -280,18 +280,18 @@ def project_search_memory(args: dict[str, Any]) -> dict[str, Any]:
continue
start = max(0, index - 220)
end = min(len(text), index + len(query) + 220)
matches.append({"path": str(path.relative_to(ROOT)), "snippet": text[start:end].strip()})
matches.append({"path": rel(path), "snippet": text[start:end].strip()})
if len(matches) >= limit:
break
return tool_result({"profile": profile, "canonical": True, "query": query, "matches": matches})
def index_path(profile: str) -> Path:
return INDEX_ROOT / profile / "project-knowledge.jsonl"
return aiw_profile.index_dir(profile, root=ROOT) / "project-knowledge.jsonl"
def index_manifest_path(profile: str) -> Path:
return INDEX_ROOT / profile / "manifest.json"
return aiw_profile.index_dir(profile, root=ROOT) / "manifest.json"
def search_tokens(text: str) -> set[str]:

View File

@@ -175,7 +175,7 @@ class ContextMCPTests(unittest.TestCase):
}) + "\n", encoding="utf-8")
manifest.write_text(json.dumps({"chunk_count": 1}), encoding="utf-8")
with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"):
with patch.object(server, "ROOT", root):
result = server.memory_hybrid_search({"profile": "fidelity", "query": "dismissal lifecycle"})["structuredContent"]
self.assertTrue(result["index_available"])
@@ -189,13 +189,31 @@ class ContextMCPTests(unittest.TestCase):
real.parent.mkdir(parents=True)
real.write_text("Important XFlow context", encoding="utf-8")
with patch.object(server, "ROOT", root), patch.object(server, "INDEX_ROOT", root / ".aiw" / "indexes"):
with patch.object(server, "ROOT", root):
result = server.memory_hybrid_search({"profile": "fidelity", "query": "XFlow"})["structuredContent"]
self.assertFalse(result["index_available"])
self.assertEqual(result["source"], "live-project-knowledge-fallback")
self.assertEqual(len(result["matches"]), 1)
def test_project_context_uses_workspace_json_paths(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
config = root / "profiles" / "demo" / "workspace.json"
current = root / "workspaces" / "demo" / "project-knowledge" / "01-current" / "current-work.md"
work_items = root / "workspaces" / "demo" / "project-knowledge" / "01-current" / "work-items.md"
config.parent.mkdir(parents=True)
current.parent.mkdir(parents=True)
config.write_text(json.dumps({"knowledge_dir": "workspaces/demo/project-knowledge"}), encoding="utf-8")
current.write_text("# Current\nDemo current work", encoding="utf-8")
work_items.write_text("# Work Items", encoding="utf-8")
with patch.object(server, "ROOT", root):
result = server.project_current_context({"profile": "demo"})["structuredContent"]
self.assertEqual(result["files"][0]["path"], "workspaces/demo/project-knowledge/01-current/current-work.md")
self.assertIn("Demo current work", result["files"][0]["text"])
def test_previous_workday_skips_weekend(self) -> None:
monday = date(2026, 5, 18)