feat: enhance profile path resolution and add example profiles for better project adaptability
This commit is contained in:
@@ -12,6 +12,7 @@ import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
@@ -20,11 +21,13 @@ from typing import Any
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
INDEX_ROOT = ROOT / ".aiw" / "indexes"
|
||||
DEFAULT_PROFILE = "fidelity"
|
||||
MAX_CHARS = 1800
|
||||
OVERLAP_CHARS = 180
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
import profile as aiw_profile # noqa: E402
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Chunk:
|
||||
@@ -37,15 +40,15 @@ class Chunk:
|
||||
|
||||
|
||||
def project_knowledge_dir(profile: str) -> Path:
|
||||
profile_base = ROOT / "profiles" / profile
|
||||
candidate = profile_base / "project-knowledge"
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
return ROOT / "project-knowledge"
|
||||
return aiw_profile.knowledge_dir(profile, root=ROOT)
|
||||
|
||||
|
||||
def index_dir(profile: str) -> Path:
|
||||
return INDEX_ROOT / profile
|
||||
return aiw_profile.index_dir(profile, root=ROOT)
|
||||
|
||||
|
||||
def rel(path: Path) -> str:
|
||||
return aiw_profile.relative_to_root(path, root=ROOT)
|
||||
|
||||
|
||||
def index_path(profile: str) -> Path:
|
||||
@@ -120,13 +123,13 @@ def build_chunks(profile: str) -> list[Chunk]:
|
||||
chunks: list[Chunk] = []
|
||||
for path in iter_markdown_files(base):
|
||||
raw = path.read_text(encoding="utf-8", errors="replace")
|
||||
rel = str(path.relative_to(ROOT))
|
||||
rel_path = rel(path)
|
||||
digest = hashlib.sha256(raw.encode("utf-8", errors="replace")).hexdigest()
|
||||
mtime = path.stat().st_mtime
|
||||
for section_index, (heading, section) in enumerate(split_sections(raw)):
|
||||
for chunk_index, chunk in enumerate(chunk_text(section)):
|
||||
chunk_digest = hashlib.sha256(f"{rel}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16]
|
||||
chunks.append(Chunk(chunk_id=chunk_digest, path=rel, heading=heading, text=chunk, mtime=mtime, sha256=digest))
|
||||
chunk_digest = hashlib.sha256(f"{rel_path}\n{section_index}\n{chunk_index}\n{chunk}".encode("utf-8")).hexdigest()[:16]
|
||||
chunks.append(Chunk(chunk_id=chunk_digest, path=rel_path, heading=heading, text=chunk, mtime=mtime, sha256=digest))
|
||||
return chunks
|
||||
|
||||
|
||||
@@ -140,14 +143,14 @@ def write_index(profile: str) -> dict[str, Any]:
|
||||
files = sorted({chunk.path for chunk in chunks})
|
||||
manifest = {
|
||||
"profile": profile,
|
||||
"source": str(project_knowledge_dir(profile).relative_to(ROOT)),
|
||||
"source": rel(project_knowledge_dir(profile)),
|
||||
"canonical": False,
|
||||
"derived_from": "project-knowledge",
|
||||
"index_type": "lexical-markdown-chunks",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"file_count": len(files),
|
||||
"chunk_count": len(chunks),
|
||||
"index_path": str(index_path(profile).relative_to(ROOT)),
|
||||
"index_path": rel(index_path(profile)),
|
||||
}
|
||||
manifest_path(profile).write_text(json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
return manifest
|
||||
@@ -225,7 +228,7 @@ def search_index(profile: str, query: str, limit: int = 10) -> dict[str, Any]:
|
||||
def status(profile: str) -> dict[str, Any]:
|
||||
manifest_file = manifest_path(profile)
|
||||
if not manifest_file.is_file():
|
||||
return {"profile": profile, "indexed": False, "index_path": str(index_path(profile).relative_to(ROOT))}
|
||||
return {"profile": profile, "indexed": False, "index_path": rel(index_path(profile))}
|
||||
manifest = json.loads(manifest_file.read_text(encoding="utf-8"))
|
||||
path = index_path(profile)
|
||||
manifest["indexed"] = path.is_file()
|
||||
|
||||
Reference in New Issue
Block a user