feat: implement AI Workspace service manager with lifecycle control for local services
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
This directory contains helpers that automate:
|
||||
|
||||
- AI Workspace local service lifecycle
|
||||
- context aggregation
|
||||
- canonical memory access
|
||||
- standup generation
|
||||
@@ -15,6 +16,18 @@ The project-agnostic memory interface lives in:
|
||||
|
||||
Recommended commands:
|
||||
|
||||
```bash
|
||||
python3 scripts/aiw/services.py status --profile fidelity
|
||||
python3 scripts/aiw/services.py doctor --profile fidelity
|
||||
python3 scripts/aiw/services.py start --profile fidelity
|
||||
```
|
||||
|
||||
The service manager reads `profiles/<profile>/services.json` and manages local
|
||||
services such as the Mattermost proxy mirror and Photo Inbox. Runtime PID/state
|
||||
and logs stay under `.aiw/runtime/`.
|
||||
|
||||
Recommended memory commands:
|
||||
|
||||
```bash
|
||||
bash scripts/memory/memory.sh health
|
||||
bash scripts/memory/memory.sh search "PDIAP-15765"
|
||||
|
||||
37
scripts/aiw/README.md
Normal file
37
scripts/aiw/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# AI Workspace Service Manager
|
||||
|
||||
The service manager is the local lifecycle layer for AI Workspace services.
|
||||
|
||||
It reads `profiles/<profile>/services.json`, starts/stops enabled services, records logs under `.aiw/runtime/logs/`, and keeps PID/state files under `.aiw/runtime/`.
|
||||
|
||||
## Common commands
|
||||
|
||||
```bash
|
||||
python3 scripts/aiw/services.py status --profile fidelity
|
||||
python3 scripts/aiw/services.py doctor --profile fidelity
|
||||
python3 scripts/aiw/services.py start --profile fidelity
|
||||
python3 scripts/aiw/services.py stop --profile fidelity
|
||||
python3 scripts/aiw/services.py logs mattermost-proxy --profile fidelity
|
||||
```
|
||||
|
||||
Start a subset by group:
|
||||
|
||||
```bash
|
||||
python3 scripts/aiw/services.py start --profile fidelity --group communication
|
||||
python3 scripts/aiw/services.py start --profile fidelity --group inbox
|
||||
```
|
||||
|
||||
## Current Fidelity services
|
||||
|
||||
- `mattermost-proxy`: runs the local Mattermost proxy mirror.
|
||||
- `mattermost-desktop`: launches Mattermost Desktop through the proxy.
|
||||
- `photo-inbox`: runs the local HTTP photo receiver.
|
||||
- `aiw-context-mcp`: reserved placeholder for the future read-only context MCP server.
|
||||
|
||||
The service manager unifies startup and status. It does not move capture behavior into the MCP.
|
||||
|
||||
## Tests
|
||||
|
||||
```bash
|
||||
python3 scripts/aiw/test_services.py
|
||||
```
|
||||
370
scripts/aiw/services.py
Normal file
370
scripts/aiw/services.py
Normal file
@@ -0,0 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
"""AI Workspace local service manager.
|
||||
|
||||
This is the profile-aware lifecycle layer for local capture/query services. It is
|
||||
intentionally small and dependency-free so it can run before the future desktop
|
||||
UI or MCP server exists.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
RUNTIME_DIR = ROOT / ".aiw" / "runtime"
|
||||
PID_DIR = RUNTIME_DIR / "pids"
|
||||
LOG_DIR = RUNTIME_DIR / "logs"
|
||||
STATE_DIR = RUNTIME_DIR / "state"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ServiceRef:
|
||||
name: str
|
||||
config: dict[str, Any]
|
||||
|
||||
|
||||
def ensure_runtime() -> None:
|
||||
for path in [PID_DIR, LOG_DIR, STATE_DIR]:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def manifest_path(profile: str) -> Path:
|
||||
return ROOT / "profiles" / profile / "services.json"
|
||||
|
||||
|
||||
def load_manifest(profile: str) -> dict[str, Any]:
|
||||
path = manifest_path(profile)
|
||||
if not path.is_file():
|
||||
raise SystemExit(f"services manifest not found: {path}")
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def service_items(manifest: dict[str, Any], include_disabled: bool = False) -> list[ServiceRef]:
|
||||
services = manifest.get("services") or {}
|
||||
refs: list[ServiceRef] = []
|
||||
for name, config in services.items():
|
||||
if not include_disabled and not config.get("enabled", True):
|
||||
continue
|
||||
refs.append(ServiceRef(name, config))
|
||||
return refs
|
||||
|
||||
|
||||
def select_services(manifest: dict[str, Any], names: list[str], group: str | None, include_disabled: bool = False) -> list[ServiceRef]:
|
||||
refs = service_items(manifest, include_disabled=include_disabled)
|
||||
by_name = {ref.name: ref for ref in refs}
|
||||
if names:
|
||||
selected: list[ServiceRef] = []
|
||||
missing: list[str] = []
|
||||
for name in names:
|
||||
ref = by_name.get(name)
|
||||
if ref is None:
|
||||
missing.append(name)
|
||||
else:
|
||||
selected.append(ref)
|
||||
if missing:
|
||||
raise SystemExit("unknown or disabled service(s): " + ", ".join(missing))
|
||||
return selected
|
||||
if group:
|
||||
return [ref for ref in refs if group in (ref.config.get("groups") or [])]
|
||||
return refs
|
||||
|
||||
|
||||
def pid_path(profile: str, service: str) -> Path:
|
||||
return PID_DIR / profile / f"{service}.pid"
|
||||
|
||||
|
||||
def state_path(profile: str, service: str) -> Path:
|
||||
return STATE_DIR / profile / f"{service}.json"
|
||||
|
||||
|
||||
def log_path(profile: str, service: str) -> Path:
|
||||
return LOG_DIR / profile / f"{service}.log"
|
||||
|
||||
|
||||
def read_pid(profile: str, service: str) -> int | None:
|
||||
path = pid_path(profile, service)
|
||||
if not path.is_file():
|
||||
return None
|
||||
try:
|
||||
return int(path.read_text(encoding="utf-8").strip())
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def is_running(pid: int | None) -> bool:
|
||||
if not pid or pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
try:
|
||||
result = subprocess.run(["ps", "-o", "stat=", "-p", str(pid)], check=False, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
state = result.stdout.strip()
|
||||
if state.startswith("Z"):
|
||||
return False
|
||||
except OSError:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def write_state(profile: str, service: str, state: dict[str, Any]) -> None:
|
||||
path = state_path(profile, service)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(state, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def read_state(profile: str, service: str) -> dict[str, Any]:
|
||||
path = state_path(profile, service)
|
||||
if not path.is_file():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def health_ok(config: dict[str, Any], timeout: float = 1.0) -> tuple[bool | None, str]:
|
||||
health = config.get("health") or {}
|
||||
kind = health.get("type")
|
||||
if not kind:
|
||||
return None, "no health check"
|
||||
if kind == "tcp":
|
||||
host = str(health.get("host") or "127.0.0.1")
|
||||
port = int(health.get("port") or 0)
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=timeout):
|
||||
return True, f"tcp {host}:{port} ok"
|
||||
except OSError as error:
|
||||
return False, f"tcp {host}:{port} failed: {error}"
|
||||
if kind == "http":
|
||||
url = str(health.get("url") or "")
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as response:
|
||||
ok = 200 <= int(response.status) < 400
|
||||
return ok, f"http {url} status {response.status}"
|
||||
except (urllib.error.URLError, TimeoutError, OSError) as error:
|
||||
return False, f"http {url} failed: {error}"
|
||||
return None, f"unknown health type: {kind}"
|
||||
|
||||
|
||||
def wait_for_health(config: dict[str, Any], seconds: float = 8.0) -> tuple[bool | None, str]:
|
||||
deadline = time.time() + seconds
|
||||
last: tuple[bool | None, str] = (None, "no health check")
|
||||
while time.time() <= deadline:
|
||||
last = health_ok(config)
|
||||
if last[0] is True or last[0] is None:
|
||||
return last
|
||||
time.sleep(0.4)
|
||||
return last
|
||||
|
||||
|
||||
def start_service(profile: str, ref: ServiceRef, manifest: dict[str, Any], started: set[str]) -> None:
|
||||
if ref.name in started:
|
||||
return
|
||||
for dependency in ref.config.get("depends_on") or []:
|
||||
dep_config = (manifest.get("services") or {}).get(dependency)
|
||||
if not dep_config or not dep_config.get("enabled", True):
|
||||
raise SystemExit(f"{ref.name} depends on missing/disabled service: {dependency}")
|
||||
start_service(profile, ServiceRef(dependency, dep_config), manifest, started)
|
||||
|
||||
kind = ref.config.get("kind", "process")
|
||||
command = ref.config.get("command") or []
|
||||
if not command:
|
||||
raise SystemExit(f"{ref.name} has no command")
|
||||
|
||||
if kind != "app-launcher":
|
||||
pid = read_pid(profile, ref.name)
|
||||
if is_running(pid):
|
||||
ok, detail = health_ok(ref.config)
|
||||
status = "running" if ok is not False else "running unhealthy"
|
||||
print(f"{ref.name}: {status} ({detail})")
|
||||
started.add(ref.name)
|
||||
return
|
||||
ok, detail = health_ok(ref.config)
|
||||
if ok is True:
|
||||
print(f"{ref.name}: externally running ({detail}); not starting duplicate")
|
||||
started.add(ref.name)
|
||||
return
|
||||
|
||||
path = log_path(profile, ref.name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("ab") as log_file:
|
||||
log_file.write(f"\n--- start {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n".encode("utf-8"))
|
||||
if kind == "app-launcher":
|
||||
result = subprocess.run(command, cwd=ROOT, stdout=log_file, stderr=subprocess.STDOUT, check=False)
|
||||
write_state(profile, ref.name, {"last_launch_exit": result.returncode, "launched_at": time.time()})
|
||||
print(f"{ref.name}: launched (exit {result.returncode})")
|
||||
else:
|
||||
process = subprocess.Popen(command, cwd=ROOT, stdout=log_file, stderr=subprocess.STDOUT, start_new_session=True)
|
||||
pid_file = pid_path(profile, ref.name)
|
||||
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
pid_file.write_text(str(process.pid) + "\n", encoding="utf-8")
|
||||
ok, detail = wait_for_health(ref.config)
|
||||
state = "started" if ok is not False else "started but health check failed"
|
||||
write_state(profile, ref.name, {"pid": process.pid, "started_at": time.time(), "health": detail})
|
||||
print(f"{ref.name}: {state} pid={process.pid} ({detail})")
|
||||
started.add(ref.name)
|
||||
|
||||
|
||||
def stop_service(profile: str, ref: ServiceRef) -> None:
|
||||
kind = ref.config.get("kind", "process")
|
||||
if kind == "app-launcher":
|
||||
print(f"{ref.name}: launcher service has no managed process")
|
||||
return
|
||||
pid = read_pid(profile, ref.name)
|
||||
if not is_running(pid):
|
||||
ok, detail = health_ok(ref.config)
|
||||
if ok is True:
|
||||
print(f"{ref.name}: externally running ({detail}); no managed pid to stop")
|
||||
return
|
||||
print(f"{ref.name}: not running")
|
||||
pid_path(profile, ref.name).unlink(missing_ok=True)
|
||||
return
|
||||
assert pid is not None
|
||||
try:
|
||||
os.killpg(pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except PermissionError:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
deadline = time.time() + 5
|
||||
while time.time() < deadline and is_running(pid):
|
||||
time.sleep(0.2)
|
||||
if is_running(pid):
|
||||
try:
|
||||
os.killpg(pid, signal.SIGKILL)
|
||||
except Exception:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
print(f"{ref.name}: killed pid={pid}")
|
||||
else:
|
||||
print(f"{ref.name}: stopped")
|
||||
pid_path(profile, ref.name).unlink(missing_ok=True)
|
||||
write_state(profile, ref.name, {"stopped_at": time.time()})
|
||||
|
||||
|
||||
def status_service(profile: str, ref: ServiceRef) -> None:
|
||||
enabled = ref.config.get("enabled", True)
|
||||
kind = ref.config.get("kind", "process")
|
||||
if not enabled:
|
||||
print(f"{ref.name}: disabled")
|
||||
return
|
||||
if kind == "app-launcher":
|
||||
state = read_state(profile, ref.name)
|
||||
launched = state.get("launched_at")
|
||||
suffix = f"last launched {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(launched))}" if launched else "not launched by manager"
|
||||
print(f"{ref.name}: launcher ({suffix})")
|
||||
return
|
||||
pid = read_pid(profile, ref.name)
|
||||
running = is_running(pid)
|
||||
ok, detail = health_ok(ref.config)
|
||||
if running and ok is not False:
|
||||
label = "running"
|
||||
elif running:
|
||||
label = "unhealthy"
|
||||
elif ok is True:
|
||||
label = "externally running"
|
||||
else:
|
||||
label = "stopped"
|
||||
print(f"{ref.name}: {label} pid={pid or '-'} ({detail})")
|
||||
|
||||
|
||||
def tail_log(profile: str, service: str, lines: int) -> None:
|
||||
path = log_path(profile, service)
|
||||
if not path.is_file():
|
||||
print(f"no log file: {path}")
|
||||
return
|
||||
content = path.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||
for line in content[-lines:]:
|
||||
print(line)
|
||||
|
||||
|
||||
def run_doctor(profile: str, manifest: dict[str, Any]) -> None:
|
||||
print(f"AI Workspace doctor profile={profile}")
|
||||
print(f"workspace: {ROOT}")
|
||||
print(f"manifest: {manifest_path(profile)}")
|
||||
ensure_runtime()
|
||||
print(f"runtime: {RUNTIME_DIR}")
|
||||
for ref in service_items(manifest, include_disabled=True):
|
||||
enabled = ref.config.get("enabled", True)
|
||||
command = ref.config.get("command") or []
|
||||
first = command[0] if command else ""
|
||||
command_path = ROOT / first if first and not os.path.isabs(first) else Path(first) if first else None
|
||||
command_ok = bool(command_path and (command_path.exists() or shutil_which(first)))
|
||||
ok, detail = health_ok(ref.config)
|
||||
enabled_text = "enabled" if enabled else "disabled"
|
||||
health_text = detail if ok is not None else "no health check"
|
||||
print(f"- {ref.name}: {enabled_text}; command={'ok' if command_ok else 'missing'}; {health_text}")
|
||||
|
||||
|
||||
def shutil_which(command: str) -> str | None:
|
||||
paths = os.environ.get("PATH", "").split(os.pathsep)
|
||||
for directory in paths:
|
||||
candidate = Path(directory) / command
|
||||
if candidate.exists() and os.access(candidate, os.X_OK):
|
||||
return str(candidate)
|
||||
return None
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("action", choices=["start", "stop", "restart", "status", "logs", "doctor"])
|
||||
parser.add_argument("services", nargs="*", help="Optional service names for start/stop/restart/status/logs.")
|
||||
parser.add_argument("--profile", default=os.getenv("AIW_PROJECT_PROFILE", "fidelity"))
|
||||
parser.add_argument("--group", default="", help="Start/stop/status services in a group, e.g. communication or inbox.")
|
||||
parser.add_argument("--lines", type=int, default=80, help="Number of log lines for logs action.")
|
||||
args = parser.parse_args()
|
||||
|
||||
ensure_runtime()
|
||||
manifest = load_manifest(args.profile)
|
||||
|
||||
if args.action == "doctor":
|
||||
run_doctor(args.profile, manifest)
|
||||
return
|
||||
|
||||
include_disabled = args.action == "status"
|
||||
refs = select_services(manifest, args.services, args.group or None, include_disabled=include_disabled)
|
||||
|
||||
if args.action == "start":
|
||||
started: set[str] = set()
|
||||
for ref in refs:
|
||||
start_service(args.profile, ref, manifest, started)
|
||||
elif args.action == "stop":
|
||||
for ref in reversed(refs):
|
||||
stop_service(args.profile, ref)
|
||||
elif args.action == "restart":
|
||||
for ref in reversed(refs):
|
||||
stop_service(args.profile, ref)
|
||||
started = set()
|
||||
for ref in refs:
|
||||
start_service(args.profile, ref, manifest, started)
|
||||
elif args.action == "status":
|
||||
for ref in refs:
|
||||
status_service(args.profile, ref)
|
||||
elif args.action == "logs":
|
||||
if not args.services:
|
||||
raise SystemExit("logs requires at least one service name")
|
||||
for service in args.services:
|
||||
print(f"==> {service} <==")
|
||||
tail_log(args.profile, service, args.lines)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
123
scripts/aiw/test_services.py
Normal file
123
scripts/aiw/test_services.py
Normal file
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import io
|
||||
import socket
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from contextlib import redirect_stdout
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
SERVICES_PATH = Path(__file__).with_name("services.py")
|
||||
SPEC = importlib.util.spec_from_file_location("aiw_services", SERVICES_PATH)
|
||||
services = importlib.util.module_from_spec(SPEC)
|
||||
assert SPEC.loader is not None
|
||||
sys.modules[SPEC.name] = services
|
||||
SPEC.loader.exec_module(services)
|
||||
|
||||
|
||||
def sample_manifest() -> dict:
|
||||
return {
|
||||
"services": {
|
||||
"alpha": {
|
||||
"enabled": True,
|
||||
"kind": "process",
|
||||
"command": ["python3", "-c", "import time; time.sleep(60)"],
|
||||
"groups": ["core"],
|
||||
},
|
||||
"beta": {
|
||||
"enabled": True,
|
||||
"kind": "process",
|
||||
"command": ["python3", "-c", "import time; time.sleep(60)"],
|
||||
"groups": ["capture"],
|
||||
"depends_on": ["alpha"],
|
||||
},
|
||||
"disabled": {
|
||||
"enabled": False,
|
||||
"kind": "process",
|
||||
"command": ["python3", "-c", "import time; time.sleep(60)"],
|
||||
"groups": ["core"],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ServiceManagerTests(unittest.TestCase):
|
||||
def test_select_services_excludes_disabled_by_default(self) -> None:
|
||||
selected = services.select_services(sample_manifest(), names=[], group=None)
|
||||
|
||||
self.assertEqual([item.name for item in selected], ["alpha", "beta"])
|
||||
|
||||
def test_select_services_can_include_disabled_for_status(self) -> None:
|
||||
selected = services.select_services(sample_manifest(), names=[], group=None, include_disabled=True)
|
||||
|
||||
self.assertEqual([item.name for item in selected], ["alpha", "beta", "disabled"])
|
||||
|
||||
def test_select_services_filters_by_group(self) -> None:
|
||||
selected = services.select_services(sample_manifest(), names=[], group="capture")
|
||||
|
||||
self.assertEqual([item.name for item in selected], ["beta"])
|
||||
|
||||
def test_health_ok_tcp_reports_open_port(self) -> None:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server:
|
||||
server.bind(("127.0.0.1", 0))
|
||||
server.listen(1)
|
||||
port = server.getsockname()[1]
|
||||
|
||||
ok, detail = services.health_ok({"health": {"type": "tcp", "host": "127.0.0.1", "port": port}})
|
||||
|
||||
self.assertTrue(ok)
|
||||
self.assertIn(f"127.0.0.1:{port}", detail)
|
||||
|
||||
def test_read_pid_ignores_invalid_pid_file(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
pid_dir = Path(tmp) / "pids"
|
||||
target = pid_dir / "fidelity"
|
||||
target.mkdir(parents=True)
|
||||
(target / "alpha.pid").write_text("not-a-pid\n", encoding="utf-8")
|
||||
|
||||
with patch.object(services, "PID_DIR", pid_dir):
|
||||
self.assertIsNone(services.read_pid("fidelity", "alpha"))
|
||||
|
||||
def test_start_and_stop_managed_process(self) -> None:
|
||||
manifest = {
|
||||
"services": {
|
||||
"sleeper": {
|
||||
"enabled": True,
|
||||
"kind": "process",
|
||||
"command": [sys.executable, "-c", "import time; time.sleep(60)"],
|
||||
"restart": "never",
|
||||
}
|
||||
}
|
||||
}
|
||||
ref = services.ServiceRef("sleeper", manifest["services"]["sleeper"])
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
root = Path(tmp)
|
||||
with patch.object(services, "PID_DIR", root / "pids"), \
|
||||
patch.object(services, "LOG_DIR", root / "logs"), \
|
||||
patch.object(services, "STATE_DIR", root / "state"):
|
||||
started: set[str] = set()
|
||||
services.ensure_runtime()
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
with redirect_stdout(io.StringIO()):
|
||||
services.start_service("test", ref, manifest, started)
|
||||
|
||||
pid = services.read_pid("test", "sleeper")
|
||||
self.assertTrue(services.is_running(pid))
|
||||
self.assertIn("sleeper", started)
|
||||
|
||||
with redirect_stdout(io.StringIO()):
|
||||
services.stop_service("test", ref)
|
||||
self.assertFalse(services.is_running(pid))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user