feat: enhance service manager with manifest validation, logging rotation, and command existence checks
This commit is contained in:
@@ -28,6 +28,9 @@ RUNTIME_DIR = ROOT / ".aiw" / "runtime"
|
||||
PID_DIR = RUNTIME_DIR / "pids"
|
||||
LOG_DIR = RUNTIME_DIR / "logs"
|
||||
STATE_DIR = RUNTIME_DIR / "state"
|
||||
DEFAULT_LOG_MAX_BYTES = 5 * 1024 * 1024
|
||||
DEFAULT_LOG_BACKUPS = 3
|
||||
DEFAULT_STOP_TIMEOUT_SECONDS = 5.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -52,6 +55,30 @@ def load_manifest(profile: str) -> dict[str, Any]:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def validate_manifest(manifest: dict[str, Any]) -> list[str]:
|
||||
errors: list[str] = []
|
||||
services = manifest.get("services")
|
||||
if not isinstance(services, dict):
|
||||
return ["manifest must contain a services object"]
|
||||
for name, config in services.items():
|
||||
if not isinstance(config, dict):
|
||||
errors.append(f"{name}: service config must be an object")
|
||||
continue
|
||||
command = config.get("command")
|
||||
if config.get("enabled", True) and (not isinstance(command, list) or not command):
|
||||
errors.append(f"{name}: enabled services require a non-empty command list")
|
||||
kind = config.get("kind", "process")
|
||||
if kind not in {"process", "app-launcher", "mcp"}:
|
||||
errors.append(f"{name}: unsupported kind {kind!r}")
|
||||
restart = config.get("restart", "never")
|
||||
if restart not in {"never", "on-failure", "always"}:
|
||||
errors.append(f"{name}: unsupported restart policy {restart!r}")
|
||||
for dependency in config.get("depends_on") or []:
|
||||
if dependency not in services:
|
||||
errors.append(f"{name}: depends on unknown service {dependency!r}")
|
||||
return errors
|
||||
|
||||
|
||||
def service_items(manifest: dict[str, Any], include_disabled: bool = False) -> list[ServiceRef]:
|
||||
services = manifest.get("services") or {}
|
||||
refs: list[ServiceRef] = []
|
||||
@@ -94,6 +121,34 @@ def log_path(profile: str, service: str) -> Path:
|
||||
return LOG_DIR / profile / f"{service}.log"
|
||||
|
||||
|
||||
def resolve_workspace_path(raw: str) -> Path:
|
||||
path = Path(raw).expanduser()
|
||||
return path if path.is_absolute() else ROOT / path
|
||||
|
||||
|
||||
def command_exists(command: str) -> bool:
|
||||
if not command:
|
||||
return False
|
||||
path = Path(command)
|
||||
if path.is_absolute() or "/" in command:
|
||||
resolved = resolve_workspace_path(command)
|
||||
return resolved.exists() and os.access(resolved, os.X_OK)
|
||||
return shutil_which(command) is not None
|
||||
|
||||
|
||||
def rotate_log_if_needed(path: Path, max_bytes: int = DEFAULT_LOG_MAX_BYTES, backups: int = DEFAULT_LOG_BACKUPS) -> None:
|
||||
if max_bytes <= 0 or backups <= 0 or not path.exists() or path.stat().st_size < max_bytes:
|
||||
return
|
||||
oldest = path.with_suffix(path.suffix + f".{backups}")
|
||||
oldest.unlink(missing_ok=True)
|
||||
for index in range(backups - 1, 0, -1):
|
||||
src = path.with_suffix(path.suffix + f".{index}")
|
||||
dst = path.with_suffix(path.suffix + f".{index + 1}")
|
||||
if src.exists():
|
||||
src.replace(dst)
|
||||
path.replace(path.with_suffix(path.suffix + ".1"))
|
||||
|
||||
|
||||
def read_pid(profile: str, service: str) -> int | None:
|
||||
path = pid_path(profile, service)
|
||||
if not path.is_file():
|
||||
@@ -189,6 +244,8 @@ def start_service(profile: str, ref: ServiceRef, manifest: dict[str, Any], start
|
||||
command = ref.config.get("command") or []
|
||||
if not command:
|
||||
raise SystemExit(f"{ref.name} has no command")
|
||||
if not command_exists(str(command[0])):
|
||||
raise SystemExit(f"{ref.name} command is not executable or not found: {command[0]}")
|
||||
|
||||
if kind != "app-launcher":
|
||||
pid = read_pid(profile, ref.name)
|
||||
@@ -206,6 +263,7 @@ def start_service(profile: str, ref: ServiceRef, manifest: dict[str, Any], start
|
||||
|
||||
path = log_path(profile, ref.name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
rotate_log_if_needed(path, int(ref.config.get("log_max_bytes", DEFAULT_LOG_MAX_BYTES)), int(ref.config.get("log_backups", DEFAULT_LOG_BACKUPS)))
|
||||
with path.open("ab") as log_file:
|
||||
log_file.write(f"\n--- start {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n".encode("utf-8"))
|
||||
if kind == "app-launcher":
|
||||
@@ -245,7 +303,7 @@ def stop_service(profile: str, ref: ServiceRef) -> None:
|
||||
pass
|
||||
except PermissionError:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
deadline = time.time() + 5
|
||||
deadline = time.time() + float(ref.config.get("stop_timeout_seconds", DEFAULT_STOP_TIMEOUT_SECONDS))
|
||||
while time.time() < deadline and is_running(pid):
|
||||
time.sleep(0.2)
|
||||
if is_running(pid):
|
||||
@@ -302,16 +360,36 @@ def run_doctor(profile: str, manifest: dict[str, Any]) -> None:
|
||||
print(f"manifest: {manifest_path(profile)}")
|
||||
ensure_runtime()
|
||||
print(f"runtime: {RUNTIME_DIR}")
|
||||
errors = validate_manifest(manifest)
|
||||
if errors:
|
||||
print("manifest: invalid")
|
||||
for error in errors:
|
||||
print(f" ! {error}")
|
||||
else:
|
||||
print("manifest: ok")
|
||||
for ref in service_items(manifest, include_disabled=True):
|
||||
enabled = ref.config.get("enabled", True)
|
||||
command = ref.config.get("command") or []
|
||||
first = command[0] if command else ""
|
||||
command_path = ROOT / first if first and not os.path.isabs(first) else Path(first) if first else None
|
||||
command_ok = bool(command_path and (command_path.exists() or shutil_which(first)))
|
||||
ok, detail = health_ok(ref.config)
|
||||
command_ok = command_exists(first) if first else False
|
||||
enabled_text = "enabled" if enabled else "disabled"
|
||||
if not enabled:
|
||||
print(f"- {ref.name}: {enabled_text}; command={'ok' if command_ok else 'missing'}; health skipped")
|
||||
continue
|
||||
ok, detail = health_ok(ref.config)
|
||||
health_text = detail if ok is not None else "no health check"
|
||||
print(f"- {ref.name}: {enabled_text}; command={'ok' if command_ok else 'missing'}; {health_text}")
|
||||
doctor = ref.config.get("doctor") or {}
|
||||
for command_name in doctor.get("required_commands") or []:
|
||||
print(f" required command {command_name}: {'ok' if command_exists(command_name) else 'missing'}")
|
||||
for command_name in doctor.get("optional_commands") or []:
|
||||
print(f" optional command {command_name}: {'ok' if command_exists(command_name) else 'missing'}")
|
||||
for raw_path in doctor.get("required_paths") or []:
|
||||
path = resolve_workspace_path(str(raw_path))
|
||||
print(f" required path {raw_path}: {'ok' if path.exists() else 'missing'}")
|
||||
for raw_path in doctor.get("optional_paths") or []:
|
||||
path = resolve_workspace_path(str(raw_path))
|
||||
print(f" optional path {raw_path}: {'ok' if path.exists() else 'missing'}")
|
||||
|
||||
|
||||
def shutil_which(command: str) -> str | None:
|
||||
@@ -339,6 +417,10 @@ def main() -> None:
|
||||
run_doctor(args.profile, manifest)
|
||||
return
|
||||
|
||||
errors = validate_manifest(manifest)
|
||||
if errors:
|
||||
raise SystemExit("invalid services manifest:\n" + "\n".join(f"- {error}" for error in errors))
|
||||
|
||||
include_disabled = args.action == "status"
|
||||
refs = select_services(manifest, args.services, args.group or None, include_disabled=include_disabled)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user