diff --git a/.opencode/commands/slack-import.md b/.opencode/commands/slack-import.md new file mode 100644 index 0000000..1ca9cc0 --- /dev/null +++ b/.opencode/commands/slack-import.md @@ -0,0 +1,59 @@ +--- +description: Import a historical Slack export and refine workspace memory from it +--- + +Use a Slack export as a historical context source for the workspace. + +Interpret this as historical recovery, not as current truth and not as model training. + +Inputs: + +- `$ARGUMENTS` may contain an export path, channel names, or date filters +- if no explicit path is given in the arguments, use `FIDELITY_SLACK_EXPORT_PATH` when available +- otherwise, if `archives/slack/export/` exists, use it as the default import source +- if no channels are specified, auto-detect channels whose folder names start with `fidelity` +- if no message limit is specified, auto-tune message selection based on archive size + +First, run the importer: + +!`if [ -n "$ARGUMENTS" ]; then python3 scripts/slack/import_slack_export.py $ARGUMENTS; elif [ -n "$FIDELITY_SLACK_EXPORT_PATH" ]; then python3 scripts/slack/import_slack_export.py --export-path "$FIDELITY_SLACK_EXPORT_PATH" --channel-prefix fidelity; elif [ -d archives/slack/export ]; then python3 scripts/slack/import_slack_export.py --export-path archives/slack/export --channel-prefix fidelity; else echo "Provide Slack import arguments, set FIDELITY_SLACK_EXPORT_PATH, or place an extracted export in archives/slack/export."; fi` + +Read: + +@ai/AGENTS.md +@ai/context/project.md +@ai/context/people/index.md +@ai/context/people/manager.md +@ai/state/current.md +@ai/state/work-items.md +@knowledge/agent-memory-rules.md +@knowledge/memory-promotion-rules.md + +Imported summary, if present: + +!`if [ -s scripts/slack/generated/slack_summary.md ]; then cat scripts/slack/generated/slack_summary.md; else echo "No Slack summary generated."; fi` + +Imported Slack context, if present: + +!`if [ -s scripts/slack/generated/slack_context.jsonl ]; then cat scripts/slack/generated/slack_context.jsonl; else echo "No Slack context generated."; fi` + +Instructions: + +- treat the Slack archive as historical evidence +- assume this may be a large multi-year export +- promote durable project-relevant context automatically when confidence is high +- prefer promoting: + - repeated Jira IDs and titles still relevant to current understanding + - durable role/person associations + - recurring architecture or debugging patterns + - past approvals or decisions that still matter +- prioritize high-signal messages such as Jira references, approvals, scope changes, root-cause notes, points, and persistent technical constraints +- avoid promoting outdated daily status unless it changes current understanding +- update existing memory when the archive clarifies or corrects it +- if historical facts are ambiguous or likely outdated, summarize them as archived context instead of promoting them + +Return: + +1. What was imported +2. Which files were updated +3. Which historical facts were promoted or intentionally left as archive-only context diff --git a/archives/slack/.gitignore b/archives/slack/.gitignore new file mode 100644 index 0000000..7c9d611 --- /dev/null +++ b/archives/slack/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!README.md diff --git a/archives/slack/README.md b/archives/slack/README.md new file mode 100644 index 0000000..38083dd --- /dev/null +++ b/archives/slack/README.md @@ -0,0 +1,29 @@ +# Slack Archive + +Use this directory for historical Slack exports that should be available to the workspace when old evidence is needed. + +## Recommended Layout + +Preferred: + +- `archives/slack/export/` + Extracted Slack export directory + +Optional: + +- `archives/slack/export.zip` + Raw zip kept for reference only + +## Important + +- The current importer works with the extracted export directory, not the zip directly. +- Keep raw archive content here, not under `ai/` or `knowledge/`. +- This directory is ignored by git except for this README and `.gitignore`. + +## Why Here + +This keeps: + +- raw historical evidence separate from curated memory +- project memory clean +- historical context available when the agent needs to recover old Jira, people, or decision context diff --git a/scripts/README.md b/scripts/README.md index 12e1389..76d6532 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -6,6 +6,7 @@ This directory contains helpers that automate: - standup generation - manager update drafting - Mattermost-ready message formatting +- historical Slack import The default workspace Mattermost extractor now lives in: @@ -43,3 +44,14 @@ Expected behavior: - return a non-zero exit code on failure OpenCode can then use that output to refresh `ai/inbox/mattermost-latest.md` proactively. + +Historical Slack exports can also be imported through: + +- `python3 scripts/slack/import_slack_export.py ...` +- `/slack-import ...` + +Recommended raw archive location: + +- `archives/slack/export/` + +The importer can auto-detect `fidelity*` channels and auto-tune message selection for very large exports. diff --git a/scripts/slack/.gitignore b/scripts/slack/.gitignore new file mode 100644 index 0000000..000255f --- /dev/null +++ b/scripts/slack/.gitignore @@ -0,0 +1,2 @@ +.env +generated/ diff --git a/scripts/slack/README.md b/scripts/slack/README.md new file mode 100644 index 0000000..c4e1515 --- /dev/null +++ b/scripts/slack/README.md @@ -0,0 +1,68 @@ +# Slack History Import + +This directory contains helpers to ingest historical Slack exports as context sources for the workspace. + +## Goal + +Use old Slack history to recover durable project context, stakeholder relationships, Jira references, and past decisions without treating the raw archive as current truth. + +## Recommended Use + +- import selected channels, not the whole export blindly +- prefer recent or relevant historical windows +- promote only durable, project-relevant facts into workspace memory +- keep raw imported artifacts under `generated/` +- keep the raw Slack export under `archives/slack/export/` when you want the workspace to find it later +- for a first pass on a large export, let the importer auto-detect `fidelity*` channels and auto-tune message selection + +## Supported Export Shape + +The importer expects the standard Slack export structure: + +- one folder per channel +- one JSON file per day inside the channel folder +- optional `users.json` at the export root + +## Usage + +Manual example: + +```bash +python3 scripts/slack/import_slack_export.py \ + --export-path /absolute/path/to/slack-export \ + --channel-prefix fidelity \ + --output-dir scripts/slack/generated +``` + +This generates: + +- `scripts/slack/generated/slack_context.jsonl` +- `scripts/slack/generated/slack_summary.md` + +Use the OpenCode command `/slack-import ...` for the guided workflow. + +## Large Export Behavior + +For very large multi-year exports, the importer is designed to be selective by default: + +- auto-detects channels whose names start with `fidelity` when no channels are specified +- auto-tunes the message limit based on archive size +- prefers recent messages plus older high-signal messages +- prioritizes Jira IDs, approvals, scope changes, root-cause notes, points, and durable technical patterns + +Override behavior if needed: + +- `--channels fidelity-preguntas,fidelity-ios` +- `--all-channels` +- `--max-messages 8000` +- `--since 2025-01-01` + +## Recommended Archive Location + +If you want the workspace to find the export later without needing a custom path each time, place the extracted export here: + +```text +archives/slack/export/ +``` + +The `/slack-import` command will use that location automatically when no explicit path or environment variable is provided. diff --git a/scripts/slack/import_slack_export.py b/scripts/slack/import_slack_export.py new file mode 100644 index 0000000..137f138 --- /dev/null +++ b/scripts/slack/import_slack_export.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 + +import argparse +import json +import re +import sys +from collections import Counter +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Sequence + + +JIRA_RE = re.compile(r"\b[A-Z][A-Z0-9]+-\d+\b") +HIGH_SIGNAL_PATTERNS = [ + re.compile(pattern, re.IGNORECASE) + for pattern in [ + r"\broot cause\b", + r"\bapproved?\b", + r"\bpoints?\b", + r"\bepic\b", + r"\bregression\b", + r"\bauth(?:enticated)?\b", + r"\breproduc(?:e|ible|ibility)\b", + r"\bgraphql\b", + r"\bapollo\b", + r"\brest\b", + r"\bxflow\b", + r"\bfid4\b", + r"\bfeature flag\b", + r"\btitle\b", + r"\bscope\b", + r"\bdone\b", + r"\bin progress\b", + r"\bblocked?\b", + ] +] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Import a Slack export into workspace-friendly JSONL and summary artifacts." + ) + parser.add_argument("--export-path", required=True, help="Path to the Slack export root.") + parser.add_argument( + "--channels", + default="", + help="Comma-separated channel names to import. When omitted, auto-detect channels by prefix.", + ) + parser.add_argument( + "--channel-prefix", + default="fidelity", + help="Default channel prefix to auto-detect when --channels is omitted.", + ) + parser.add_argument( + "--all-channels", + action="store_true", + help="Import every channel folder instead of using prefix-based auto-detection.", + ) + parser.add_argument( + "--since", + default="", + help="Optional lower bound date in YYYY-MM-DD.", + ) + parser.add_argument( + "--until", + default="", + help="Optional upper bound date in YYYY-MM-DD.", + ) + parser.add_argument( + "--max-messages", + type=int, + default=0, + help="Maximum number of messages to emit after filtering. 0 means auto-tune based on export size.", + ) + parser.add_argument( + "--recent-days", + type=int, + default=180, + help="Recent window used for prioritizing current relevance when selecting from very large archives.", + ) + parser.add_argument( + "--output-dir", + default="scripts/slack/generated", + help="Directory where generated artifacts will be written.", + ) + return parser.parse_args() + + +def parse_date(raw: str) -> Optional[datetime]: + if not raw: + return None + return datetime.strptime(raw, "%Y-%m-%d") + + +def load_users(export_root: Path) -> Dict[str, str]: + users_path = export_root / "users.json" + if not users_path.exists(): + return {} + + try: + payload = json.loads(users_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return {} + + user_map: Dict[str, str] = {} + for user in payload: + user_id = user.get("id", "") + profile = user.get("profile", {}) or {} + name = ( + user.get("name") + or profile.get("display_name") + or profile.get("real_name") + or user_id + ) + if user_id: + user_map[user_id] = name + return user_map + + +def available_channels(export_root: Path) -> List[str]: + return sorted(path.name for path in export_root.iterdir() if path.is_dir()) + + +def resolve_channels(export_root: Path, channels: List[str], channel_prefix: str, all_channels: bool) -> List[str]: + if channels: + return channels + + available = available_channels(export_root) + if all_channels: + return available + + prefix = channel_prefix.strip().lstrip("#") + if prefix: + matched = [name for name in available if name.lower().startswith(prefix.lower())] + if matched: + return matched + + return available + + +def iter_channel_files(export_root: Path, channels: Sequence[str]) -> Iterable[tuple[str, Path]]: + if channels: + candidates = [export_root / name for name in channels] + else: + candidates = [path for path in export_root.iterdir() if path.is_dir()] + + for channel_dir in sorted(candidates): + if not channel_dir.is_dir(): + continue + for day_file in sorted(channel_dir.glob("*.json")): + yield channel_dir.name, day_file + + +def date_in_range(day_file: Path, since: Optional[datetime], until: Optional[datetime]) -> bool: + try: + file_day = datetime.strptime(day_file.stem, "%Y-%m-%d") + except ValueError: + return False + if since and file_day < since: + return False + if until and file_day > until: + return False + return True + + +def resolve_username(message: Dict[str, Any], user_map: Dict[str, str]) -> str: + user_id = message.get("user", "") + if user_id and user_id in user_map: + return user_map[user_id] + if message.get("username"): + return str(message["username"]) + return user_id or "unknown" + + +def message_timestamp(ts_value: str) -> str: + try: + ts_float = float(ts_value) + except (TypeError, ValueError): + return "" + return datetime.fromtimestamp(ts_float).astimezone().isoformat() + + +def message_datetime(ts_value: str) -> Optional[datetime]: + try: + ts_float = float(ts_value) + except (TypeError, ValueError): + return None + return datetime.fromtimestamp(ts_float).astimezone() + + +def normalize_message(channel: str, raw: Dict[str, Any], user_map: Dict[str, str]) -> Optional[Dict[str, Any]]: + text = (raw.get("text") or "").strip() + if not text: + return None + + subtype = raw.get("subtype", "") + record: Dict[str, Any] = { + "source": "slack", + "channel": channel, + "timestamp": message_timestamp(str(raw.get("ts", ""))), + "username": resolve_username(raw, user_map), + "message": text, + "type": subtype or "message", + "thread_ts": raw.get("thread_ts") or None, + } + + if raw.get("reply_count") is not None: + record["reply_count"] = raw.get("reply_count") + if raw.get("ts"): + record["message_id"] = str(raw["ts"]) + + return record + + +def auto_max_messages(total_messages: int) -> int: + if total_messages <= 1500: + return total_messages + if total_messages <= 8000: + return 2500 + if total_messages <= 25000: + return 4000 + return 6000 + + +def score_message(item: Dict[str, Any], recent_cutoff: Optional[datetime]) -> int: + score = 0 + text = item.get("message", "") + username = item.get("username", "") + timestamp = item.get("timestamp", "") + + jira_matches = JIRA_RE.findall(text) + score += len(jira_matches) * 8 + + for pattern in HIGH_SIGNAL_PATTERNS: + if pattern.search(text): + score += 4 + + if item.get("thread_ts"): + score += 1 + if item.get("reply_count"): + score += 1 + + if username and username != "unknown": + score += 1 + + try: + message_dt = datetime.fromisoformat(timestamp) if timestamp else None + except ValueError: + message_dt = None + + if recent_cutoff and message_dt and message_dt >= recent_cutoff: + score += 6 + + if len(text) >= 120: + score += 2 + + return score + + +def select_messages(messages: List[Dict[str, Any]], max_messages: int, recent_days: int) -> List[Dict[str, Any]]: + if len(messages) <= max_messages: + return messages + + recent_cutoff = datetime.now().astimezone() - timedelta(days=recent_days) + + recent_messages = [] + older_messages = [] + for item in messages: + try: + message_dt = datetime.fromisoformat(item.get("timestamp", "")) + except ValueError: + message_dt = None + + if message_dt and message_dt >= recent_cutoff: + recent_messages.append(item) + else: + older_messages.append(item) + + recent_budget = min(len(recent_messages), max(max_messages // 2, 1000)) + selected_recent = recent_messages[-recent_budget:] + remaining_budget = max_messages - len(selected_recent) + + scored_older = sorted( + older_messages, + key=lambda item: (score_message(item, recent_cutoff), item.get("timestamp", "")), + reverse=True, + ) + selected_older = scored_older[:remaining_budget] + + selected = selected_recent + selected_older + selected.sort(key=lambda item: item.get("timestamp", "")) + return selected + + +def collect_messages( + export_root: Path, + channels: Sequence[str], + since: Optional[datetime], + until: Optional[datetime], + max_messages: int, + recent_days: int, +) -> List[Dict[str, Any]]: + user_map = load_users(export_root) + messages: List[Dict[str, Any]] = [] + + for channel, day_file in iter_channel_files(export_root, channels): + if not date_in_range(day_file, since, until): + continue + try: + payload = json.loads(day_file.read_text(encoding="utf-8")) + except json.JSONDecodeError: + continue + if not isinstance(payload, list): + continue + + for raw in payload: + record = normalize_message(channel, raw, user_map) + if record: + messages.append(record) + + messages.sort(key=lambda item: item.get("timestamp", "")) + tuned_max = max_messages or auto_max_messages(len(messages)) + return select_messages(messages, tuned_max, recent_days) + + +def write_jsonl(messages: List[Dict[str, Any]], path: Path) -> None: + lines = [json.dumps(item, ensure_ascii=False) for item in messages] + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8") + + +def build_summary(messages: List[Dict[str, Any]], channels: Sequence[str]) -> str: + channel_counter = Counter(item["channel"] for item in messages) + user_counter = Counter(item["username"] for item in messages) + jira_counter = Counter() + + for item in messages: + jira_counter.update(JIRA_RE.findall(item.get("message", ""))) + + lines = [ + "# Slack Import Summary", + "", + f"- Messages imported: {len(messages)}", + f"- Channels imported: {', '.join(channels) if channels else 'all detected channels'}", + "", + "## Top Channels", + ] + + for channel, count in channel_counter.most_common(10): + lines.append(f"- {channel}: {count}") + + lines.extend(["", "## Top Participants"]) + for username, count in user_counter.most_common(10): + lines.append(f"- {username}: {count}") + + lines.extend(["", "## Jira IDs Mentioned"]) + if jira_counter: + for jira_id, count in jira_counter.most_common(20): + lines.append(f"- {jira_id}: {count}") + else: + lines.append("- None detected") + + lines.extend( + [ + "", + "## Guidance", + "- Treat this archive as historical context, not current truth.", + "- Prefer promoting durable patterns, repeated approvals, role mappings, Jira references, and architectural context.", + "- Avoid promoting outdated status unless it still affects current understanding.", + ] + ) + + return "\n".join(lines) + "\n" + + +def main() -> int: + args = parse_args() + export_root = Path(args.export_path).expanduser().resolve() + if not export_root.exists(): + print(f"Export path not found: {export_root}", file=sys.stderr) + return 1 + + channels = [item.strip().lstrip("#") for item in args.channels.split(",") if item.strip()] + channels = resolve_channels(export_root, channels, args.channel_prefix, args.all_channels) + since = parse_date(args.since) + until = parse_date(args.until) + + messages = collect_messages( + export_root=export_root, + channels=channels, + since=since, + until=until, + max_messages=args.max_messages, + recent_days=args.recent_days, + ) + + output_dir = Path(args.output_dir).expanduser().resolve() + jsonl_path = output_dir / "slack_context.jsonl" + summary_path = output_dir / "slack_summary.md" + + write_jsonl(messages, jsonl_path) + summary_path.write_text(build_summary(messages, channels), encoding="utf-8") + + print(f"Imported {len(messages)} Slack messages") + print(f"Wrote JSONL: {jsonl_path}") + print(f"Wrote summary: {summary_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())