From b886c61afd9a67bc70eb276af0f1e06a22a0eda1 Mon Sep 17 00:00:00 2001 From: "david.delagneau" Date: Tue, 19 May 2026 16:03:16 -0600 Subject: [PATCH] feat: enhance Mattermost proxy with improved post ID deduplication and file tracking --- scripts/mattermost-proxy/.env.example | 7 +--- scripts/mattermost-proxy/README.md | 10 ++---- scripts/mattermost-proxy/launch-mattermost.sh | 13 ------- scripts/mattermost-proxy/mattermost_mirror.py | 35 ++++++++++++++----- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/scripts/mattermost-proxy/.env.example b/scripts/mattermost-proxy/.env.example index 9df3795..32abb2b 100644 --- a/scripts/mattermost-proxy/.env.example +++ b/scripts/mattermost-proxy/.env.example @@ -22,10 +22,5 @@ MATTERMOST_MIRROR_CHANNEL_IDS= # Keep disabled by default to avoid large files. MATTERMOST_MIRROR_WRITE_RAW=0 -# Mattermost desktop app bundle / binary. +# Mattermost desktop app bundle. MATTERMOST_APP_PATH=/Applications/Mattermost.app -MATTERMOST_APP_BIN=/Applications/Mattermost.app/Contents/MacOS/Mattermost - -# Default uses macOS `open -n ... --args` because direct Electron binary launch -# can crash sandboxed desktop apps. Set to `binary` only for debugging. -MATTERMOST_MIRROR_LAUNCH_MODE=open diff --git a/scripts/mattermost-proxy/README.md b/scripts/mattermost-proxy/README.md index f947ad1..e719b5e 100644 --- a/scripts/mattermost-proxy/README.md +++ b/scripts/mattermost-proxy/README.md @@ -43,11 +43,7 @@ The helper intentionally uses `open -n /Applications/Mattermost.app --args ...` instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost` directly. Direct binary launch can crash sandboxed Electron apps with Mach rendezvous errors because their expected app/container parent process is -missing. If you need the old behavior for debugging, set: - -```bash -MATTERMOST_MIRROR_LAUNCH_MODE=binary scripts/mattermost-proxy/launch-mattermost.sh -``` +missing. ## Output layout @@ -64,6 +60,8 @@ ai/inbox/mattermost-mirror/ Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis. +The mirror writes any post payload it sees, including older messages returned when the desktop app loads channel history or a thread. It dedupes by `post_id`, so scrolling back through useful history is a safe way to backfill missing local evidence without creating repeated entries. + ## Normalized message schema Each line in the normalized JSONL contains: @@ -102,6 +100,4 @@ Each line in the normalized JSONL contains: - `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`. - `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist. - `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence. -- `MATTERMOST_APP_BIN`: Mattermost Desktop binary path. - `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path. -- `MATTERMOST_MIRROR_LAUNCH_MODE`: `open` by default; `binary` only for debugging. diff --git a/scripts/mattermost-proxy/launch-mattermost.sh b/scripts/mattermost-proxy/launch-mattermost.sh index 3487ff1..274cde5 100755 --- a/scripts/mattermost-proxy/launch-mattermost.sh +++ b/scripts/mattermost-proxy/launch-mattermost.sh @@ -11,21 +11,8 @@ if [ -f "$SCRIPT_DIR/.env" ]; then fi APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}" -APP_NAME="${MATTERMOST_APP_NAME:-Mattermost}" -APP_BIN="${MATTERMOST_APP_BIN:-$APP_PATH/Contents/MacOS/Mattermost}" PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}" PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}" -LAUNCH_MODE="${MATTERMOST_MIRROR_LAUNCH_MODE:-open}" - -if [ "$LAUNCH_MODE" = "binary" ]; then - if [ ! -x "$APP_BIN" ]; then - echo "Mattermost app binary not found or not executable: $APP_BIN" >&2 - echo "Set MATTERMOST_APP_BIN in scripts/mattermost-proxy/.env if needed." >&2 - exit 1 - fi - - exec "$APP_BIN" --proxy-server="http://${PROXY_HOST}:${PROXY_PORT}" -fi if [ ! -d "$APP_PATH" ]; then echo "Mattermost app bundle not found: $APP_PATH" >&2 diff --git a/scripts/mattermost-proxy/mattermost_mirror.py b/scripts/mattermost-proxy/mattermost_mirror.py index de8e327..61f96b3 100644 --- a/scripts/mattermost-proxy/mattermost_mirror.py +++ b/scripts/mattermost-proxy/mattermost_mirror.py @@ -54,6 +54,7 @@ class MattermostMirror: self.latest_md_path = self.out_dir / "latest.md" self.seen_post_ids: set[str] = set() + self.seen_by_file: dict[Path, set[str]] = {} self.users: dict[str, str] = {} self.channels: dict[str, str] = {} self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None} @@ -81,11 +82,22 @@ class MattermostMirror: self.state = {"channels": {}, "users": {}, "updated_at": None} def _load_recent_seen_ids(self) -> None: - # Bound startup work: latest.jsonl contains the dedupe window. Daily files also - # protect same-day restarts below. + # Bound startup work: latest.jsonl contains the hot dedupe window. Daily + # files are loaded lazily when older/backfilled messages are encountered. for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]: if not path.exists(): continue + try: + ids = self._load_seen_ids_for_file(path) + self.seen_post_ids.update(ids) + except Exception: + continue + + def _load_seen_ids_for_file(self, path: Path) -> set[str]: + if path in self.seen_by_file: + return self.seen_by_file[path] + ids: set[str] = set() + if path.exists(): try: with path.open("r", encoding="utf-8") as handle: for line in handle: @@ -94,9 +106,11 @@ class MattermostMirror: obj = json.loads(line) post_id = obj.get("post_id") if post_id: - self.seen_post_ids.add(post_id) + ids.add(post_id) except Exception: - continue + ids = set() + self.seen_by_file[path] = ids + return ids def _atomic_write_text(self, path: Path, text: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) @@ -231,12 +245,15 @@ class MattermostMirror: def _write_message(self, msg: dict[str, Any]) -> None: post_id = msg["post_id"] - if post_id in self.seen_post_ids: - return - self.seen_post_ids.add(post_id) - created_dt = self._dt_from_ms(msg.get("created_at_ms")) - self._append_jsonl(self._daily_messages_path(created_dt), msg) + daily_path = self._daily_messages_path(created_dt) + daily_seen = self._load_seen_ids_for_file(daily_path) + if post_id in self.seen_post_ids or post_id in daily_seen: + return + + self.seen_post_ids.add(post_id) + daily_seen.add(post_id) + self._append_jsonl(daily_path, msg) self._update_state(msg) self._update_latest(msg) self._update_index(created_dt, msg)