feat: enhance Mattermost proxy with improved post ID deduplication and file tracking

This commit is contained in:
2026-05-19 16:03:16 -06:00
parent 9dd731f758
commit b886c61afd
4 changed files with 30 additions and 35 deletions

View File

@@ -22,10 +22,5 @@ MATTERMOST_MIRROR_CHANNEL_IDS=
# Keep disabled by default to avoid large files. # Keep disabled by default to avoid large files.
MATTERMOST_MIRROR_WRITE_RAW=0 MATTERMOST_MIRROR_WRITE_RAW=0
# Mattermost desktop app bundle / binary. # Mattermost desktop app bundle.
MATTERMOST_APP_PATH=/Applications/Mattermost.app MATTERMOST_APP_PATH=/Applications/Mattermost.app
MATTERMOST_APP_BIN=/Applications/Mattermost.app/Contents/MacOS/Mattermost
# Default uses macOS `open -n ... --args` because direct Electron binary launch
# can crash sandboxed desktop apps. Set to `binary` only for debugging.
MATTERMOST_MIRROR_LAUNCH_MODE=open

View File

@@ -43,11 +43,7 @@ The helper intentionally uses `open -n /Applications/Mattermost.app --args ...`
instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost` instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost`
directly. Direct binary launch can crash sandboxed Electron apps with Mach directly. Direct binary launch can crash sandboxed Electron apps with Mach
rendezvous errors because their expected app/container parent process is rendezvous errors because their expected app/container parent process is
missing. If you need the old behavior for debugging, set: missing.
```bash
MATTERMOST_MIRROR_LAUNCH_MODE=binary scripts/mattermost-proxy/launch-mattermost.sh
```
## Output layout ## Output layout
@@ -64,6 +60,8 @@ ai/inbox/mattermost-mirror/
Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis. Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis.
The mirror writes any post payload it sees, including older messages returned when the desktop app loads channel history or a thread. It dedupes by `post_id`, so scrolling back through useful history is a safe way to backfill missing local evidence without creating repeated entries.
## Normalized message schema ## Normalized message schema
Each line in the normalized JSONL contains: Each line in the normalized JSONL contains:
@@ -102,6 +100,4 @@ Each line in the normalized JSONL contains:
- `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`. - `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`.
- `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist. - `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist.
- `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence. - `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence.
- `MATTERMOST_APP_BIN`: Mattermost Desktop binary path.
- `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path. - `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path.
- `MATTERMOST_MIRROR_LAUNCH_MODE`: `open` by default; `binary` only for debugging.

View File

@@ -11,21 +11,8 @@ if [ -f "$SCRIPT_DIR/.env" ]; then
fi fi
APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}" APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}"
APP_NAME="${MATTERMOST_APP_NAME:-Mattermost}"
APP_BIN="${MATTERMOST_APP_BIN:-$APP_PATH/Contents/MacOS/Mattermost}"
PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}" PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}"
PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}" PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}"
LAUNCH_MODE="${MATTERMOST_MIRROR_LAUNCH_MODE:-open}"
if [ "$LAUNCH_MODE" = "binary" ]; then
if [ ! -x "$APP_BIN" ]; then
echo "Mattermost app binary not found or not executable: $APP_BIN" >&2
echo "Set MATTERMOST_APP_BIN in scripts/mattermost-proxy/.env if needed." >&2
exit 1
fi
exec "$APP_BIN" --proxy-server="http://${PROXY_HOST}:${PROXY_PORT}"
fi
if [ ! -d "$APP_PATH" ]; then if [ ! -d "$APP_PATH" ]; then
echo "Mattermost app bundle not found: $APP_PATH" >&2 echo "Mattermost app bundle not found: $APP_PATH" >&2

View File

@@ -54,6 +54,7 @@ class MattermostMirror:
self.latest_md_path = self.out_dir / "latest.md" self.latest_md_path = self.out_dir / "latest.md"
self.seen_post_ids: set[str] = set() self.seen_post_ids: set[str] = set()
self.seen_by_file: dict[Path, set[str]] = {}
self.users: dict[str, str] = {} self.users: dict[str, str] = {}
self.channels: dict[str, str] = {} self.channels: dict[str, str] = {}
self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None} self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None}
@@ -81,11 +82,22 @@ class MattermostMirror:
self.state = {"channels": {}, "users": {}, "updated_at": None} self.state = {"channels": {}, "users": {}, "updated_at": None}
def _load_recent_seen_ids(self) -> None: def _load_recent_seen_ids(self) -> None:
# Bound startup work: latest.jsonl contains the dedupe window. Daily files also # Bound startup work: latest.jsonl contains the hot dedupe window. Daily
# protect same-day restarts below. # files are loaded lazily when older/backfilled messages are encountered.
for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]: for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]:
if not path.exists(): if not path.exists():
continue continue
try:
ids = self._load_seen_ids_for_file(path)
self.seen_post_ids.update(ids)
except Exception:
continue
def _load_seen_ids_for_file(self, path: Path) -> set[str]:
if path in self.seen_by_file:
return self.seen_by_file[path]
ids: set[str] = set()
if path.exists():
try: try:
with path.open("r", encoding="utf-8") as handle: with path.open("r", encoding="utf-8") as handle:
for line in handle: for line in handle:
@@ -94,9 +106,11 @@ class MattermostMirror:
obj = json.loads(line) obj = json.loads(line)
post_id = obj.get("post_id") post_id = obj.get("post_id")
if post_id: if post_id:
self.seen_post_ids.add(post_id) ids.add(post_id)
except Exception: except Exception:
continue ids = set()
self.seen_by_file[path] = ids
return ids
def _atomic_write_text(self, path: Path, text: str) -> None: def _atomic_write_text(self, path: Path, text: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
@@ -231,12 +245,15 @@ class MattermostMirror:
def _write_message(self, msg: dict[str, Any]) -> None: def _write_message(self, msg: dict[str, Any]) -> None:
post_id = msg["post_id"] post_id = msg["post_id"]
if post_id in self.seen_post_ids:
return
self.seen_post_ids.add(post_id)
created_dt = self._dt_from_ms(msg.get("created_at_ms")) created_dt = self._dt_from_ms(msg.get("created_at_ms"))
self._append_jsonl(self._daily_messages_path(created_dt), msg) daily_path = self._daily_messages_path(created_dt)
daily_seen = self._load_seen_ids_for_file(daily_path)
if post_id in self.seen_post_ids or post_id in daily_seen:
return
self.seen_post_ids.add(post_id)
daily_seen.add(post_id)
self._append_jsonl(daily_path, msg)
self._update_state(msg) self._update_state(msg)
self._update_latest(msg) self._update_latest(msg)
self._update_index(created_dt, msg) self._update_index(created_dt, msg)