feat: enhance Mattermost proxy with improved post ID deduplication and file tracking
This commit is contained in:
@@ -22,10 +22,5 @@ MATTERMOST_MIRROR_CHANNEL_IDS=
|
||||
# Keep disabled by default to avoid large files.
|
||||
MATTERMOST_MIRROR_WRITE_RAW=0
|
||||
|
||||
# Mattermost desktop app bundle / binary.
|
||||
# Mattermost desktop app bundle.
|
||||
MATTERMOST_APP_PATH=/Applications/Mattermost.app
|
||||
MATTERMOST_APP_BIN=/Applications/Mattermost.app/Contents/MacOS/Mattermost
|
||||
|
||||
# Default uses macOS `open -n ... --args` because direct Electron binary launch
|
||||
# can crash sandboxed desktop apps. Set to `binary` only for debugging.
|
||||
MATTERMOST_MIRROR_LAUNCH_MODE=open
|
||||
|
||||
@@ -43,11 +43,7 @@ The helper intentionally uses `open -n /Applications/Mattermost.app --args ...`
|
||||
instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost`
|
||||
directly. Direct binary launch can crash sandboxed Electron apps with Mach
|
||||
rendezvous errors because their expected app/container parent process is
|
||||
missing. If you need the old behavior for debugging, set:
|
||||
|
||||
```bash
|
||||
MATTERMOST_MIRROR_LAUNCH_MODE=binary scripts/mattermost-proxy/launch-mattermost.sh
|
||||
```
|
||||
missing.
|
||||
|
||||
## Output layout
|
||||
|
||||
@@ -64,6 +60,8 @@ ai/inbox/mattermost-mirror/
|
||||
|
||||
Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis.
|
||||
|
||||
The mirror writes any post payload it sees, including older messages returned when the desktop app loads channel history or a thread. It dedupes by `post_id`, so scrolling back through useful history is a safe way to backfill missing local evidence without creating repeated entries.
|
||||
|
||||
## Normalized message schema
|
||||
|
||||
Each line in the normalized JSONL contains:
|
||||
@@ -102,6 +100,4 @@ Each line in the normalized JSONL contains:
|
||||
- `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`.
|
||||
- `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist.
|
||||
- `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence.
|
||||
- `MATTERMOST_APP_BIN`: Mattermost Desktop binary path.
|
||||
- `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path.
|
||||
- `MATTERMOST_MIRROR_LAUNCH_MODE`: `open` by default; `binary` only for debugging.
|
||||
|
||||
@@ -11,21 +11,8 @@ if [ -f "$SCRIPT_DIR/.env" ]; then
|
||||
fi
|
||||
|
||||
APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}"
|
||||
APP_NAME="${MATTERMOST_APP_NAME:-Mattermost}"
|
||||
APP_BIN="${MATTERMOST_APP_BIN:-$APP_PATH/Contents/MacOS/Mattermost}"
|
||||
PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}"
|
||||
PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}"
|
||||
LAUNCH_MODE="${MATTERMOST_MIRROR_LAUNCH_MODE:-open}"
|
||||
|
||||
if [ "$LAUNCH_MODE" = "binary" ]; then
|
||||
if [ ! -x "$APP_BIN" ]; then
|
||||
echo "Mattermost app binary not found or not executable: $APP_BIN" >&2
|
||||
echo "Set MATTERMOST_APP_BIN in scripts/mattermost-proxy/.env if needed." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec "$APP_BIN" --proxy-server="http://${PROXY_HOST}:${PROXY_PORT}"
|
||||
fi
|
||||
|
||||
if [ ! -d "$APP_PATH" ]; then
|
||||
echo "Mattermost app bundle not found: $APP_PATH" >&2
|
||||
|
||||
@@ -54,6 +54,7 @@ class MattermostMirror:
|
||||
self.latest_md_path = self.out_dir / "latest.md"
|
||||
|
||||
self.seen_post_ids: set[str] = set()
|
||||
self.seen_by_file: dict[Path, set[str]] = {}
|
||||
self.users: dict[str, str] = {}
|
||||
self.channels: dict[str, str] = {}
|
||||
self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None}
|
||||
@@ -81,11 +82,22 @@ class MattermostMirror:
|
||||
self.state = {"channels": {}, "users": {}, "updated_at": None}
|
||||
|
||||
def _load_recent_seen_ids(self) -> None:
|
||||
# Bound startup work: latest.jsonl contains the dedupe window. Daily files also
|
||||
# protect same-day restarts below.
|
||||
# Bound startup work: latest.jsonl contains the hot dedupe window. Daily
|
||||
# files are loaded lazily when older/backfilled messages are encountered.
|
||||
for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]:
|
||||
if not path.exists():
|
||||
continue
|
||||
try:
|
||||
ids = self._load_seen_ids_for_file(path)
|
||||
self.seen_post_ids.update(ids)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _load_seen_ids_for_file(self, path: Path) -> set[str]:
|
||||
if path in self.seen_by_file:
|
||||
return self.seen_by_file[path]
|
||||
ids: set[str] = set()
|
||||
if path.exists():
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
@@ -94,9 +106,11 @@ class MattermostMirror:
|
||||
obj = json.loads(line)
|
||||
post_id = obj.get("post_id")
|
||||
if post_id:
|
||||
self.seen_post_ids.add(post_id)
|
||||
ids.add(post_id)
|
||||
except Exception:
|
||||
continue
|
||||
ids = set()
|
||||
self.seen_by_file[path] = ids
|
||||
return ids
|
||||
|
||||
def _atomic_write_text(self, path: Path, text: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -231,12 +245,15 @@ class MattermostMirror:
|
||||
|
||||
def _write_message(self, msg: dict[str, Any]) -> None:
|
||||
post_id = msg["post_id"]
|
||||
if post_id in self.seen_post_ids:
|
||||
return
|
||||
self.seen_post_ids.add(post_id)
|
||||
|
||||
created_dt = self._dt_from_ms(msg.get("created_at_ms"))
|
||||
self._append_jsonl(self._daily_messages_path(created_dt), msg)
|
||||
daily_path = self._daily_messages_path(created_dt)
|
||||
daily_seen = self._load_seen_ids_for_file(daily_path)
|
||||
if post_id in self.seen_post_ids or post_id in daily_seen:
|
||||
return
|
||||
|
||||
self.seen_post_ids.add(post_id)
|
||||
daily_seen.add(post_id)
|
||||
self._append_jsonl(daily_path, msg)
|
||||
self._update_state(msg)
|
||||
self._update_latest(msg)
|
||||
self._update_index(created_dt, msg)
|
||||
|
||||
Reference in New Issue
Block a user