feat: enhance Mattermost proxy with improved post ID deduplication and file tracking
This commit is contained in:
@@ -22,10 +22,5 @@ MATTERMOST_MIRROR_CHANNEL_IDS=
|
|||||||
# Keep disabled by default to avoid large files.
|
# Keep disabled by default to avoid large files.
|
||||||
MATTERMOST_MIRROR_WRITE_RAW=0
|
MATTERMOST_MIRROR_WRITE_RAW=0
|
||||||
|
|
||||||
# Mattermost desktop app bundle / binary.
|
# Mattermost desktop app bundle.
|
||||||
MATTERMOST_APP_PATH=/Applications/Mattermost.app
|
MATTERMOST_APP_PATH=/Applications/Mattermost.app
|
||||||
MATTERMOST_APP_BIN=/Applications/Mattermost.app/Contents/MacOS/Mattermost
|
|
||||||
|
|
||||||
# Default uses macOS `open -n ... --args` because direct Electron binary launch
|
|
||||||
# can crash sandboxed desktop apps. Set to `binary` only for debugging.
|
|
||||||
MATTERMOST_MIRROR_LAUNCH_MODE=open
|
|
||||||
|
|||||||
@@ -43,11 +43,7 @@ The helper intentionally uses `open -n /Applications/Mattermost.app --args ...`
|
|||||||
instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost`
|
instead of invoking `/Applications/Mattermost.app/Contents/MacOS/Mattermost`
|
||||||
directly. Direct binary launch can crash sandboxed Electron apps with Mach
|
directly. Direct binary launch can crash sandboxed Electron apps with Mach
|
||||||
rendezvous errors because their expected app/container parent process is
|
rendezvous errors because their expected app/container parent process is
|
||||||
missing. If you need the old behavior for debugging, set:
|
missing.
|
||||||
|
|
||||||
```bash
|
|
||||||
MATTERMOST_MIRROR_LAUNCH_MODE=binary scripts/mattermost-proxy/launch-mattermost.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output layout
|
## Output layout
|
||||||
|
|
||||||
@@ -64,6 +60,8 @@ ai/inbox/mattermost-mirror/
|
|||||||
|
|
||||||
Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis.
|
Use `latest.md` or `latest.jsonl` for quick AI context. Use date-rotated `messages/...` files for previous-workday or channel/date-specific analysis.
|
||||||
|
|
||||||
|
The mirror writes any post payload it sees, including older messages returned when the desktop app loads channel history or a thread. It dedupes by `post_id`, so scrolling back through useful history is a safe way to backfill missing local evidence without creating repeated entries.
|
||||||
|
|
||||||
## Normalized message schema
|
## Normalized message schema
|
||||||
|
|
||||||
Each line in the normalized JSONL contains:
|
Each line in the normalized JSONL contains:
|
||||||
@@ -102,6 +100,4 @@ Each line in the normalized JSONL contains:
|
|||||||
- `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`.
|
- `MATTERMOST_MIRROR_LATEST_LIMIT`: number of messages in `latest.*`, default `200`.
|
||||||
- `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist.
|
- `MATTERMOST_MIRROR_CHANNEL_IDS`: optional comma-separated channel ID allowlist.
|
||||||
- `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence.
|
- `MATTERMOST_MIRROR_WRITE_RAW`: set to `1` to save compact raw REST/WebSocket evidence.
|
||||||
- `MATTERMOST_APP_BIN`: Mattermost Desktop binary path.
|
|
||||||
- `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path.
|
- `MATTERMOST_APP_PATH`: Mattermost Desktop `.app` bundle path.
|
||||||
- `MATTERMOST_MIRROR_LAUNCH_MODE`: `open` by default; `binary` only for debugging.
|
|
||||||
|
|||||||
@@ -11,21 +11,8 @@ if [ -f "$SCRIPT_DIR/.env" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}"
|
APP_PATH="${MATTERMOST_APP_PATH:-/Applications/Mattermost.app}"
|
||||||
APP_NAME="${MATTERMOST_APP_NAME:-Mattermost}"
|
|
||||||
APP_BIN="${MATTERMOST_APP_BIN:-$APP_PATH/Contents/MacOS/Mattermost}"
|
|
||||||
PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}"
|
PROXY_HOST="${MATTERMOST_MIRROR_LISTEN_HOST:-127.0.0.1}"
|
||||||
PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}"
|
PROXY_PORT="${MATTERMOST_MIRROR_LISTEN_PORT:-8080}"
|
||||||
LAUNCH_MODE="${MATTERMOST_MIRROR_LAUNCH_MODE:-open}"
|
|
||||||
|
|
||||||
if [ "$LAUNCH_MODE" = "binary" ]; then
|
|
||||||
if [ ! -x "$APP_BIN" ]; then
|
|
||||||
echo "Mattermost app binary not found or not executable: $APP_BIN" >&2
|
|
||||||
echo "Set MATTERMOST_APP_BIN in scripts/mattermost-proxy/.env if needed." >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec "$APP_BIN" --proxy-server="http://${PROXY_HOST}:${PROXY_PORT}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -d "$APP_PATH" ]; then
|
if [ ! -d "$APP_PATH" ]; then
|
||||||
echo "Mattermost app bundle not found: $APP_PATH" >&2
|
echo "Mattermost app bundle not found: $APP_PATH" >&2
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ class MattermostMirror:
|
|||||||
self.latest_md_path = self.out_dir / "latest.md"
|
self.latest_md_path = self.out_dir / "latest.md"
|
||||||
|
|
||||||
self.seen_post_ids: set[str] = set()
|
self.seen_post_ids: set[str] = set()
|
||||||
|
self.seen_by_file: dict[Path, set[str]] = {}
|
||||||
self.users: dict[str, str] = {}
|
self.users: dict[str, str] = {}
|
||||||
self.channels: dict[str, str] = {}
|
self.channels: dict[str, str] = {}
|
||||||
self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None}
|
self.state: dict[str, Any] = {"channels": {}, "users": {}, "updated_at": None}
|
||||||
@@ -81,11 +82,22 @@ class MattermostMirror:
|
|||||||
self.state = {"channels": {}, "users": {}, "updated_at": None}
|
self.state = {"channels": {}, "users": {}, "updated_at": None}
|
||||||
|
|
||||||
def _load_recent_seen_ids(self) -> None:
|
def _load_recent_seen_ids(self) -> None:
|
||||||
# Bound startup work: latest.jsonl contains the dedupe window. Daily files also
|
# Bound startup work: latest.jsonl contains the hot dedupe window. Daily
|
||||||
# protect same-day restarts below.
|
# files are loaded lazily when older/backfilled messages are encountered.
|
||||||
for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]:
|
for path in [self.latest_jsonl_path, self._daily_messages_path(datetime.now(timezone.utc))]:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
continue
|
continue
|
||||||
|
try:
|
||||||
|
ids = self._load_seen_ids_for_file(path)
|
||||||
|
self.seen_post_ids.update(ids)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _load_seen_ids_for_file(self, path: Path) -> set[str]:
|
||||||
|
if path in self.seen_by_file:
|
||||||
|
return self.seen_by_file[path]
|
||||||
|
ids: set[str] = set()
|
||||||
|
if path.exists():
|
||||||
try:
|
try:
|
||||||
with path.open("r", encoding="utf-8") as handle:
|
with path.open("r", encoding="utf-8") as handle:
|
||||||
for line in handle:
|
for line in handle:
|
||||||
@@ -94,9 +106,11 @@ class MattermostMirror:
|
|||||||
obj = json.loads(line)
|
obj = json.loads(line)
|
||||||
post_id = obj.get("post_id")
|
post_id = obj.get("post_id")
|
||||||
if post_id:
|
if post_id:
|
||||||
self.seen_post_ids.add(post_id)
|
ids.add(post_id)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
ids = set()
|
||||||
|
self.seen_by_file[path] = ids
|
||||||
|
return ids
|
||||||
|
|
||||||
def _atomic_write_text(self, path: Path, text: str) -> None:
|
def _atomic_write_text(self, path: Path, text: str) -> None:
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -231,12 +245,15 @@ class MattermostMirror:
|
|||||||
|
|
||||||
def _write_message(self, msg: dict[str, Any]) -> None:
|
def _write_message(self, msg: dict[str, Any]) -> None:
|
||||||
post_id = msg["post_id"]
|
post_id = msg["post_id"]
|
||||||
if post_id in self.seen_post_ids:
|
|
||||||
return
|
|
||||||
self.seen_post_ids.add(post_id)
|
|
||||||
|
|
||||||
created_dt = self._dt_from_ms(msg.get("created_at_ms"))
|
created_dt = self._dt_from_ms(msg.get("created_at_ms"))
|
||||||
self._append_jsonl(self._daily_messages_path(created_dt), msg)
|
daily_path = self._daily_messages_path(created_dt)
|
||||||
|
daily_seen = self._load_seen_ids_for_file(daily_path)
|
||||||
|
if post_id in self.seen_post_ids or post_id in daily_seen:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.seen_post_ids.add(post_id)
|
||||||
|
daily_seen.add(post_id)
|
||||||
|
self._append_jsonl(daily_path, msg)
|
||||||
self._update_state(msg)
|
self._update_state(msg)
|
||||||
self._update_latest(msg)
|
self._update_latest(msg)
|
||||||
self._update_index(created_dt, msg)
|
self._update_index(created_dt, msg)
|
||||||
|
|||||||
Reference in New Issue
Block a user