feat: Add previous workday mode to Mattermost extractor and enhance sync script

This commit is contained in:
2026-04-13 10:18:59 -06:00
parent e65471f3c2
commit 07e198a641
6 changed files with 132 additions and 16 deletions

View File

@@ -55,3 +55,9 @@ Recommended raw archive location:
- `archives/slack/export/`
The importer can auto-detect `fidelity*` channels and auto-tune message selection for very large exports.
The Mattermost extractor can also fetch the latest prior day with channel activity for standups. It starts from the previous calendar day and expands backward automatically when there is no activity, which covers Mondays, weekends, holidays, and OOO gaps.
```bash
bash scripts/mattermost/sync.sh --previous-workday
```

View File

@@ -60,6 +60,20 @@ bash scripts/mattermost/sync.sh
OpenCode can use this script directly. If `FIDELITY_MATTERMOST_SYNC_CMD` is not set, the workspace plugins will fall back to this wrapper automatically.
Previous workday mode for standups:
```bash
bash scripts/mattermost/sync.sh --previous-workday
```
This mode starts from the previous calendar day and expands backward until it finds a day with Mattermost activity in the configured channels. It handles Mondays, weekends, holidays, and OOO gaps without relying on workspace logs.
Useful options:
- `--today YYYY-MM-DD`
- `--max-lookback-days N`
- `--output-file PATH`
## Bootstrap
You can initialize the local runtime with:

View File

@@ -3,9 +3,11 @@
import json
import logging
import os
import re
import ssl
import sys
from datetime import datetime, timedelta
from argparse import ArgumentParser, Namespace
from datetime import date, datetime, time, timedelta
from pathlib import Path
from typing import Any, Dict, List
from urllib import error, parse, request
@@ -17,6 +19,7 @@ DEFAULT_MAX_MESSAGES = 200
MAX_PER_PAGE = 200
DEFAULT_OUTPUT_FILE = str(SCRIPT_DIR / "generated" / "mattermost_context.jsonl")
REQUEST_TIMEOUT = 15
DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
LOGGER = logging.getLogger("mattermost_context")
@@ -29,6 +32,8 @@ CHANNEL_SPECS: List[Dict[str, str]] = []
WINDOW_HOURS = DEFAULT_WINDOW_HOURS
MAX_MESSAGES = DEFAULT_MAX_MESSAGES
CUTOFF_TIMESTAMP_MS = 0
RANGE_START_TIMESTAMP_MS = 0
RANGE_END_TIMESTAMP_MS = 0
OUTPUT_FILE = DEFAULT_OUTPUT_FILE
REQUEST_HEADERS: Dict[str, str] = {}
SSL_CONTEXT: ssl.SSLContext | None = None
@@ -40,6 +45,44 @@ class MattermostAPIError(RuntimeError):
pass
def parse_args() -> Namespace:
parser = ArgumentParser(description="Extract Mattermost messages as JSONL context.")
parser.add_argument(
"--previous-workday",
action="store_true",
help="Fetch the latest prior calendar day with Mattermost activity instead of a fixed recent window.",
)
parser.add_argument(
"--today",
default=date.today().isoformat(),
help="Reference date in YYYY-MM-DD format. Defaults to today.",
)
parser.add_argument(
"--max-lookback-days",
type=int,
default=int(os.getenv("MATTERMOST_MAX_LOOKBACK_DAYS", "7")),
help="Maximum days to search backward with --previous-workday.",
)
parser.add_argument(
"--window-hours",
type=int,
default=0,
help="Override MESSAGE_WINDOW_HOURS for normal recent-window mode.",
)
parser.add_argument(
"--output-file",
default="",
help="Override MATTERMOST_OUTPUT_FILE.",
)
return parser.parse_args()
def parse_iso_date(raw_value: str) -> date:
if not DATE_RE.match(raw_value):
raise ValueError(f"Invalid date '{raw_value}'. Use YYYY-MM-DD.")
return datetime.strptime(raw_value, "%Y-%m-%d").date()
def parse_bool_env(name: str, default: bool = False) -> bool:
raw_value = os.getenv(name)
if raw_value is None:
@@ -133,18 +176,18 @@ def build_ssl_context() -> ssl.SSLContext:
return ssl.create_default_context()
def configure() -> None:
def configure(args: Namespace) -> None:
global MATTERMOST_URL, CHANNEL_SPECS, WINDOW_HOURS, MAX_MESSAGES, CUTOFF_TIMESTAMP_MS, OUTPUT_FILE
global REQUEST_HEADERS, SSL_CONTEXT
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS, REQUEST_HEADERS, SSL_CONTEXT
global MATTERMOST_TEAM_NAME, MATTERMOST_TEAM_ID
load_dotenv_file()
MATTERMOST_URL = require_env("MATTERMOST_URL").rstrip("/")
token = require_env("MATTERMOST_TOKEN")
CHANNEL_SPECS = parse_channel_specs()
WINDOW_HOURS = int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
WINDOW_HOURS = args.window_hours or int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
MAX_MESSAGES = int(os.getenv("MAX_MESSAGES", str(DEFAULT_MAX_MESSAGES)))
OUTPUT_FILE = os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
OUTPUT_FILE = args.output_file or os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
MATTERMOST_TEAM_NAME = os.getenv("MATTERMOST_TEAM_NAME", "").strip()
MATTERMOST_TEAM_ID = os.getenv("MATTERMOST_TEAM_ID", "").strip()
@@ -155,6 +198,8 @@ def configure() -> None:
cutoff = datetime.now().astimezone() - timedelta(hours=WINDOW_HOURS)
CUTOFF_TIMESTAMP_MS = int(cutoff.timestamp() * 1000)
RANGE_START_TIMESTAMP_MS = 0
RANGE_END_TIMESTAMP_MS = 0
REQUEST_HEADERS = {
"Authorization": f"Bearer {token}",
@@ -194,6 +239,8 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
collected: List[Dict[str, Any]] = []
page = 0
per_page = min(MAX_PER_PAGE, MAX_MESSAGES)
start_timestamp_ms = RANGE_START_TIMESTAMP_MS or CUTOFF_TIMESTAMP_MS
end_timestamp_ms = RANGE_END_TIMESTAMP_MS
while len(collected) < MAX_MESSAGES:
payload = api_get_json(
@@ -213,9 +260,11 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
continue
created_at = int(post.get("create_at", 0))
if created_at < CUTOFF_TIMESTAMP_MS:
if created_at < start_timestamp_ms:
reached_cutoff = True
continue
if end_timestamp_ms and created_at >= end_timestamp_ms:
continue
collected.append(post)
if len(collected) >= MAX_MESSAGES:
@@ -375,10 +424,10 @@ def is_system_message(post: Dict[str, Any]) -> bool:
return post_type.startswith("system_")
def extract_messages() -> List[Dict[str, Any]]:
def extract_messages(resolved_channels: List[Dict[str, Any]] | None = None) -> List[Dict[str, Any]]:
all_messages: List[Dict[str, Any]] = []
for channel in resolve_channels():
for channel in resolved_channels or resolve_channels():
channel_id = channel.get("id", "")
channel_name = channel.get("name", "") or channel_id
channel_display_name = channel.get("display_name", "") or channel_name
@@ -418,6 +467,38 @@ def extract_messages() -> List[Dict[str, Any]]:
return all_messages
def day_range_ms(day: date) -> tuple[int, int]:
start = datetime.combine(day, time.min).astimezone()
end = start + timedelta(days=1)
return int(start.timestamp() * 1000), int(end.timestamp() * 1000)
def set_fetch_range(start_ms: int, end_ms: int) -> None:
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS
RANGE_START_TIMESTAMP_MS = start_ms
RANGE_END_TIMESTAMP_MS = end_ms
def extract_previous_workday_messages(args: Namespace) -> tuple[List[Dict[str, Any]], date | None, int]:
today = parse_iso_date(args.today)
resolved_channels = resolve_channels()
max_lookback_days = args.max_lookback_days
if max_lookback_days <= 0:
raise ValueError("--max-lookback-days must be greater than 0.")
for skipped_days in range(max_lookback_days):
candidate_day = today - timedelta(days=skipped_days + 1)
start_ms, end_ms = day_range_ms(candidate_day)
set_fetch_range(start_ms, end_ms)
messages = extract_messages(resolved_channels)
if messages:
return messages, candidate_day, skipped_days
LOGGER.info("No messages found for %s; expanding lookback.", candidate_day.isoformat())
return [], None, max_lookback_days
def format_messages(messages: List[Dict[str, Any]]) -> str:
lines: List[str] = []
@@ -473,8 +554,20 @@ def main() -> int:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
try:
configure()
messages = extract_messages()
args = parse_args()
configure(args)
if args.previous_workday:
messages, selected_day, skipped_days = extract_previous_workday_messages(args)
if selected_day:
LOGGER.info(
"Selected previous workday %s after skipping %s inactive calendar day(s).",
selected_day.isoformat(),
skipped_days,
)
else:
LOGGER.info("No previous workday messages found within %s day(s).", skipped_days)
else:
messages = extract_messages()
output = format_messages(messages)
print(output)
save_to_file(output)

View File

@@ -12,4 +12,4 @@ else
PYTHON_BIN="python3"
fi
exec "$PYTHON_BIN" "$SCRIPT_DIR/mattermost_context.py"
exec "$PYTHON_BIN" "$SCRIPT_DIR/mattermost_context.py" "$@"