feat: Add previous workday mode to Mattermost extractor and enhance sync script
This commit is contained in:
@@ -3,9 +3,11 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from urllib import error, parse, request
|
||||
@@ -17,6 +19,7 @@ DEFAULT_MAX_MESSAGES = 200
|
||||
MAX_PER_PAGE = 200
|
||||
DEFAULT_OUTPUT_FILE = str(SCRIPT_DIR / "generated" / "mattermost_context.jsonl")
|
||||
REQUEST_TIMEOUT = 15
|
||||
DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
||||
|
||||
|
||||
LOGGER = logging.getLogger("mattermost_context")
|
||||
@@ -29,6 +32,8 @@ CHANNEL_SPECS: List[Dict[str, str]] = []
|
||||
WINDOW_HOURS = DEFAULT_WINDOW_HOURS
|
||||
MAX_MESSAGES = DEFAULT_MAX_MESSAGES
|
||||
CUTOFF_TIMESTAMP_MS = 0
|
||||
RANGE_START_TIMESTAMP_MS = 0
|
||||
RANGE_END_TIMESTAMP_MS = 0
|
||||
OUTPUT_FILE = DEFAULT_OUTPUT_FILE
|
||||
REQUEST_HEADERS: Dict[str, str] = {}
|
||||
SSL_CONTEXT: ssl.SSLContext | None = None
|
||||
@@ -40,6 +45,44 @@ class MattermostAPIError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def parse_args() -> Namespace:
|
||||
parser = ArgumentParser(description="Extract Mattermost messages as JSONL context.")
|
||||
parser.add_argument(
|
||||
"--previous-workday",
|
||||
action="store_true",
|
||||
help="Fetch the latest prior calendar day with Mattermost activity instead of a fixed recent window.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--today",
|
||||
default=date.today().isoformat(),
|
||||
help="Reference date in YYYY-MM-DD format. Defaults to today.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-lookback-days",
|
||||
type=int,
|
||||
default=int(os.getenv("MATTERMOST_MAX_LOOKBACK_DAYS", "7")),
|
||||
help="Maximum days to search backward with --previous-workday.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--window-hours",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Override MESSAGE_WINDOW_HOURS for normal recent-window mode.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-file",
|
||||
default="",
|
||||
help="Override MATTERMOST_OUTPUT_FILE.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def parse_iso_date(raw_value: str) -> date:
|
||||
if not DATE_RE.match(raw_value):
|
||||
raise ValueError(f"Invalid date '{raw_value}'. Use YYYY-MM-DD.")
|
||||
return datetime.strptime(raw_value, "%Y-%m-%d").date()
|
||||
|
||||
|
||||
def parse_bool_env(name: str, default: bool = False) -> bool:
|
||||
raw_value = os.getenv(name)
|
||||
if raw_value is None:
|
||||
@@ -133,18 +176,18 @@ def build_ssl_context() -> ssl.SSLContext:
|
||||
return ssl.create_default_context()
|
||||
|
||||
|
||||
def configure() -> None:
|
||||
def configure(args: Namespace) -> None:
|
||||
global MATTERMOST_URL, CHANNEL_SPECS, WINDOW_HOURS, MAX_MESSAGES, CUTOFF_TIMESTAMP_MS, OUTPUT_FILE
|
||||
global REQUEST_HEADERS, SSL_CONTEXT
|
||||
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS, REQUEST_HEADERS, SSL_CONTEXT
|
||||
global MATTERMOST_TEAM_NAME, MATTERMOST_TEAM_ID
|
||||
|
||||
load_dotenv_file()
|
||||
MATTERMOST_URL = require_env("MATTERMOST_URL").rstrip("/")
|
||||
token = require_env("MATTERMOST_TOKEN")
|
||||
CHANNEL_SPECS = parse_channel_specs()
|
||||
WINDOW_HOURS = int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
|
||||
WINDOW_HOURS = args.window_hours or int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
|
||||
MAX_MESSAGES = int(os.getenv("MAX_MESSAGES", str(DEFAULT_MAX_MESSAGES)))
|
||||
OUTPUT_FILE = os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
|
||||
OUTPUT_FILE = args.output_file or os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
|
||||
MATTERMOST_TEAM_NAME = os.getenv("MATTERMOST_TEAM_NAME", "").strip()
|
||||
MATTERMOST_TEAM_ID = os.getenv("MATTERMOST_TEAM_ID", "").strip()
|
||||
|
||||
@@ -155,6 +198,8 @@ def configure() -> None:
|
||||
|
||||
cutoff = datetime.now().astimezone() - timedelta(hours=WINDOW_HOURS)
|
||||
CUTOFF_TIMESTAMP_MS = int(cutoff.timestamp() * 1000)
|
||||
RANGE_START_TIMESTAMP_MS = 0
|
||||
RANGE_END_TIMESTAMP_MS = 0
|
||||
|
||||
REQUEST_HEADERS = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
@@ -194,6 +239,8 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
|
||||
collected: List[Dict[str, Any]] = []
|
||||
page = 0
|
||||
per_page = min(MAX_PER_PAGE, MAX_MESSAGES)
|
||||
start_timestamp_ms = RANGE_START_TIMESTAMP_MS or CUTOFF_TIMESTAMP_MS
|
||||
end_timestamp_ms = RANGE_END_TIMESTAMP_MS
|
||||
|
||||
while len(collected) < MAX_MESSAGES:
|
||||
payload = api_get_json(
|
||||
@@ -213,9 +260,11 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
|
||||
continue
|
||||
|
||||
created_at = int(post.get("create_at", 0))
|
||||
if created_at < CUTOFF_TIMESTAMP_MS:
|
||||
if created_at < start_timestamp_ms:
|
||||
reached_cutoff = True
|
||||
continue
|
||||
if end_timestamp_ms and created_at >= end_timestamp_ms:
|
||||
continue
|
||||
|
||||
collected.append(post)
|
||||
if len(collected) >= MAX_MESSAGES:
|
||||
@@ -375,10 +424,10 @@ def is_system_message(post: Dict[str, Any]) -> bool:
|
||||
return post_type.startswith("system_")
|
||||
|
||||
|
||||
def extract_messages() -> List[Dict[str, Any]]:
|
||||
def extract_messages(resolved_channels: List[Dict[str, Any]] | None = None) -> List[Dict[str, Any]]:
|
||||
all_messages: List[Dict[str, Any]] = []
|
||||
|
||||
for channel in resolve_channels():
|
||||
for channel in resolved_channels or resolve_channels():
|
||||
channel_id = channel.get("id", "")
|
||||
channel_name = channel.get("name", "") or channel_id
|
||||
channel_display_name = channel.get("display_name", "") or channel_name
|
||||
@@ -418,6 +467,38 @@ def extract_messages() -> List[Dict[str, Any]]:
|
||||
return all_messages
|
||||
|
||||
|
||||
def day_range_ms(day: date) -> tuple[int, int]:
|
||||
start = datetime.combine(day, time.min).astimezone()
|
||||
end = start + timedelta(days=1)
|
||||
return int(start.timestamp() * 1000), int(end.timestamp() * 1000)
|
||||
|
||||
|
||||
def set_fetch_range(start_ms: int, end_ms: int) -> None:
|
||||
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS
|
||||
RANGE_START_TIMESTAMP_MS = start_ms
|
||||
RANGE_END_TIMESTAMP_MS = end_ms
|
||||
|
||||
|
||||
def extract_previous_workday_messages(args: Namespace) -> tuple[List[Dict[str, Any]], date | None, int]:
|
||||
today = parse_iso_date(args.today)
|
||||
resolved_channels = resolve_channels()
|
||||
max_lookback_days = args.max_lookback_days
|
||||
if max_lookback_days <= 0:
|
||||
raise ValueError("--max-lookback-days must be greater than 0.")
|
||||
|
||||
for skipped_days in range(max_lookback_days):
|
||||
candidate_day = today - timedelta(days=skipped_days + 1)
|
||||
start_ms, end_ms = day_range_ms(candidate_day)
|
||||
set_fetch_range(start_ms, end_ms)
|
||||
messages = extract_messages(resolved_channels)
|
||||
if messages:
|
||||
return messages, candidate_day, skipped_days
|
||||
|
||||
LOGGER.info("No messages found for %s; expanding lookback.", candidate_day.isoformat())
|
||||
|
||||
return [], None, max_lookback_days
|
||||
|
||||
|
||||
def format_messages(messages: List[Dict[str, Any]]) -> str:
|
||||
lines: List[str] = []
|
||||
|
||||
@@ -473,8 +554,20 @@ def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
|
||||
try:
|
||||
configure()
|
||||
messages = extract_messages()
|
||||
args = parse_args()
|
||||
configure(args)
|
||||
if args.previous_workday:
|
||||
messages, selected_day, skipped_days = extract_previous_workday_messages(args)
|
||||
if selected_day:
|
||||
LOGGER.info(
|
||||
"Selected previous workday %s after skipping %s inactive calendar day(s).",
|
||||
selected_day.isoformat(),
|
||||
skipped_days,
|
||||
)
|
||||
else:
|
||||
LOGGER.info("No previous workday messages found within %s day(s).", skipped_days)
|
||||
else:
|
||||
messages = extract_messages()
|
||||
output = format_messages(messages)
|
||||
print(output)
|
||||
save_to_file(output)
|
||||
|
||||
Reference in New Issue
Block a user