|
|
|
|
@@ -3,9 +3,11 @@
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import ssl
|
|
|
|
|
import sys
|
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
from argparse import ArgumentParser, Namespace
|
|
|
|
|
from datetime import date, datetime, time, timedelta
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, List
|
|
|
|
|
from urllib import error, parse, request
|
|
|
|
|
@@ -17,6 +19,7 @@ DEFAULT_MAX_MESSAGES = 200
|
|
|
|
|
MAX_PER_PAGE = 200
|
|
|
|
|
DEFAULT_OUTPUT_FILE = str(SCRIPT_DIR / "generated" / "mattermost_context.jsonl")
|
|
|
|
|
REQUEST_TIMEOUT = 15
|
|
|
|
|
DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LOGGER = logging.getLogger("mattermost_context")
|
|
|
|
|
@@ -29,6 +32,8 @@ CHANNEL_SPECS: List[Dict[str, str]] = []
|
|
|
|
|
WINDOW_HOURS = DEFAULT_WINDOW_HOURS
|
|
|
|
|
MAX_MESSAGES = DEFAULT_MAX_MESSAGES
|
|
|
|
|
CUTOFF_TIMESTAMP_MS = 0
|
|
|
|
|
RANGE_START_TIMESTAMP_MS = 0
|
|
|
|
|
RANGE_END_TIMESTAMP_MS = 0
|
|
|
|
|
OUTPUT_FILE = DEFAULT_OUTPUT_FILE
|
|
|
|
|
REQUEST_HEADERS: Dict[str, str] = {}
|
|
|
|
|
SSL_CONTEXT: ssl.SSLContext | None = None
|
|
|
|
|
@@ -40,6 +45,44 @@ class MattermostAPIError(RuntimeError):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args() -> Namespace:
|
|
|
|
|
parser = ArgumentParser(description="Extract Mattermost messages as JSONL context.")
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--previous-workday",
|
|
|
|
|
action="store_true",
|
|
|
|
|
help="Fetch the latest prior calendar day with Mattermost activity instead of a fixed recent window.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--today",
|
|
|
|
|
default=date.today().isoformat(),
|
|
|
|
|
help="Reference date in YYYY-MM-DD format. Defaults to today.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--max-lookback-days",
|
|
|
|
|
type=int,
|
|
|
|
|
default=int(os.getenv("MATTERMOST_MAX_LOOKBACK_DAYS", "7")),
|
|
|
|
|
help="Maximum days to search backward with --previous-workday.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--window-hours",
|
|
|
|
|
type=int,
|
|
|
|
|
default=0,
|
|
|
|
|
help="Override MESSAGE_WINDOW_HOURS for normal recent-window mode.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--output-file",
|
|
|
|
|
default="",
|
|
|
|
|
help="Override MATTERMOST_OUTPUT_FILE.",
|
|
|
|
|
)
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_iso_date(raw_value: str) -> date:
|
|
|
|
|
if not DATE_RE.match(raw_value):
|
|
|
|
|
raise ValueError(f"Invalid date '{raw_value}'. Use YYYY-MM-DD.")
|
|
|
|
|
return datetime.strptime(raw_value, "%Y-%m-%d").date()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_bool_env(name: str, default: bool = False) -> bool:
|
|
|
|
|
raw_value = os.getenv(name)
|
|
|
|
|
if raw_value is None:
|
|
|
|
|
@@ -133,18 +176,18 @@ def build_ssl_context() -> ssl.SSLContext:
|
|
|
|
|
return ssl.create_default_context()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def configure() -> None:
|
|
|
|
|
def configure(args: Namespace) -> None:
|
|
|
|
|
global MATTERMOST_URL, CHANNEL_SPECS, WINDOW_HOURS, MAX_MESSAGES, CUTOFF_TIMESTAMP_MS, OUTPUT_FILE
|
|
|
|
|
global REQUEST_HEADERS, SSL_CONTEXT
|
|
|
|
|
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS, REQUEST_HEADERS, SSL_CONTEXT
|
|
|
|
|
global MATTERMOST_TEAM_NAME, MATTERMOST_TEAM_ID
|
|
|
|
|
|
|
|
|
|
load_dotenv_file()
|
|
|
|
|
MATTERMOST_URL = require_env("MATTERMOST_URL").rstrip("/")
|
|
|
|
|
token = require_env("MATTERMOST_TOKEN")
|
|
|
|
|
CHANNEL_SPECS = parse_channel_specs()
|
|
|
|
|
WINDOW_HOURS = int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
|
|
|
|
|
WINDOW_HOURS = args.window_hours or int(os.getenv("MESSAGE_WINDOW_HOURS", str(DEFAULT_WINDOW_HOURS)))
|
|
|
|
|
MAX_MESSAGES = int(os.getenv("MAX_MESSAGES", str(DEFAULT_MAX_MESSAGES)))
|
|
|
|
|
OUTPUT_FILE = os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
|
|
|
|
|
OUTPUT_FILE = args.output_file or os.getenv("MATTERMOST_OUTPUT_FILE", DEFAULT_OUTPUT_FILE).strip() or DEFAULT_OUTPUT_FILE
|
|
|
|
|
MATTERMOST_TEAM_NAME = os.getenv("MATTERMOST_TEAM_NAME", "").strip()
|
|
|
|
|
MATTERMOST_TEAM_ID = os.getenv("MATTERMOST_TEAM_ID", "").strip()
|
|
|
|
|
|
|
|
|
|
@@ -155,6 +198,8 @@ def configure() -> None:
|
|
|
|
|
|
|
|
|
|
cutoff = datetime.now().astimezone() - timedelta(hours=WINDOW_HOURS)
|
|
|
|
|
CUTOFF_TIMESTAMP_MS = int(cutoff.timestamp() * 1000)
|
|
|
|
|
RANGE_START_TIMESTAMP_MS = 0
|
|
|
|
|
RANGE_END_TIMESTAMP_MS = 0
|
|
|
|
|
|
|
|
|
|
REQUEST_HEADERS = {
|
|
|
|
|
"Authorization": f"Bearer {token}",
|
|
|
|
|
@@ -194,6 +239,8 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
|
|
|
|
|
collected: List[Dict[str, Any]] = []
|
|
|
|
|
page = 0
|
|
|
|
|
per_page = min(MAX_PER_PAGE, MAX_MESSAGES)
|
|
|
|
|
start_timestamp_ms = RANGE_START_TIMESTAMP_MS or CUTOFF_TIMESTAMP_MS
|
|
|
|
|
end_timestamp_ms = RANGE_END_TIMESTAMP_MS
|
|
|
|
|
|
|
|
|
|
while len(collected) < MAX_MESSAGES:
|
|
|
|
|
payload = api_get_json(
|
|
|
|
|
@@ -213,9 +260,11 @@ def get_channel_posts(channel_id: str) -> List[Dict[str, Any]]:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
created_at = int(post.get("create_at", 0))
|
|
|
|
|
if created_at < CUTOFF_TIMESTAMP_MS:
|
|
|
|
|
if created_at < start_timestamp_ms:
|
|
|
|
|
reached_cutoff = True
|
|
|
|
|
continue
|
|
|
|
|
if end_timestamp_ms and created_at >= end_timestamp_ms:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
collected.append(post)
|
|
|
|
|
if len(collected) >= MAX_MESSAGES:
|
|
|
|
|
@@ -375,10 +424,10 @@ def is_system_message(post: Dict[str, Any]) -> bool:
|
|
|
|
|
return post_type.startswith("system_")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_messages() -> List[Dict[str, Any]]:
|
|
|
|
|
def extract_messages(resolved_channels: List[Dict[str, Any]] | None = None) -> List[Dict[str, Any]]:
|
|
|
|
|
all_messages: List[Dict[str, Any]] = []
|
|
|
|
|
|
|
|
|
|
for channel in resolve_channels():
|
|
|
|
|
for channel in resolved_channels or resolve_channels():
|
|
|
|
|
channel_id = channel.get("id", "")
|
|
|
|
|
channel_name = channel.get("name", "") or channel_id
|
|
|
|
|
channel_display_name = channel.get("display_name", "") or channel_name
|
|
|
|
|
@@ -418,6 +467,38 @@ def extract_messages() -> List[Dict[str, Any]]:
|
|
|
|
|
return all_messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def day_range_ms(day: date) -> tuple[int, int]:
|
|
|
|
|
start = datetime.combine(day, time.min).astimezone()
|
|
|
|
|
end = start + timedelta(days=1)
|
|
|
|
|
return int(start.timestamp() * 1000), int(end.timestamp() * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_fetch_range(start_ms: int, end_ms: int) -> None:
|
|
|
|
|
global RANGE_START_TIMESTAMP_MS, RANGE_END_TIMESTAMP_MS
|
|
|
|
|
RANGE_START_TIMESTAMP_MS = start_ms
|
|
|
|
|
RANGE_END_TIMESTAMP_MS = end_ms
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_previous_workday_messages(args: Namespace) -> tuple[List[Dict[str, Any]], date | None, int]:
|
|
|
|
|
today = parse_iso_date(args.today)
|
|
|
|
|
resolved_channels = resolve_channels()
|
|
|
|
|
max_lookback_days = args.max_lookback_days
|
|
|
|
|
if max_lookback_days <= 0:
|
|
|
|
|
raise ValueError("--max-lookback-days must be greater than 0.")
|
|
|
|
|
|
|
|
|
|
for skipped_days in range(max_lookback_days):
|
|
|
|
|
candidate_day = today - timedelta(days=skipped_days + 1)
|
|
|
|
|
start_ms, end_ms = day_range_ms(candidate_day)
|
|
|
|
|
set_fetch_range(start_ms, end_ms)
|
|
|
|
|
messages = extract_messages(resolved_channels)
|
|
|
|
|
if messages:
|
|
|
|
|
return messages, candidate_day, skipped_days
|
|
|
|
|
|
|
|
|
|
LOGGER.info("No messages found for %s; expanding lookback.", candidate_day.isoformat())
|
|
|
|
|
|
|
|
|
|
return [], None, max_lookback_days
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_messages(messages: List[Dict[str, Any]]) -> str:
|
|
|
|
|
lines: List[str] = []
|
|
|
|
|
|
|
|
|
|
@@ -473,8 +554,20 @@ def main() -> int:
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
configure()
|
|
|
|
|
messages = extract_messages()
|
|
|
|
|
args = parse_args()
|
|
|
|
|
configure(args)
|
|
|
|
|
if args.previous_workday:
|
|
|
|
|
messages, selected_day, skipped_days = extract_previous_workday_messages(args)
|
|
|
|
|
if selected_day:
|
|
|
|
|
LOGGER.info(
|
|
|
|
|
"Selected previous workday %s after skipping %s inactive calendar day(s).",
|
|
|
|
|
selected_day.isoformat(),
|
|
|
|
|
skipped_days,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
LOGGER.info("No previous workday messages found within %s day(s).", skipped_days)
|
|
|
|
|
else:
|
|
|
|
|
messages = extract_messages()
|
|
|
|
|
output = format_messages(messages)
|
|
|
|
|
print(output)
|
|
|
|
|
save_to_file(output)
|
|
|
|
|
|