feat: Enhance Slack import and memory promotion rules to better capture multi-channel, multi-year contributions and improve user role identification
This commit is contained in:
@@ -51,6 +51,7 @@ For very large multi-year exports, the importer is designed to be selective by d
|
||||
- preserves coverage across channels and years, not only recent history
|
||||
- prefers recent messages plus older high-signal messages
|
||||
- preserves strong Jira-linked messages even when they are old
|
||||
- highlights repeated human participants with cross-channel, multi-year, or high-signal involvement so they can be added to people memory
|
||||
- prioritizes Jira IDs, approvals, scope changes, root-cause notes, points, and durable technical patterns
|
||||
|
||||
Override behavior if needed:
|
||||
|
||||
@@ -49,6 +49,42 @@ HIGH_SIGNAL_PATTERNS = [
|
||||
r"\bblocked?\b",
|
||||
]
|
||||
]
|
||||
TOPIC_PATTERNS = {
|
||||
"xflow_swiftui": re.compile(
|
||||
r"\b(xflow|swiftui|viewmaker|delegate|lifecycle|navigation|next button|markdown modal|validation)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"rest_graphql": re.compile(
|
||||
r"\b(rest|graphql|apollo|feature flag|transport)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"pipeline_ci": re.compile(
|
||||
r"\b(apex|apexkit|pipeline|ci|preview macro|analytics|build|archive|sampleapp)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"auth_repro": re.compile(
|
||||
r"\b(auth|authenticated|non-auth|reproduc|teenidentitycheck|dob|regression|external report)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"process_communication": re.compile(
|
||||
r"\b(approved|title|description|scope|points|jira|pr|wording|send this|manager)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
}
|
||||
ROLE_HINT_PATTERNS = {
|
||||
"manager_or_lead": re.compile(
|
||||
r"\b(approved|use this for the description|send it to|did you make the story|can start on this|estimate|points)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"xflow_ios_engineer": re.compile(
|
||||
r"\b(xflow|swiftui|viewmaker|delegate|lifecycle|navigation|validation|next button)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"build_pipeline_engineer": re.compile(
|
||||
r"\b(apex|apexkit|pipeline|ci|analytics|preview macro|archive|sampleapp|jenkins|sonarqube)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
@@ -107,7 +143,7 @@ def parse_date(raw: str) -> Optional[datetime]:
|
||||
return datetime.strptime(raw, "%Y-%m-%d")
|
||||
|
||||
|
||||
def load_users(export_root: Path) -> Dict[str, str]:
|
||||
def load_users(export_root: Path) -> Dict[str, Dict[str, str]]:
|
||||
users_path = export_root / "users.json"
|
||||
if not users_path.exists():
|
||||
return {}
|
||||
@@ -117,18 +153,17 @@ def load_users(export_root: Path) -> Dict[str, str]:
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
user_map: Dict[str, str] = {}
|
||||
user_map: Dict[str, Dict[str, str]] = {}
|
||||
for user in payload:
|
||||
user_id = user.get("id", "")
|
||||
profile = user.get("profile", {}) or {}
|
||||
name = (
|
||||
user.get("name")
|
||||
or profile.get("display_name")
|
||||
or profile.get("real_name")
|
||||
or user_id
|
||||
)
|
||||
handle = user.get("name") or user_id
|
||||
display_name = profile.get("real_name") or profile.get("display_name") or handle
|
||||
if user_id:
|
||||
user_map[user_id] = name
|
||||
user_map[user_id] = {
|
||||
"handle": handle,
|
||||
"display_name": display_name,
|
||||
}
|
||||
return user_map
|
||||
|
||||
|
||||
@@ -178,13 +213,15 @@ def date_in_range(day_file: Path, since: Optional[datetime], until: Optional[dat
|
||||
return True
|
||||
|
||||
|
||||
def resolve_username(message: Dict[str, Any], user_map: Dict[str, str]) -> str:
|
||||
def resolve_user_identity(message: Dict[str, Any], user_map: Dict[str, Dict[str, str]]) -> Dict[str, str]:
|
||||
user_id = message.get("user", "")
|
||||
if user_id and user_id in user_map:
|
||||
return user_map[user_id]
|
||||
if message.get("username"):
|
||||
return str(message["username"])
|
||||
return user_id or "unknown"
|
||||
username = str(message["username"])
|
||||
return {"handle": username, "display_name": username}
|
||||
fallback = user_id or "unknown"
|
||||
return {"handle": fallback, "display_name": fallback}
|
||||
|
||||
|
||||
def message_timestamp(ts_value: str) -> str:
|
||||
@@ -203,17 +240,19 @@ def message_datetime(ts_value: str) -> Optional[datetime]:
|
||||
return datetime.fromtimestamp(ts_float).astimezone()
|
||||
|
||||
|
||||
def normalize_message(channel: str, raw: Dict[str, Any], user_map: Dict[str, str]) -> Optional[Dict[str, Any]]:
|
||||
def normalize_message(channel: str, raw: Dict[str, Any], user_map: Dict[str, Dict[str, str]]) -> Optional[Dict[str, Any]]:
|
||||
text = (raw.get("text") or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
subtype = raw.get("subtype", "")
|
||||
identity = resolve_user_identity(raw, user_map)
|
||||
record: Dict[str, Any] = {
|
||||
"source": "slack",
|
||||
"channel": channel,
|
||||
"timestamp": message_timestamp(str(raw.get("ts", ""))),
|
||||
"username": resolve_username(raw, user_map),
|
||||
"username": identity["display_name"],
|
||||
"slack_handle": identity["handle"],
|
||||
"message": text,
|
||||
"type": subtype or "message",
|
||||
"thread_ts": raw.get("thread_ts") or None,
|
||||
@@ -441,10 +480,40 @@ def build_summary(messages: List[Dict[str, Any]], channels: Sequence[str]) -> st
|
||||
user_counter = Counter(item["username"] for item in messages)
|
||||
jira_counter = Counter()
|
||||
year_counter = Counter()
|
||||
topic_counter = Counter()
|
||||
user_topic_counter: Dict[str, Counter] = {}
|
||||
user_channel_counter: Dict[str, Counter] = {}
|
||||
user_year_counter: Dict[str, Counter] = {}
|
||||
user_signal_counter = Counter()
|
||||
user_examples: Dict[str, List[str]] = {}
|
||||
user_handles: Dict[str, str] = {}
|
||||
|
||||
for item in messages:
|
||||
jira_counter.update(JIRA_RE.findall(item.get("message", "")))
|
||||
year_counter.update([message_year(item)])
|
||||
message = item.get("message", "")
|
||||
username = item.get("username", "")
|
||||
user_handles.setdefault(username, item.get("slack_handle", username))
|
||||
jira_counter.update(JIRA_RE.findall(message))
|
||||
year = message_year(item)
|
||||
year_counter.update([year])
|
||||
user_channel_counter.setdefault(username, Counter()).update([item["channel"]])
|
||||
user_year_counter.setdefault(username, Counter()).update([year])
|
||||
|
||||
topics = [name for name, pattern in TOPIC_PATTERNS.items() if pattern.search(message)]
|
||||
if topics:
|
||||
topic_counter.update(topics)
|
||||
user_signal_counter[username] += len(topics) + len(JIRA_RE.findall(message))
|
||||
user_topic_counter.setdefault(username, Counter()).update(topics)
|
||||
example = f"{item['timestamp']} {item['channel']}: {message[:140].replace(chr(10), ' ')}"
|
||||
user_examples.setdefault(username, [])
|
||||
if len(user_examples[username]) < 2 and example not in user_examples[username]:
|
||||
user_examples[username].append(example)
|
||||
|
||||
role_hits = 0
|
||||
for pattern in ROLE_HINT_PATTERNS.values():
|
||||
if pattern.search(message):
|
||||
role_hits += 1
|
||||
if role_hits:
|
||||
user_signal_counter[username] += role_hits
|
||||
|
||||
first_timestamp = messages[0]["timestamp"] if messages else "n/a"
|
||||
last_timestamp = messages[-1]["timestamp"] if messages else "n/a"
|
||||
@@ -477,6 +546,53 @@ def build_summary(messages: List[Dict[str, Any]], channels: Sequence[str]) -> st
|
||||
for year, count in year_counter.most_common():
|
||||
lines.append(f"- {year}: {count}")
|
||||
|
||||
lines.extend(["", "## Topic Signals"])
|
||||
if topic_counter:
|
||||
for topic, count in topic_counter.most_common():
|
||||
lines.append(f"- {topic}: {count}")
|
||||
else:
|
||||
lines.append("- No topic patterns matched")
|
||||
|
||||
lines.extend(["", "## People Worth Reviewing"])
|
||||
ranked_people = sorted(
|
||||
user_counter,
|
||||
key=lambda username: (
|
||||
user_signal_counter[username],
|
||||
len(user_channel_counter.get(username, {})),
|
||||
len(user_year_counter.get(username, {})),
|
||||
user_counter[username],
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
candidate_count = 0
|
||||
for username in ranked_people:
|
||||
handle = user_handles.get(username, username).lower()
|
||||
if handle in {"uslackbot", "internal trackit report", "jirabot", "geekbot"}:
|
||||
continue
|
||||
channels_seen = len(user_channel_counter.get(username, {}))
|
||||
years_seen = len(user_year_counter.get(username, {}))
|
||||
signal = user_signal_counter[username]
|
||||
if signal < 12 and user_counter[username] < 20:
|
||||
continue
|
||||
top_topics = ", ".join(
|
||||
topic for topic, _ in user_topic_counter.get(username, Counter()).most_common(3)
|
||||
) or "general project discussion"
|
||||
name_label = username if handle == username else f"{username} (Slack: {handle})"
|
||||
lines.append(
|
||||
f"- {name_label}: {user_counter[username]} messages, signal={signal}, channels={channels_seen}, years={years_seen}, topics={top_topics}"
|
||||
)
|
||||
for example in user_examples.get(username, []):
|
||||
lines.append(f" Evidence: {example}")
|
||||
candidate_count += 1
|
||||
if candidate_count >= 8:
|
||||
break
|
||||
|
||||
lines.extend(["", "## Import Guidance"])
|
||||
lines.append("- Create or update person files for repeated humans with multi-channel or multi-year involvement.")
|
||||
lines.append("- Prefer storing exact role only when the archive states it clearly; otherwise store relationship and collaboration pattern.")
|
||||
lines.append("- Promote repeated Jira/title/scope/approval patterns when they still clarify current project understanding.")
|
||||
lines.append("- Keep old status-only updates archive-only unless they alter current context.")
|
||||
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
|
||||
Reference in New Issue
Block a user