feat: Enhance Slack import and memory promotion rules to better capture multi-channel, multi-year contributions and improve user role identification

This commit is contained in:
2026-04-09 16:17:29 -06:00
parent 1d003fa242
commit 85ba522a1d
5 changed files with 154 additions and 16 deletions

View File

@@ -50,6 +50,16 @@ Instructions:
- durable role/person associations
- recurring architecture or debugging patterns
- past approvals or decisions that still matter
- create or update person files when the archive shows a human repeatedly contributing across channels, years, or high-signal technical/process discussions
- store people conservatively:
- exact role only when explicitly supported by the archive
- otherwise store collaboration pattern, communication style, and project relationship
- actively look for:
- Jira IDs plus explicit titles, sizing, and scope changes
- repeated architecture themes around XFlow, SwiftUI, REST, GraphQL, auth, and entry-point behavior
- ownership or responsibility boundaries between framework and consuming app teams
- recurring pipeline or dependency failures that shaped project work
- named people who repeatedly drive approvals, technical framing, or debugging direction
- prioritize high-signal messages such as Jira references, approvals, scope changes, root-cause notes, points, and persistent technical constraints
- favor messages that help reconstruct project history across multiple years, not just the newest ones
- avoid promoting outdated daily status unless it changes current understanding

View File

@@ -42,6 +42,7 @@ Examples:
- confirmed version, dependency, or rollout facts tied to current work
- corrections to previously stored project context
- repeated named people with stable roles or communication relevance
- repeated named people with multi-channel, multi-year, or high-signal technical/process involvement even when the exact formal role is still unknown
---
@@ -106,6 +107,12 @@ Use these files for:
- keeping named stakeholders recognizable across sessions
- storing stable communication or responsibility context per person
When the role is not explicit, store:
- where the person tends to appear
- what kinds of topics they influence
- how they affect approvals, scope, debugging, or communication
### `ai/context/decisions/*.md`
Use for explicit confirmed decisions with ongoing impact.

View File

@@ -75,6 +75,8 @@ Use these files when:
- a person's identity matters repeatedly
- a role becomes associated with a specific person
- a stakeholder's communication or approval patterns affect future work
- a human appears across multiple channels or years with repeated technical, process, or approval signal
- the archive makes the collaboration pattern clear even if the formal title is still unknown
Prefer:
@@ -82,6 +84,8 @@ Prefer:
- `index.md` for active roster
- one file per person for person-specific context
If exact role confidence is low, store the person's repeated project relationship instead of inventing a title.
---
## Do Not Promote

View File

@@ -51,6 +51,7 @@ For very large multi-year exports, the importer is designed to be selective by d
- preserves coverage across channels and years, not only recent history
- prefers recent messages plus older high-signal messages
- preserves strong Jira-linked messages even when they are old
- highlights repeated human participants with cross-channel, multi-year, or high-signal involvement so they can be added to people memory
- prioritizes Jira IDs, approvals, scope changes, root-cause notes, points, and durable technical patterns
Override behavior if needed:

View File

@@ -49,6 +49,42 @@ HIGH_SIGNAL_PATTERNS = [
r"\bblocked?\b",
]
]
TOPIC_PATTERNS = {
"xflow_swiftui": re.compile(
r"\b(xflow|swiftui|viewmaker|delegate|lifecycle|navigation|next button|markdown modal|validation)\b",
re.IGNORECASE,
),
"rest_graphql": re.compile(
r"\b(rest|graphql|apollo|feature flag|transport)\b",
re.IGNORECASE,
),
"pipeline_ci": re.compile(
r"\b(apex|apexkit|pipeline|ci|preview macro|analytics|build|archive|sampleapp)\b",
re.IGNORECASE,
),
"auth_repro": re.compile(
r"\b(auth|authenticated|non-auth|reproduc|teenidentitycheck|dob|regression|external report)\b",
re.IGNORECASE,
),
"process_communication": re.compile(
r"\b(approved|title|description|scope|points|jira|pr|wording|send this|manager)\b",
re.IGNORECASE,
),
}
ROLE_HINT_PATTERNS = {
"manager_or_lead": re.compile(
r"\b(approved|use this for the description|send it to|did you make the story|can start on this|estimate|points)\b",
re.IGNORECASE,
),
"xflow_ios_engineer": re.compile(
r"\b(xflow|swiftui|viewmaker|delegate|lifecycle|navigation|validation|next button)\b",
re.IGNORECASE,
),
"build_pipeline_engineer": re.compile(
r"\b(apex|apexkit|pipeline|ci|analytics|preview macro|archive|sampleapp|jenkins|sonarqube)\b",
re.IGNORECASE,
),
}
def parse_args() -> argparse.Namespace:
@@ -107,7 +143,7 @@ def parse_date(raw: str) -> Optional[datetime]:
return datetime.strptime(raw, "%Y-%m-%d")
def load_users(export_root: Path) -> Dict[str, str]:
def load_users(export_root: Path) -> Dict[str, Dict[str, str]]:
users_path = export_root / "users.json"
if not users_path.exists():
return {}
@@ -117,18 +153,17 @@ def load_users(export_root: Path) -> Dict[str, str]:
except json.JSONDecodeError:
return {}
user_map: Dict[str, str] = {}
user_map: Dict[str, Dict[str, str]] = {}
for user in payload:
user_id = user.get("id", "")
profile = user.get("profile", {}) or {}
name = (
user.get("name")
or profile.get("display_name")
or profile.get("real_name")
or user_id
)
handle = user.get("name") or user_id
display_name = profile.get("real_name") or profile.get("display_name") or handle
if user_id:
user_map[user_id] = name
user_map[user_id] = {
"handle": handle,
"display_name": display_name,
}
return user_map
@@ -178,13 +213,15 @@ def date_in_range(day_file: Path, since: Optional[datetime], until: Optional[dat
return True
def resolve_username(message: Dict[str, Any], user_map: Dict[str, str]) -> str:
def resolve_user_identity(message: Dict[str, Any], user_map: Dict[str, Dict[str, str]]) -> Dict[str, str]:
user_id = message.get("user", "")
if user_id and user_id in user_map:
return user_map[user_id]
if message.get("username"):
return str(message["username"])
return user_id or "unknown"
username = str(message["username"])
return {"handle": username, "display_name": username}
fallback = user_id or "unknown"
return {"handle": fallback, "display_name": fallback}
def message_timestamp(ts_value: str) -> str:
@@ -203,17 +240,19 @@ def message_datetime(ts_value: str) -> Optional[datetime]:
return datetime.fromtimestamp(ts_float).astimezone()
def normalize_message(channel: str, raw: Dict[str, Any], user_map: Dict[str, str]) -> Optional[Dict[str, Any]]:
def normalize_message(channel: str, raw: Dict[str, Any], user_map: Dict[str, Dict[str, str]]) -> Optional[Dict[str, Any]]:
text = (raw.get("text") or "").strip()
if not text:
return None
subtype = raw.get("subtype", "")
identity = resolve_user_identity(raw, user_map)
record: Dict[str, Any] = {
"source": "slack",
"channel": channel,
"timestamp": message_timestamp(str(raw.get("ts", ""))),
"username": resolve_username(raw, user_map),
"username": identity["display_name"],
"slack_handle": identity["handle"],
"message": text,
"type": subtype or "message",
"thread_ts": raw.get("thread_ts") or None,
@@ -441,10 +480,40 @@ def build_summary(messages: List[Dict[str, Any]], channels: Sequence[str]) -> st
user_counter = Counter(item["username"] for item in messages)
jira_counter = Counter()
year_counter = Counter()
topic_counter = Counter()
user_topic_counter: Dict[str, Counter] = {}
user_channel_counter: Dict[str, Counter] = {}
user_year_counter: Dict[str, Counter] = {}
user_signal_counter = Counter()
user_examples: Dict[str, List[str]] = {}
user_handles: Dict[str, str] = {}
for item in messages:
jira_counter.update(JIRA_RE.findall(item.get("message", "")))
year_counter.update([message_year(item)])
message = item.get("message", "")
username = item.get("username", "")
user_handles.setdefault(username, item.get("slack_handle", username))
jira_counter.update(JIRA_RE.findall(message))
year = message_year(item)
year_counter.update([year])
user_channel_counter.setdefault(username, Counter()).update([item["channel"]])
user_year_counter.setdefault(username, Counter()).update([year])
topics = [name for name, pattern in TOPIC_PATTERNS.items() if pattern.search(message)]
if topics:
topic_counter.update(topics)
user_signal_counter[username] += len(topics) + len(JIRA_RE.findall(message))
user_topic_counter.setdefault(username, Counter()).update(topics)
example = f"{item['timestamp']} {item['channel']}: {message[:140].replace(chr(10), ' ')}"
user_examples.setdefault(username, [])
if len(user_examples[username]) < 2 and example not in user_examples[username]:
user_examples[username].append(example)
role_hits = 0
for pattern in ROLE_HINT_PATTERNS.values():
if pattern.search(message):
role_hits += 1
if role_hits:
user_signal_counter[username] += role_hits
first_timestamp = messages[0]["timestamp"] if messages else "n/a"
last_timestamp = messages[-1]["timestamp"] if messages else "n/a"
@@ -477,6 +546,53 @@ def build_summary(messages: List[Dict[str, Any]], channels: Sequence[str]) -> st
for year, count in year_counter.most_common():
lines.append(f"- {year}: {count}")
lines.extend(["", "## Topic Signals"])
if topic_counter:
for topic, count in topic_counter.most_common():
lines.append(f"- {topic}: {count}")
else:
lines.append("- No topic patterns matched")
lines.extend(["", "## People Worth Reviewing"])
ranked_people = sorted(
user_counter,
key=lambda username: (
user_signal_counter[username],
len(user_channel_counter.get(username, {})),
len(user_year_counter.get(username, {})),
user_counter[username],
),
reverse=True,
)
candidate_count = 0
for username in ranked_people:
handle = user_handles.get(username, username).lower()
if handle in {"uslackbot", "internal trackit report", "jirabot", "geekbot"}:
continue
channels_seen = len(user_channel_counter.get(username, {}))
years_seen = len(user_year_counter.get(username, {}))
signal = user_signal_counter[username]
if signal < 12 and user_counter[username] < 20:
continue
top_topics = ", ".join(
topic for topic, _ in user_topic_counter.get(username, Counter()).most_common(3)
) or "general project discussion"
name_label = username if handle == username else f"{username} (Slack: {handle})"
lines.append(
f"- {name_label}: {user_counter[username]} messages, signal={signal}, channels={channels_seen}, years={years_seen}, topics={top_topics}"
)
for example in user_examples.get(username, []):
lines.append(f" Evidence: {example}")
candidate_count += 1
if candidate_count >= 8:
break
lines.extend(["", "## Import Guidance"])
lines.append("- Create or update person files for repeated humans with multi-channel or multi-year involvement.")
lines.append("- Prefer storing exact role only when the archive states it clearly; otherwise store relationship and collaboration pattern.")
lines.append("- Promote repeated Jira/title/scope/approval patterns when they still clarify current project understanding.")
lines.append("- Keep old status-only updates archive-only unless they alter current context.")
lines.extend(
[
"",