Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions backend/routers/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import database.conversations as conversations_db
import database.dev_api_key as dev_api_key_db
import database.action_items as action_items_db
import database.users as users_db

from models.memories import MemoryCategory, Memory, MemoryDB
from models.conversation import (
Expand Down Expand Up @@ -679,6 +680,29 @@ class CreateConversationFromTranscriptRequest(BaseModel):
geolocation: Optional[Geolocation] = Field(default=None, description="Geolocation where conversation occurred")


def _add_speaker_names_to_segments(uid, conversations: list):
"""Add speaker_name to transcript segments based on person_id mappings."""
all_person_ids = set()
for conv in conversations:
for seg in conv.get('transcript_segments', []):
if seg.get('person_id'):
all_person_ids.add(seg['person_id'])

people_map = {}
if all_person_ids:
people_data = users_db.get_people_by_ids(uid, list(all_person_ids))
people_map = {p['id']: p['name'] for p in people_data}

for conv in conversations:
for seg in conv.get('transcript_segments', []):
if seg.get('is_user'):
seg['speaker_name'] = 'User'
elif seg.get('person_id') and seg['person_id'] in people_map:
seg['speaker_name'] = people_map[seg['person_id']]
else:
seg['speaker_name'] = f"Speaker {seg.get('speaker_id', 0)}"


Comment on lines +683 to +705
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is nearly identical to _add_speaker_names_to_payload() in backend/utils/webhooks.py (lines 35-53). The logic for mapping speaker names is duplicated in both places.

Consider extracting this common logic into a shared utility function (e.g., in backend/utils/ or backend/database/) that both files can import and use. This would:

  • Reduce code duplication
  • Make the logic easier to maintain and test
  • Ensure consistent behavior across both the webhook and API endpoints
Suggested change
def _add_speaker_names_to_segments(uid, conversations: list):
"""Add speaker_name to transcript segments based on person_id mappings."""
all_person_ids = set()
for conv in conversations:
for seg in conv.get('transcript_segments', []):
if seg.get('person_id'):
all_person_ids.add(seg['person_id'])
people_map = {}
if all_person_ids:
people_data = users_db.get_people_by_ids(uid, list(all_person_ids))
people_map = {p['id']: p['name'] for p in people_data}
for conv in conversations:
for seg in conv.get('transcript_segments', []):
if seg.get('is_user'):
seg['speaker_name'] = 'User'
elif seg.get('person_id') and seg['person_id'] in people_map:
seg['speaker_name'] = people_map[seg['person_id']]
else:
seg['speaker_name'] = f"Speaker {seg.get('speaker_id', 0)}"
def _annotate_speaker_names(uid, transcript_segments: list):
"""
Add speaker_name to transcript segments based on person_id mappings.
This helper operates on a flat list of segments so it can be reused
wherever transcript segments are available.
"""
all_person_ids = {seg['person_id'] for seg in transcript_segments if seg.get('person_id')}
people_map = {}
if all_person_ids:
people_data = users_db.get_people_by_ids(uid, list(all_person_ids))
people_map = {p['id']: p['name'] for p in people_data}
for seg in transcript_segments:
if seg.get('is_user'):
seg['speaker_name'] = 'User'
elif seg.get('person_id') and seg['person_id'] in people_map:
seg['speaker_name'] = people_map[seg['person_id']]
else:
seg['speaker_name'] = f"Speaker {seg.get('speaker_id', 0)}"
def _add_speaker_names_to_segments(uid, conversations: list):
"""Add speaker_name to transcript segments in a list of conversations."""
# Flatten all transcript segments from conversations so we can annotate
# them in a single pass.
all_segments = []
for conv in conversations:
segments = conv.get('transcript_segments', [])
if segments:
all_segments.extend(segments)
if all_segments:
_annotate_speaker_names(uid, all_segments)

Copilot uses AI. Check for mistakes.
@router.get("/v1/dev/user/conversations", response_model=List[Conversation], tags=["developer"])
def get_conversations(
start_date: Optional[datetime] = None,
Expand Down Expand Up @@ -717,6 +741,8 @@ def get_conversations(
if not include_transcript:
for conv in unlocked_conversations:
conv.pop('transcript_segments', None)
else:
_add_speaker_names_to_segments(uid, unlocked_conversations)
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new speaker name mapping functionality in the developer API lacks test coverage. Since the repository has established integration tests (e.g., backend/tests/integration/test_notifications_integration.py), consider adding tests to verify:

  • Speaker name resolution from person_id mappings works correctly in both single and multi-conversation scenarios
  • Correct fallback to "User" for user segments
  • Correct fallback to "Speaker {id}" format for unknown speakers
  • Behavior when include_transcript=True vs include_transcript=False
  • Handling of conversations with no transcript segments
  • Behavior when get_people_by_ids() returns empty or partial results

This would help ensure the feature works correctly and prevent regressions.

Copilot uses AI. Check for mistakes.

return unlocked_conversations

Expand Down Expand Up @@ -822,6 +848,8 @@ def get_conversation_endpoint(
# Remove transcript_segments if not requested
if not include_transcript:
conversation.pop('transcript_segments', None)
else:
_add_speaker_names_to_segments(uid, [conversation])

return conversation

Expand Down
23 changes: 23 additions & 0 deletions backend/utils/webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from models.conversation import Conversation
from models.users import WebhookType
import database.notifications as notification_db
import database.users as users_db
from utils.notifications import send_notification


Expand All @@ -31,6 +32,27 @@ def _json_serialize_datetime(obj: Any) -> Any:
return obj


def _add_speaker_names_to_payload(uid, payload: dict):
"""Add speaker_name to transcript segments in webhook payload."""
segments = payload.get('transcript_segments', [])
if not segments:
return

person_ids = [seg.get('person_id') for seg in segments if seg.get('person_id')]
people_map = {}
if person_ids:
people_data = users_db.get_people_by_ids(uid, list(set(person_ids)))
people_map = {p['id']: p['name'] for p in people_data}
Comment on lines +41 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The logic for collecting person_ids can be made more memory-efficient by building a set directly, rather than creating an intermediate list and then converting it to a set.

More importantly, this function _add_speaker_names_to_payload is nearly identical to _add_speaker_names_to_segments in backend/routers/developer.py. Code duplication increases maintenance overhead and can lead to inconsistencies. I strongly recommend extracting this logic into a single, shared utility function in a follow-up PR to improve maintainability. This approach aligns with our guideline that large-scale refactoring should be handled in a separate, dedicated pull request rather than being bundled with feature changes.

Suggested change
person_ids = [seg.get('person_id') for seg in segments if seg.get('person_id')]
people_map = {}
if person_ids:
people_data = users_db.get_people_by_ids(uid, list(set(person_ids)))
people_map = {p['id']: p['name'] for p in people_data}
person_ids = {seg['person_id'] for seg in segments if seg.get('person_id')}
people_map = {}
if person_ids:
people_data = users_db.get_people_by_ids(uid, list(person_ids))
people_map = {p['id']: p['name'] for p in people_data}
References
  1. Large-scale refactoring, such as converting synchronous I/O calls to asynchronous across multiple files, should be handled in a separate, dedicated pull request rather than being bundled with feature changes to maintain consistency with existing patterns.


for seg in segments:
if seg.get('is_user'):
seg['speaker_name'] = 'User'
elif seg.get('person_id') and seg['person_id'] in people_map:
seg['speaker_name'] = people_map[seg['person_id']]
else:
seg['speaker_name'] = f"Speaker {seg.get('speaker_id', 0)}"
Comment on lines +35 to +53
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is nearly identical to _add_speaker_names_to_segments() in backend/routers/developer.py (lines 683-703). The logic for mapping speaker names is duplicated in both places with only minor differences in the conversations iteration structure.

Consider extracting this common logic into a shared utility function (e.g., in backend/utils/ or backend/database/) that both files can import and use. This would:

  • Reduce code duplication
  • Make the logic easier to maintain and test
  • Ensure consistent behavior across both the webhook and API endpoints

Copilot uses AI. Check for mistakes.


def conversation_created_webhook(uid, memory: Conversation):
toggled = user_webhook_status_db(uid, WebhookType.memory_created)

Expand All @@ -41,6 +63,7 @@ def conversation_created_webhook(uid, memory: Conversation):
webhook_url += f'?uid={uid}'
try:
payload = memory.as_dict_cleaned_dates()
_add_speaker_names_to_payload(uid, payload)
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new speaker name mapping functionality in the webhook lacks test coverage. Since the repository has established integration tests (e.g., backend/tests/integration/test_notifications_integration.py), consider adding tests to verify:

  • Speaker name resolution from person_id mappings
  • Correct fallback to "User" for user segments
  • Correct fallback to "Speaker {id}" format for unknown speakers
  • Handling of conversations with no transcript segments
  • Behavior when get_people_by_ids() returns empty results

This would help ensure the feature works correctly and prevent regressions.

Copilot uses AI. Check for mistakes.
payload = _json_serialize_datetime(payload)
response = requests.post(
webhook_url,
Expand Down