-
Notifications
You must be signed in to change notification settings - Fork 9.5k
fix(chat): persist thread transcripts outside checkpoints #2385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
LittleChenLiya
wants to merge
4
commits into
bytedance:main
from
LittleChenLiya:fix/transcript-store
Closed
Changes from 2 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
c06118c
fix(chat): persist thread transcripts outside checkpoints
LittleChenLiya 86335f1
Merge remote-tracking branch 'origin/main' into fix-transcript-store-…
LittleChenLiya 842c21d
fix: address transcript merge review feedback
LittleChenLiya 81f272a
Merge main and resolve transcript conflicts
LittleChenLiya File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| """Canonical chat transcript storage. | ||
|
|
||
| The LangGraph checkpoint state is model context. It can be summarized, | ||
| trimmed, or otherwise rewritten by middlewares. The UI transcript needs a | ||
| separate durable record so conversation history survives context compression. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import time | ||
| from typing import Any | ||
|
|
||
| from deerflow.runtime import serialize_lc_object | ||
|
|
||
| TRANSCRIPTS_NS: tuple[str, ...] = ("thread_transcripts",) | ||
|
|
||
| _SUMMARY_MARKER_KEY = "deerflow_conversation_summary" | ||
| _LEGACY_SUMMARY_PREFIX = "Here is a summary of the conversation to date:" | ||
|
|
||
|
|
||
| def _message_fingerprint(message: dict[str, Any]) -> str: | ||
| """Return a stable identity for messages that do not have ids yet.""" | ||
|
|
||
| identity_payload = { | ||
| "type": message.get("type"), | ||
| "name": message.get("name"), | ||
| "tool_call_id": message.get("tool_call_id"), | ||
| "content": message.get("content"), | ||
| } | ||
| return json.dumps(identity_payload, sort_keys=True, default=str, ensure_ascii=False) | ||
|
|
||
|
|
||
| def _message_text(message: dict[str, Any]) -> str: | ||
| content = message.get("content") | ||
| if isinstance(content, str): | ||
| return content.strip() | ||
| if isinstance(content, list): | ||
| parts: list[str] = [] | ||
| for item in content: | ||
| if isinstance(item, dict) and item.get("type") == "text": | ||
| text = item.get("text") | ||
| if isinstance(text, str): | ||
| parts.append(text) | ||
| return "\n".join(parts).strip() | ||
| return "" | ||
|
|
||
|
|
||
| def _is_visible_transcript_message(message: dict[str, Any]) -> bool: | ||
| additional_kwargs = message.get("additional_kwargs") | ||
| if not isinstance(additional_kwargs, dict): | ||
| additional_kwargs = {} | ||
|
|
||
| if additional_kwargs.get("hide_from_ui") is True: | ||
| return False | ||
| if additional_kwargs.get(_SUMMARY_MARKER_KEY) is True: | ||
| return False | ||
|
|
||
| # Backward compatibility for summary messages created before they were | ||
| # explicitly tagged by DeerFlowSummarizationMiddleware. | ||
| if message.get("type") == "human" and _message_text(message).startswith(_LEGACY_SUMMARY_PREFIX): | ||
| return False | ||
|
|
||
| return message.get("type") in {"human", "ai", "tool"} | ||
|
|
||
|
|
||
| def normalize_transcript_messages(messages: list[Any] | tuple[Any, ...] | None) -> list[dict[str, Any]]: | ||
| """Serialize and filter messages before writing them to the transcript.""" | ||
| normalized: list[dict[str, Any]] = [] | ||
| for raw_message in messages or []: | ||
| message = serialize_lc_object(raw_message) | ||
| if isinstance(message, dict) and _is_visible_transcript_message(message): | ||
| normalized.append(message) | ||
| return normalized | ||
|
|
||
|
|
||
| async def get_thread_transcript(store: Any, thread_id: str) -> list[dict[str, Any]]: | ||
| """Read the canonical transcript for *thread_id* from the Store.""" | ||
| item = await store.aget(TRANSCRIPTS_NS, thread_id) | ||
| if item is None: | ||
| return [] | ||
| value = item.value if isinstance(item.value, dict) else {} | ||
| messages = value.get("messages", []) | ||
| return messages if isinstance(messages, list) else [] | ||
|
|
||
|
|
||
| async def append_thread_transcript_messages( | ||
| store: Any, | ||
| thread_id: str, | ||
| messages: list[Any] | tuple[Any, ...] | None, | ||
| ) -> list[dict[str, Any]]: | ||
| """Append visible messages to the canonical transcript, deduplicating by identity.""" | ||
| incoming = normalize_transcript_messages(messages) | ||
| if not incoming: | ||
| return await get_thread_transcript(store, thread_id) | ||
|
|
||
| existing = await get_thread_transcript(store, thread_id) | ||
| seen_ids = {str(message["id"]) for message in existing if isinstance(message, dict) and message.get("id")} | ||
| unidentified_by_fingerprint = {_message_fingerprint(message): index for index, message in enumerate(existing) if isinstance(message, dict) and not message.get("id")} | ||
| changed = False | ||
|
|
||
| for message in incoming: | ||
| message_id = message.get("id") | ||
| fingerprint = _message_fingerprint(message) | ||
| if message_id and str(message_id) in seen_ids: | ||
| continue | ||
|
|
||
| unidentified_index = unidentified_by_fingerprint.get(fingerprint) | ||
| if message_id and unidentified_index is not None: | ||
| existing[unidentified_index] = message | ||
| seen_ids.add(str(message_id)) | ||
| del unidentified_by_fingerprint[fingerprint] | ||
| changed = True | ||
| continue | ||
|
|
||
| if message_id: | ||
| seen_ids.add(str(message_id)) | ||
| else: | ||
| unidentified_by_fingerprint.setdefault(fingerprint, len(existing)) | ||
| existing.append(message) | ||
| changed = True | ||
|
|
||
| if changed: | ||
| await store.aput( | ||
| TRANSCRIPTS_NS, | ||
| thread_id, | ||
| { | ||
| "thread_id": thread_id, | ||
| "messages": existing, | ||
| "updated_at": time.time(), | ||
| }, | ||
| ) | ||
|
|
||
| return existing | ||
|
|
||
|
|
||
| async def delete_thread_transcript(store: Any, thread_id: str) -> None: | ||
| """Delete a thread transcript if the active Store supports deletion.""" | ||
| delete = getattr(store, "adelete", None) | ||
| if delete is None: | ||
| return | ||
| await delete(TRANSCRIPTS_NS, thread_id) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.