diff --git a/Callbacks | LangChain Reference.html b/Callbacks | LangChain Reference.html new file mode 100644 index 00000000..56574ea9 --- /dev/null +++ b/Callbacks | LangChain Reference.html @@ -0,0 +1,21384 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Callbacks | LangChain Reference + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + + + + + + + + + + + + +

Callbacks

+ +
+ + + +

+ BaseCallbackHandler + + +

+ + +
+

+ Bases: LLMManagerMixin, ChainManagerMixin, ToolManagerMixin, RetrieverManagerMixin, CallbackManagerMixin, RunManagerMixin

+ +
+ + + + + + + +
+

+              flowchart TD
+              langchain_core.callbacks.base.BaseCallbackHandler[BaseCallbackHandler]
+              langchain_core.callbacks.base.LLMManagerMixin[LLMManagerMixin]
+              langchain_core.callbacks.base.ChainManagerMixin[ChainManagerMixin]
+              langchain_core.callbacks.base.ToolManagerMixin[ToolManagerMixin]
+              langchain_core.callbacks.base.RetrieverManagerMixin[RetrieverManagerMixin]
+              langchain_core.callbacks.base.CallbackManagerMixin[CallbackManagerMixin]
+              langchain_core.callbacks.base.RunManagerMixin[RunManagerMixin]
+
+                              langchain_core.callbacks.base.LLMManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+                langchain_core.callbacks.base.ChainManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+                langchain_core.callbacks.base.ToolManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+                langchain_core.callbacks.base.RetrieverManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+                langchain_core.callbacks.base.CallbackManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+                langchain_core.callbacks.base.RunManagerMixin --> langchain_core.callbacks.base.BaseCallbackHandler
+                
+
+
+              click langchain_core.callbacks.base.BaseCallbackHandler href "" "langchain_core.callbacks.base.BaseCallbackHandler"
+              click langchain_core.callbacks.base.LLMManagerMixin href "" "langchain_core.callbacks.base.LLMManagerMixin"
+              click langchain_core.callbacks.base.ChainManagerMixin href "" "langchain_core.callbacks.base.ChainManagerMixin"
+              click langchain_core.callbacks.base.ToolManagerMixin href "" "langchain_core.callbacks.base.ToolManagerMixin"
+              click langchain_core.callbacks.base.RetrieverManagerMixin href "" "langchain_core.callbacks.base.RetrieverManagerMixin"
+              click langchain_core.callbacks.base.CallbackManagerMixin href "" "langchain_core.callbacks.base.CallbackManagerMixin"
+              click langchain_core.callbacks.base.RunManagerMixin href "" "langchain_core.callbacks.base.RunManagerMixin"
+            
+ + + +

Base callback handler.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
on_text +
+

Run on an arbitrary text.

+
+
on_retry +
+

Run on a retry event.

+
+
on_custom_event +
+

Override to define a handler for a custom event.

+
+
on_llm_start +
+

Run when LLM starts running.

+
+
on_chat_model_start +
+

Run when a chat model starts running.

+
+
on_retriever_start +
+

Run when the Retriever starts running.

+
+
on_chain_start +
+

Run when a chain starts running.

+
+
on_tool_start +
+

Run when the tool starts running.

+
+
on_retriever_error +
+

Run when Retriever errors.

+
+
on_retriever_end +
+

Run when Retriever ends running.

+
+
on_tool_end +
+

Run when the tool ends running.

+
+
on_tool_error +
+

Run when tool errors.

+
+
on_chain_end +
+

Run when chain ends running.

+
+
on_chain_error +
+

Run when chain errors.

+
+
on_agent_action +
+

Run on agent action.

+
+
on_agent_finish +
+

Run on the agent end.

+
+
on_llm_new_token +
+

Run on new output token.

+
+
on_llm_end +
+

Run when LLM ends running.

+
+
on_llm_error +
+

Run when LLM errors.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ raise_error + + + + class-attribute + instance-attribute + + +

+
raise_error: bool = False
+
+ +
+ +

Whether to raise an error if an exception occurs.

+ + +
+ +
+ +
+ + + +

+ run_inline + + + + class-attribute + instance-attribute + + +

+
run_inline: bool = False
+
+ +
+ +

Whether to run the callback inline.

+ + +
+ +
+ +
+ + + +

+ ignore_llm + + + + property + + +

+
ignore_llm: bool
+
+ +
+ +

Whether to ignore LLM callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retry + + + + property + + +

+
ignore_retry: bool
+
+ +
+ +

Whether to ignore retry callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chain + + + + property + + +

+
ignore_chain: bool
+
+ +
+ +

Whether to ignore chain callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_agent + + + + property + + +

+
ignore_agent: bool
+
+ +
+ +

Whether to ignore agent callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retriever + + + + property + + +

+
ignore_retriever: bool
+
+ +
+ +

Whether to ignore retriever callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chat_model + + + + property + + +

+
ignore_chat_model: bool
+
+ +
+ +

Whether to ignore chat model callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_custom_event + + + + property + + +

+
ignore_custom_event: bool
+
+ +
+ +

Ignore custom event.

+ + +
+ +
+ + + + +
+ +

+ on_text + + +

+
on_text(
+    text: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
+) -> Any
+
+ +
+ +

Run on an arbitrary text.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ text + +
+

The text.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retry + + +

+
on_retry(
+    retry_state: RetryCallState,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on a retry event.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ retry_state + +
+

The retry state.

+
+

+ + TYPE: + RetryCallState + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_custom_event + + +

+
on_custom_event(
+    name: str,
+    data: Any,
+    *,
+    run_id: UUID,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Override to define a handler for a custom event.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the custom event.

+
+

+ + TYPE: + str + +

+
+ data + +
+

The data for the custom event.

+

Format will match the format specified by the user.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID + +

+
+ tags + +
+

The tags associated with the custom event (includes inherited tags).

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata associated with the custom event (includes inherited +metadata).

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ +
+ +

+ on_llm_start + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM starts running.

+
+

Warning

+

This method is called for non-chat models (regular text completion LLMs). If +you're implementing a handler for a chat model, you should use +on_chat_model_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chat model starts running.

+
+

Warning

+

This method is called for chat models. If you're implementing a handler for +a non-chat model, you should use on_llm_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chat model.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_start + + +

+
on_retriever_start(
+    serialized: dict[str, Any],
+    query: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the Retriever starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized Retriever.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_start + + +

+
on_chain_start(
+    serialized: dict[str, Any],
+    inputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_start + + +

+
on_tool_start(
+    serialized: dict[str, Any],
+    input_str: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inputs: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ input_str + +
+

The input string.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_error + + +

+
on_retriever_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when Retriever errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_end + + +

+
on_retriever_end(
+    documents: Sequence[Document],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when Retriever ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ documents + +
+

The documents retrieved.

+
+

+ + TYPE: + Sequence[Document] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_end + + +

+
on_tool_end(
+    output: Any, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
+) -> Any
+
+ +
+ +

Run when the tool ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ output + +
+

The output of the tool.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_error + + +

+
on_tool_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when tool errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_end + + +

+
on_chain_end(
+    outputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when chain ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ outputs + +
+

The outputs of the chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_error + + +

+
on_chain_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when chain errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_action + + +

+
on_agent_action(
+    action: AgentAction,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on agent action.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ action + +
+

The agent action.

+
+

+ + TYPE: + AgentAction + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_finish + + +

+
on_agent_finish(
+    finish: AgentFinish,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on the agent end.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ finish + +
+

The agent finish.

+
+

+ + TYPE: + AgentFinish + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_new_token + + +

+
on_llm_new_token(
+    token: str,
+    *,
+    chunk: GenerationChunk | ChatGenerationChunk | None = None,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on new output token.

+

Only available when streaming is enabled.

+

For both chat models and non-chat models (legacy text completion LLMs).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ token + +
+

The new token.

+
+

+ + TYPE: + str + +

+
+ chunk + +
+

The new generated chunk, containing content and other information.

+
+

+ + TYPE: + GenerationChunk | ChatGenerationChunk | None + + + DEFAULT: + None + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_end + + +

+
on_llm_end(
+    response: LLMResult,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ response + +
+

The response which was generated.

+
+

+ + TYPE: + LLMResult + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_error + + +

+
on_llm_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ AsyncCallbackHandler + + +

+ + +
+

+ Bases: BaseCallbackHandler

+ + + +

Base async callback handler.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
on_llm_start +
+

Run when the model starts running.

+
+
on_chat_model_start +
+

Run when a chat model starts running.

+
+
on_llm_new_token +
+

Run on new output token. Only available when streaming is enabled.

+
+
on_llm_end +
+

Run when the model ends running.

+
+
on_llm_error +
+

Run when LLM errors.

+
+
on_chain_start +
+

Run when a chain starts running.

+
+
on_chain_end +
+

Run when a chain ends running.

+
+
on_chain_error +
+

Run when chain errors.

+
+
on_tool_start +
+

Run when the tool starts running.

+
+
on_tool_end +
+

Run when the tool ends running.

+
+
on_tool_error +
+

Run when tool errors.

+
+
on_text +
+

Run on an arbitrary text.

+
+
on_retry +
+

Run on a retry event.

+
+
on_agent_action +
+

Run on agent action.

+
+
on_agent_finish +
+

Run on the agent end.

+
+
on_retriever_start +
+

Run on the retriever start.

+
+
on_retriever_end +
+

Run on the retriever end.

+
+
on_retriever_error +
+

Run on retriever error.

+
+
on_custom_event +
+

Override to define a handler for custom events.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ raise_error + + + + class-attribute + instance-attribute + + +

+
raise_error: bool = False
+
+ +
+ +

Whether to raise an error if an exception occurs.

+ + +
+ +
+ +
+ + + +

+ run_inline + + + + class-attribute + instance-attribute + + +

+
run_inline: bool = False
+
+ +
+ +

Whether to run the callback inline.

+ + +
+ +
+ +
+ + + +

+ ignore_llm + + + + property + + +

+
ignore_llm: bool
+
+ +
+ +

Whether to ignore LLM callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retry + + + + property + + +

+
ignore_retry: bool
+
+ +
+ +

Whether to ignore retry callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chain + + + + property + + +

+
ignore_chain: bool
+
+ +
+ +

Whether to ignore chain callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_agent + + + + property + + +

+
ignore_agent: bool
+
+ +
+ +

Whether to ignore agent callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retriever + + + + property + + +

+
ignore_retriever: bool
+
+ +
+ +

Whether to ignore retriever callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chat_model + + + + property + + +

+
ignore_chat_model: bool
+
+ +
+ +

Whether to ignore chat model callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_custom_event + + + + property + + +

+
ignore_custom_event: bool
+
+ +
+ +

Ignore custom event.

+ + +
+ +
+ + + + +
+ +

+ on_llm_start + + + + async + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when the model starts running.

+
+

Warning

+

This method is called for non-chat models (regular text completion LLMs). If +you're implementing a handler for a chat model, you should use +on_chat_model_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + + + async + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chat model starts running.

+
+

Warning

+

This method is called for chat models. If you're implementing a handler for +a non-chat model, you should use on_llm_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chat model.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_new_token + + + + async + + +

+
on_llm_new_token(
+    token: str,
+    *,
+    chunk: GenerationChunk | ChatGenerationChunk | None = None,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on new output token. Only available when streaming is enabled.

+

For both chat models and non-chat models (legacy text completion LLMs).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ token + +
+

The new token.

+
+

+ + TYPE: + str + +

+
+ chunk + +
+

The new generated chunk, containing content and other information.

+
+

+ + TYPE: + GenerationChunk | ChatGenerationChunk | None + + + DEFAULT: + None + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_end + + + + async + + +

+
on_llm_end(
+    response: LLMResult,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when the model ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ response + +
+

The response which was generated.

+
+

+ + TYPE: + LLMResult + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_error + + + + async + + +

+
on_llm_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when LLM errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
    +
  • response (LLMResult): The response which was generated before + the error occurred.
  • +
+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_start + + + + async + + +

+
on_chain_start(
+    serialized: dict[str, Any],
+    inputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when a chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_end + + + + async + + +

+
on_chain_end(
+    outputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when a chain ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ outputs + +
+

The outputs of the chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_error + + + + async + + +

+
on_chain_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when chain errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_start + + + + async + + +

+
on_tool_start(
+    serialized: dict[str, Any],
+    input_str: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inputs: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when the tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized tool.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ input_str + +
+

The input string.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_end + + + + async + + +

+
on_tool_end(
+    output: Any,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when the tool ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ output + +
+

The output of the tool.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_error + + + + async + + +

+
on_tool_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run when tool errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_text + + + + async + + +

+
on_text(
+    text: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on an arbitrary text.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ text + +
+

The text.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retry + + + + async + + +

+
on_retry(
+    retry_state: RetryCallState,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on a retry event.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ retry_state + +
+

The retry state.

+
+

+ + TYPE: + RetryCallState + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_action + + + + async + + +

+
on_agent_action(
+    action: AgentAction,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on agent action.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ action + +
+

The agent action.

+
+

+ + TYPE: + AgentAction + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_finish + + + + async + + +

+
on_agent_finish(
+    finish: AgentFinish,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on the agent end.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ finish + +
+

The agent finish.

+
+

+ + TYPE: + AgentFinish + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_start + + + + async + + +

+
on_retriever_start(
+    serialized: dict[str, Any],
+    query: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on the retriever start.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized retriever.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_end + + + + async + + +

+
on_retriever_end(
+    documents: Sequence[Document],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on the retriever end.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ documents + +
+

The documents retrieved.

+
+

+ + TYPE: + Sequence[Document] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_error + + + + async + + +

+
on_retriever_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Run on retriever error.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_custom_event + + + + async + + +

+
on_custom_event(
+    name: str,
+    data: Any,
+    *,
+    run_id: UUID,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> None
+
+ +
+ +

Override to define a handler for custom events.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the custom event.

+
+

+ + TYPE: + str + +

+
+ data + +
+

The data for the custom event.

+

Format will match the format specified by the user.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID + +

+
+ tags + +
+

The tags associated with the custom event (includes inherited tags).

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata associated with the custom event (includes inherited +metadata).

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ BaseCallbackManager + + +

+ + +
+

+ Bases: CallbackManagerMixin

+ + + +

Base callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
on_llm_start +
+

Run when LLM starts running.

+
+
on_chat_model_start +
+

Run when a chat model starts running.

+
+
on_retriever_start +
+

Run when the Retriever starts running.

+
+
on_chain_start +
+

Run when a chain starts running.

+
+
on_tool_start +
+

Run when the tool starts running.

+
+
__init__ +
+

Initialize callback manager.

+
+
copy +
+

Return a copy of the callback manager.

+
+
merge +
+

Merge the callback manager with another callback manager.

+
+
add_handler +
+

Add a handler to the callback manager.

+
+
remove_handler +
+

Remove a handler from the callback manager.

+
+
set_handlers +
+

Set handlers as the only handlers on the callback manager.

+
+
set_handler +
+

Set handler as the only handler on the callback manager.

+
+
add_tags +
+

Add tags to the callback manager.

+
+
remove_tags +
+

Remove tags from the callback manager.

+
+
add_metadata +
+

Add metadata to the callback manager.

+
+
remove_metadata +
+

Remove metadata from the callback manager.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ is_async + + + + property + + +

+
is_async: bool
+
+ +
+ +

Whether the callback manager is async.

+ + +
+ +
+ + + + +
+ +

+ on_llm_start + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM starts running.

+
+

Warning

+

This method is called for non-chat models (regular text completion LLMs). If +you're implementing a handler for a chat model, you should use +on_chat_model_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chat model starts running.

+
+

Warning

+

This method is called for chat models. If you're implementing a handler for +a non-chat model, you should use on_llm_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chat model.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_start + + +

+
on_retriever_start(
+    serialized: dict[str, Any],
+    query: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the Retriever starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized Retriever.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_start + + +

+
on_chain_start(
+    serialized: dict[str, Any],
+    inputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_start + + +

+
on_tool_start(
+    serialized: dict[str, Any],
+    input_str: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inputs: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ input_str + +
+

The input string.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ __init__ + + +

+
__init__(
+    handlers: list[BaseCallbackHandler],
+    inheritable_handlers: list[BaseCallbackHandler] | None = None,
+    parent_run_id: UUID | None = None,
+    *,
+    tags: list[str] | None = None,
+    inheritable_tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inheritable_metadata: dict[str, Any] | None = None,
+) -> None
+
+ +
+ +

Initialize callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inheritable_handlers + +
+

The inheritable handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The parent run ID.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ inheritable_tags + +
+

The inheritable tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inheritable_metadata + +
+

The inheritable metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ +
+ +

+ copy + + +

+
copy() -> Self
+
+ +
+ +

Return a copy of the callback manager.

+ +
+ +
+ +
+ +

+ merge + + +

+
merge(other: BaseCallbackManager) -> Self
+
+ +
+ +

Merge the callback manager with another callback manager.

+

May be overwritten in subclasses.

+

Primarily used internally within merge_configs.

+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Self + + +
+

The merged callback manager of the same type as the current object.

+
+
+ + +
+ Example +
# Merging two callback managers`
+from langchain_core.callbacks.manager import (
+    CallbackManager,
+    trace_as_chain_group,
+)
+from langchain_core.callbacks.stdout import StdOutCallbackHandler
+
+manager = CallbackManager(handlers=[StdOutCallbackHandler()], tags=["tag2"])
+with trace_as_chain_group("My Group Name", tags=["tag1"]) as group_manager:
+    merged_manager = group_manager.merge(manager)
+    print(merged_manager.handlers)
+    # [
+    #    <langchain_core.callbacks.stdout.StdOutCallbackHandler object at ...>,
+    #    <langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at ...>,
+    # ]
+
+    print(merged_manager.tags)
+    #    ['tag2', 'tag1']
+
+
+
+ +
+ +
+ +

+ add_handler + + +

+
add_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Add a handler to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to add.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_handler + + +

+
remove_handler(handler: BaseCallbackHandler) -> None
+
+ +
+ +

Remove a handler from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to remove.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ +
+ +
+ +
+ +

+ set_handlers + + +

+
set_handlers(handlers: list[BaseCallbackHandler], inherit: bool = True) -> None
+
+ +
+ +

Set handlers as the only handlers on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers to set.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inherit + +
+

Whether to inherit the handlers.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ set_handler + + +

+
set_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Set handler as the only handler on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to set.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ add_tags + + +

+
add_tags(tags: list[str], inherit: bool = True) -> None
+
+ +
+ +

Add tags to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to add.

+
+

+ + TYPE: + list[str] + +

+
+ inherit + +
+

Whether to inherit the tags.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_tags + + +

+
remove_tags(tags: list[str]) -> None
+
+ +
+ +

Remove tags from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ +
+ +

+ add_metadata + + +

+
add_metadata(metadata: dict[str, Any], inherit: bool = True) -> None
+
+ +
+ +

Add metadata to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ metadata + +
+

The metadata to add.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inherit + +
+

Whether to inherit the metadata.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_metadata + + +

+
remove_metadata(keys: list[str]) -> None
+
+ +
+ +

Remove metadata from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ keys + +
+

The keys to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ CallbackManager + + +

+ + +
+

+ Bases: BaseCallbackManager

+ + + +

Callback manager for LangChain.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
on_llm_start +
+

Run when LLM starts running.

+
+
on_chat_model_start +
+

Run when chat model starts running.

+
+
on_chain_start +
+

Run when chain starts running.

+
+
on_tool_start +
+

Run when tool starts running.

+
+
on_retriever_start +
+

Run when the retriever starts running.

+
+
on_custom_event +
+

Dispatch an adhoc event to the handlers (async version).

+
+
configure +
+

Configure the callback manager.

+
+
__init__ +
+

Initialize callback manager.

+
+
copy +
+

Return a copy of the callback manager.

+
+
merge +
+

Merge the callback manager with another callback manager.

+
+
add_handler +
+

Add a handler to the callback manager.

+
+
remove_handler +
+

Remove a handler from the callback manager.

+
+
set_handlers +
+

Set handlers as the only handlers on the callback manager.

+
+
set_handler +
+

Set handler as the only handler on the callback manager.

+
+
add_tags +
+

Add tags to the callback manager.

+
+
remove_tags +
+

Remove tags from the callback manager.

+
+
add_metadata +
+

Add metadata to the callback manager.

+
+
remove_metadata +
+

Remove metadata from the callback manager.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ is_async + + + + property + + +

+
is_async: bool
+
+ +
+ +

Whether the callback manager is async.

+ + +
+ +
+ + + + +
+ +

+ on_llm_start + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> list[CallbackManagerForLLMRun]
+
+ +
+ +

Run when LLM starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The list of prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + list[CallbackManagerForLLMRun] + + +
+

A callback manager for each prompt as an LLM run.

+
+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> list[CallbackManagerForLLMRun]
+
+ +
+ +

Run when chat model starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The list of messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + list[CallbackManagerForLLMRun] + + +
+

A callback manager for each list of messages as an LLM run.

+
+
+ +
+ +
+ +
+ +

+ on_chain_start + + +

+
on_chain_start(
+    serialized: dict[str, Any] | None,
+    inputs: dict[str, Any] | Any,
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> CallbackManagerForChainRun
+
+ +
+ +

Run when chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ inputs + +
+

The inputs to the chain.

+
+

+ + TYPE: + dict[str, Any] | Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + CallbackManagerForChainRun + + +
+

The callback manager for the chain run.

+
+
+ +
+ +
+ +
+ +

+ on_tool_start + + +

+
on_tool_start(
+    serialized: dict[str, Any] | None,
+    input_str: str,
+    run_id: UUID | None = None,
+    parent_run_id: UUID | None = None,
+    inputs: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> CallbackManagerForToolRun
+
+ +
+ +

Run when tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

Serialized representation of the tool.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ input_str + +
+

The input to the tool as a string.

+

Non-string inputs are cast to strings.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

ID for the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ inputs + +
+

The original input to the tool if provided.

+

Recommended for usage instead of input_str when the original input is +needed.

+

If provided, the inputs are expected to be formatted as a dict. The keys +will correspond to the named-arguments in the tool.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

The keyword arguments to pass to the event handler

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + CallbackManagerForToolRun + + +
+

The callback manager for the tool run.

+
+
+ +
+ +
+ +
+ +

+ on_retriever_start + + +

+
on_retriever_start(
+    serialized: dict[str, Any] | None,
+    query: str,
+    run_id: UUID | None = None,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> CallbackManagerForRetrieverRun
+
+ +
+ +

Run when the retriever starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized retriever.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + CallbackManagerForRetrieverRun + + +
+

The callback manager for the retriever run.

+
+
+ +
+ +
+ +
+ +

+ on_custom_event + + +

+
on_custom_event(
+    name: str, data: Any, run_id: UUID | None = None, **kwargs: Any
+) -> None
+
+ +
+ +

Dispatch an adhoc event to the handlers (async version).

+

This event should NOT be used in any internal LangChain code. The event is meant +specifically for users of the library to dispatch custom events that are +tailored to their application.

+ + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the adhoc event.

+
+

+ + TYPE: + str + +

+
+ data + +
+

The data for the adhoc event.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + +
RAISESDESCRIPTION
+ + ValueError + + +
+

If additional keyword arguments are passed.

+
+
+ +
+ +
+ +
+ +

+ configure + + + + classmethod + + +

+
configure(
+    inheritable_callbacks: Callbacks = None,
+    local_callbacks: Callbacks = None,
+    verbose: bool = False,
+    inheritable_tags: list[str] | None = None,
+    local_tags: list[str] | None = None,
+    inheritable_metadata: dict[str, Any] | None = None,
+    local_metadata: dict[str, Any] | None = None,
+) -> CallbackManager
+
+ +
+ +

Configure the callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ inheritable_callbacks + +
+

The inheritable callbacks.

+
+

+ + TYPE: + Callbacks + + + DEFAULT: + None + +

+
+ local_callbacks + +
+

The local callbacks.

+
+

+ + TYPE: + Callbacks + + + DEFAULT: + None + +

+
+ verbose + +
+

Whether to enable verbose mode.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ inheritable_tags + +
+

The inheritable tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ local_tags + +
+

The local tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ inheritable_metadata + +
+

The inheritable metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ local_metadata + +
+

The local metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + CallbackManager + + +
+

The configured callback manager.

+
+
+ +
+ +
+ +
+ +

+ __init__ + + +

+
__init__(
+    handlers: list[BaseCallbackHandler],
+    inheritable_handlers: list[BaseCallbackHandler] | None = None,
+    parent_run_id: UUID | None = None,
+    *,
+    tags: list[str] | None = None,
+    inheritable_tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inheritable_metadata: dict[str, Any] | None = None,
+) -> None
+
+ +
+ +

Initialize callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inheritable_handlers + +
+

The inheritable handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The parent run ID.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ inheritable_tags + +
+

The inheritable tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inheritable_metadata + +
+

The inheritable metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ +
+ +

+ copy + + +

+
copy() -> Self
+
+ +
+ +

Return a copy of the callback manager.

+ +
+ +
+ +
+ +

+ merge + + +

+
merge(other: BaseCallbackManager) -> Self
+
+ +
+ +

Merge the callback manager with another callback manager.

+

May be overwritten in subclasses.

+

Primarily used internally within merge_configs.

+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Self + + +
+

The merged callback manager of the same type as the current object.

+
+
+ + +
+ Example +
# Merging two callback managers`
+from langchain_core.callbacks.manager import (
+    CallbackManager,
+    trace_as_chain_group,
+)
+from langchain_core.callbacks.stdout import StdOutCallbackHandler
+
+manager = CallbackManager(handlers=[StdOutCallbackHandler()], tags=["tag2"])
+with trace_as_chain_group("My Group Name", tags=["tag1"]) as group_manager:
+    merged_manager = group_manager.merge(manager)
+    print(merged_manager.handlers)
+    # [
+    #    <langchain_core.callbacks.stdout.StdOutCallbackHandler object at ...>,
+    #    <langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at ...>,
+    # ]
+
+    print(merged_manager.tags)
+    #    ['tag2', 'tag1']
+
+
+
+ +
+ +
+ +

+ add_handler + + +

+
add_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Add a handler to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to add.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_handler + + +

+
remove_handler(handler: BaseCallbackHandler) -> None
+
+ +
+ +

Remove a handler from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to remove.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ +
+ +
+ +
+ +

+ set_handlers + + +

+
set_handlers(handlers: list[BaseCallbackHandler], inherit: bool = True) -> None
+
+ +
+ +

Set handlers as the only handlers on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers to set.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inherit + +
+

Whether to inherit the handlers.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ set_handler + + +

+
set_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Set handler as the only handler on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to set.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ add_tags + + +

+
add_tags(tags: list[str], inherit: bool = True) -> None
+
+ +
+ +

Add tags to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to add.

+
+

+ + TYPE: + list[str] + +

+
+ inherit + +
+

Whether to inherit the tags.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_tags + + +

+
remove_tags(tags: list[str]) -> None
+
+ +
+ +

Remove tags from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ +
+ +

+ add_metadata + + +

+
add_metadata(metadata: dict[str, Any], inherit: bool = True) -> None
+
+ +
+ +

Add metadata to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ metadata + +
+

The metadata to add.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inherit + +
+

Whether to inherit the metadata.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_metadata + + +

+
remove_metadata(keys: list[str]) -> None
+
+ +
+ +

Remove metadata from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ keys + +
+

The keys to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ AsyncCallbackManager + + +

+ + +
+

+ Bases: BaseCallbackManager

+ + + +

Async callback manager that handles callbacks from LangChain.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
on_llm_start +
+

Run when LLM starts running.

+
+
on_chat_model_start +
+

Async run when LLM starts running.

+
+
on_chain_start +
+

Async run when chain starts running.

+
+
on_tool_start +
+

Run when the tool starts running.

+
+
on_custom_event +
+

Dispatch an adhoc event to the handlers (async version).

+
+
on_retriever_start +
+

Run when the retriever starts running.

+
+
configure +
+

Configure the async callback manager.

+
+
__init__ +
+

Initialize callback manager.

+
+
copy +
+

Return a copy of the callback manager.

+
+
merge +
+

Merge the callback manager with another callback manager.

+
+
add_handler +
+

Add a handler to the callback manager.

+
+
remove_handler +
+

Remove a handler from the callback manager.

+
+
set_handlers +
+

Set handlers as the only handlers on the callback manager.

+
+
set_handler +
+

Set handler as the only handler on the callback manager.

+
+
add_tags +
+

Add tags to the callback manager.

+
+
remove_tags +
+

Remove tags from the callback manager.

+
+
add_metadata +
+

Add metadata to the callback manager.

+
+
remove_metadata +
+

Remove metadata from the callback manager.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ is_async + + + + property + + +

+
is_async: bool
+
+ +
+ +

Return whether the handler is async.

+ + +
+ +
+ + + + +
+ +

+ on_llm_start + + + + async + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> list[AsyncCallbackManagerForLLMRun]
+
+ +
+ +

Run when LLM starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The list of prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + list[AsyncCallbackManagerForLLMRun] + + +
+

The list of async callback managers, one for each LLM run corresponding to

+
+
+ + list[AsyncCallbackManagerForLLMRun] + + +
+

each prompt.

+
+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + + + async + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> list[AsyncCallbackManagerForLLMRun]
+
+ +
+ +

Async run when LLM starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The list of messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + list[AsyncCallbackManagerForLLMRun] + + +
+

The list of async callback managers, one for each LLM run corresponding to

+
+
+ + list[AsyncCallbackManagerForLLMRun] + + +
+

each inner message list.

+
+
+ +
+ +
+ +
+ +

+ on_chain_start + + + + async + + +

+
on_chain_start(
+    serialized: dict[str, Any] | None,
+    inputs: dict[str, Any] | Any,
+    run_id: UUID | None = None,
+    **kwargs: Any,
+) -> AsyncCallbackManagerForChainRun
+
+ +
+ +

Async run when chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ inputs + +
+

The inputs to the chain.

+
+

+ + TYPE: + dict[str, Any] | Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + AsyncCallbackManagerForChainRun + + +
+

The async callback manager for the chain run.

+
+
+ +
+ +
+ +
+ +

+ on_tool_start + + + + async + + +

+
on_tool_start(
+    serialized: dict[str, Any] | None,
+    input_str: str,
+    run_id: UUID | None = None,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> AsyncCallbackManagerForToolRun
+
+ +
+ +

Run when the tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized tool.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ input_str + +
+

The input to the tool.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + AsyncCallbackManagerForToolRun + + +
+

The async callback manager for the tool run.

+
+
+ +
+ +
+ +
+ +

+ on_custom_event + + + + async + + +

+
on_custom_event(
+    name: str, data: Any, run_id: UUID | None = None, **kwargs: Any
+) -> None
+
+ +
+ +

Dispatch an adhoc event to the handlers (async version).

+

This event should NOT be used in any internal LangChain code. The event is meant +specifically for users of the library to dispatch custom events that are +tailored to their application.

+ + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the adhoc event.

+
+

+ + TYPE: + str + +

+
+ data + +
+

The data for the adhoc event.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + +
RAISESDESCRIPTION
+ + ValueError + + +
+

If additional keyword arguments are passed.

+
+
+ +
+ +
+ +
+ +

+ on_retriever_start + + + + async + + +

+
on_retriever_start(
+    serialized: dict[str, Any] | None,
+    query: str,
+    run_id: UUID | None = None,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> AsyncCallbackManagerForRetrieverRun
+
+ +
+ +

Run when the retriever starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized retriever.

+
+

+ + TYPE: + dict[str, Any] | None + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + AsyncCallbackManagerForRetrieverRun + + +
+

The async callback manager for the retriever run.

+
+
+ +
+ +
+ +
+ +

+ configure + + + + classmethod + + +

+
configure(
+    inheritable_callbacks: Callbacks = None,
+    local_callbacks: Callbacks = None,
+    verbose: bool = False,
+    inheritable_tags: list[str] | None = None,
+    local_tags: list[str] | None = None,
+    inheritable_metadata: dict[str, Any] | None = None,
+    local_metadata: dict[str, Any] | None = None,
+) -> AsyncCallbackManager
+
+ +
+ +

Configure the async callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ inheritable_callbacks + +
+

The inheritable callbacks.

+
+

+ + TYPE: + Callbacks + + + DEFAULT: + None + +

+
+ local_callbacks + +
+

The local callbacks.

+
+

+ + TYPE: + Callbacks + + + DEFAULT: + None + +

+
+ verbose + +
+

Whether to enable verbose mode.

+
+

+ + TYPE: + bool + + + DEFAULT: + False + +

+
+ inheritable_tags + +
+

The inheritable tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ local_tags + +
+

The local tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ inheritable_metadata + +
+

The inheritable metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ local_metadata + +
+

The local metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + AsyncCallbackManager + + +
+

The configured async callback manager.

+
+
+ +
+ +
+ +
+ +

+ __init__ + + +

+
__init__(
+    handlers: list[BaseCallbackHandler],
+    inheritable_handlers: list[BaseCallbackHandler] | None = None,
+    parent_run_id: UUID | None = None,
+    *,
+    tags: list[str] | None = None,
+    inheritable_tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inheritable_metadata: dict[str, Any] | None = None,
+) -> None
+
+ +
+ +

Initialize callback manager.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inheritable_handlers + +
+

The inheritable handlers.

+
+

+ + TYPE: + list[BaseCallbackHandler] | None + + + DEFAULT: + None + +

+
+ parent_run_id + +
+

The parent run ID.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ inheritable_tags + +
+

The inheritable tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inheritable_metadata + +
+

The inheritable metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ +
+ +

+ copy + + +

+
copy() -> Self
+
+ +
+ +

Return a copy of the callback manager.

+ +
+ +
+ +
+ +

+ merge + + +

+
merge(other: BaseCallbackManager) -> Self
+
+ +
+ +

Merge the callback manager with another callback manager.

+

May be overwritten in subclasses.

+

Primarily used internally within merge_configs.

+ + + + + + + + + + + + + + + +
RETURNSDESCRIPTION
+ + Self + + +
+

The merged callback manager of the same type as the current object.

+
+
+ + +
+ Example +
# Merging two callback managers`
+from langchain_core.callbacks.manager import (
+    CallbackManager,
+    trace_as_chain_group,
+)
+from langchain_core.callbacks.stdout import StdOutCallbackHandler
+
+manager = CallbackManager(handlers=[StdOutCallbackHandler()], tags=["tag2"])
+with trace_as_chain_group("My Group Name", tags=["tag1"]) as group_manager:
+    merged_manager = group_manager.merge(manager)
+    print(merged_manager.handlers)
+    # [
+    #    <langchain_core.callbacks.stdout.StdOutCallbackHandler object at ...>,
+    #    <langchain_core.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at ...>,
+    # ]
+
+    print(merged_manager.tags)
+    #    ['tag2', 'tag1']
+
+
+
+ +
+ +
+ +

+ add_handler + + +

+
add_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Add a handler to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to add.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_handler + + +

+
remove_handler(handler: BaseCallbackHandler) -> None
+
+ +
+ +

Remove a handler from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to remove.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ +
+ +
+ +
+ +

+ set_handlers + + +

+
set_handlers(handlers: list[BaseCallbackHandler], inherit: bool = True) -> None
+
+ +
+ +

Set handlers as the only handlers on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handlers + +
+

The handlers to set.

+
+

+ + TYPE: + list[BaseCallbackHandler] + +

+
+ inherit + +
+

Whether to inherit the handlers.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ set_handler + + +

+
set_handler(handler: BaseCallbackHandler, inherit: bool = True) -> None
+
+ +
+ +

Set handler as the only handler on the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ handler + +
+

The handler to set.

+
+

+ + TYPE: + BaseCallbackHandler + +

+
+ inherit + +
+

Whether to inherit the handler.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ add_tags + + +

+
add_tags(tags: list[str], inherit: bool = True) -> None
+
+ +
+ +

Add tags to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to add.

+
+

+ + TYPE: + list[str] + +

+
+ inherit + +
+

Whether to inherit the tags.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_tags + + +

+
remove_tags(tags: list[str]) -> None
+
+ +
+ +

Remove tags from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ tags + +
+

The tags to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ +
+ +

+ add_metadata + + +

+
add_metadata(metadata: dict[str, Any], inherit: bool = True) -> None
+
+ +
+ +

Add metadata to the callback manager.

+ + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ metadata + +
+

The metadata to add.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inherit + +
+

Whether to inherit the metadata.

+
+

+ + TYPE: + bool + + + DEFAULT: + True + +

+
+ +
+ +
+ +
+ +

+ remove_metadata + + +

+
remove_metadata(keys: list[str]) -> None
+
+ +
+ +

Remove metadata from the callback manager.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ keys + +
+

The keys to remove.

+
+

+ + TYPE: + list[str] + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ + + +

+ UsageMetadataCallbackHandler + + +

+ + +
+

+ Bases: BaseCallbackHandler

+ + + +

Callback Handler that tracks AIMessage.usage_metadata.

+ + +
+ Example +
from langchain.chat_models import init_chat_model
+from langchain_core.callbacks import UsageMetadataCallbackHandler
+
+llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
+
+callback = UsageMetadataCallbackHandler()
+result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
+result_2 = llm_2.invoke("Hello", config={"callbacks": [callback]})
+callback.usage_metadata
+
+
{'gpt-4o-mini-2024-07-18': {'input_tokens': 8,
+  'output_tokens': 10,
+  'total_tokens': 18,
+  'input_token_details': {'audio': 0, 'cache_read': 0},
+  'output_token_details': {'audio': 0, 'reasoning': 0}},
+ 'claude-3-5-haiku-20241022': {'input_tokens': 8,
+  'output_tokens': 21,
+  'total_tokens': 29,
+  'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
+
+
+

Added in langchain-core 0.3.49

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
METHODDESCRIPTION
__init__ +
+

Initialize the UsageMetadataCallbackHandler.

+
+
on_llm_end +
+

Collect token usage.

+
+
on_text +
+

Run on an arbitrary text.

+
+
on_retry +
+

Run on a retry event.

+
+
on_custom_event +
+

Override to define a handler for a custom event.

+
+
on_llm_start +
+

Run when LLM starts running.

+
+
on_chat_model_start +
+

Run when a chat model starts running.

+
+
on_retriever_start +
+

Run when the Retriever starts running.

+
+
on_chain_start +
+

Run when a chain starts running.

+
+
on_tool_start +
+

Run when the tool starts running.

+
+
on_retriever_error +
+

Run when Retriever errors.

+
+
on_retriever_end +
+

Run when Retriever ends running.

+
+
on_tool_end +
+

Run when the tool ends running.

+
+
on_tool_error +
+

Run when tool errors.

+
+
on_chain_end +
+

Run when chain ends running.

+
+
on_chain_error +
+

Run when chain errors.

+
+
on_agent_action +
+

Run on agent action.

+
+
on_agent_finish +
+

Run on the agent end.

+
+
on_llm_new_token +
+

Run on new output token.

+
+
on_llm_error +
+

Run when LLM errors.

+
+
+ + + + + +
+ + + + + + + +
+ + + +

+ raise_error + + + + class-attribute + instance-attribute + + +

+
raise_error: bool = False
+
+ +
+ +

Whether to raise an error if an exception occurs.

+ + +
+ +
+ +
+ + + +

+ run_inline + + + + class-attribute + instance-attribute + + +

+
run_inline: bool = False
+
+ +
+ +

Whether to run the callback inline.

+ + +
+ +
+ +
+ + + +

+ ignore_llm + + + + property + + +

+
ignore_llm: bool
+
+ +
+ +

Whether to ignore LLM callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retry + + + + property + + +

+
ignore_retry: bool
+
+ +
+ +

Whether to ignore retry callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chain + + + + property + + +

+
ignore_chain: bool
+
+ +
+ +

Whether to ignore chain callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_agent + + + + property + + +

+
ignore_agent: bool
+
+ +
+ +

Whether to ignore agent callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_retriever + + + + property + + +

+
ignore_retriever: bool
+
+ +
+ +

Whether to ignore retriever callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_chat_model + + + + property + + +

+
ignore_chat_model: bool
+
+ +
+ +

Whether to ignore chat model callbacks.

+ + +
+ +
+ +
+ + + +

+ ignore_custom_event + + + + property + + +

+
ignore_custom_event: bool
+
+ +
+ +

Ignore custom event.

+ + +
+ +
+ + + + +
+ +

+ __init__ + + +

+
__init__() -> None
+
+ +
+ +

Initialize the UsageMetadataCallbackHandler.

+ +
+ +
+ +
+ +

+ on_llm_end + + +

+
on_llm_end(response: LLMResult, **kwargs: Any) -> None
+
+ +
+ +

Collect token usage.

+ +
+ +
+ +
+ +

+ on_text + + +

+
on_text(
+    text: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
+) -> Any
+
+ +
+ +

Run on an arbitrary text.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ text + +
+

The text.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retry + + +

+
on_retry(
+    retry_state: RetryCallState,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on a retry event.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ retry_state + +
+

The retry state.

+
+

+ + TYPE: + RetryCallState + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_custom_event + + +

+
on_custom_event(
+    name: str,
+    data: Any,
+    *,
+    run_id: UUID,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Override to define a handler for a custom event.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the custom event.

+
+

+ + TYPE: + str + +

+
+ data + +
+

The data for the custom event.

+

Format will match the format specified by the user.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the run.

+
+

+ + TYPE: + UUID + +

+
+ tags + +
+

The tags associated with the custom event (includes inherited tags).

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata associated with the custom event (includes inherited +metadata).

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ +
+ +
+ +
+ +

+ on_llm_start + + +

+
on_llm_start(
+    serialized: dict[str, Any],
+    prompts: list[str],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM starts running.

+
+

Warning

+

This method is called for non-chat models (regular text completion LLMs). If +you're implementing a handler for a chat model, you should use +on_chat_model_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized LLM.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ prompts + +
+

The prompts.

+
+

+ + TYPE: + list[str] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chat_model_start + + +

+
on_chat_model_start(
+    serialized: dict[str, Any],
+    messages: list[list[BaseMessage]],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chat model starts running.

+
+

Warning

+

This method is called for chat models. If you're implementing a handler for +a non-chat model, you should use on_llm_start instead.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chat model.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ messages + +
+

The messages.

+
+

+ + TYPE: + list[list[BaseMessage]] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_start + + +

+
on_retriever_start(
+    serialized: dict[str, Any],
+    query: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the Retriever starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized Retriever.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ query + +
+

The query.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_start + + +

+
on_chain_start(
+    serialized: dict[str, Any],
+    inputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when a chain starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_start + + +

+
on_tool_start(
+    serialized: dict[str, Any],
+    input_str: str,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    metadata: dict[str, Any] | None = None,
+    inputs: dict[str, Any] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when the tool starts running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ serialized + +
+

The serialized chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ input_str + +
+

The input string.

+
+

+ + TYPE: + str + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ metadata + +
+

The metadata.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ inputs + +
+

The inputs.

+
+

+ + TYPE: + dict[str, Any] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_error + + +

+
on_retriever_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when Retriever errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_retriever_end + + +

+
on_retriever_end(
+    documents: Sequence[Document],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when Retriever ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ documents + +
+

The documents retrieved.

+
+

+ + TYPE: + Sequence[Document] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_end + + +

+
on_tool_end(
+    output: Any, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
+) -> Any
+
+ +
+ +

Run when the tool ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ output + +
+

The output of the tool.

+
+

+ + TYPE: + Any + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_tool_error + + +

+
on_tool_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when tool errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_end + + +

+
on_chain_end(
+    outputs: dict[str, Any],
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when chain ends running.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ outputs + +
+

The outputs of the chain.

+
+

+ + TYPE: + dict[str, Any] + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_chain_error + + +

+
on_chain_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when chain errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_action + + +

+
on_agent_action(
+    action: AgentAction,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on agent action.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ action + +
+

The agent action.

+
+

+ + TYPE: + AgentAction + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_agent_finish + + +

+
on_agent_finish(
+    finish: AgentFinish,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on the agent end.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ finish + +
+

The agent finish.

+
+

+ + TYPE: + AgentFinish + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_new_token + + +

+
on_llm_new_token(
+    token: str,
+    *,
+    chunk: GenerationChunk | ChatGenerationChunk | None = None,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run on new output token.

+

Only available when streaming is enabled.

+

For both chat models and non-chat models (legacy text completion LLMs).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ token + +
+

The new token.

+
+

+ + TYPE: + str + +

+
+ chunk + +
+

The new generated chunk, containing content and other information.

+
+

+ + TYPE: + GenerationChunk | ChatGenerationChunk | None + + + DEFAULT: + None + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ +
+ +

+ on_llm_error + + +

+
on_llm_error(
+    error: BaseException,
+    *,
+    run_id: UUID,
+    parent_run_id: UUID | None = None,
+    tags: list[str] | None = None,
+    **kwargs: Any,
+) -> Any
+
+ +
+ +

Run when LLM errors.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ error + +
+

The error that occurred.

+
+

+ + TYPE: + BaseException + +

+
+ run_id + +
+

The ID of the current run.

+
+

+ + TYPE: + UUID + +

+
+ parent_run_id + +
+

The ID of the parent run.

+
+

+ + TYPE: + UUID | None + + + DEFAULT: + None + +

+
+ tags + +
+

The tags.

+
+

+ + TYPE: + list[str] | None + + + DEFAULT: + None + +

+
+ **kwargs + +
+

Additional keyword arguments.

+
+

+ + TYPE: + Any + + + DEFAULT: + {} + +

+
+ +
+ +
+ + + +
+ +
+ +
+ +
+ +

+ get_usage_metadata_callback + + +

+
get_usage_metadata_callback(
+    name: str = "usage_metadata_callback",
+) -> Generator[UsageMetadataCallbackHandler, None, None]
+
+ +
+ +

Get usage metadata callback.

+

Get context manager for tracking usage metadata across chat model calls using +AIMessage.usage_metadata.

+ + + + + + + + + + + + + + + +
PARAMETERDESCRIPTION
+ name + +
+

The name of the context variable.

+
+

+ + TYPE: + str + + + DEFAULT: + 'usage_metadata_callback' + +

+
+ + + + + + + + + + + + + + + +
YIELDSDESCRIPTION
+ + UsageMetadataCallbackHandler + + +
+

The usage metadata callback.

+
+
+ + +
+ Example +
from langchain.chat_models import init_chat_model
+from langchain_core.callbacks import get_usage_metadata_callback
+
+llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
+
+with get_usage_metadata_callback() as cb:
+    llm_1.invoke("Hello")
+    llm_2.invoke("Hello")
+    print(cb.usage_metadata)
+
+
{
+    "gpt-4o-mini-2024-07-18": {
+        "input_tokens": 8,
+        "output_tokens": 10,
+        "total_tokens": 18,
+        "input_token_details": {"audio": 0, "cache_read": 0},
+        "output_token_details": {"audio": 0, "reasoning": 0},
+    },
+    "claude-3-5-haiku-20241022": {
+        "input_tokens": 8,
+        "output_tokens": 21,
+        "total_tokens": 29,
+        "input_token_details": {"cache_read": 0, "cache_creation": 0},
+    },
+}
+
+
+

Added in langchain-core 0.3.49

+
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + + + +
+ + + +
+ + + +
+
+
+
+ +
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/LANGCHAIN.md b/LANGCHAIN.md new file mode 100644 index 00000000..29d8da09 --- /dev/null +++ b/LANGCHAIN.md @@ -0,0 +1,1419 @@ +> ## Documentation Index +> Fetch the complete documentation index at: https://docs.langchain.com/llms.txt +> Use this file to discover all available pages before exploring further. + +# Memory + +AI applications need [memory](/oss/python/concepts/memory) to share context across multiple interactions. In LangGraph, you can add two types of memory: + +* [Add short-term memory](#add-short-term-memory) as a part of your agent's [state](/oss/python/langgraph/graph-api#state) to enable multi-turn conversations. +* [Add long-term memory](#add-long-term-memory) to store user-specific or application-level data across sessions. + +## Add short-term memory + +**Short-term** memory (thread-level [persistence](/oss/python/langgraph/persistence)) enables agents to track multi-turn conversations. To add short-term memory: + +```python theme={null} +from langgraph.checkpoint.memory import InMemorySaver # [!code highlight] +from langgraph.graph import StateGraph + +checkpointer = InMemorySaver() # [!code highlight] + +builder = StateGraph(...) +graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + +graph.invoke( + {"messages": [{"role": "user", "content": "hi! i am Bob"}]}, + {"configurable": {"thread_id": "1"}}, # [!code highlight] +) +``` + +### Use in production + +In production, use a checkpointer backed by a database: + +```python theme={null} +from langgraph.checkpoint.postgres import PostgresSaver + +DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" +with PostgresSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + builder = StateGraph(...) + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] +``` + + + ``` + pip install -U "psycopg[binary,pool]" langgraph langgraph-checkpoint-postgres + ``` + + + You need to call `checkpointer.setup()` the first time you're using Postgres checkpointer + + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.postgres import PostgresSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" + with PostgresSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # checkpointer.setup() + + def call_model(state: MessagesState): + response = model.invoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" + async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # await checkpointer.setup() + + async def call_model(state: MessagesState): + response = await model.ainvoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + + + ``` + pip install -U pymongo langgraph langgraph-checkpoint-mongodb + ``` + + + **Setup** + To use the [MongoDB checkpointer](https://pypi.org/project/langgraph-checkpoint-mongodb/), you will need a MongoDB cluster. Follow [this guide](https://www.mongodb.com/docs/guides/atlas/cluster/) to create a cluster if you don't already have one. + + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.mongodb import MongoDBSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "localhost:27017" + with MongoDBSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + + def call_model(state: MessagesState): + response = model.invoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.mongodb.aio import AsyncMongoDBSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "localhost:27017" + async with AsyncMongoDBSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + + async def call_model(state: MessagesState): + response = await model.ainvoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + + + ``` + pip install -U langgraph langgraph-checkpoint-redis + ``` + + + You need to call `checkpointer.setup()` the first time you're using Redis checkpointer. + + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.redis import RedisSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "redis://localhost:6379" + with RedisSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # checkpointer.setup() + + def call_model(state: MessagesState): + response = model.invoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + ```python theme={null} + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.redis.aio import AsyncRedisSaver # [!code highlight] + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + DB_URI = "redis://localhost:6379" + async with AsyncRedisSaver.from_conn_string(DB_URI) as checkpointer: # [!code highlight] + # await checkpointer.asetup() + + async def call_model(state: MessagesState): + response = await model.ainvoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile(checkpointer=checkpointer) # [!code highlight] + + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, # [!code highlight] + stream_mode="values" + ): + chunk["messages"][-1].pretty_print() + ``` + + + + +### Use in subgraphs + +If your graph contains [subgraphs](/oss/python/langgraph/use-subgraphs), you only need to provide the checkpointer when compiling the parent graph. LangGraph will automatically propagate the checkpointer to the child subgraphs. + +```python theme={null} +from langgraph.graph import START, StateGraph +from langgraph.checkpoint.memory import InMemorySaver +from typing import TypedDict + +class State(TypedDict): + foo: str + +# Subgraph + +def subgraph_node_1(state: State): + return {"foo": state["foo"] + "bar"} + +subgraph_builder = StateGraph(State) +subgraph_builder.add_node(subgraph_node_1) +subgraph_builder.add_edge(START, "subgraph_node_1") +subgraph = subgraph_builder.compile() # [!code highlight] + +# Parent graph + +builder = StateGraph(State) +builder.add_node("node_1", subgraph) # [!code highlight] +builder.add_edge(START, "node_1") + +checkpointer = InMemorySaver() +graph = builder.compile(checkpointer=checkpointer) # [!code highlight] +``` + +If you want the subgraph to have its own memory, you can compile it with the appropriate checkpointer option. This is useful in [multi-agent](/oss/python/langchain/multi-agent) systems, if you want agents to keep track of their internal message histories. + +```python theme={null} +subgraph_builder = StateGraph(...) +subgraph = subgraph_builder.compile(checkpointer=True) # [!code highlight] +``` + +## Add long-term memory + +Use long-term memory to store user-specific or application-specific data across conversations. + +```python theme={null} +from langgraph.store.memory import InMemoryStore # [!code highlight] +from langgraph.graph import StateGraph + +store = InMemoryStore() # [!code highlight] + +builder = StateGraph(...) +graph = builder.compile(store=store) # [!code highlight] +``` + +### Access the store inside nodes + +Once you compile a graph with a store, LangGraph automatically injects the store into your node functions. The recommended way to access the store is through the `Runtime` object. + +```python theme={null} +from dataclasses import dataclass +from langgraph.runtime import Runtime +from langgraph.graph import StateGraph, MessagesState, START +import uuid + +@dataclass +class Context: + user_id: str + +async def call_model(state: MessagesState, runtime: Runtime[Context]): # [!code highlight] + user_id = runtime.context.user_id # [!code highlight] + namespace = (user_id, "memories") + + # Search for relevant memories + memories = await runtime.store.asearch( # [!code highlight] + namespace, query=state["messages"][-1].content, limit=3 + ) + info = "\n".join([d.value["data"] for d in memories]) + + # ... Use memories in model call + + # Store a new memory + await runtime.store.aput( # [!code highlight] + namespace, str(uuid.uuid4()), {"data": "User prefers dark mode"} + ) + +builder = StateGraph(MessagesState, context_schema=Context) # [!code highlight] +builder.add_node(call_model) +builder.add_edge(START, "call_model") +graph = builder.compile(store=store) + +# Pass context at invocation time +graph.invoke( + {"messages": [{"role": "user", "content": "hi"}]}, + {"configurable": {"thread_id": "1"}}, + context=Context(user_id="1"), # [!code highlight] +) +``` + +### Use in production + +In production, use a store backed by a database: + +```python theme={null} +from langgraph.store.postgres import PostgresStore + +DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" +with PostgresStore.from_conn_string(DB_URI) as store: # [!code highlight] + builder = StateGraph(...) + graph = builder.compile(store=store) # [!code highlight] +``` + + + ``` + pip install -U "psycopg[binary,pool]" langgraph langgraph-checkpoint-postgres + ``` + + + You need to call `store.setup()` the first time you're using Postgres store + + + + + ```python theme={null} + from dataclasses import dataclass + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver + from langgraph.store.postgres.aio import AsyncPostgresStore # [!code highlight] + from langgraph.runtime import Runtime # [!code highlight] + import uuid + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + @dataclass + class Context: + user_id: str + + async def call_model( # [!code highlight] + state: MessagesState, + runtime: Runtime[Context], # [!code highlight] + ): + user_id = runtime.context.user_id # [!code highlight] + namespace = ("memories", user_id) + memories = await runtime.store.asearch(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + await runtime.store.aput(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = await model.ainvoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" + + async with ( + AsyncPostgresStore.from_conn_string(DB_URI) as store, # [!code highlight] + AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer, + ): + # await store.setup() + # await checkpointer.setup() + + builder = StateGraph(MessagesState, context_schema=Context) # [!code highlight] + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = {"configurable": {"thread_id": "1"}} + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + + config = {"configurable": {"thread_id": "2"}} + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + ``` + + + + ```python theme={null} + from dataclasses import dataclass + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.postgres import PostgresSaver + from langgraph.store.postgres import PostgresStore # [!code highlight] + from langgraph.runtime import Runtime # [!code highlight] + import uuid + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + @dataclass + class Context: + user_id: str + + def call_model( # [!code highlight] + state: MessagesState, + runtime: Runtime[Context], # [!code highlight] + ): + user_id = runtime.context.user_id # [!code highlight] + namespace = ("memories", user_id) + memories = runtime.store.search(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + runtime.store.put(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = model.invoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" + + with ( + PostgresStore.from_conn_string(DB_URI) as store, # [!code highlight] + PostgresSaver.from_conn_string(DB_URI) as checkpointer, + ): + # store.setup() + # checkpointer.setup() + + builder = StateGraph(MessagesState, context_schema=Context) # [!code highlight] + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = {"configurable": {"thread_id": "1"}} + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + + config = {"configurable": {"thread_id": "2"}} + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + ``` + + + + + + ``` + pip install -U langgraph langgraph-checkpoint-redis + ``` + + + You need to call `store.setup()` the first time you're using [Redis store](https://pypi.org/project/langgraph-checkpoint-redis/). + + + + + ```python theme={null} + from dataclasses import dataclass + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.redis.aio import AsyncRedisSaver + from langgraph.store.redis.aio import AsyncRedisStore # [!code highlight] + from langgraph.runtime import Runtime # [!code highlight] + import uuid + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + @dataclass + class Context: + user_id: str + + async def call_model( # [!code highlight] + state: MessagesState, + runtime: Runtime[Context], # [!code highlight] + ): + user_id = runtime.context.user_id # [!code highlight] + namespace = ("memories", user_id) + memories = await runtime.store.asearch(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + await runtime.store.aput(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = await model.ainvoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + DB_URI = "redis://localhost:6379" + + async with ( + AsyncRedisStore.from_conn_string(DB_URI) as store, # [!code highlight] + AsyncRedisSaver.from_conn_string(DB_URI) as checkpointer, + ): + # await store.setup() + # await checkpointer.asetup() + + builder = StateGraph(MessagesState, context_schema=Context) # [!code highlight] + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = {"configurable": {"thread_id": "1"}} + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + + config = {"configurable": {"thread_id": "2"}} + async for chunk in graph.astream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + ``` + + + + ```python theme={null} + from dataclasses import dataclass + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, MessagesState, START + from langgraph.checkpoint.redis import RedisSaver + from langgraph.store.redis import RedisStore # [!code highlight] + from langgraph.runtime import Runtime # [!code highlight] + import uuid + + model = init_chat_model(model="claude-haiku-4-5-20251001") + + @dataclass + class Context: + user_id: str + + def call_model( # [!code highlight] + state: MessagesState, + runtime: Runtime[Context], # [!code highlight] + ): + user_id = runtime.context.user_id # [!code highlight] + namespace = ("memories", user_id) + memories = runtime.store.search(namespace, query=str(state["messages"][-1].content)) # [!code highlight] + info = "\n".join([d.value["data"] for d in memories]) + system_msg = f"You are a helpful assistant talking to the user. User info: {info}" + + # Store new memories if the user asks the model to remember + last_message = state["messages"][-1] + if "remember" in last_message.content.lower(): + memory = "User name is Bob" + runtime.store.put(namespace, str(uuid.uuid4()), {"data": memory}) # [!code highlight] + + response = model.invoke( + [{"role": "system", "content": system_msg}] + state["messages"] + ) + return {"messages": response} + + DB_URI = "redis://localhost:6379" + + with ( + RedisStore.from_conn_string(DB_URI) as store, # [!code highlight] + RedisSaver.from_conn_string(DB_URI) as checkpointer, + ): + store.setup() + checkpointer.setup() + + builder = StateGraph(MessagesState, context_schema=Context) # [!code highlight] + builder.add_node(call_model) + builder.add_edge(START, "call_model") + + graph = builder.compile( + checkpointer=checkpointer, + store=store, # [!code highlight] + ) + + config = {"configurable": {"thread_id": "1"}} + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "Hi! Remember: my name is Bob"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + + config = {"configurable": {"thread_id": "2"}} + for chunk in graph.stream( + {"messages": [{"role": "user", "content": "what is my name?"}]}, + config, + stream_mode="values", + context=Context(user_id="1"), # [!code highlight] + ): + chunk["messages"][-1].pretty_print() + ``` + + + + +### Use semantic search + +Enable semantic search in your graph's memory store to let graph agents search for items in the store by semantic similarity. + +```python theme={null} +from langchain.embeddings import init_embeddings +from langgraph.store.memory import InMemoryStore + +# Create store with semantic search enabled +embeddings = init_embeddings("openai:text-embedding-3-small") +store = InMemoryStore( + index={ + "embed": embeddings, + "dims": 1536, + } +) + +store.put(("user_123", "memories"), "1", {"text": "I love pizza"}) +store.put(("user_123", "memories"), "2", {"text": "I am a plumber"}) + +items = store.search( + ("user_123", "memories"), query="I'm hungry", limit=1 +) +``` + + + ```python theme={null} + + from langchain.embeddings import init_embeddings + from langchain.chat_models import init_chat_model + from langgraph.store.memory import InMemoryStore + from langgraph.graph import START, MessagesState, StateGraph + from langgraph.runtime import Runtime # [!code highlight] + + model = init_chat_model("gpt-4.1-mini") + + # Create store with semantic search enabled + embeddings = init_embeddings("openai:text-embedding-3-small") + store = InMemoryStore( + index={ + "embed": embeddings, + "dims": 1536, + } + ) + + store.put(("user_123", "memories"), "1", {"text": "I love pizza"}) + store.put(("user_123", "memories"), "2", {"text": "I am a plumber"}) + + async def chat(state: MessagesState, runtime: Runtime): # [!code highlight] + # Search based on user's last message + items = await runtime.store.asearch( # [!code highlight] + ("user_123", "memories"), query=state["messages"][-1].content, limit=2 + ) + memories = "\n".join(item.value["text"] for item in items) + memories = f"## Memories of user\n{memories}" if memories else "" + response = await model.ainvoke( + [ + {"role": "system", "content": f"You are a helpful assistant.\n{memories}"}, + *state["messages"], + ] + ) + return {"messages": [response]} + + + builder = StateGraph(MessagesState) + builder.add_node(chat) + builder.add_edge(START, "chat") + graph = builder.compile(store=store) + + async for message, metadata in graph.astream( + input={"messages": [{"role": "user", "content": "I'm hungry"}]}, + stream_mode="messages", + ): + print(message.content, end="") + ``` + + +## Manage short-term memory + +With [short-term memory](#add-short-term-memory) enabled, long conversations can exceed the LLM's context window. Common solutions are: + +* [Trim messages](#trim-messages): Remove first or last N messages (before calling LLM) +* [Delete messages](#delete-messages) from LangGraph state permanently +* [Summarize messages](#summarize-messages): Summarize earlier messages in the history and replace them with a summary +* [Manage checkpoints](#manage-checkpoints) to store and retrieve message history +* Custom strategies (e.g., message filtering, etc.) + +This allows the agent to keep track of the conversation without exceeding the LLM's context window. + +### Trim messages + +Most LLMs have a maximum supported context window (denominated in tokens). One way to decide when to truncate messages is to count the tokens in the message history and truncate whenever it approaches that limit. If you're using LangChain, you can use the trim messages utility and specify the number of tokens to keep from the list, as well as the `strategy` (e.g., keep the last `max_tokens`) to use for handling the boundary. + +To trim message history, use the [`trim_messages`](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.utils.trim_messages.html) function: + +```python theme={null} +from langchain_core.messages.utils import ( # [!code highlight] + trim_messages, # [!code highlight] + count_tokens_approximately # [!code highlight] +) # [!code highlight] + +def call_model(state: MessagesState): + messages = trim_messages( # [!code highlight] + state["messages"], + strategy="last", + token_counter=count_tokens_approximately, + max_tokens=128, + start_on="human", + end_on=("human", "tool"), + ) + response = model.invoke(messages) + return {"messages": [response]} + +builder = StateGraph(MessagesState) +builder.add_node(call_model) +... +``` + + + ```python theme={null} + from langchain_core.messages.utils import ( + trim_messages, # [!code highlight] + count_tokens_approximately # [!code highlight] + ) + from langchain.chat_models import init_chat_model + from langgraph.graph import StateGraph, START, MessagesState + + model = init_chat_model("claude-sonnet-4-5-20250929") + summarization_model = model.bind(max_tokens=128) + + def call_model(state: MessagesState): + messages = trim_messages( # [!code highlight] + state["messages"], + strategy="last", + token_counter=count_tokens_approximately, + max_tokens=128, + start_on="human", + end_on=("human", "tool"), + ) + response = model.invoke(messages) + return {"messages": [response]} + + checkpointer = InMemorySaver() + builder = StateGraph(MessagesState) + builder.add_node(call_model) + builder.add_edge(START, "call_model") + graph = builder.compile(checkpointer=checkpointer) + + config = {"configurable": {"thread_id": "1"}} + graph.invoke({"messages": "hi, my name is bob"}, config) + graph.invoke({"messages": "write a short poem about cats"}, config) + graph.invoke({"messages": "now do the same but for dogs"}, config) + final_response = graph.invoke({"messages": "what's my name?"}, config) + + final_response["messages"][-1].pretty_print() + ``` + + ``` + ================================== Ai Message ================================== + + Your name is Bob, as you mentioned when you first introduced yourself. + ``` + + +### Delete messages + +You can delete messages from the graph state to manage the message history. This is useful when you want to remove specific messages or clear the entire message history. + +To delete messages from the graph state, you can use the `RemoveMessage`. For `RemoveMessage` to work, you need to use a state key with [`add_messages`](https://reference.langchain.com/python/langgraph/graphs/#langgraph.graph.message.add_messages) [reducer](/oss/python/langgraph/graph-api#reducers), like [`MessagesState`](/oss/python/langgraph/graph-api#messagesstate). + +To remove specific messages: + +```python theme={null} +from langchain.messages import RemoveMessage # [!code highlight] + +def delete_messages(state): + messages = state["messages"] + if len(messages) > 2: + # remove the earliest two messages + return {"messages": [RemoveMessage(id=m.id) for m in messages[:2]]} # [!code highlight] +``` + +To remove **all** messages: + +```python theme={null} +from langgraph.graph.message import REMOVE_ALL_MESSAGES # [!code highlight] + +def delete_messages(state): + return {"messages": [RemoveMessage(id=REMOVE_ALL_MESSAGES)]} # [!code highlight] +``` + + + When deleting messages, **make sure** that the resulting message history is valid. Check the limitations of the LLM provider you're using. For example: + + * Some providers expect message history to start with a `user` message + * Most providers require `assistant` messages with tool calls to be followed by corresponding `tool` result messages. + + + + ```python theme={null} + from langchain.messages import RemoveMessage # [!code highlight] + + def delete_messages(state): + messages = state["messages"] + if len(messages) > 2: + # remove the earliest two messages + return {"messages": [RemoveMessage(id=m.id) for m in messages[:2]]} # [!code highlight] + + def call_model(state: MessagesState): + response = model.invoke(state["messages"]) + return {"messages": response} + + builder = StateGraph(MessagesState) + builder.add_sequence([call_model, delete_messages]) + builder.add_edge(START, "call_model") + + checkpointer = InMemorySaver() + app = builder.compile(checkpointer=checkpointer) + + for event in app.stream( + {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, + config, + stream_mode="values" + ): + print([(message.type, message.content) for message in event["messages"]]) + + for event in app.stream( + {"messages": [{"role": "user", "content": "what's my name?"}]}, + config, + stream_mode="values" + ): + print([(message.type, message.content) for message in event["messages"]]) + ``` + + ``` + [('human', "hi! I'm bob")] + [('human', "hi! I'm bob"), ('ai', 'Hi Bob! How are you doing today? Is there anything I can help you with?')] + [('human', "hi! I'm bob"), ('ai', 'Hi Bob! How are you doing today? Is there anything I can help you with?'), ('human', "what's my name?")] + [('human', "hi! I'm bob"), ('ai', 'Hi Bob! How are you doing today? Is there anything I can help you with?'), ('human', "what's my name?"), ('ai', 'Your name is Bob.')] + [('human', "what's my name?"), ('ai', 'Your name is Bob.')] + ``` + + +### Summarize messages + +The problem with trimming or removing messages, as shown above, is that you may lose information from culling of the message queue. Because of this, some applications benefit from a more sophisticated approach of summarizing the message history using a chat model. + +Summary + +Prompting and orchestration logic can be used to summarize the message history. For example, in LangGraph you can extend the [`MessagesState`](/oss/python/langgraph/graph-api#working-with-messages-in-graph-state) to include a `summary` key: + +```python theme={null} +from langgraph.graph import MessagesState +class State(MessagesState): + summary: str +``` + +Then, you can generate a summary of the chat history, using any existing summary as context for the next summary. This `summarize_conversation` node can be called after some number of messages have accumulated in the `messages` state key. + +```python theme={null} +def summarize_conversation(state: State): + + # First, we get any existing summary + summary = state.get("summary", "") + + # Create our summarization prompt + if summary: + + # A summary already exists + summary_message = ( + f"This is a summary of the conversation to date: {summary}\n\n" + "Extend the summary by taking into account the new messages above:" + ) + + else: + summary_message = "Create a summary of the conversation above:" + + # Add prompt to our history + messages = state["messages"] + [HumanMessage(content=summary_message)] + response = model.invoke(messages) + + # Delete all but the 2 most recent messages + delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]] + return {"summary": response.content, "messages": delete_messages} +``` + + + ```python theme={null} + from typing import Any, TypedDict + + from langchain.chat_models import init_chat_model + from langchain.messages import AnyMessage + from langchain_core.messages.utils import count_tokens_approximately + from langgraph.graph import StateGraph, START, MessagesState + from langgraph.checkpoint.memory import InMemorySaver + from langmem.short_term import SummarizationNode, RunningSummary # [!code highlight] + + model = init_chat_model("claude-sonnet-4-5-20250929") + summarization_model = model.bind(max_tokens=128) + + class State(MessagesState): + context: dict[str, RunningSummary] # [!code highlight] + + class LLMInputState(TypedDict): # [!code highlight] + summarized_messages: list[AnyMessage] + context: dict[str, RunningSummary] + + summarization_node = SummarizationNode( # [!code highlight] + token_counter=count_tokens_approximately, + model=summarization_model, + max_tokens=256, + max_tokens_before_summary=256, + max_summary_tokens=128, + ) + + def call_model(state: LLMInputState): # [!code highlight] + response = model.invoke(state["summarized_messages"]) + return {"messages": [response]} + + checkpointer = InMemorySaver() + builder = StateGraph(State) + builder.add_node(call_model) + builder.add_node("summarize", summarization_node) # [!code highlight] + builder.add_edge(START, "summarize") + builder.add_edge("summarize", "call_model") + graph = builder.compile(checkpointer=checkpointer) + + # Invoke the graph + config = {"configurable": {"thread_id": "1"}} + graph.invoke({"messages": "hi, my name is bob"}, config) + graph.invoke({"messages": "write a short poem about cats"}, config) + graph.invoke({"messages": "now do the same but for dogs"}, config) + final_response = graph.invoke({"messages": "what's my name?"}, config) + + final_response["messages"][-1].pretty_print() + print("\nSummary:", final_response["context"]["running_summary"].summary) + ``` + + 1. We will keep track of our running summary in the `context` field + + (expected by the `SummarizationNode`). + + 1. Define private state that will be used only for filtering + + the inputs to `call_model` node. + + 1. We're passing a private input state here to isolate the messages returned by the summarization node + + ``` + ================================== Ai Message ================================== + + From our conversation, I can see that you introduced yourself as Bob. That's the name you shared with me when we began talking. + + Summary: In this conversation, I was introduced to Bob, who then asked me to write a poem about cats. I composed a poem titled "The Mystery of Cats" that captured cats' graceful movements, independent nature, and their special relationship with humans. Bob then requested a similar poem about dogs, so I wrote "The Joy of Dogs," which highlighted dogs' loyalty, enthusiasm, and loving companionship. Both poems were written in a similar style but emphasized the distinct characteristics that make each pet special. + ``` + + +### Manage checkpoints + +You can view and delete the information stored by the checkpointer. + + + +#### View thread state + + + + ```python theme={null} + config = { + "configurable": { + "thread_id": "1", # [!code highlight] + # optionally provide an ID for a specific checkpoint, + # otherwise the latest checkpoint is shown + # "checkpoint_id": "1f029ca3-1f5b-6704-8004-820c16b69a5a" # [!code highlight] + + } + } + graph.get_state(config) # [!code highlight] + ``` + + ``` + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today?), HumanMessage(content="what's my name?"), AIMessage(content='Your name is Bob.')]}, next=(), + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1f5b-6704-8004-820c16b69a5a'}}, + metadata={ + 'source': 'loop', + 'writes': {'call_model': {'messages': AIMessage(content='Your name is Bob.')}}, + 'step': 4, + 'parents': {}, + 'thread_id': '1' + }, + created_at='2025-05-05T16:01:24.680462+00:00', + parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + tasks=(), + interrupts=() + ) + ``` + + + + ```python theme={null} + config = { + "configurable": { + "thread_id": "1", # [!code highlight] + # optionally provide an ID for a specific checkpoint, + # otherwise the latest checkpoint is shown + # "checkpoint_id": "1f029ca3-1f5b-6704-8004-820c16b69a5a" # [!code highlight] + + } + } + checkpointer.get_tuple(config) # [!code highlight] + ``` + + ``` + CheckpointTuple( + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1f5b-6704-8004-820c16b69a5a'}}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:24.680462+00:00', + 'id': '1f029ca3-1f5b-6704-8004-820c16b69a5a', + 'channel_versions': {'__start__': '00000000000000000000000000000005.0.5290678567601859', 'messages': '00000000000000000000000000000006.0.3205149138784782', 'branch:to:call_model': '00000000000000000000000000000006.0.14611156755133758'}, 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000004.0.5736472536395331'}, 'call_model': {'branch:to:call_model': '00000000000000000000000000000005.0.1410174088651449'}}, + 'channel_values': {'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today?), HumanMessage(content="what's my name?"), AIMessage(content='Your name is Bob.')]}, + }, + metadata={ + 'source': 'loop', + 'writes': {'call_model': {'messages': AIMessage(content='Your name is Bob.')}}, + 'step': 4, + 'parents': {}, + 'thread_id': '1' + }, + parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + pending_writes=[] + ) + ``` + + + + + +#### View the history of the thread + + + + ```python theme={null} + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + list(graph.get_state_history(config)) # [!code highlight] + ``` + + ``` + [ + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?'), HumanMessage(content="what's my name?"), AIMessage(content='Your name is Bob.')]}, + next=(), + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1f5b-6704-8004-820c16b69a5a'}}, + metadata={'source': 'loop', 'writes': {'call_model': {'messages': AIMessage(content='Your name is Bob.')}}, 'step': 4, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:24.680462+00:00', + parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + tasks=(), + interrupts=() + ), + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?'), HumanMessage(content="what's my name?")]}, + next=('call_model',), + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + metadata={'source': 'loop', 'writes': None, 'step': 3, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:23.863421+00:00', + parent_config={...} + tasks=(PregelTask(id='8ab4155e-6b15-b885-9ce5-bed69a2c305c', name='call_model', path=('__pregel_pull', 'call_model'), error=None, interrupts=(), state=None, result={'messages': AIMessage(content='Your name is Bob.')}),), + interrupts=() + ), + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')]}, + next=('__start__',), + config={...}, + metadata={'source': 'input', 'writes': {'__start__': {'messages': [{'role': 'user', 'content': "what's my name?"}]}}, 'step': 2, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:23.863173+00:00', + parent_config={...} + tasks=(PregelTask(id='24ba39d6-6db1-4c9b-f4c5-682aeaf38dcd', name='__start__', path=('__pregel_pull', '__start__'), error=None, interrupts=(), state=None, result={'messages': [{'role': 'user', 'content': "what's my name?"}]}),), + interrupts=() + ), + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')]}, + next=(), + config={...}, + metadata={'source': 'loop', 'writes': {'call_model': {'messages': AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')}}, 'step': 1, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:23.862295+00:00', + parent_config={...} + tasks=(), + interrupts=() + ), + StateSnapshot( + values={'messages': [HumanMessage(content="hi! I'm bob")]}, + next=('call_model',), + config={...}, + metadata={'source': 'loop', 'writes': None, 'step': 0, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:22.278960+00:00', + parent_config={...} + tasks=(PregelTask(id='8cbd75e0-3720-b056-04f7-71ac805140a0', name='call_model', path=('__pregel_pull', 'call_model'), error=None, interrupts=(), state=None, result={'messages': AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')}),), + interrupts=() + ), + StateSnapshot( + values={'messages': []}, + next=('__start__',), + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-0870-6ce2-bfff-1f3f14c3e565'}}, + metadata={'source': 'input', 'writes': {'__start__': {'messages': [{'role': 'user', 'content': "hi! I'm bob"}]}}, 'step': -1, 'parents': {}, 'thread_id': '1'}, + created_at='2025-05-05T16:01:22.277497+00:00', + parent_config=None, + tasks=(PregelTask(id='d458367b-8265-812c-18e2-33001d199ce6', name='__start__', path=('__pregel_pull', '__start__'), error=None, interrupts=(), state=None, result={'messages': [{'role': 'user', 'content': "hi! I'm bob"}]}),), + interrupts=() + ) + ] + ``` + + + + ```python theme={null} + config = { + "configurable": { + "thread_id": "1" # [!code highlight] + } + } + list(checkpointer.list(config)) # [!code highlight] + ``` + + ``` + [ + CheckpointTuple( + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1f5b-6704-8004-820c16b69a5a'}}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:24.680462+00:00', + 'id': '1f029ca3-1f5b-6704-8004-820c16b69a5a', + 'channel_versions': {'__start__': '00000000000000000000000000000005.0.5290678567601859', 'messages': '00000000000000000000000000000006.0.3205149138784782', 'branch:to:call_model': '00000000000000000000000000000006.0.14611156755133758'}, + 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000004.0.5736472536395331'}, 'call_model': {'branch:to:call_model': '00000000000000000000000000000005.0.1410174088651449'}}, + 'channel_values': {'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?'), HumanMessage(content="what's my name?"), AIMessage(content='Your name is Bob.')]}, + }, + metadata={'source': 'loop', 'writes': {'call_model': {'messages': AIMessage(content='Your name is Bob.')}}, 'step': 4, 'parents': {}, 'thread_id': '1'}, + parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + pending_writes=[] + ), + CheckpointTuple( + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-1790-6b0a-8003-baf965b6a38f'}}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:23.863421+00:00', + 'id': '1f029ca3-1790-6b0a-8003-baf965b6a38f', + 'channel_versions': {'__start__': '00000000000000000000000000000005.0.5290678567601859', 'messages': '00000000000000000000000000000006.0.3205149138784782', 'branch:to:call_model': '00000000000000000000000000000006.0.14611156755133758'}, + 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000004.0.5736472536395331'}, 'call_model': {'branch:to:call_model': '00000000000000000000000000000005.0.1410174088651449'}}, + 'channel_values': {'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?'), HumanMessage(content="what's my name?")], 'branch:to:call_model': None} + }, + metadata={'source': 'loop', 'writes': None, 'step': 3, 'parents': {}, 'thread_id': '1'}, + parent_config={...}, + pending_writes=[('8ab4155e-6b15-b885-9ce5-bed69a2c305c', 'messages', AIMessage(content='Your name is Bob.'))] + ), + CheckpointTuple( + config={...}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:23.863173+00:00', + 'id': '1f029ca3-1790-616e-8002-9e021694a0cd', + 'channel_versions': {'__start__': '00000000000000000000000000000004.0.5736472536395331', 'messages': '00000000000000000000000000000003.0.7056767754077798', 'branch:to:call_model': '00000000000000000000000000000003.0.22059023329132854'}, + 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000001.0.7040775356287469'}, 'call_model': {'branch:to:call_model': '00000000000000000000000000000002.0.9300422176788571'}}, + 'channel_values': {'__start__': {'messages': [{'role': 'user', 'content': "what's my name?"}]}, 'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')]} + }, + metadata={'source': 'input', 'writes': {'__start__': {'messages': [{'role': 'user', 'content': "what's my name?"}]}}, 'step': 2, 'parents': {}, 'thread_id': '1'}, + parent_config={...}, + pending_writes=[('24ba39d6-6db1-4c9b-f4c5-682aeaf38dcd', 'messages', [{'role': 'user', 'content': "what's my name?"}]), ('24ba39d6-6db1-4c9b-f4c5-682aeaf38dcd', 'branch:to:call_model', None)] + ), + CheckpointTuple( + config={...}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:23.862295+00:00', + 'id': '1f029ca3-178d-6f54-8001-d7b180db0c89', + 'channel_versions': {'__start__': '00000000000000000000000000000002.0.18673090920108737', 'messages': '00000000000000000000000000000003.0.7056767754077798', 'branch:to:call_model': '00000000000000000000000000000003.0.22059023329132854'}, + 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000001.0.7040775356287469'}, 'call_model': {'branch:to:call_model': '00000000000000000000000000000002.0.9300422176788571'}}, + 'channel_values': {'messages': [HumanMessage(content="hi! I'm bob"), AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')]} + }, + metadata={'source': 'loop', 'writes': {'call_model': {'messages': AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?')}}, 'step': 1, 'parents': {}, 'thread_id': '1'}, + parent_config={...}, + pending_writes=[] + ), + CheckpointTuple( + config={...}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:22.278960+00:00', + 'id': '1f029ca3-0874-6612-8000-339f2abc83b1', + 'channel_versions': {'__start__': '00000000000000000000000000000002.0.18673090920108737', 'messages': '00000000000000000000000000000002.0.30296526818059655', 'branch:to:call_model': '00000000000000000000000000000002.0.9300422176788571'}, + 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000001.0.7040775356287469'}}, + 'channel_values': {'messages': [HumanMessage(content="hi! I'm bob")], 'branch:to:call_model': None} + }, + metadata={'source': 'loop', 'writes': None, 'step': 0, 'parents': {}, 'thread_id': '1'}, + parent_config={...}, + pending_writes=[('8cbd75e0-3720-b056-04f7-71ac805140a0', 'messages', AIMessage(content='Hi Bob! How are you doing today? Is there anything I can help you with?'))] + ), + CheckpointTuple( + config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f029ca3-0870-6ce2-bfff-1f3f14c3e565'}}, + checkpoint={ + 'v': 3, + 'ts': '2025-05-05T16:01:22.277497+00:00', + 'id': '1f029ca3-0870-6ce2-bfff-1f3f14c3e565', + 'channel_versions': {'__start__': '00000000000000000000000000000001.0.7040775356287469'}, + 'versions_seen': {'__input__': {}}, + 'channel_values': {'__start__': {'messages': [{'role': 'user', 'content': "hi! I'm bob"}]}} + }, + metadata={'source': 'input', 'writes': {'__start__': {'messages': [{'role': 'user', 'content': "hi! I'm bob"}]}}, 'step': -1, 'parents': {}, 'thread_id': '1'}, + parent_config=None, + pending_writes=[('d458367b-8265-812c-18e2-33001d199ce6', 'messages', [{'role': 'user', 'content': "hi! I'm bob"}]), ('d458367b-8265-812c-18e2-33001d199ce6', 'branch:to:call_model', None)] + ) + ] + ``` + + + +#### Delete all checkpoints for a thread + +```python theme={null} +thread_id = "1" +checkpointer.delete_thread(thread_id) +``` + +## Database management + +If you are using any database-backed persistence implementation (such as Postgres or Redis) to store short and/or long-term memory, you will need to run migrations to set up the required schema before you can use it with your database. + +By convention, most database-specific libraries define a `setup()` method on the checkpointer or store instance that runs the required migrations. However, you should check with your specific implementation of [`BaseCheckpointSaver`](https://reference.langchain.com/python/langgraph/checkpoints/#langgraph.checkpoint.base.BaseCheckpointSaver) or [`BaseStore`](https://reference.langchain.com/python/langgraph/store/#langgraph.store.base.BaseStore) to confirm the exact method name and usage. + +We recommend running migrations as a dedicated deployment step, or you can ensure they're run as part of server startup. + +*** + + + [Edit this page on GitHub](https://github.com/langchain-ai/docs/edit/main/src/oss/langgraph/add-memory.mdx) or [file an issue](https://github.com/langchain-ai/docs/issues/new/choose). + + + + [Connect these docs](/use-these-docs) to Claude, VSCode, and more via MCP for real-time answers. + diff --git a/LLAMAINDEXCALLBACK.md b/LLAMAINDEXCALLBACK.md new file mode 100644 index 00000000..9e917be5 --- /dev/null +++ b/LLAMAINDEXCALLBACK.md @@ -0,0 +1,37 @@ +## Concept + +LlamaIndex provides callbacks to help debug, track, and trace the inner workings of the library. +Using the callback manager, as many callbacks as needed can be added. + +In addition to logging data related to events, you can also track the duration and number of occurrences +of each event. + +Furthermore, a trace map of events is also recorded, and callbacks can use this data +however they want. For example, the `LlamaDebugHandler` will, by default, print the trace of events +after most operations. + +**Callback Event Types** +While each callback may not leverage each event type, the following events are available to be tracked: + +- `CHUNKING` -> Logs for the before and after of text splitting. +- `NODE_PARSING` -> Logs for the documents and the nodes that they are parsed into. +- `EMBEDDING` -> Logs for the number of texts embedded. +- `LLM` -> Logs for the template and response of LLM calls. +- `QUERY` -> Keeps track of the start and end of each query. +- `RETRIEVE` -> Logs for the nodes retrieved for a query. +- `SYNTHESIZE` -> Logs for the result for synthesize calls. +- `TREE` -> Logs for the summary and level of summaries generated. +- `SUB_QUESTION` -> Log for a generated sub question and answer. + +You can implement your own callback to track and trace these events, or use an existing callback. + +## Modules + +Currently supported callbacks are as follows: + +- [TokenCountingHandler](/python/examples/observability/tokencountinghandler) -> Flexible token counting for prompt, completion, and embedding token usage. See [the migration details](/python/framework/module_guides/observability/callbacks/token_counting_migration) +- [LlamaDebugHanlder](/python/examples/observability/llamadebughandler) -> Basic tracking and tracing for events. Example usage can be found in the notebook below. +- [WandbCallbackHandler](/python/examples/observability/wandbcallbackhandler) -> Tracking of events and traces using the Wandb Prompts frontend. More details are in the notebook below or at [Wandb](https://docs.wandb.ai/guides/prompts/quickstart) +- [AimCallback](/python/examples/observability/aimcallback) -> Tracking of LLM inputs and outputs. Example usage can be found in the notebook below. +- [OpenInferenceCallbackHandler](/python/examples/observability/openinferencecallback) -> Tracking of AI model inferences. Example usage can be found in the notebook below. +- [OpenAIFineTuningHandler](https://github.com/jerryjliu/llama_index/blob/main/experimental/openai_fine_tuning/openai_fine_tuning.ipynb) -> Records all LLM inputs and outputs. Then, provides a function `save_finetuning_events()` to save inputs and outputs in a format suitable for fine-tuning with OpenAI. \ No newline at end of file diff --git a/LLAMAINDEXINSTRUMENT.md b/LLAMAINDEXINSTRUMENT.md new file mode 100644 index 00000000..9a6d6484 --- /dev/null +++ b/LLAMAINDEXINSTRUMENT.md @@ -0,0 +1,939 @@ +LlamaIndex provides **one-click observability** 🔭 to allow you to build principled LLM applications in a production setting. + +A key requirement for principled development of LLM applications over your data (RAG systems, agents) is being able to observe, debug, and evaluate +your system - both as a whole and for each component. + +This feature allows you to seamlessly integrate the LlamaIndex library with powerful observability/evaluation tools offered by our partners. +Configure a variable once, and you'll be able to do things like the following: + +- View LLM/prompt inputs/outputs +- Ensure that the outputs of any component (LLMs, embeddings) are performing as expected +- View call traces for both indexing and querying + +Each provider has similarities and differences. Take a look below for the full set of guides for each one! + +**NOTE:** + +Observability is now being handled via the [`instrumentation` module](/python/framework/module_guides/observability/instrumentation) (available in v0.10.20 and later.) + +A lot of the tooling and integrations mentioned in this page use our legacy `CallbackManager` or don't use `set_global_handler`. We've marked these integrations as such! + +## Usage Pattern + +To toggle, you will generally just need to do the following: + +```python +from llama_index.core import set_global_handler + +# general usage +set_global_handler("", **kwargs) +``` + +Note that all `kwargs` to `set_global_handler` are passed to the underlying callback handler. + +And that's it! Executions will get seamlessly piped to downstream service and you'll be able to access features such as viewing execution traces of your application. + +## Integrations + +### OpenTelemetry + +[OpenTelemetry](https://opentelemetry.io) is a widely used open-source service for tracing and observability, with numerous backend integrations (such as Jaeger, Zipkin or Prometheus). + +Our OpenTelemetry integration traces all the events produced by pieces of LlamaIndex code, including LLMs, Agents, RAG pipeline components and many more: everything you would get out with LlamaIndex native instrumentation you can export in OpenTelemetry format! + +You can install the library with: + +```bash +pip install llama-index-observability-otel +``` + +And can use it in your code with the default settings, as in this example with a RAG pipeline: + +```python +from llama_index.observability.otel import LlamaIndexOpenTelemetry +from llama_index.core import SimpleDirectoryReader, VectorStoreIndex +from llama_index.llms.openai import OpenAI +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.core import Settings + +# initialize the instrumentation object +instrumentor = LlamaIndexOpenTelemetry() + +if __name__ == "__main__": + embed_model = OpenAIEmbedding(model_name="text-embedding-3-small") + llm = OpenAI(model="gpt-4.1-mini") + + # start listening! + instrumentor.start_registering() + + # register events + documents = SimpleDirectoryReader( + input_dir="./data/paul_graham/" + ).load_data() + + index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) + query_engine = index.as_query_engine(llm=llm) + + query_result_one = query_engine.query("Who is Paul?") + query_result_two = query_engine.query("What did Paul do?") +``` + +Or you can use a more complex and customized set-up, such as in the following example: + +```python +import json +from pydantic import BaseModel, Field +from typing import List + +from llama_index.observability.otel import LlamaIndexOpenTelemetry +from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( + OTLPSpanExporter, +) + +# define a custom span exporter +span_exporter = OTLPSpanExporter("http://0.0.0.0:4318/v1/traces") + +# initialize the instrumentation object +instrumentor = LlamaIndexOpenTelemetry( + service_name_or_resource="my.test.service.1", + span_exporter=span_exporter, + debug=True, +) + + +if __name__ == "__main__": + instrumentor.start_registering() + # ... your code here +``` + +We also have a [demo repository](https://github.com/run-llama/agents-observability-demo) where we show how to trace agentic workflows and pipe the registered traces into a Postgres database. + +### LlamaTrace (Hosted Arize Phoenix) + +We've partnered with Arize on [LlamaTrace](https://llamatrace.com/), a hosted tracing, observability, and evaluation platform that works natively with LlamaIndex open-source users and has integrations with LlamaCloud. + +This is built upon the open-source Arize [Phoenix](https://github.com/Arize-ai/phoenix) project. Phoenix provides a notebook-first experience for monitoring your models and LLM Applications by providing: + +- LLM Traces - Trace through the execution of your LLM Application to understand the internals of your LLM Application and to troubleshoot problems related to things like retrieval and tool execution. +- LLM Evals - Leverage the power of large language models to evaluate your generative model or application's relevance, toxicity, and more. + +#### Usage Pattern + +To install the integration package, do `pip install -U llama-index-callbacks-arize-phoenix`. + +Then create an account on LlamaTrace: https://llamatrace.com/login. Create an API key and put it in the `PHOENIX_API_KEY` variable below. + +Then run the following code: + +```python +# Phoenix can display in real time the traces automatically +# collected from your LlamaIndex application. +# Run all of your LlamaIndex applications as usual and traces +# will be collected and displayed in Phoenix. + +# setup Arize Phoenix for logging/observability +import llama_index.core +import os + +PHOENIX_API_KEY = "" +os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}" +llama_index.core.set_global_handler( + "arize_phoenix", endpoint="https://llamatrace.com/v1/traces" +) + +... +``` + +#### Guides + +- [LlamaCloud Agent with LlamaTrace](https://github.com/run-llama/llamacloud-demo/blob/main/examples/tracing/llamacloud_tracing_phoenix.ipynb) + +![](./../../_static/integrations/arize_phoenix.png) + +### SigNoz + +[SigNoz](https://signoz.io/) is an open source observability framework. It is built natively off of OpenTelemetry, offers traces, logs, and metrics all in one pane, and has both self hosted and cloud deployment options. By using SigNoz with LlamaIndex, you can view detailed traces of all RAG and Agent workflows, while keeping track of important metrics like token usage, latency, error rates, LLM model distribution and much more. + +#### Usage Pattern + +Install the following dependencies: + +```bash +pip install \ + opentelemetry-distro \ + opentelemetry-exporter-otlp \ + opentelemetry-instrumentation-httpx \ + opentelemetry-instrumentation-system-metrics \ + llama-index \ + openinference-instrumentation-llama-index +``` + +Next, add automatic instrumentation: + +```bash +opentelemetry-bootstrap --action=install +``` + +Then, run your LlamaIndex application with auto-instrumentation: + +```bash +OTEL_RESOURCE_ATTRIBUTES="service.name=" \ +OTEL_EXPORTER_OTLP_ENDPOINT="https://ingest..signoz.cloud:443" \ +OTEL_EXPORTER_OTLP_HEADERS="signoz-ingestion-key=" \ +OTEL_EXPORTER_OTLP_PROTOCOL=grpc \ +OTEL_TRACES_EXPORTER=otlp \ +OTEL_METRICS_EXPORTER=otlp \ +OTEL_LOGS_EXPORTER=otlp \ +OTEL_PYTHON_LOG_CORRELATION=true \ +OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true \ +opentelemetry-instrument +``` + +- `` is the name of your service +- Set the `` to match your SigNoz Cloud [region](https://signoz.io/docs/ingestion/signoz-cloud/overview/#endpoint) +- Replace `` with your SigNoz [ingestion key](https://signoz.io/docs/ingestion/signoz-cloud/keys/) +- Replace `` with the actual command you would use to run your application. For example: `python main.py` + +> 📌 Note: Using self-hosted SigNoz? Most steps are identical. To adapt this guide, update the endpoint and remove the ingestion key header as shown in [Cloud → Self-Hosted](https://signoz.io/docs/ingestion/cloud-vs-self-hosted/#cloud-to-self-hosted). + +You will now be able to see any traces, logs, and metrics that are automatically or manually exported by your LlamaIndex application usage. + +![LlamaIndex Trace View](https://signoz.io/img/docs/llm/llamaindex/llamaindex-detailed-trace-view.webp) + +#### Example Guides + +- [SigNoz LlamaIndex Integration Docs](https://signoz.io/docs/llamaindex-observability/) +- [SigNoz LLamaIndex Q&A RAG Demo](https://github.com/SigNoz/llamaindex-rag-opentelemetry-demo) + +### Weights and Biases (W&B) Weave + +[W&B Weave](https://weave-docs.wandb.ai/) is a framework for tracking, experimenting with, evaluating, deploying, and improving LLM applications. Designed for scalability and flexibility, Weave supports every stage of your application development workflow. + +#### Usage Pattern + +The integration leverages LlamaIndex's [`instrumentation` module](/python/framework/module_guides/observability/instrumentation) to register spans/events as Weave calls. By default, Weave automatically patches and tracks calls to [common LLM libraries and frameworks](https://weave-docs.wandb.ai/guides/integrations/). + +Install the `weave` library: + +```bash +pip install weave +``` + +Get a W&B API Key: + +If you don't already have a W&B account, create one by visiting [https://wandb.ai](https://wandb.ai) and copy your API key from [https://wandb.ai/authorize](https://wandb.ai/authorize). When prompted to authenticate, enter the API key. + +```python +import weave +from llama_index.llms.openai import OpenAI + +# Initialize Weave with your project name +weave.init("llamaindex-demo") + +# All LlamaIndex operations are now automatically traced +llm = OpenAI(model="gpt-4o-mini") +response = llm.complete("William Shakespeare is ") +print(response) +``` + +![weave quickstart](./../../_static/integrations/weave/weave_quickstart.png) + +Traces include execution time, token usage, cost, inputs/outputs, errors, nested operations, and streaming data. If you are new to Weave tracing, learn more about how to navigate it [here](https://weave-docs.wandb.ai/guides/tracking/trace-tree). + +If you have a custom function which is not traced, decorate it with [`@weave.op()`](https://weave-docs.wandb.ai/guides/tracking/ops). + +You can also control the patching behavior using the `autopatch_settings` argument in `weave.init`. For example if you don't want to trace a library/framework you can turn it off like this: + +```python +weave.init(..., autopatch_settings={"openai": {"enabled": False}}) +``` + +No additional LlamaIndex configuration is required; tracing begins once `weave.init()` is called. + +#### Guides + +The integration with LlamaIndex supports almost every component of LlamaIndex -- streaming/async, completions, chat, tool calling, agents, workflows, and RAG support. Learn more about them in the official [W&B Weave × LlamaIndex](https://weave-docs.wandb.ai/guides/integrations/llamaindex) documentation. + +### MLflow + +[MLflow](https://mlflow.org/docs/latest/llms/tracing/index.html) is an open-source MLOps/LLMOps platform, focuses on the full lifecycle for machine learning projects, ensuring that each phase is manageable, traceable, and reproducible. +**MLflow Tracing** is an OpenTelemetry-based tracing capability and supports one-click instrumentation for LlamaIndex applications. + +#### Usage Pattern + +Since MLflow is open-source, you can start using it without any account creation or API key setup. Jump straight into the code after installing the MLflow package! + +```python +import mlflow + +mlflow.llama_index.autolog() # Enable mlflow tracing +``` + +![](./../../_static/integrations/mlflow/mlflow.gif) + +#### Guides + +MLflow LlamaIndex integration also provides experiment tracking, evaluation, dependency management, and more. Check out the [MLflow documentation](https://mlflow.org/docs/latest/llms/llama-index/index.html) for more details. + +#### Support Table + +MLflow Tracing support the full range of LlamaIndex features. Some new features like [AgentWorkflow](https://www.llamaindex.ai/blog/introducing-agentworkflow-a-powerful-system-for-building-ai-agent-systems) requires MLflow >= 2.18.0. + +| Streaming | Async | Engine | Agents | Workflow | AgentWorkflow | +| --------- | ----- | ------ | ------ | ------------ | ------------- | +| ✅ | ✅ | ✅ | ✅ | ✅ (>= 2.18) | ✅ (>= 2.18) | + +### OpenLLMetry + +[OpenLLMetry](https://github.com/traceloop/openllmetry) is an open-source project based on OpenTelemetry for tracing and monitoring +LLM applications. It connects to [all major observability platforms](https://www.traceloop.com/docs/openllmetry/integrations/introduction) and installs in minutes. + +#### Usage Pattern + +```python +from traceloop.sdk import Traceloop + +Traceloop.init() +``` + +#### Guides + +- [OpenLLMetry](/python/examples/observability/openllmetry) + +![](./../../_static/integrations/openllmetry.png) + +### Arize Phoenix (local) + +You can also choose to use a **local** instance of Phoenix through the open-source project. + +In this case you don't need to create an account on LlamaTrace or set an API key for Phoenix. The phoenix server will launch locally. + +#### Usage Pattern + +To install the integration package, do `pip install -U llama-index-callbacks-arize-phoenix`. + +Then run the following code: + +```python +# Phoenix can display in real time the traces automatically +# collected from your LlamaIndex application. +# Run all of your LlamaIndex applications as usual and traces +# will be collected and displayed in Phoenix. + +import phoenix as px + +# Look for a URL in the output to open the App in a browser. +px.launch_app() +# The App is initially empty, but as you proceed with the steps below, +# traces will appear automatically as your LlamaIndex application runs. + +import llama_index.core + +llama_index.core.set_global_handler("arize_phoenix") +... +``` + +#### Example Guides + +- [Auto-Retrieval Guide with Pinecone and Arize Phoenix](https://docs.llamaindex.ai/en/latest/examples/vector_stores/pinecone_auto_retriever/?h=phoenix) +- [Arize Phoenix Tracing Tutorial](https://colab.research.google.com/github/Arize-ai/phoenix/blob/main/tutorials/tracing/llama_index_tracing_tutorial.ipynb) + +### Langfuse 🪢 + +[Langfuse](https://langfuse.com/docs) is an open source LLM engineering platform to help teams collaboratively debug, analyze and iterate on their LLM Applications. With the Langfuse integration, you can track and monitor performance, traces, and metrics of your LlamaIndex application. Detailed [traces](https://langfuse.com/docs/tracing) of the context augmentation and the LLM querying processes are captured and can be inspected directly in the Langfuse UI. + +#### Usage Pattern + +Make sure you have both `llama-index` and `langfuse` installed. + +```bash +pip install llama-index langfuse openinference-instrumentation-llama-index +``` + +Next, set up your Langfuse API keys. You can get these keys by signing up for a free [Langfuse Cloud](https://cloud.langfuse.com/) account or by [self-hosting Langfuse](https://langfuse.com/self-hosting). These environment variables are essential for the Langfuse client to authenticate and send data to your Langfuse project. + +```python +import os + +# Get keys for your project from the project settings page: https://cloud.langfuse.com + +os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..." +os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..." +os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" # 🇪🇺 EU region +# os.environ["LANGFUSE_HOST"] = "https://us.cloud.langfuse.com" # 🇺🇸 US region +``` + +With the environment variables set, we can now initialize the Langfuse client. `get_client()` initializes the Langfuse client using the credentials provided in the environment variables. + +```python +from langfuse import get_client + +langfuse = get_client() + +# Verify connection +if langfuse.auth_check(): + print("Langfuse client is authenticated and ready!") +else: + print("Authentication failed. Please check your credentials and host.") +``` + +Now, we initialize the [OpenInference LlamaIndex instrumentation](https://docs.arize.com/phoenix/tracing/integrations-tracing/llamaindex). This third-party instrumentation automatically captures LlamaIndex operations and exports OpenTelemetry (OTel) spans to Langfuse. + +```python +from openinference.instrumentation.llama_index import LlamaIndexInstrumentor + +# Initialize LlamaIndex instrumentation +LlamaIndexInstrumentor().instrument() +``` + +You can now see the logs of your LlamaIndex application in Langfuse: + +[LlamaIndex example trace](https://langfuse.com/images/cookbook/integration-llamaindex-workflows/llamaindex-trace.gif) + +_[Example trace link in Langfuse](https://cloud.langfuse.com/project/cloramnkj0002jz088vzn1ja4/traces/6f554d6b-a2bc-4fba-904f-aa54de2897ca?display=preview)_ + +#### Example Guides + +- [Langfuse Documentation](https://langfuse.com/docs/integrations/llama-index/get-started) +- [Tracing LlamaIndex Agents](https://langfuse.com/docs/integrations/llama-index/workflows) + +### Literal AI + +[Literal AI](https://literalai.com/) is the go-to LLM evaluation and observability solution, enabling engineering and product teams to ship LLM applications reliably, faster and at scale. This is possible through a collaborative development cycle involving prompt engineering, LLM observability, LLM evaluation and LLM monitoring. Conversation Threads and Agent Runs can be automatically logged on Literal AI. + +The simplest way to get started and try out Literal AI is to signup on our [cloud instance](https://cloud.getliteral.ai/). +You can then navigate to **Settings**, grab your API key, and start logging! + +#### Usage Pattern + +- Install the Literal AI Python SDK with `pip install literalai` +- On your Literal AI project, go to **Settings** and grab your API key +- If you are using a self-hosted instance of Literal AI, also make note of its base URL + +Then add the following lines to your applicative code : + +```python +from llama_index.core import set_global_handler + +# You should provide your Literal AI API key and base url using the following environment variables: +# LITERAL_API_KEY, LITERAL_API_URL +set_global_handler("literalai") +``` + +#### Example Guides + +- [Literal AI integration with Llama Index](https://docs.getliteral.ai/integrations/llama-index) +- [Build a Q&A application with LLamaIndex and monitor it with Literal AI](https://github.com/Chainlit/literal-cookbook/blob/main/python/llamaindex-integration) + +### Comet Opik + +[Opik](https://www.comet.com/docs/opik/?utm_source=llama-index&utm_medium=docs&utm_campaign=opik&utm_content=home_page) is an open-source end to end LLM Evaluation Platform built by Comet. + +To get started, simply sign up for an account on [Comet](https://www.comet.com/signup?from=llm&utm_medium=github&utm_source=llama-index&utm_campaign=opik) and grab your API key. + +#### Usage Pattern + +- Install the Opik Python SDK with `pip install opik` +- In Opik, get your API key from the user menu. +- If you are using a self-hosted instance of Opik, also make note of its base URL. + +You can configure Opik using the environment variables `OPIK_API_KEY`, `OPIK_WORKSPACE` and `OPIK_URL_OVERRIDE` if you are using a [self-hosted instance](https://www.comet.com/docs/opik/self-host/self_hosting_opik). You can set these by calling: + +```bash +export OPIK_API_KEY="" +export OPIK_WORKSPACE="" + +# Optional +#export OPIK_URL_OVERRIDE="" +``` + +You can now use the Opik integration with LlamaIndex by setting the global handler: + +```python +from llama_index.core import Document, VectorStoreIndex, set_global_handler + +# You should provide your OPIK API key and Workspace using the following environment variables: +# OPIK_API_KEY, OPIK_WORKSPACE +set_global_handler( + "opik", +) + +# This example uses OpenAI by default so don't forget to set an OPENAI_API_KEY +index = VectorStoreIndex.from_documents([Document.example()]) +query_engine = index.as_query_engine() + +questions = [ + "Tell me about LLMs", + "How do you fine-tune a neural network ?", + "What is RAG ?", +] + +for question in questions: + print(f"> \033[92m{question}\033[0m") + response = query_engine.query(question) + print(response) +``` + +You will see the following traces in Opik: + +![Opik integration with LlamaIndex](./../../_static/integrations/opik.png) + +#### Example Guides + +- [Llama-index + Opik documentation page](https://www.comet.com/docs/opik/tracing/integrations/llama_index?utm_source=llamaindex&utm_medium=docs&utm_campaign=opik) +- [Llama-index integration cookbook](https://www.comet.com/docs/opik/cookbook/llama-index?utm_source=llama-index&utm_medium=docs&utm_campaign=opik) + +### Argilla + +[Argilla](https://github.com/argilla-io/argilla) is a collaboration tool for AI engineers and domain experts who need to build high-quality datasets for their projects. + +To get started, you need to deploy the Argilla server. If you have not done so, you can easily deploy it following this [guide](https://docs.argilla.io/latest/getting_started/quickstart/). + +#### Usage Pattern + +- Install the Argilla LlamaIndex integration package with `pip install argilla-llama-index` +- Initialize the ArgillaHandler. The `` is in the `My Settings` page of your Argilla Space but make sure you are logged in with the `owner` account you used to create the Space. The `` is the URL shown in your browser. +- Add the ArgillaHandler to the dispatcher. + +```python +from llama_index.core.instrumentation import get_dispatcher +from argilla_llama_index import ArgillaHandler + +argilla_handler = ArgillaHandler( + dataset_name="query_llama_index", + api_url="http://localhost:6900", + api_key="argilla.apikey", + number_of_retrievals=2, +) +root_dispatcher = get_dispatcher() +root_dispatcher.add_span_handler(argilla_handler) +root_dispatcher.add_event_handler(argilla_handler) +``` + +#### Example Guides + +- [Getting started with Argilla's LlamaIndex Integration](https://github.com/argilla-io/argilla-llama-index/blob/main/docs/tutorials/getting_started.ipynb) +- [Other example tutorials](https://github.com/argilla-io/argilla-llama-index/tree/main/docs/tutorials) + +![Argilla integration with LlamaIndex](./../../_static/integrations/argilla.png) + +### Agenta + +[Agenta](https://agenta.ai) is an **open-source** LLMOps platform that helps developers and product teams build robust AI applications powered by LLMs. It offers all the tools for **observability**, **prompt management and engineering**, and **LLM evaluation**. + +#### Usage Pattern + +Install the necessary dependencies for the integration: + +```bash +pip install agenta llama-index openinference-instrumentation-llama-index +``` + +Set up your API credentials and initialize Agenta: + +```python +import os +import agenta as ag +from openinference.instrumentation.llama_index import LlamaIndexInstrumentor + +# Set your Agenta credentials +os.environ["AGENTA_API_KEY"] = "your_agenta_api_key" +os.environ[ + "AGENTA_HOST" +] = "https://cloud.agenta.ai" # Use your self-hosted URL if applicable + +# Initialize Agenta SDK +ag.init() + +# Enable LlamaIndex instrumentation +LlamaIndexInstrumentor().instrument() +``` + +Build your instrumented application: + +```python +@ag.instrument() +def document_search_app(user_query: str): + """ + Document search application using LlamaIndex. + Loads documents, builds a searchable index, and answers user queries. + """ + # Load documents from local directory + docs = SimpleDirectoryReader("data").load_data() + + # Build vector search index + search_index = VectorStoreIndex.from_documents(docs) + + # Initialize query processor + query_processor = search_index.as_query_engine() + + # Process user query + answer = query_processor.query(user_query) + + return answer +``` + +Once this is set up, Agenta will automatically capture all execution steps. You can then view the traces in Agenta to debug your application, link them to specific configurations and prompts, evaluate their performance, query the data, and monitor key metrics. + +![Agenta integration with LlamaIndex](./../../_static/integrations/agenta.png) + +#### Example Guides + +- [Documentation Observability for LlamaIndex with Agenta](https://docs.agenta.ai/observability/integrations/llamaindex) +- [Notebook Observability for LlamaIndex with Agenta](https://github.com/agenta-ai/agenta/blob/main/examples/jupyter/integrations/observability-openinference-llamaindex.ipynb) + +### Deepeval + +[DeepEval (by Confident AI)](https://github.com/confident-ai/deepeval) is an open-source evaluation framework for LLM applications. As you "unit test" your LLM app using DeepEval's 14+ default metrics it currently offers (summarization, hallucination, answer relevancy, faithfulness, RAGAS, etc.), you can debug failing test cases through this tracing integration with LlamaIndex, or debug unsatisfactory evaluations in **production** through DeepEval's hosted evaluation platform, [Confident AI](https://documentation.confident-ai.com/docs), that runs referenceless evaluations in production. + +#### Usage Pattern + +```bash +pip install -U deepeval llama-index +``` + +```python +import deepeval +from deepeval.integrations.llama_index import instrument_llama_index + +import llama_index.core.instrumentation as instrument + +# Login +deepeval.login("") + +# Let DeepEval collect traces +instrument_llama_index(instrument.get_dispatcher()) +``` + +![tracing](https://confident-bucket.s3.us-east-1.amazonaws.com/llama-index%3Atrace.gif) + +#### Guides + +- [Evaluate Llama Index Agents](https://deepeval.com/integrations/frameworks/langchain) +- [Tracing Llama Index Agents](https://documentation.confident-ai.com/docs/llm-tracing/integrations/llamaindex) + +### Maxim AI + +[Maxim AI](https://www.getmaxim.ai/) is an Agent Simulation, Evaluation & Observability platform that helps developers build, monitor, and improve their LLM applications. The Maxim integration with LlamaIndex provides comprehensive tracing, monitoring, and evaluation capabilities for your RAG systems, agents, and other LLM workflows. + +#### Usage Pattern + +Install the required packages: + +```bash +pip install maxim-py +``` + +Set up your environment variables: + +```python +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Get environment variables +MAXIM_API_KEY = os.getenv("MAXIM_API_KEY") +MAXIM_LOG_REPO_ID = os.getenv("MAXIM_LOG_REPO_ID") + +# Verify required environment variables are set +if not MAXIM_API_KEY: + raise ValueError("MAXIM_API_KEY environment variable is required") +if not MAXIM_LOG_REPO_ID: + raise ValueError("MAXIM_LOG_REPO_ID environment variable is required") +``` + +Initialize Maxim and instrument LlamaIndex: + +```python +from maxim import Config, Maxim +from maxim.logger import LoggerConfig +from maxim.logger.llamaindex import instrument_llamaindex + +# Initialize Maxim logger +maxim = Maxim(Config(api_key=os.getenv("MAXIM_API_KEY"))) +logger = maxim.logger(LoggerConfig(id=os.getenv("MAXIM_LOG_REPO_ID"))) + +# Instrument LlamaIndex with Maxim observability +# Set debug=True to see detailed logs during development +instrument_llamaindex(logger, debug=True) + +print("✅ Maxim instrumentation enabled for LlamaIndex") +``` + +Now your LlamaIndex applications will automatically send traces to Maxim: + +```python +from llama_index.core.agent import FunctionAgent +from llama_index.core.tools import FunctionTool +from llama_index.llms.openai import OpenAI + + +# Define tools and create agent +def add_numbers(a: float, b: float) -> float: + """Add two numbers together.""" + return a + b + + +add_tool = FunctionTool.from_defaults(fn=add_numbers) +llm = OpenAI(model="gpt-4o-mini", temperature=0) + +agent = FunctionAgent( + tools=[add_tool], + llm=llm, + verbose=True, + system_prompt="You are a helpful calculator assistant.", +) + +# This will be automatically logged by Maxim instrumentation +import asyncio + +response = await agent.run("What is 15 + 25?") +print(f"Response: {response}") +``` + +#### Guides + +- [Maxim Instrumentation Cookbook](/python/examples/observability/maxim-instrumentation) +- [Maxim AI Documentation](https://www.getmaxim.ai/docs/sdk/python/integrations/llamaindex/llamaindex) + +![tracing](https://cdn.getmaxim.ai/public/images/llamaindex.gif) + +## Other Partner `One-Click` Integrations (Legacy Modules) + +These partner integrations use our legacy `CallbackManager` or third-party calls. + +### Langfuse + +This integration is deprecated. We recommend using the new instrumentation-based integration with Langfuse as described [here](https://langfuse.com/docs/integrations/llama-index/get-started). + +#### Usage Pattern + +```python +from llama_index.core import set_global_handler + +# Make sure you've installed the 'llama-index-callbacks-langfuse' integration package. + +# NOTE: Set your environment variables 'LANGFUSE_SECRET_KEY', 'LANGFUSE_PUBLIC_KEY' and 'LANGFUSE_HOST' +# as shown in your langfuse.com project settings. + +set_global_handler("langfuse") +``` + +#### Guides + +- [Langfuse Callback Handler](/python/examples/observability/langfusecallbackhandler) +- [Langfuse Tracing with PostHog](/python/examples/observability/langfusemistralposthog) + +![langfuse-tracing](https://static.langfuse.com/llamaindex-langfuse-docs.gif) + +### OpenInference + +[OpenInference](https://github.com/Arize-ai/open-inference-spec) is an open standard for capturing and storing AI model inferences. It enables experimentation, visualization, and evaluation of LLM applications using LLM observability solutions such as [Phoenix](https://github.com/Arize-ai/phoenix). + +#### Usage Pattern + +```python +import llama_index.core + +llama_index.core.set_global_handler("openinference") + +# NOTE: No need to do the following +from llama_index.callbacks.openinference import OpenInferenceCallbackHandler +from llama_index.core.callbacks import CallbackManager +from llama_index.core import Settings + +# callback_handler = OpenInferenceCallbackHandler() +# Settings.callback_manager = CallbackManager([callback_handler]) + +# Run your LlamaIndex application here... +for query in queries: + query_engine.query(query) + +# View your LLM app data as a dataframe in OpenInference format. +from llama_index.core.callbacks.open_inference_callback import as_dataframe + +query_data_buffer = llama_index.core.global_handler.flush_query_data_buffer() +query_dataframe = as_dataframe(query_data_buffer) +``` + +**NOTE**: To unlock capabilities of Phoenix, you will need to define additional steps to feed in query/ context dataframes. See below! + +#### Guides + +- [OpenInference Callback Handler](/python/examples/observability/openinferencecallback) +- [Evaluating Search and Retrieval with Arize Phoenix](https://colab.research.google.com/github/Arize-ai/phoenix/blob/main/tutorials/llama_index_search_and_retrieval_tutorial.ipynb) + +### TruEra TruLens + +TruLens allows users to instrument/evaluate LlamaIndex applications, through features such as feedback functions and tracing. + +#### Usage Pattern + Guides + +```python +# use trulens +from trulens_eval import TruLlama + +tru_query_engine = TruLlama(query_engine) + +# query +tru_query_engine.query("What did the author do growing up?") +``` + +![](./../../_static/integrations/trulens.png) + +#### Guides + +- [Trulens Guide](/python/framework/community/integrations/trulens) +- [Quickstart Guide with LlamaIndex + TruLens](https://github.com/truera/trulens/blob/trulens-eval-0.20.3/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb) +- [Google Colab](https://colab.research.google.com/github/truera/trulens/blob/trulens-eval-0.20.3/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb) + +### HoneyHive + +HoneyHive allows users to trace the execution flow of any LLM workflow. Users can then debug and analyze their traces, or customize feedback on specific trace events to create evaluation or fine-tuning datasets from production. + +#### Usage Pattern + +```python +from llama_index.core import set_global_handler + +set_global_handler( + "honeyhive", + project="My HoneyHive Project", + name="My LLM Workflow Name", + api_key="MY HONEYHIVE API KEY", +) + +# NOTE: No need to do the following +from llama_index.core.callbacks import CallbackManager + +# from honeyhive.utils.llamaindex_tracer import HoneyHiveLlamaIndexTracer +from llama_index.core import Settings + +# hh_tracer = HoneyHiveLlamaIndexTracer( +# project="My HoneyHive Project", +# name="My LLM Workflow Name", +# api_key="MY HONEYHIVE API KEY", +# ) +# Settings.callback_manager = CallbackManager([hh_tracer]) +``` + +![](./../../_static/integrations/honeyhive.png) +![](./../../_static/integrations/perfetto.png) +_Use Perfetto to debug and analyze your HoneyHive traces_ + +#### Guides + +- [HoneyHive Callback Handler](/python/examples/observability/honeyhivellamaindextracer) + +### PromptLayer + +PromptLayer allows you to track analytics across LLM calls, tagging, analyzing, and evaluating prompts for various use-cases. Use it with LlamaIndex to track the performance of your RAG prompts and more. + +#### Usage Pattern + +```python +import os + +os.environ["PROMPTLAYER_API_KEY"] = "pl_7db888a22d8171fb58aab3738aa525a7" + +from llama_index.core import set_global_handler + +# pl_tags are optional, to help you organize your prompts and apps +set_global_handler("promptlayer", pl_tags=["paul graham", "essay"]) +``` + +#### Guides + +- [PromptLayer](/python/examples/observability/promptlayerhandler) + +### Langtrace + +[Langtrace](https://github.com/Scale3-Labs/langtrace) is a robust open-source tool that supports OpenTelemetry and is designed to trace, evaluate, and manage LLM applications seamlessly. Langtrace integrates directly with LlamaIndex, offering detailed, real-time insights into performance metrics such as accuracy, evaluations, and latency. + +#### Install + +```shell +pip install langtrace-python-sdk +``` + +#### Usage Pattern + +```python +from langtrace_python_sdk import ( + langtrace, +) # Must precede any llm module imports + +langtrace.init(api_key="") +``` + +#### Guides + +- [Langtrace](https://docs.langtrace.ai/supported-integrations/llm-frameworks/llamaindex) + +### OpenLIT + +[OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native GenAI and LLM Application Observability tool. It's designed to make the integration process of observability into GenAI projects with just a single line of code. OpenLIT provides OpenTelemetry Auto instrumentation for various LLMs, VectorDBs and Frameworks like LlamaIndex. OpenLIT provides insights into your LLM Applications performance, tracing of requests, over view metrics on usage like costs, tokens and a lot more. + +#### Install + +```shell +pip install openlit +``` + +#### Usage Pattern + +```python +import openlit + +openlit.init() +``` + +#### Guides + +- [OpenLIT's Official Documentation](https://docs.openlit.io/latest/integrations/llama-index) + +### AgentOps + +[AgentOps](https://github.com/AgentOps-AI/agentops) helps developers build, evaluate, +and monitor AI agents. AgentOps will help build agents from prototype to production, +enabling agent monitoring, LLM cost tracking, benchmarking, and more. + +#### Install + +```shell +pip install llama-index-instrumentation-agentops +``` + +#### Usage Pattern + +```python +from llama_index.core import set_global_handler + +# NOTE: Feel free to set your AgentOps environment variables (e.g., 'AGENTOPS_API_KEY') +# as outlined in the AgentOps documentation, or pass the equivalent keyword arguments +# anticipated by AgentOps' AOClient as **eval_params in set_global_handler. + +set_global_handler("agentops") +``` + +### Simple (LLM Inputs/Outputs) + +This simple observability tool prints every LLM input/output pair to the terminal. Most useful for when you need to quickly enable debug logging on your LLM application. + +#### Usage Pattern + +```python +import llama_index.core + +llama_index.core.set_global_handler("simple") +``` + +#### Guides + +- [MLflow](https://mlflow.org/docs/latest/llms/llama-index/index.html) + +## More observability + +- [Callbacks Guide](/python/framework/module_guides/observability/callbacks) \ No newline at end of file diff --git a/OVERVIEW.md b/OVERVIEW.md new file mode 100644 index 00000000..f1b9124b --- /dev/null +++ b/OVERVIEW.md @@ -0,0 +1,553 @@ +# AgentAdapter Message Accumulation & Tracing: Investigation Overview + +## 1. Status Quo: How Each Framework Handles Messages Across Invocations + +### smolagents + +**Source of truth**: `agent.memory.steps` (via `write_memory_to_messages()`) + +**Key finding**: By default, smolagents **resets memory on every `.run()` call**. + +```python +# smolagents/agents.py - MultiStepAgent.run() +def run(self, task, stream=False, reset=True, ...): # reset=True is the DEFAULT + if reset: + self.memory.reset() # Clears all steps + self.monitor.reset() +``` + +The MASEval adapter calls `self.agent.run(query)` without specifying `reset=False`, meaning: +- Each invocation starts with a **clean memory** +- `get_messages()` (which calls `write_memory_to_messages()`) returns **only the current run's messages** +- Messages from previous invocations are **lost** from the agent's perspective + +However, smolagents *can* accumulate if `reset=False` is passed. This is used in multi-turn scenarios like GradioUI. + +**What smolagents does well for tracing**: Each `ActionStep` in memory has a `model_input_messages` field that captures **exactly what the LLM saw** when generating that step's response. This is the gold standard for answering "what context did the agent have?" + +**Documentation sources**: [smolagents agents.py source](https://github.com/huggingface/smolagents), `AgentMemory.reset()` clears `self.steps = []`. + +--- + +### LangGraph + +**Source of truth depends on mode**: +- Stateless (no checkpointer): `_last_result` cache in adapter +- Stateful (with checkpointer + thread_id): `graph.get_state(config).values["messages"]` + +**Key finding**: LangGraph has **two completely different behaviors** depending on configuration. + +#### Stateless Mode (current default in adapter) + +```python +# maseval langgraph adapter _run_agent(): +initial_state = {"messages": [HumanMessage(content=query)]} +result = self.agent.invoke(initial_state) +self._last_result = result # Overwritten each call +``` + +- Each `invoke()` creates a **fresh state** with only the new user message +- The result is cached in `_last_result`, which is **overwritten** on the next call +- `get_messages()` returns **only the last invocation's messages** +- Previous invocations' messages are **lost** + +#### Stateful Mode (with checkpointer + thread_id) + +When a checkpointer and `thread_id` are configured, LangGraph **accumulates messages across invocations** via its checkpoint system. The `add_messages` reducer in `MessagesState` appends new messages to the checkpointed state. + +```python +# From LANGCHAIN.md documentation: +checkpointer = InMemorySaver() +graph = builder.compile(checkpointer=checkpointer) +config = {"configurable": {"thread_id": "1"}} + +# First call +graph.invoke({"messages": [{"role": "user", "content": "hi! I'm bob"}]}, config) +# Second call - messages from first call are still in checkpoint state +graph.invoke({"messages": [{"role": "user", "content": "what's my name?"}]}, config) +# Agent remembers "bob" because all messages are accumulated in checkpoint +``` + +In this mode, `get_state(config).values["messages"]` returns **ALL messages from all invocations**, with no boundary markers between invocations. The `get_state_history(config)` API provides step-by-step checkpoint snapshots, which is LangGraph's mechanism for seeing intermediate states. + +**Important caveat**: Accumulation depends on using `MessagesState` (which has the `add_messages` reducer annotation). A plain `TypedDict` with `messages: list` won't automatically accumulate; behavior depends on node implementation. + +**Documentation source**: `LANGCHAIN.md` (local), LangGraph memory documentation. + +--- + +### LlamaIndex + +**Source of truth**: Per-run `Context` store (no persistent state on the workflow object) + +**Key finding**: LlamaIndex AgentWorkflow is **stateless between runs by default**. There is **no `.memory` attribute** on `AgentWorkflow` itself. + +```python +# LlamaIndex AgentWorkflow.run(): +def run(self, user_msg=None, chat_history=None, memory=None, ctx=None, ...): + if ctx is not None and ctx.is_running: + return super().run(ctx=ctx, **kwargs) + else: + # Creates fresh Context internally when ctx=None + start_event = start_event or AgentWorkflowStartEvent(...) + return super().run(start_event=start_event, ctx=ctx) +``` + +The MASEval adapter does NOT pass `ctx` between calls: +```python +# maseval llamaindex adapter _run_agent_sync(): +async def run_async(): + handler = self.agent.run(user_msg=query) # No ctx passed + result = await handler + return result +``` + +- Each `.run()` creates a **fresh Context** with a **fresh `ChatMemoryBuffer`** +- `handler.ctx` (which contains the memory) is **discarded** after each call +- The adapter's `get_messages()` tries `self.agent.memory` (which doesn't exist on AgentWorkflow), then falls back to `_message_cache` +- `_message_cache` is **overwritten** each run via `_extract_messages_from_result()` +- Previous invocations' messages are **lost** + +**To persist state**, you must explicitly pass `ctx=handler.ctx` or create a shared `Context` object. + +**Documentation sources**: [LlamaIndex Agent State docs](https://docs.llamaindex.ai/en/stable/understanding/agent/state/), [LlamaIndex Agent Memory docs](https://developers.llamaindex.ai/python/framework/module_guides/deploying/agents/memory/), [AgentWorkflow source](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/agent/workflow/multi_agent_workflow.py). + +--- + +## 2. Summary Table: Current Behavior + +| Framework | Default Accumulation | Can Accumulate? | MASEval Adapter Behavior | `get_messages()` Returns | +|-------------|---------------------|-----------------|--------------------------|--------------------------| +| smolagents | No (`reset=True`) | Yes (`reset=False`) | Per-run only (resets each time) | Current run's messages only | +| LangGraph (stateless) | No | No (unless checkpointer added) | Per-run only (`_last_result` overwritten) | Last run's messages only | +| LangGraph (stateful) | Yes (via checkpoint) | Yes | Accumulated (via `get_state`) | All messages, no invocation boundaries | +| LlamaIndex | No (fresh Context) | Yes (pass `ctx`) | Per-run only (cache overwritten) | Last run's messages only | + +**Conclusion**: Your intuition was partially correct. smolagents *can* keep track of messages internally (via `reset=False`), but the MASEval adapter doesn't use this. Currently, **all three adapters effectively reset messages per invocation** in the MASEval integration (unless LangGraph is configured with a checkpointer). + +--- + +## 3. The Tracing Problem + +### Your Scenario + +Single agent, two invocations, 4 message pairs each: + +``` +Invocation 1: Invocation 2: + msg_in_1 msg_in_5 + msg_out_1 msg_out_5 + msg_in_2 msg_in_6 + msg_out_2 msg_out_6 + msg_in_3 msg_in_7 + msg_out_3 msg_out_7 + msg_in_4 msg_in_8 + msg_out_4 msg_out_8 +``` + +### Problem 1: "How do we tell whether `msg_out_7` had only `msg_in_7` or all previous messages available?" + +**Current state**: We can't. The traces don't capture what the LLM actually *saw* when generating each response. + +- **smolagents** actually *does* have this data in `ActionStep.model_input_messages` (what the LLM saw for each step). This is captured in `adapter.logs` already. But it's a smolagents-specific feature. +- **LangGraph stateless**: The LLM only saw messages from the current invocation (the graph node receives the state, which only contains what was passed in). +- **LangGraph stateful**: The LLM saw ALL accumulated messages (from checkpoint + new input). But we don't capture a snapshot of "what the LLM saw at step N." +- **LlamaIndex**: The LLM only saw messages from the current invocation (fresh ChatMemoryBuffer each time). No record of what was visible. + +**What we need**: A `model_input_messages` field (like smolagents has) for every adapter, capturing the actual LLM input at each step. + +### Problem 2: "How do we see when invocations happened? Post-hoc instead of just one long list of runs?" + +**Current state**: No invocation boundaries are recorded. + +- `get_messages()` returns either a single run's messages (smolagents, LlamaIndex, stateless LangGraph) or a flat accumulated list (stateful LangGraph) with no boundary markers. +- `adapter.logs` (for LangGraph/LlamaIndex) has one entry per `run()` call with timestamps, but this is separate from the message history. +- smolagents `logs` property generates entries from `memory.steps`, which reset each run. + +**The gap**: There's no unified structure that links "these messages belong to invocation N" with "the invocation started/ended at time T." + +### Problem 3: "Some agents persist state across runs or not. Traces should be collected permanently." + +**Current state**: This is the core tension. + +- **Agent execution**: Should respect the framework's memory behavior (reset vs. accumulate). This affects agent functioning and scientific integrity. +- **Tracing/evaluation**: Should capture everything permanently across all invocations, regardless of whether the agent "forgot" previous messages. + +Currently, `gather_traces()` is called (presumably) at the end, and returns whatever `get_messages()` returns at that moment. For frameworks that reset, this means **only the last run's messages are captured in traces**. Everything before the last run is lost. + +--- + +## 4. Analysis: What Needs to Change + +### Core Issue + +The adapter conflates two concerns: +1. **Agent memory management** (what the agent sees during execution) +2. **Trace collection** (what the evaluator needs to see post-hoc) + +These should be decoupled. The adapter should: +- Let the agent manage its own memory (respecting `reset=True/False`, checkpointers, context, etc.) +- Independently accumulate a complete trace of ALL messages across ALL invocations + +### What a Solution Needs + +1. **Invocation-scoped trace capture**: After each `run()`, capture a snapshot of `get_messages()` tagged with an invocation ID/index and timestamp. This builds an ordered list of `(invocation_id, timestamp, messages)` tuples. + +2. **Cumulative trace storage**: A list-of-invocations structure rather than a flat list of messages: + ```python + traces = [ + {"invocation": 0, "timestamp": "...", "messages": [msg_in_1, msg_out_1, ...]}, + {"invocation": 1, "timestamp": "...", "messages": [msg_in_5, msg_out_5, ...]}, + ] + ``` + +3. **Context visibility metadata**: Record whether the agent had access to prior invocations' messages: + ```python + { + "invocation": 1, + "had_prior_context": True, # agent saw msgs from invocation 0 + "messages": [...], # messages in this invocation + "agent_visible_messages": [...], # ALL messages the agent could see (including prior) + } + ``` + +4. **Framework-agnostic implementation**: This should live in the base `AgentAdapter` class (in `run()`) so all adapters automatically get it, without requiring framework-specific code. + +### Proposed Approach (sketch) + +The base `AgentAdapter.run()` method already wraps `_run_agent()`. It could be extended to: + +```python +# In AgentAdapter base class +def run(self, query: str) -> Any: + for cb in self.callbacks: + cb.on_run_start(self) + + invocation_start = datetime.now().isoformat() + result = self._run_agent(query) + + # Capture trace snapshot AFTER run (agent's memory is populated) + messages_snapshot = self.get_messages().to_list() + self._invocation_traces.append({ + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": messages_snapshot, + }) + + for cb in self.callbacks: + cb.on_run_end(self, result) + + return result +``` + +Then `gather_traces()` would include `self._invocation_traces` as a structured field. + +### Design Questions to Resolve + +1. **Should `get_messages()` return per-invocation or cumulative messages?** + - Per-invocation: matches current behavior for most frameworks, but loses accumulated state for stateful LangGraph + - Cumulative: would need adapter-level accumulation for stateless frameworks, conflating adapter and agent state + - Recommendation: keep `get_messages()` reflecting the agent's actual state (per-invocation for stateless, cumulative for stateful). Add a separate `get_all_traces()` or `get_invocation_history()` for the full trace. + +2. **Should the adapter control the agent's memory behavior?** + - For smolagents: Should the adapter pass `reset=False`? This changes agent behavior. + - For LlamaIndex: Should the adapter store and pass `ctx` between runs? This changes agent behavior. + - Recommendation: **No.** The adapter should not override the user's configuration. If a benchmark needs multi-turn persistence, the user should configure the agent accordingly (pass `reset=False`, add a checkpointer, create a shared Context). The adapter should trace whatever happens, not control it. + +3. **Where should trace accumulation live?** + - Option A: In base `AgentAdapter.run()` (automatic for all adapters) + - Option B: In a separate `TracingMiddleware` or callback + - Recommendation: Option A is simplest and ensures no adapter can forget to do it. + +4. **What about the `logs` property?** + - LangGraph and LlamaIndex append to `self.logs` (a list) across runs, so logs DO accumulate + - smolagents overrides `logs` as a property reading from `agent.memory.steps`, which resets each run + - This inconsistency means smolagents loses per-step logs from prior invocations + - This should be fixed: smolagents `logs` should also accumulate + +--- + +## 5. Summary of Findings + +| Question | Answer | +|----------|--------| +| Do smolagents messages accumulate? | **Not by default** (`reset=True`). Can accumulate with `reset=False`. MASEval adapter uses default (resets). | +| Do LangGraph messages accumulate? | **Only with checkpointer + thread_id**. Without checkpointer (current default), no. | +| Do LlamaIndex messages accumulate? | **Not by default**. Each `.run()` gets fresh Context. Must pass `ctx` to accumulate. | +| Can we tell what context an agent had? | **Only for smolagents** (via `model_input_messages`). Not for LangGraph or LlamaIndex. | +| Can we see invocation boundaries? | **No.** No framework or adapter marks where one invocation ends and another begins. | +| Are traces collected permanently? | **No.** smolagents loses steps on reset. LangGraph/LlamaIndex overwrite `_last_result`/`_message_cache`. Only `self.logs` accumulates (and smolagents `logs` property doesn't). | + +### The Fundamental Issue + +The current design assumes `get_messages()` is the primary trace mechanism. But `get_messages()` reflects the **agent's current state** (which may have been reset), not the **evaluation's trace needs** (which require a permanent, structured record of everything). + +Tracing and agent memory are different concerns that need different data structures. + +--- + +## 6. The Internal Agent-to-Agent Call Problem + +### The Scenario + +Consider a multi-agent benchmark with 4 agents, each wrapped in an `AgentAdapter`: + +``` +Benchmark registers: + AgentAdapter(agent_1, "agent_1") + AgentAdapter(agent_2, "agent_2") + AgentAdapter(agent_3, "agent_3") + AgentAdapter(agent_4, "agent_4") + +But at runtime, the FRAMEWORK orchestrates agents directly: + benchmark calls adapter_1.run("task") + → agent_1 internally calls agent_2.run(...) ← bypasses AgentAdapter + → agent_2 resets memory, executes, produces messages + → agent_1 internally calls agent_3.run(...) ← bypasses AgentAdapter + → agent_3 executes + → agent_1 internally calls agent_2.run(...) AGAIN ← bypasses AgentAdapter + → agent_2 resets memory AGAIN — first run's messages are GONE + → ... + +When benchmark finally calls gather_traces(): + adapter_2.get_messages() → only sees agent_2's LAST run + adapter_2.logs → smolagents: only last run; LangGraph/LlamaIndex: never populated + (because adapter.run() was never called) +``` + +### Why This Is Worse Than the Multi-Invocation Problem + +The multi-invocation problem (Section 3) assumed the benchmark calls `adapter.run()` each time. At least then, the adapter's `run()` method executes and *could* capture traces. + +Here, the adapter's `run()` is **never called** for agents 2-4. The adapter is a dead wrapper — the framework bypasses it entirely. This means: +- No callbacks fire (`on_run_start`/`on_run_end`) +- No logs are created (LangGraph/LlamaIndex append to `self.logs` in `_run_agent()`) +- `get_messages()` shows whatever the agent's memory happens to contain at the moment `gather_traces()` is called +- If the agent reset memory N times during execution, N-1 runs are lost forever + +### What Each Framework's Internal Agent-to-Agent Calling Looks Like + +**smolagents managed agents**: The parent agent calls sub-agents via `execute_tool_call()`. Sub-agents run their full loop internally. + +**LangGraph multi-agent**: Agents are subgraphs or nodes that invoke each other as graph steps. Execution flows through the graph runtime. + +**LlamaIndex AgentWorkflow**: Multiple agents hand off to each other within the workflow. Execution flows through the workflow runtime. + +--- + +## 7. Available Hook Mechanisms (Verified from Documentation) + +### smolagents: `step_callbacks` + +**Source**: [smolagents/agents.py v1.24.0](https://github.com/huggingface/smolagents/blob/v1.24.0/src/smolagents/agents.py) + +```python +# Constructor parameter: +step_callbacks: list[Callable] | dict[Type[MemoryStep], Callable | list[Callable]] | None + +# List format: callbacks registered for ActionStep only (backward compat) +# Dict format: callbacks mapped to specific step types +``` + +**When it fires**: In `_finalize_step()` after each ActionStep, PlanningStep, or FinalAnswerStep completes. + +**What it receives**: `(memory_step, agent=self)` — the completed step object containing: +- `step_number`, `model_input_messages`, `model_output_message` +- `tool_calls`, `observations`, `action_output` +- `timing`, `token_usage`, `error` + +**Critical limitation**: Step callbacks do **NOT** fire for managed agents' internal steps. Only the parent agent's callbacks fire. The managed agent's results are returned as tool call output to the parent, but the managed agent's internal steps are invisible. + +**Implication for MASEval**: To capture managed agent steps, we'd need to install `step_callbacks` on EACH managed agent individually, not just the top-level agent. + +### LangGraph/LangChain: `BaseCallbackHandler` + +**Source**: LangChain Reference — Callbacks (local file `Callbacks | LangChain Reference.html`) + +```python +class BaseCallbackHandler: + def on_llm_start(self, serialized, prompts, *, run_id, parent_run_id, tags, metadata, **kwargs): ... + def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs): ... + def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id, **kwargs): ... + def on_tool_start(self, serialized, input_str, *, run_id, parent_run_id, **kwargs): ... + def on_tool_end(self, output, *, run_id, parent_run_id, **kwargs): ... + def on_chain_start(self, serialized, inputs, *, run_id, parent_run_id, **kwargs): ... + def on_chain_end(self, outputs, *, run_id, parent_run_id, **kwargs): ... + def on_agent_action(self, action, *, run_id, parent_run_id, **kwargs): ... + def on_agent_finish(self, finish, *, run_id, parent_run_id, **kwargs): ... +``` + +**Key feature**: Every callback receives `run_id` and `parent_run_id`, enabling reconstruction of the full call tree. + +**How to attach**: Pass via `config={"callbacks": [handler]}` at invoke time. Callbacks propagate through chains and subgraphs. + +**Propagation**: Callbacks DO propagate to subgraphs and nested chains. When a parent graph invokes a subgraph, the parent's callbacks fire for the subgraph's events too. + +**Implication for MASEval**: A single `BaseCallbackHandler` installed at the top-level graph would capture ALL events across all sub-agents. The `parent_run_id` chain gives us the execution tree for free. + +### LlamaIndex: Instrumentation Module + Legacy CallbackManager + +**Source**: `LLAMAINDEXINSTRUMENT.md`, `LLAMAINDEXCALLBACK.md` (local files) + +**New system (v0.10.20+)**: `instrumentation` module with span handlers and event handlers. +```python +from llama_index.core.instrumentation import get_dispatcher + +root_dispatcher = get_dispatcher() +root_dispatcher.add_span_handler(my_handler) +root_dispatcher.add_event_handler(my_handler) +``` + +**Legacy system**: `CallbackManager` with event types: +- `LLM`, `EMBEDDING`, `QUERY`, `RETRIEVE`, `SYNTHESIZE`, `TOOL`, etc. + +**OpenTelemetry native**: `llama-index-observability-otel` package provides direct OTel export. +```python +from llama_index.observability.otel import LlamaIndexOpenTelemetry +instrumentor = LlamaIndexOpenTelemetry() +instrumentor.start_registering() +``` + +**Global handler**: `set_global_handler("simple")` or `set_global_handler("arize_phoenix")` etc. — catches ALL LlamaIndex operations globally, including within AgentWorkflow sub-agents. + +**Implication for MASEval**: The global handler / dispatcher approach captures everything across all agents in the workflow. This is the most "automatic" of the three frameworks. + +--- + +## 8. The OpenTelemetry Question + +All three frameworks are converging on OpenTelemetry: +- **LlamaIndex**: Native OTel support via `llama-index-observability-otel` +- **LangChain**: OTel integrations via OpenLLMetry, Langfuse, etc. +- **smolagents**: Has structured logging that could be mapped to OTel spans + +### Pros of an OTel-based approach for MASEval + +1. **Unified standard**: One tracing format across all frameworks +2. **Built-in structure**: Spans with parent-child relationships, trace IDs, timestamps +3. **Invocation boundaries**: Each `.run()` could be a parent span; each LLM call a child span +4. **Call tree reconstruction**: `parent_run_id` / parent span gives us the agent-to-agent call graph +5. **Rich ecosystem**: Can export to Jaeger, Phoenix, Grafana, etc. for visualization +6. **No framework-specific code**: Instrumentation libraries already exist + +### Cons of an OTel-based approach + +1. **Heavy dependency**: `opentelemetry-sdk` + framework-specific instrumentation packages +2. **Abstraction mismatch**: OTel is designed for general observability, not eval-specific message tracing. MASEval needs "what messages did the LLM see?" — OTel gives "how long did the span take?" +3. **Data extraction**: Getting message content OUT of OTel spans back into MASEval's `MessageHistory` format requires parsing span attributes +4. **Setup complexity**: Requires configuring exporters, processors, etc. +5. **Not all frameworks equal**: smolagents' OTel story is weaker than LlamaIndex's + +### Recommendation: Hybrid Approach + +Use **framework-native hooks** (not OTel) as the primary mechanism, with optional OTel export: + +1. **Primary**: Install framework-specific callbacks/hooks that write to the adapter's trace buffer in MASEval's native format. This gives us exactly the data we need (messages, context, invocation boundaries) without extra dependencies. + +2. **Optional**: For users who want OTel observability, document how to also attach OTel instrumentation alongside MASEval's hooks. They're not mutually exclusive. + +--- + +## 9. Recommended Architecture + +### The Hook Pattern + +Each adapter installs a framework-specific hook on the agent at initialization time. The hook writes to a persistent trace buffer on the adapter that survives memory resets. + +``` +AgentAdapter + ├── agent (the wrapped framework agent) + ├── _trace_buffer: List[TraceEvent] ← permanent, never reset + └── _hook (framework-specific) + └── on_step/on_llm/etc → appends to _trace_buffer + +When agent runs (even internally, bypassing adapter): + hook fires → TraceEvent written to _trace_buffer + +gather_traces(): + returns _trace_buffer contents (complete, structured, with boundaries) +``` + +### Framework-Specific Hook Installation + +**smolagents**: +```python +class SmolAgentAdapter(AgentAdapter): + def __init__(self, agent_instance, name, ...): + super().__init__(agent_instance, name, ...) + # Install step_callback on the agent itself + self._install_trace_hook() + # Also install on all managed agents + for managed in getattr(agent_instance, 'managed_agents', {}).values(): + self._install_trace_hook_on(managed) + + def _install_trace_hook(self): + # Register callback that writes to self._trace_buffer + def trace_callback(step, agent): + self._trace_buffer.append({ + "agent_name": agent.name, + "step": step.dict(), + "model_input_messages": step.model_input_messages, + "timestamp": step.timing.end_time if step.timing else None, + }) + self.agent.step_callbacks.register(ActionStep, trace_callback) + self.agent.step_callbacks.register(PlanningStep, trace_callback) +``` + +**LangGraph**: +```python +class LangGraphAgentAdapter(AgentAdapter): + def __init__(self, agent_instance, name, ..., config=None): + super().__init__(agent_instance, name, ...) + # Create callback handler that writes to trace buffer + self._trace_handler = MASEvalLangChainHandler(self._trace_buffer) + # Inject into config so it propagates to all subgraphs + if config: + config.setdefault("callbacks", []).append(self._trace_handler) +``` + +**LlamaIndex**: +```python +class LlamaIndexAgentAdapter(AgentAdapter): + def __init__(self, agent_instance, name, ...): + super().__init__(agent_instance, name, ...) + # Use LlamaIndex's global dispatcher or instrumentation + # This captures ALL events across all agents in the workflow + dispatcher = get_dispatcher() + self._trace_handler = MASEvalLlamaIndexHandler(self._trace_buffer) + dispatcher.add_span_handler(self._trace_handler) + dispatcher.add_event_handler(self._trace_handler) +``` + +### What The Trace Buffer Would Contain + +```python +_trace_buffer = [ + # Invocation boundary + {"type": "invocation_start", "agent": "agent_1", "query": "task", "timestamp": "..."}, + + # LLM call with full context (what the LLM saw) + {"type": "llm_call", "agent": "agent_1", "input_messages": [...], "output": "...", + "tokens": {"input": 50, "output": 30}, "timestamp": "..."}, + + # Sub-agent call + {"type": "invocation_start", "agent": "agent_2", "query": "subtask", "parent_agent": "agent_1", "timestamp": "..."}, + {"type": "llm_call", "agent": "agent_2", "input_messages": [...], "output": "...", "timestamp": "..."}, + {"type": "invocation_end", "agent": "agent_2", "result": "...", "timestamp": "..."}, + + # More of agent_1 + {"type": "llm_call", "agent": "agent_1", "input_messages": [...], "output": "...", "timestamp": "..."}, + {"type": "invocation_end", "agent": "agent_1", "result": "...", "timestamp": "..."}, +] +``` + +This structure answers ALL three original questions: +1. **What context did the agent have?** → `input_messages` on each `llm_call` event +2. **Where do invocations start/end?** → `invocation_start` / `invocation_end` events +3. **Permanent trace collection?** → `_trace_buffer` is never reset, captures everything + +### Key Design Principle + +**The adapter should NOT change the agent's behavior.** The hooks are read-only observers. Whether the agent resets memory, uses checkpointers, or persists context is the user's choice. The hooks just record what happens. diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 00000000..87361980 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,692 @@ +# Plan: Fix AgentAdapter Tracing & Message Accumulation + +## Context + +The OVERVIEW.md investigation identified four problems with MASEval's AgentAdapter tracing: + +1. **No trace accumulation across invocations** — All adapters lose messages from prior `run()` calls +2. **No invocation boundaries** — No way to tell post-hoc which messages belong to which invocation +3. **smolagents `logs` doesn't accumulate** — The `logs` property reads from `agent.memory.steps` which resets each run (LangGraph, LlamaIndex, and CAMEL all accumulate properly) +4. **Internal agent-to-agent calls bypass adapters** — Framework orchestrators call sub-agents directly; adapter.run() is never called + +This plan addresses all four problems: +- **Phase 1** (sections 1.1–1.6): Base adapter invocation tracing + smolagents logs fix +- **Phase 2** (sections 2.0–2.4): Framework-specific hooks to capture internal agent-to-agent calls + +--- + +## Phase 1: Base Adapter Invocation Tracing + +### 1.1 Changes to `maseval/core/agent.py` + +**Add `_invocation_traces` to `__init__`:** + +```python +def __init__(self, agent_instance, name, callbacks=None): + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self.logs = [] + self._invocation_traces = [] # NEW: permanent trace buffer +``` + +**Modify `run()` to capture invocation snapshots:** + +After `_run_agent()` completes (and before `on_run_end` callbacks), snapshot the current messages and metadata into `_invocation_traces`. This happens whether or not the agent reset its memory — we capture whatever `get_messages()` returns at that moment. + +```python +def run(self, query: str) -> Any: + for cb in self.callbacks: + cb.on_run_start(self) + + invocation_start = datetime.now().isoformat() + result = self._run_agent(query) + + # Capture invocation trace snapshot + try: + messages_snapshot = self.get_messages().to_list() + except Exception: + messages_snapshot = [] + + self._invocation_traces.append({ + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": messages_snapshot, + "status": "success", + }) + + for cb in self.callbacks: + cb.on_run_end(self, result) + + return result +``` + +**Handle errors** — if `_run_agent()` raises, still record the invocation (with status "error") before re-raising: + +```python +def run(self, query: str) -> Any: + for cb in self.callbacks: + cb.on_run_start(self) + + invocation_start = datetime.now().isoformat() + try: + result = self._run_agent(query) + except Exception as e: + # Record failed invocation + self._invocation_traces.append({ + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": [], + "status": "error", + "error": str(e), + "error_type": type(e).__name__, + }) + raise + + # Capture successful invocation + try: + messages_snapshot = self.get_messages().to_list() + except Exception: + messages_snapshot = [] + + self._invocation_traces.append({ + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": messages_snapshot, + "status": "success", + }) + + for cb in self.callbacks: + cb.on_run_end(self, result) + + return result +``` + +**Update `gather_traces()` to include invocation traces:** + +```python +def gather_traces(self): + history = self.get_messages() + return { + **super().gather_traces(), + "name": self.name, + "agent_type": type(self.agent).__name__, + "message_count": len(history), + "messages": history.to_list() if history else [], + "callbacks": [type(cb).__name__ for cb in self.callbacks], + "logs": self.logs, + "invocation_traces": self._invocation_traces, # NEW + } +``` + +**Import needed:** Add `from datetime import datetime` to the top of `agent.py`. + +### 1.2 Changes to `maseval/interface/agents/smolagents.py` + +**Problem:** The `logs` property reads dynamically from `self.agent.memory.steps`, which resets on each `run()` call. Steps from previous invocations are lost. + +**Fix:** Add `_accumulated_logs` list. After each `_run_agent()` call, snapshot the current memory steps into `_accumulated_logs`. Change the `logs` property to return `_accumulated_logs`. + +```python +def __init__(self, agent_instance, name, callbacks=None): + # Still skip super().__init__() to avoid self.logs = [] conflicting with property + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self._accumulated_logs = [] + self._invocation_traces = [] # Must initialize since we skip super().__init__() +``` + +Refactor the current `logs` property body into a helper method `_extract_current_logs()`: + +```python +def _extract_current_logs(self) -> List[Dict[str, Any]]: + """Extract logs from the agent's current memory state.""" + # (move existing logs property body here, unchanged) + ... + +@property +def logs(self) -> List[Dict[str, Any]]: + """Return accumulated logs from all invocations.""" + return self._accumulated_logs +``` + +In `_run_agent()`, after calling `self.agent.run(query)`, snapshot: + +```python +def _run_agent(self, query: str) -> str: + _check_smolagents_installed() + final_answer = self.agent.run(query) + + # Snapshot current memory steps into accumulated logs + current_logs = self._extract_current_logs() + self._accumulated_logs.extend(current_logs) + + return final_answer +``` + +### 1.3 Changes to `maseval/interface/agents/camel.py` + +**Problem:** CAMEL also skips `super().__init__()`, so `_invocation_traces` won't be initialized. + +**Fix:** Add `self._invocation_traces = []` in CamelAgentAdapter's `__init__`: + +```python +def __init__(self, agent_instance, name, callbacks=None): + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self._responses = [] + self._errors = [] + self._invocation_traces = [] # NEW: must initialize since we skip super().__init__() +``` + +### 1.4 Changes to `tests/conftest.py` (DummyAgentAdapter) + +No changes needed — DummyAgentAdapter calls `super().__init__()` indirectly (through `AgentAdapter.__init__`), so `_invocation_traces` will be initialized automatically. + +--- + +## Phase 1: Testing Strategy + +### 1.5 Contract test changes (`tests/test_contract/test_agent_adapter_contract.py`) + +**Tighten `test_adapter_logs_accumulate_across_runs`:** +Currently says "we accept both behaviors as long as logs are populated." Change to **require accumulation** — logs from run 1 must still be present after run 2. + +```python +def test_adapter_logs_accumulate_across_runs(self, framework): + """Test that logs accumulate across multiple runs.""" + mock_llm = MockLLM(responses=["First response", "Second response"]) + agent = create_agent_for_framework(framework, mock_llm) + adapter = create_adapter_for_framework(framework, agent) + + adapter.run("First query") + logs_after_first = len(adapter.logs) + assert logs_after_first > 0 + + adapter.run("Second query") + logs_after_second = len(adapter.logs) + + # Logs MUST accumulate (not reset) + assert logs_after_second > logs_after_first +``` + +**Add new contract tests for invocation traces:** + +```python +def test_adapter_invocation_traces_populated(self, framework): + """Test that _invocation_traces is populated after run().""" + ... + adapter.run("Test query") + assert len(adapter._invocation_traces) == 1 + trace = adapter._invocation_traces[0] + assert trace["invocation"] == 0 + assert trace["query"] == "Test query" + assert trace["status"] == "success" + assert "started_at" in trace + assert "completed_at" in trace + assert isinstance(trace["messages"], list) + +def test_adapter_invocation_traces_accumulate(self, framework): + """Test that invocation traces accumulate across multiple runs.""" + ... + adapter.run("First query") + adapter.run("Second query") + assert len(adapter._invocation_traces) == 2 + assert adapter._invocation_traces[0]["query"] == "First query" + assert adapter._invocation_traces[1]["query"] == "Second query" + assert adapter._invocation_traces[0]["invocation"] == 0 + assert adapter._invocation_traces[1]["invocation"] == 1 + +def test_adapter_invocation_traces_in_gather_traces(self, framework): + """Test that gather_traces() includes invocation_traces.""" + ... + adapter.run("Test query") + traces = adapter.gather_traces() + assert "invocation_traces" in traces + assert len(traces["invocation_traces"]) == 1 + +def test_adapter_invocation_traces_on_error(self, framework): + """Test that invocation traces are recorded even when run fails.""" + # Use a framework-specific setup that causes an error + # (may need to be a separate non-parametrized test or handle frameworks individually) +``` + +### 1.6 Framework-specific integration test changes + +**smolagents** (`test_smolagents_integration.py`): + +Existing tests that directly manipulate `agent.memory.steps` and then check `adapter.logs` will need updating because `logs` now returns `_accumulated_logs` (populated via `_run_agent()`, not dynamically from memory). + +- `test_smolagents_adapter_logs_property` — Change to call `adapter._extract_current_logs()` instead of `adapter.logs` (tests the conversion logic itself) +- `test_smolagents_adapter_logs_with_errors` — Same: use `_extract_current_logs()` +- `test_smolagents_adapter_logs_empty_when_no_steps` — Same: use `_extract_current_logs()` +- **Add new test:** `test_smolagents_logs_accumulate_across_runs` — Run agent twice with `FakeSmolagentsModel`, verify `adapter.logs` has entries from both runs +- **Add new test:** `test_smolagents_invocation_traces` — Run agent, verify `adapter._invocation_traces` is populated + +**LangGraph** (`test_langgraph_integration.py`): +- **Add:** Test that `_invocation_traces` captures per-invocation messages correctly +- **Add:** Test that `_invocation_traces[0]["messages"]` has messages from only run 1 + +**LlamaIndex** (`test_llamaindex_integration.py`): +- **Add:** Same as LangGraph — verify invocation traces per-invocation + +--- + +## Phase 2: Framework-Specific Hooks + +**Goal:** Capture execution events even when internal agent-to-agent calls bypass `adapter.run()`. Each framework provides a hook mechanism; we install read-only observers that write into a `_trace_buffer` on the adapter. + +### 2.0 Changes to `maseval/core/agent.py` + +**Add `_trace_buffer` to `__init__`:** + +```python +def __init__(self, agent_instance, name, callbacks=None): + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self.logs = [] + self._invocation_traces = [] # Phase 1 + self._trace_buffer = [] # Phase 2: framework hook events +``` + +**Update `gather_traces()` to include trace buffer:** + +```python +def gather_traces(self): + history = self.get_messages() + return { + **super().gather_traces(), + "name": self.name, + "agent_type": type(self.agent).__name__, + "message_count": len(history), + "messages": history.to_list() if history else [], + "callbacks": [type(cb).__name__ for cb in self.callbacks], + "logs": self.logs, + "invocation_traces": self._invocation_traces, + "trace_buffer": self._trace_buffer, + } +``` + +### 2.1 smolagents: `step_callbacks` via `CallbackRegistry` + +**API:** `agent.step_callbacks.register(step_cls, callback)` — the `CallbackRegistry` (in `smolagents/memory.py`) fires callbacks in `_finalize_step()` with signature `callback(memory_step, agent=self)`. + +**Key detail:** Callbacks do NOT propagate to managed agents. We must register on `agent.managed_agents[name]` for each managed agent. + +**Changes to `maseval/interface/agents/smolagents.py`:** + +Add `_install_hooks()` called from `__init__`: + +```python +def __init__(self, agent_instance, name, callbacks=None): + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self._accumulated_logs = [] + self._invocation_traces = [] + self._trace_buffer = [] + self._install_hooks() + +def _install_hooks(self): + """Register step_callbacks on agent and all managed agents.""" + from smolagents.memory import ActionStep, PlanningStep + if hasattr(self.agent, 'step_callbacks'): + self.agent.step_callbacks.register(ActionStep, self._on_step) + self.agent.step_callbacks.register(PlanningStep, self._on_step) + # Also register on managed agents + if hasattr(self.agent, 'managed_agents') and self.agent.managed_agents: + for managed_agent in self.agent.managed_agents.values(): + if hasattr(managed_agent, 'step_callbacks'): + managed_agent.step_callbacks.register(ActionStep, self._on_step) + managed_agent.step_callbacks.register(PlanningStep, self._on_step) + +def _on_step(self, memory_step, agent=None): + """Callback fired by smolagents after each step finalization.""" + from smolagents.memory import ActionStep, PlanningStep + entry = { + "source": "smolagents_step_callback", + "step_type": type(memory_step).__name__, + "agent_name": getattr(agent, 'name', None), + } + if isinstance(memory_step, ActionStep): + entry["step_number"] = memory_step.step_number + entry["has_error"] = memory_step.error is not None + if memory_step.tool_calls: + entry["tool_calls"] = [tc.name for tc in memory_step.tool_calls] + elif isinstance(memory_step, PlanningStep): + entry["plan_length"] = len(memory_step.plan) if memory_step.plan else 0 + self._trace_buffer.append(entry) +``` + +### 2.2 LangGraph: `BaseCallbackHandler` in config + +**API:** `langchain_core.callbacks.base.BaseCallbackHandler` — mix of `LLMManagerMixin`, `ChainManagerMixin`, `ToolManagerMixin`, `CallbackManagerMixin`, `RunManagerMixin`. Passed via `config={"callbacks": [handler]}` to `graph.invoke()`. LangGraph propagates callbacks to all subgraphs automatically. Each callback receives `run_id` and `parent_run_id`. + +**Changes to `maseval/interface/agents/langgraph.py`:** + +Add a private handler class and install it: + +```python +class _MASEvalLangChainHandler: + """Read-only callback handler that captures execution events.""" + + def __init__(self, trace_buffer: list): + self._trace_buffer = trace_buffer + # BaseCallbackHandler attributes + self.raise_error = False + self.run_inline = True + self.ignore_llm = False + self.ignore_chain = False + self.ignore_agent = False + self.ignore_retriever = True + self.ignore_retry = True + + def on_chain_start(self, serialized, inputs, *, run_id, parent_run_id=None, tags=None, metadata=None, **kwargs): + self._trace_buffer.append({ + "source": "langgraph_callback", + "event": "chain_start", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + "chain_type": serialized.get("id", [])[-1] if serialized.get("id") else None, + }) + + def on_chain_end(self, outputs, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append({ + "source": "langgraph_callback", + "event": "chain_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + }) + + def on_llm_end(self, response, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append({ + "source": "langgraph_callback", + "event": "llm_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + }) + + def on_tool_start(self, serialized, input_str, *, run_id, parent_run_id=None, tags=None, metadata=None, inputs=None, **kwargs): + self._trace_buffer.append({ + "source": "langgraph_callback", + "event": "tool_start", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + "tool_name": serialized.get("name"), + }) + + def on_tool_end(self, output, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append({ + "source": "langgraph_callback", + "event": "tool_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + }) + + # No-op stubs for remaining required methods + def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id=None, **kwargs): pass + def on_llm_start(self, serialized, prompts, *, run_id, parent_run_id=None, **kwargs): pass + def on_chain_error(self, error, *, run_id, parent_run_id=None, **kwargs): pass + def on_tool_error(self, error, *, run_id, parent_run_id=None, **kwargs): pass + def on_llm_error(self, error, *, run_id, parent_run_id=None, **kwargs): pass + def on_llm_new_token(self, token, *, run_id, parent_run_id=None, **kwargs): pass +``` + +**Note:** We use duck-typing instead of inheriting from `BaseCallbackHandler` to avoid importing `langchain_core` at module level. LangChain's callback manager checks for method presence, not class hierarchy. + +In `LangGraphAgentAdapter.__init__()`: + +```python +def __init__(self, agent_instance, name, callbacks=None, config=None): + super().__init__(agent_instance, name, callbacks) + self._langgraph_config = config + self._last_result = None + self._hook_handler = _MASEvalLangChainHandler(self._trace_buffer) +``` + +In `_run_agent()`, inject the handler into config: + +```python +# Build config with our callback handler injected +invoke_config = dict(self._langgraph_config) if self._langgraph_config else {} +existing_callbacks = invoke_config.get("callbacks", []) or [] +invoke_config["callbacks"] = existing_callbacks + [self._hook_handler] + +# Use invoke_config instead of self._langgraph_config for the invoke() call +``` + +### 2.3 LlamaIndex: Instrumentation Dispatcher + +**API:** `llama_index_instrumentation.get_dispatcher()` returns/creates dispatchers. `Dispatcher.add_span_handler(handler)` adds a `BaseSpanHandler`. The `SimpleSpanHandler` tracks `completed_spans` and `dropped_spans` with timing, parent hierarchy, and tags. Events propagate up the dispatcher tree. + +**Key detail:** The dispatcher is global — all LlamaIndex operations go through it. We use an `_active` flag on the handler to only record spans during our adapter's `run()`. + +**Changes to `maseval/interface/agents/llamaindex.py`:** + +Add a private span handler class: + +```python +class _MASEvalSpanHandler(BaseSpanHandler): + """Read-only span handler that records LlamaIndex execution spans.""" + + _trace_buffer: list = PrivateAttr(default_factory=list) + _active: bool = PrivateAttr(default=False) + + def class_name(cls) -> str: + return "_MASEvalSpanHandler" + + def new_span(self, id_, bound_args, instance=None, parent_span_id=None, tags=None, **kwargs): + if not self._active: + return None + from llama_index_instrumentation.span.simple import SimpleSpan + return SimpleSpan(id_=id_, parent_id=parent_span_id, tags=tags or {}) + + def prepare_to_exit_span(self, id_, bound_args, instance=None, result=None, **kwargs): + span = self.open_spans.get(id_) + if span is None: + return None + from datetime import datetime + span.end_time = datetime.now() + span.duration = (span.end_time - span.start_time).total_seconds() + self._trace_buffer.append({ + "source": "llamaindex_span", + "event": "span_exit", + "span_id": id_, + "parent_id": span.parent_id, + "duration": span.duration, + }) + return span + + def prepare_to_drop_span(self, id_, bound_args, instance=None, err=None, **kwargs): + span = self.open_spans.get(id_) + if span is None: + return None + self._trace_buffer.append({ + "source": "llamaindex_span", + "event": "span_drop", + "span_id": id_, + "parent_id": span.parent_id, + "error": str(err) if err else None, + }) + return span +``` + +In `LlamaIndexAgentAdapter.__init__()`: + +```python +def __init__(self, agent_instance, name, callbacks=None): + super().__init__(agent_instance, name, callbacks) + self._last_result = None + self._message_cache = [] + self._span_handler = _MASEvalSpanHandler() + self._span_handler._trace_buffer = self._trace_buffer # share buffer with base + self._install_hooks() + +def _install_hooks(self): + """Add span handler to LlamaIndex's global dispatcher.""" + from llama_index_instrumentation import get_dispatcher + dispatcher = get_dispatcher() + dispatcher.add_span_handler(self._span_handler) +``` + +In `_run_agent()`, bracket execution with `_active` flag: + +```python +def _run_agent(self, query): + ... + self._span_handler._active = True + try: + result = self._run_agent_sync(query) + ... + except Exception as e: + ... + raise + finally: + self._span_handler._active = False +``` + +### 2.4 CAMEL-AI: Explicitly Skipped + +**CAMEL-AI has no agent-level hook mechanism.** Unlike the other three frameworks: + +- smolagents has `step_callbacks` (`CallbackRegistry`) fired after each step +- LangGraph has `BaseCallbackHandler` propagated through all subgraphs +- LlamaIndex has a global instrumentation `Dispatcher` with `BaseSpanHandler` + +CAMEL's `ChatAgent.step()` provides **no callback, hook, or instrumentation API**. The only callback system in CAMEL is `WorkforceCallback` which operates at the **task orchestration level** (task created/assigned/completed), not at the individual agent step level. + +**What this means:** `CamelAgentAdapter._trace_buffer` will always be empty. The adapter captures what it can via `self._responses` (stored in `_run_agent()`), but there is no way to observe internal agent execution (tool calls, reasoning steps, sub-agent delegation) without CAMEL adding agent-level hooks. + +**Future options if CAMEL adds hooks:** +1. If CAMEL adds step-level callbacks to `ChatAgent` (like smolagents' `CallbackRegistry`), register a callback in `_install_hooks()` +2. If CAMEL exposes an instrumentation/tracing API, integrate similarly to LlamaIndex's dispatcher +3. The `WorkforceCallback` system could be used to improve `CamelWorkforceTracer` (replace private attribute access with event-driven capture), but that's a separate enhancement + +**Changes to `maseval/interface/agents/camel.py`:** + +1. Add `self._invocation_traces = []` and `self._trace_buffer = []` to `__init__` (required since CAMEL skips `super().__init__()`) +2. Add a prominent warning to the class docstring about unreliable tracing + +```python +def __init__(self, agent_instance, name, callbacks=None): + self.agent = agent_instance + self.name = name + self.callbacks = callbacks or [] + self.messages = None + self._responses = [] + self._errors = [] + self._invocation_traces = [] + self._trace_buffer = [] # CAMEL has no agent-level hook API — buffer stays empty +``` + +**Docstring warning to add to `CamelAgentAdapter`:** + +```python +class CamelAgentAdapter(AgentAdapter): + """An AgentAdapter for CAMEL-AI ChatAgent. + + .. warning:: + **Unreliable tracing.** CAMEL-AI's ChatAgent does not expose any + callback, hook, or instrumentation API for individual agent steps. + Unlike smolagents (step_callbacks), LangGraph (BaseCallbackHandler), + and LlamaIndex (instrumentation Dispatcher), there is no way to + observe internal execution events (tool calls, reasoning steps, + sub-agent delegation) from outside the agent. + + Consequences: + - ``_trace_buffer`` is always empty (no framework hooks to tap into) + - ``logs`` only captures data from ``ChatAgentResponse.info`` returned + by ``step()`` — if the agent performs internal tool calls or + multi-step reasoning, those details may be lost + - ``get_messages()`` relies on CAMEL's memory system, which may not + reflect the full execution history + - For Workforce orchestration, use ``CamelWorkforceTracer`` which + can tap into ``WorkforceCallback`` events at the task level + + This will be improved when CAMEL-AI adds agent-level instrumentation. + Track: https://github.com/camel-ai/camel + + ... + """ +``` + +--- + +## Phase 2: Testing Strategy + +### 2.5 Contract test additions (`tests/test_contract/test_agent_adapter_contract.py`) + +```python +def test_adapter_trace_buffer_exists(self, framework): + """Test that _trace_buffer is initialized.""" + adapter = create_adapter_for_framework(framework, ...) + assert hasattr(adapter, '_trace_buffer') + assert isinstance(adapter._trace_buffer, list) + +def test_adapter_trace_buffer_in_gather_traces(self, framework): + """Test that gather_traces() includes trace_buffer.""" + adapter = create_adapter_for_framework(framework, ...) + adapter.run("Test query") + traces = adapter.gather_traces() + assert "trace_buffer" in traces +``` + +### 2.6 Framework-specific hook tests + +**smolagents** (`test_smolagents_integration.py`): +- `test_smolagents_hook_captures_steps` — Run agent, verify `adapter._trace_buffer` has entries with `source == "smolagents_step_callback"` +- `test_smolagents_hook_captures_managed_agent_steps` — Create agent with managed agent, run, verify buffer captures steps from both agents + +**LangGraph** (`test_langgraph_integration.py`): +- `test_langgraph_hook_captures_chain_events` — Run graph, verify `adapter._trace_buffer` has `chain_start`/`chain_end` events +- `test_langgraph_hook_has_run_ids` — Verify each event in buffer has `run_id` and `parent_run_id` + +**LlamaIndex** (`test_llamaindex_integration.py`): +- `test_llamaindex_hook_captures_spans` — Run agent, verify `adapter._trace_buffer` has `span_exit` entries +- `test_llamaindex_hook_only_active_during_run` — Verify buffer is empty before `run()` and only populated during execution + +--- + +## Updated Files to Modify + +| File | Change | +|------|--------| +| `maseval/core/agent.py` | Add `_invocation_traces`, `_trace_buffer`, modify `run()`, update `gather_traces()`, add `datetime` import | +| `maseval/interface/agents/smolagents.py` | Fix logs accumulation, add `_install_hooks()`, `_on_step()`, init `_invocation_traces` + `_trace_buffer` | +| `maseval/interface/agents/langgraph.py` | Add `_MASEvalLangChainHandler` class, create handler in `__init__`, inject in `_run_agent()` | +| `maseval/interface/agents/llamaindex.py` | Add `_MASEvalSpanHandler` class, install in `__init__`, bracket `_run_agent()` with `_active` flag | +| `maseval/interface/agents/camel.py` | Add `_invocation_traces` + `_trace_buffer` to `__init__`, add unreliable-tracing warning to docstring | +| `tests/test_contract/test_agent_adapter_contract.py` | Tighten logs test, add invocation trace tests, add trace buffer tests | +| `tests/test_interface/test_agent_integration/test_langgraph_integration.py` | Add invocation trace + hook tests | +| `tests/test_interface/test_agent_integration/test_llamaindex_integration.py` | Add invocation trace + hook tests | +| `tests/test_interface/test_agent_integration/test_smolagents_integration.py` | Add logs accumulation + hook tests | + +## Files NOT Modified (no changes needed) + +| File | Why | +|------|-----| +| `tests/conftest.py` | DummyAgentAdapter calls `super().__init__()` — gets both buffers automatically | + +--- + +## Verification + +1. `uv run ruff format . && uv run ruff check . --fix` — formatting and linting +2. `uv run pytest -m contract -v` — contract tests (most critical) +3. `uv run pytest -m interface -v` — all framework integration tests +4. `uv run pytest -v` — full default test suite +5. Verify no existing tests break (especially the tightened logs accumulation test) diff --git a/maseval/core/agent.py b/maseval/core/agent.py index 97011527..c1d4c79d 100644 --- a/maseval/core/agent.py +++ b/maseval/core/agent.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from datetime import datetime from typing import List, Any, Optional, Dict from .callback import AgentCallback @@ -23,13 +24,47 @@ def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Agen self.callbacks = callbacks or [] self.messages: Optional[MessageHistory] = None self.logs: List[Dict[str, Any]] = [] + self._invocation_traces: List[Dict[str, Any]] = [] + self._trace_buffer: List[Dict[str, Any]] = [] def run(self, query: str) -> Any: """Executes the agent and returns the result.""" for cb in self.callbacks: cb.on_run_start(self) - result = self._run_agent(query) + invocation_start = datetime.now().isoformat() + try: + result = self._run_agent(query) + except Exception as e: + self._invocation_traces.append( + { + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": [], + "status": "error", + "error": str(e), + "error_type": type(e).__name__, + } + ) + raise + + try: + messages_snapshot = self.get_messages().to_list() + except Exception: + messages_snapshot = [] + + self._invocation_traces.append( + { + "invocation": len(self._invocation_traces), + "started_at": invocation_start, + "completed_at": datetime.now().isoformat(), + "query": query, + "messages": messages_snapshot, + "status": "success", + } + ) for cb in self.callbacks: cb.on_run_end(self, result) @@ -119,6 +154,8 @@ def gather_traces(self) -> Dict[str, Any]: - `message_count` - Number of messages in history - `messages` - Full message history as list of dicts - `callbacks` - List of callback class names attached to this agent + - `invocation_traces` - Per-invocation snapshots with query, messages, timestamps, status + - `trace_buffer` - Framework hook events captured passively during execution Returns: Dictionary containing agent execution traces. @@ -144,6 +181,8 @@ def gather_traces(self) -> Dict[str, Any]: "messages": history.to_list() if history else [], "callbacks": [type(cb).__name__ for cb in self.callbacks], "logs": self.logs, + "invocation_traces": self._invocation_traces, + "trace_buffer": self._trace_buffer, } def gather_config(self) -> Dict[str, Any]: diff --git a/maseval/interface/agents/camel.py b/maseval/interface/agents/camel.py index 6166d108..287e51e0 100644 --- a/maseval/interface/agents/camel.py +++ b/maseval/interface/agents/camel.py @@ -90,6 +90,28 @@ def _extract_response_content(response) -> str: class CamelAgentAdapter(AgentAdapter): """An AgentAdapter for CAMEL-AI ChatAgent. + .. warning:: + **Unreliable tracing.** CAMEL-AI's ChatAgent does not expose any + callback, hook, or instrumentation API for individual agent steps. + Unlike smolagents (step_callbacks), LangGraph (BaseCallbackHandler), + and LlamaIndex (instrumentation Dispatcher), there is no way to + observe internal execution events (tool calls, reasoning steps, + sub-agent delegation) from outside the agent. + + Consequences: + + - ``_trace_buffer`` is always empty (no framework hooks to tap into) + - ``logs`` only captures data from ``ChatAgentResponse.info`` returned + by ``step()`` — if the agent performs internal tool calls or + multi-step reasoning, those details may be lost + - ``get_messages()`` relies on CAMEL's memory system, which may not + reflect the full execution history + - For Workforce orchestration, use ``CamelWorkforceTracer`` which + can tap into ``WorkforceCallback`` events at the task level + + This will be improved when CAMEL-AI adds agent-level instrumentation. + Track: https://github.com/camel-ai/camel + This adapter integrates CAMEL-AI's ChatAgent with MASEval's benchmarking framework, converting CAMEL's message format to OpenAI-compatible MessageHistory format. It leverages CAMEL's native memory system and response info as the source of truth @@ -182,6 +204,7 @@ def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any] name: Agent name for identification callbacks: Optional list of AgentCallback instances """ + _check_camel_installed() self.agent = agent_instance self.name = name self.callbacks = callbacks or [] @@ -190,6 +213,9 @@ def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any] self._responses: List[Any] = [] # Store errors that occur during execution (for comprehensive logging) self._errors: List[Dict[str, Any]] = [] + # Must initialize since we skip super().__init__() + self._invocation_traces: List[Dict[str, Any]] = [] + self._trace_buffer: List[Dict[str, Any]] = [] # CAMEL has no agent-level hook API @property def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] @@ -202,8 +228,6 @@ def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] Returns: List of log dictionaries with comprehensive step information """ - _check_camel_installed() - logs_list: List[Dict[str, Any]] = [] for step_num, response in enumerate(self._responses, start=1): @@ -291,22 +315,16 @@ def get_messages(self) -> MessageHistory: Returns: MessageHistory with converted messages """ - _check_camel_installed() - messages: List[Dict[str, Any]] = [] - # Try to get messages from agent's memory + # Get messages from agent's memory if hasattr(self.agent, "memory") and self.agent.memory is not None: - try: - # get_context() returns (messages_list, token_count) - context = self.agent.memory.get_context() - if isinstance(context, tuple) and len(context) >= 1: - memory_messages = context[0] - if isinstance(memory_messages, list): - messages = self._convert_memory_messages(memory_messages) - except Exception: - # If memory access fails, fall back to empty - pass + # get_context() returns (messages_list, token_count) + context = self.agent.memory.get_context() + if isinstance(context, tuple) and len(context) >= 1: + memory_messages = context[0] + if isinstance(memory_messages, list): + messages = self._convert_memory_messages(memory_messages) return MessageHistory(messages) @@ -397,7 +415,6 @@ def gather_traces(self) -> Dict[str, Any]: - last_terminated: Whether the last response indicated termination """ base_traces = super().gather_traces() - _check_camel_installed() # Calculate aggregated statistics from responses total_input_tokens = 0 @@ -449,7 +466,6 @@ def gather_config(self) -> Dict[str, Any]: - memory_type: Type of memory being used """ base_config = super().gather_config() - _check_camel_installed() camel_config: Dict[str, Any] = {} @@ -536,7 +552,6 @@ def _run_agent(self, query: str) -> str: Raises: Exception: If agent execution fails """ - _check_camel_installed() from camel.messages import BaseMessage # Create user message for CAMEL @@ -728,7 +743,6 @@ def respond(self, message: str) -> str: if self.is_done(): return "" - _check_camel_installed() from camel.messages import BaseMessage self._turn_count += 1 @@ -781,7 +795,6 @@ def get_tool(self) -> Any: Returns: CAMEL FunctionTool wrapping the respond method. """ - _check_camel_installed() from camel.toolkits import FunctionTool user_instance = self diff --git a/maseval/interface/agents/langgraph.py b/maseval/interface/agents/langgraph.py index 5831c81d..cd87159b 100644 --- a/maseval/interface/agents/langgraph.py +++ b/maseval/interface/agents/langgraph.py @@ -29,6 +29,96 @@ def _check_langgraph_installed(): raise ImportError("langgraph is not installed. Install it with: pip install maseval[langgraph]") from e +class _MASEvalLangChainHandler: + """Read-only callback handler that captures LangGraph execution events. + + Uses duck-typing instead of inheriting from BaseCallbackHandler to avoid + importing langchain_core at module level. LangChain's callback manager + checks for method presence, not class hierarchy. + """ + + def __init__(self, trace_buffer: List[Dict[str, Any]]): + self._trace_buffer = trace_buffer + self.raise_error = False + self.run_inline = True + self.ignore_llm = False + self.ignore_chain = False + self.ignore_agent = False + self.ignore_retriever = True + self.ignore_retry = True + + def on_chain_start(self, serialized, inputs, *, run_id, parent_run_id=None, tags=None, metadata=None, **kwargs): + self._trace_buffer.append( + { + "source": "langgraph_callback", + "event": "chain_start", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + "chain_type": serialized.get("id", [])[-1] if serialized and serialized.get("id") else None, + } + ) + + def on_chain_end(self, outputs, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append( + { + "source": "langgraph_callback", + "event": "chain_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + } + ) + + def on_llm_end(self, response, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append( + { + "source": "langgraph_callback", + "event": "llm_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + } + ) + + def on_tool_start(self, serialized, input_str, *, run_id, parent_run_id=None, tags=None, metadata=None, **kwargs): + self._trace_buffer.append( + { + "source": "langgraph_callback", + "event": "tool_start", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + "tool_name": serialized.get("name") if serialized else None, + } + ) + + def on_tool_end(self, output, *, run_id, parent_run_id=None, **kwargs): + self._trace_buffer.append( + { + "source": "langgraph_callback", + "event": "tool_end", + "run_id": str(run_id), + "parent_run_id": str(parent_run_id) if parent_run_id else None, + } + ) + + # No-op stubs for remaining required methods + def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id=None, **kwargs): + pass + + def on_llm_start(self, serialized, prompts, *, run_id, parent_run_id=None, **kwargs): + pass + + def on_chain_error(self, error, *, run_id, parent_run_id=None, **kwargs): + pass + + def on_tool_error(self, error, *, run_id, parent_run_id=None, **kwargs): + pass + + def on_llm_error(self, error, *, run_id, parent_run_id=None, **kwargs): + pass + + def on_llm_new_token(self, token, *, run_id, parent_run_id=None, **kwargs): + pass + + class LangGraphAgentAdapter(AgentAdapter): """An AgentAdapter for LangGraph CompiledGraph agents. @@ -126,9 +216,11 @@ def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any] config: Optional LangGraph config dict (for stateful graphs with checkpointer) Should include 'configurable': {'thread_id': '...'} for persistent state """ + _check_langgraph_installed() super().__init__(agent_instance, name, callbacks) self._langgraph_config = config self._last_result = None + self._hook_handler = _MASEvalLangChainHandler(self._trace_buffer) def get_messages(self) -> MessageHistory: """Get message history from LangGraph. @@ -139,18 +231,12 @@ def get_messages(self) -> MessageHistory: Returns: MessageHistory with converted messages """ - _check_langgraph_installed() - # If we have a config with thread_id and the graph has get_state, use it if self._langgraph_config and hasattr(self.agent, "get_state"): - try: - state = self.agent.get_state(self._langgraph_config) - messages = state.values.get("messages", []) - if messages: - return self._convert_langchain_messages(messages) - except Exception: - # If get_state fails, fall back to cached result - pass + state = self.agent.get_state(self._langgraph_config) + messages = state.values.get("messages", []) + if messages: + return self._convert_langchain_messages(messages) # Fall back to cached result from last run if self._last_result: @@ -176,7 +262,6 @@ def gather_config(self) -> dict[str, Any]: - graph_info: Information about the graph structure (if available) """ base_config = super().gather_config() - _check_langgraph_installed() # Add LangGraph-specific config langgraph_config = {} @@ -196,17 +281,14 @@ def gather_config(self) -> dict[str, Any]: safe_config["configurable"] = {"has_thread_id": "thread_id" in value if isinstance(value, dict) else False} langgraph_config["config"] = safe_config - # Try to get graph structure info + # Get graph structure info if hasattr(self.agent, "get_graph"): - try: - graph = self.agent.get_graph() - if graph: - langgraph_config["graph_info"] = { - "num_nodes": len(graph.nodes) if hasattr(graph, "nodes") else None, - "num_edges": len(graph.edges) if hasattr(graph, "edges") else None, - } - except Exception: - pass + graph = self.agent.get_graph() + if graph: + langgraph_config["graph_info"] = { + "num_nodes": len(graph.nodes), + "num_edges": len(graph.edges), + } if langgraph_config: base_config["langgraph_config"] = langgraph_config @@ -214,7 +296,6 @@ def gather_config(self) -> dict[str, Any]: return base_config def _run_agent(self, query: str) -> Any: - _check_langgraph_installed() from langchain_core.messages import HumanMessage start_time = time.time() @@ -224,11 +305,13 @@ def _run_agent(self, query: str) -> Any: # Initialize the state with the user query initial_state = {"messages": [HumanMessage(content=query)]} - # Invoke the graph (with config if provided) - if self._langgraph_config: - result = self.agent.invoke(initial_state, config=self._langgraph_config) - else: - result = self.agent.invoke(initial_state) + # Build config with our callback handler injected + invoke_config = dict(self._langgraph_config) if self._langgraph_config else {} + existing_callbacks = invoke_config.get("callbacks", []) or [] + invoke_config["callbacks"] = existing_callbacks + [self._hook_handler] + + # Invoke the graph with config (always includes our handler) + result = self.agent.invoke(initial_state, config=invoke_config) # Cache the result for stateless graphs self._last_result = result @@ -270,18 +353,14 @@ def _run_agent(self, query: str) -> Any: # For stateful graphs with checkpointer, get state snapshot metadata if self._langgraph_config and hasattr(self.agent, "get_state"): - try: - state_snapshot = self.agent.get_state(self._langgraph_config) - if state_snapshot.metadata: - log_entry["checkpoint_metadata"] = { - "source": state_snapshot.metadata.get("source"), - "step": state_snapshot.metadata.get("step"), - } - if state_snapshot.created_at: - log_entry["checkpoint_created_at"] = state_snapshot.created_at - except Exception: - # If get_state fails, just skip metadata - pass + state_snapshot = self.agent.get_state(self._langgraph_config) + if state_snapshot.metadata: + log_entry["checkpoint_metadata"] = { + "source": state_snapshot.metadata.get("source"), + "step": state_snapshot.metadata.get("step"), + } + if state_snapshot.created_at: + log_entry["checkpoint_created_at"] = state_snapshot.created_at self.logs.append(log_entry) diff --git a/maseval/interface/agents/llamaindex.py b/maseval/interface/agents/llamaindex.py index d8add62f..c545b3ed 100644 --- a/maseval/interface/agents/llamaindex.py +++ b/maseval/interface/agents/llamaindex.py @@ -33,6 +33,101 @@ def _check_llamaindex_installed(): raise ImportError("llama-index-core is not installed. Install it with: pip install maseval[llamaindex]") from e +class _MASEvalSpanHandler: + """Read-only span handler that records LlamaIndex execution spans. + + Uses duck-typing compatible with llama_index_instrumentation's BaseSpanHandler. + Only records spans when ``_active`` is True (set during adapter's ``run()``). + """ + + def __init__(self): + self.open_spans: Dict[str, Any] = {} + self.completed_spans: List[Any] = [] + self.dropped_spans: List[Any] = [] + self.current_span_ids: Dict[Any, Optional[str]] = {} + self._trace_buffer: List[Dict[str, Any]] = [] + self._active: bool = False + self._lock = None + + @property + def lock(self): + import threading + + if self._lock is None: + self._lock = threading.Lock() + return self._lock + + def span_enter(self, id_, bound_args, instance=None, parent_id=None, tags=None, **kwargs): + """Logic for entering a span.""" + if not self._active: + return + if id_ not in self.open_spans: + span = self.new_span(id_=id_, bound_args=bound_args, instance=instance, parent_span_id=parent_id, tags=tags) + if span: + with self.lock: + self.open_spans[id_] = span + + def span_exit(self, id_, bound_args, instance=None, result=None, **kwargs): + """Logic for exiting a span.""" + span = self.prepare_to_exit_span(id_=id_, bound_args=bound_args, instance=instance, result=result) + if span: + with self.lock: + self.open_spans.pop(id_, None) + + def span_drop(self, id_, bound_args, instance=None, err=None, **kwargs): + """Logic for dropping a span (early exit / error).""" + span = self.prepare_to_drop_span(id_=id_, bound_args=bound_args, instance=instance, err=err) + if span: + with self.lock: + self.open_spans.pop(id_, None) + + def new_span(self, id_, bound_args, instance=None, parent_span_id=None, tags=None, **kwargs): + if not self._active: + return None + try: + from llama_index_instrumentation.span.simple import SimpleSpan + + return SimpleSpan(id_=id_, parent_id=parent_span_id, tags=tags or {}) + except ImportError: + return None + + def prepare_to_exit_span(self, id_, bound_args, instance=None, result=None, **kwargs): + span = self.open_spans.get(id_) + if span is None: + return None + span.end_time = datetime.now() + span.duration = (span.end_time - span.start_time).total_seconds() + self._trace_buffer.append( + { + "source": "llamaindex_span", + "event": "span_exit", + "span_id": id_, + "parent_id": span.parent_id, + "duration": span.duration, + } + ) + with self.lock: + self.completed_spans.append(span) + return span + + def prepare_to_drop_span(self, id_, bound_args, instance=None, err=None, **kwargs): + span = self.open_spans.get(id_) + if span is None: + return None + self._trace_buffer.append( + { + "source": "llamaindex_span", + "event": "span_drop", + "span_id": id_, + "parent_id": span.parent_id, + "error": str(err) if err else None, + } + ) + with self.lock: + self.dropped_spans.append(span) + return span + + class LlamaIndexAgentAdapter(AgentAdapter): """An AgentAdapter for LlamaIndex workflow-based agents. @@ -119,9 +214,13 @@ def __init__(self, agent_instance: Any, name: str, callbacks: Optional[List[Any] name: Agent name callbacks: Optional list of callbacks """ + _check_llamaindex_installed() super().__init__(agent_instance, name, callbacks) self._last_result = None self._message_cache: List[Dict[str, Any]] = [] + self._span_handler = _MASEvalSpanHandler() + self._span_handler._trace_buffer = self._trace_buffer # share buffer with base + self._install_hooks() def get_messages(self) -> MessageHistory: """Get message history from LlamaIndex. @@ -132,17 +231,11 @@ def get_messages(self) -> MessageHistory: Returns: MessageHistory with converted messages """ - _check_llamaindex_installed() - - # Try to extract from agent memory if available + # Extract from agent memory if available if hasattr(self.agent, "memory") and hasattr(self.agent.memory, "get_all"): - try: - messages = self.agent.memory.get_all() - if messages: - return self._convert_llamaindex_messages(messages) - except Exception: - # If memory access fails, fall back to cache - pass + messages = self.agent.memory.get_all() + if messages: + return self._convert_llamaindex_messages(messages) # Fall back to cached messages return MessageHistory(self._message_cache) @@ -161,7 +254,6 @@ def gather_config(self) -> Dict[str, Any]: - llamaindex_config: LlamaIndex-specific configuration (if available) """ base_config = super().gather_config() - _check_llamaindex_installed() # Add LlamaIndex-specific config llamaindex_config: Dict[str, Any] = {} @@ -190,18 +282,25 @@ def gather_config(self) -> Dict[str, Any]: # Check if it's a workflow if hasattr(self.agent, "get_config"): - try: - workflow_config = self.agent.get_config() - if workflow_config: - llamaindex_config["workflow_config"] = workflow_config - except Exception: - pass + workflow_config = self.agent.get_config() + if workflow_config: + llamaindex_config["workflow_config"] = workflow_config if llamaindex_config: base_config["llamaindex_config"] = llamaindex_config return base_config + def _install_hooks(self): + """Add span handler to LlamaIndex's global dispatcher.""" + try: + from llama_index_instrumentation import get_dispatcher + except ImportError: + return + + dispatcher = get_dispatcher() + dispatcher.add_span_handler(self._span_handler) # type: ignore[invalid-argument-type] # duck-typed handler + def _run_agent(self, query: str) -> Any: """Run the LlamaIndex agent and cache execution state. @@ -214,11 +313,10 @@ def _run_agent(self, query: str) -> Any: Raises: Exception: If agent execution fails """ - _check_llamaindex_installed() - start_time = time.time() timestamp = datetime.now().isoformat() + self._span_handler._active = True try: # Run the agent (handles async automatically) result = self._run_agent_sync(query) @@ -275,6 +373,8 @@ def _run_agent(self, query: str) -> Any: ) raise + finally: + self._span_handler._active = False def _run_agent_sync(self, query: str) -> Any: """Run agent in sync context. diff --git a/maseval/interface/agents/smolagents.py b/maseval/interface/agents/smolagents.py index efcd6b7f..2e8004ea 100644 --- a/maseval/interface/agents/smolagents.py +++ b/maseval/interface/agents/smolagents.py @@ -102,16 +102,20 @@ def __init__(self, agent_instance, name: str, callbacks=None): """Initialize the Smolagent adapter. Note: We don't call super().__init__() to avoid initializing self.logs as a list, - since we override it as a property that dynamically fetches from agent.memory. + since we override it as a property that returns accumulated logs. """ + _check_smolagents_installed() self.agent = agent_instance self.name = name self.callbacks = callbacks or [] self.messages = None + self._accumulated_logs: List[Dict[str, Any]] = [] + self._invocation_traces: List[Dict[str, Any]] = [] + self._trace_buffer: List[Dict[str, Any]] = [] + self._install_hooks() - @property - def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] - """Dynamically generate logs from smolagents' internal memory. + def _extract_current_logs(self) -> List[Dict[str, Any]]: + """Extract logs from the agent's current memory state. Converts smolagents' ActionStep and PlanningStep objects into log entries compatible with the AgentAdapter contract, including all available properties. @@ -119,7 +123,6 @@ def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] Returns: List of log dictionaries with comprehensive step information """ - _check_smolagents_installed() from smolagents.memory import ActionStep, PlanningStep, TaskStep logs_list: List[Dict[str, Any]] = [] @@ -251,6 +254,15 @@ def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] return logs_list + @property + def logs(self) -> List[Dict[str, Any]]: # type: ignore[override] + """Return accumulated logs from all invocations. + + Returns: + List of log dictionaries accumulated across all run() calls + """ + return self._accumulated_logs + def gather_traces(self) -> dict: """Gather traces including message history and monitoring data. @@ -263,7 +275,6 @@ def gather_traces(self) -> dict: Dict containing messages and monitoring statistics """ base_logs = super().gather_traces() - _check_smolagents_installed() # Extract monitoring data from agent's memory steps if hasattr(self.agent, "memory") and hasattr(self.agent.memory, "steps"): @@ -351,7 +362,6 @@ def gather_config(self) -> dict[str, Any]: - managed_agents: List of managed agent configs (if any) """ base_config = super().gather_config() - _check_smolagents_installed() # Get comprehensive config from smolagents' native to_dict() method smolagents_config = {} @@ -391,8 +401,6 @@ def get_messages(self) -> MessageHistory: Returns: MessageHistory with converted messages from smolagents """ - _check_smolagents_installed() - # Get messages from smolagents memory smol_messages = self.agent.write_memory_to_messages() @@ -400,15 +408,48 @@ def get_messages(self) -> MessageHistory: return self._convert_smolagents_messages(smol_messages) def _run_agent(self, query: str) -> str: - _check_smolagents_installed() - # Run the agent (this updates the agent's internal memory and returns the final answer) # All execution details are tracked in agent.memory.steps automatically final_answer = self.agent.run(query) - # Return the final answer (traces are captured via get_messages() and gather_traces()) + # Snapshot current memory steps into accumulated logs + current_logs = self._extract_current_logs() + self._accumulated_logs.extend(current_logs) + return final_answer + def _install_hooks(self): + """Register step_callbacks on agent and all managed agents.""" + from smolagents.memory import ActionStep, PlanningStep + + self.agent.step_callbacks.register(ActionStep, self._on_step) + self.agent.step_callbacks.register(PlanningStep, self._on_step) + + # Also register on managed agents (callbacks do NOT propagate automatically) + managed_agents = getattr(self.agent, "managed_agents", None) + if isinstance(managed_agents, dict): + for managed_agent in managed_agents.values(): + managed_agent.step_callbacks.register(ActionStep, self._on_step) + managed_agent.step_callbacks.register(PlanningStep, self._on_step) + + def _on_step(self, memory_step, agent=None): + """Callback fired by smolagents after each step finalization.""" + from smolagents.memory import ActionStep, PlanningStep + + entry: Dict[str, Any] = { + "source": "smolagents_step_callback", + "step_type": type(memory_step).__name__, + "agent_name": getattr(agent, "name", None), + } + if isinstance(memory_step, ActionStep): + entry["step_number"] = memory_step.step_number + entry["has_error"] = memory_step.error is not None + if memory_step.tool_calls: + entry["tool_calls"] = [tc.name for tc in memory_step.tool_calls] + elif isinstance(memory_step, PlanningStep): + entry["plan_length"] = len(memory_step.plan) if memory_step.plan else 0 + self._trace_buffer.append(entry) + def _convert_smolagents_messages(self, smol_messages: list) -> MessageHistory: """Convert smolagents message format to MASEval MessageHistory. diff --git a/tests/test_contract/test_agent_adapter_contract.py b/tests/test_contract/test_agent_adapter_contract.py index 7614c0a6..2cf86f48 100644 --- a/tests/test_contract/test_agent_adapter_contract.py +++ b/tests/test_contract/test_agent_adapter_contract.py @@ -313,8 +313,9 @@ def test_adapter_callbacks_triggered_uniformly(self, framework): def test_adapter_traces_same_structure(self, framework): """Test gather_traces returns consistent structure across frameworks. - Contract: All adapters must provide message history in traces, enabling - uniform access to execution data regardless of underlying framework. + Contract: All adapters must provide these keys in traces: + name, agent_type, message_count, messages, callbacks, logs, + invocation_traces, trace_buffer. """ mock_llm = MockLLM(responses=["Response"]) agent = create_agent_for_framework(framework, mock_llm) @@ -323,20 +324,21 @@ def test_adapter_traces_same_structure(self, framework): adapter.run("Test query") traces = adapter.gather_traces() - # All should include message history; different adapters name this key - if "message_history" in traces: - messages = traces["message_history"] - else: - messages = traces.get("messages", []) + # Required keys from base AgentAdapter.gather_traces() + for key in ["name", "agent_type", "message_count", "messages", "callbacks", "logs", "invocation_traces", "trace_buffer"]: + assert key in traces, f"gather_traces() missing required key: {key}" - assert isinstance(messages, list) - assert len(messages) > 0 + assert isinstance(traces["messages"], list) + assert len(traces["messages"]) > 0 + assert isinstance(traces["logs"], list) + assert isinstance(traces["invocation_traces"], list) + assert isinstance(traces["trace_buffer"], list) def test_adapter_config_same_structure(self, framework): """Test gather_config returns consistent structure across frameworks. - Contract: All adapters must provide agent name in config, enabling - identification and reproducibility tracking. + Contract: All adapters must provide these keys in config: + name, agent_type, adapter_type, callbacks. """ mock_llm = MockLLM(responses=["Response"]) agent = create_agent_for_framework(framework, mock_llm) @@ -344,10 +346,12 @@ def test_adapter_config_same_structure(self, framework): config = adapter.gather_config() - # All should include agent name - assert "agent_name" in config or "name" in config - # All should include some identifying information - assert len(config) > 0 + # Required keys from base AgentAdapter.gather_config() + for key in ["name", "agent_type", "adapter_type", "callbacks"]: + assert key in config, f"gather_config() missing required key: {key}" + + assert config["name"] == "test_agent" + assert isinstance(config["callbacks"], list) def test_adapter_get_messages_after_multiple_runs(self, framework): """Test message history accumulation across multiple agent runs. @@ -431,21 +435,18 @@ def on_run_end(self, agent, result): def test_adapter_callback_lifecycle_order(self, framework): """Test callbacks fire in correct lifecycle order with proper state. - Contract: on_run_start fires before execution with initial state, - on_run_end fires after execution with final state and result. - - Note: smolagents always has a system message in memory at start. + Contract: on_run_start fires before execution, on_run_end fires after + execution with the result. After run, message count must be greater + than before run. """ lifecycle_events = [] class LifecycleTracker(AgentCallback): def on_run_start(self, agent): - # Capture state at start msg_count_at_start = len(agent.get_messages()) lifecycle_events.append(("start", msg_count_at_start)) def on_run_end(self, agent, result): - # Capture state at end msg_count_at_end = len(agent.get_messages()) lifecycle_events.append(("end", msg_count_at_end, result)) @@ -460,13 +461,8 @@ def on_run_end(self, agent, result): assert lifecycle_events[0][0] == "start" assert lifecycle_events[1][0] == "end" - # Verify on_run_start fires before user messages are added - # smolagents has 1 message (system), others have 0 - expected_start_count = 1 if framework == "smolagents" else 0 - assert lifecycle_events[0][1] == expected_start_count - - # Verify on_run_end fires after message history is populated - assert lifecycle_events[1][1] > expected_start_count # Has more messages at end + # Verify on_run_end has more messages than on_run_start (execution happened) + assert lifecycle_events[1][1] > lifecycle_events[0][1] # Verify result is passed to on_run_end assert lifecycle_events[1][2] == result @@ -581,11 +577,10 @@ def test_adapter_logs_structure_has_basic_info(self, framework): assert len(log_entry) > 0 def test_adapter_logs_accumulate_across_runs(self, framework): - """Test that logs accumulate or reset consistently across multiple run - calls to the agent. + """Test that logs accumulate across multiple run calls. - Contract: Adapter logs should maintain a consistent lifecycle behavior - across runs. + Contract: Logs from run 1 must still be present after run 2. + All adapters must accumulate logs, not reset them. """ mock_llm = MockLLM(responses=["First response", "Second response"]) agent = create_agent_for_framework(framework, mock_llm) @@ -600,6 +595,52 @@ def test_adapter_logs_accumulate_across_runs(self, framework): adapter.run("Second query") logs_count_after_second = len(adapter.logs) - # Logs should either accumulate or stay consistent - # (we accept both behaviors as long as logs are populated) - assert logs_count_after_second > 0 + # Logs MUST accumulate (not reset) + assert logs_count_after_second > logs_count_after_first + + def test_adapter_invocation_traces_structure(self, framework): + """Test that gather_traces() invocation_traces has correct structure. + + Contract: Each invocation trace must contain invocation number, query, + status, timestamps, and message snapshot — all accessed through + the public gather_traces() API. + """ + mock_llm = MockLLM(responses=["Test response to query"]) + agent = create_agent_for_framework(framework, mock_llm) + adapter = create_adapter_for_framework(framework, agent) + + adapter.run("Test query") + traces = adapter.gather_traces() + + invocation_traces = traces["invocation_traces"] + assert len(invocation_traces) == 1 + + trace = invocation_traces[0] + assert trace["invocation"] == 0 + assert trace["query"] == "Test query" + assert trace["status"] == "success" + assert "started_at" in trace + assert "completed_at" in trace + assert isinstance(trace["messages"], list) + assert len(trace["messages"]) > 0 # Should have at least user + assistant + + def test_adapter_invocation_traces_accumulate(self, framework): + """Test that invocation traces accumulate across multiple runs. + + Contract: Each run() call appends a new invocation trace with + sequential invocation numbers, visible through gather_traces(). + """ + mock_llm = MockLLM(responses=["First response", "Second response"]) + agent = create_agent_for_framework(framework, mock_llm) + adapter = create_adapter_for_framework(framework, agent) + + adapter.run("First query") + adapter.run("Second query") + traces = adapter.gather_traces() + + invocation_traces = traces["invocation_traces"] + assert len(invocation_traces) == 2 + assert invocation_traces[0]["query"] == "First query" + assert invocation_traces[1]["query"] == "Second query" + assert invocation_traces[0]["invocation"] == 0 + assert invocation_traces[1]["invocation"] == 1 diff --git a/tests/test_interface/test_agent_integration/test_camel_integration.py b/tests/test_interface/test_agent_integration/test_camel_integration.py index 46c55620..8f9cdcaa 100644 --- a/tests/test_interface/test_agent_integration/test_camel_integration.py +++ b/tests/test_interface/test_agent_integration/test_camel_integration.py @@ -816,7 +816,7 @@ def test_camel_adapter_gather_traces_with_non_serializable_info(): def test_camel_adapter_get_messages_memory_access_failure(): - """Test get_messages handles memory access failure gracefully.""" + """Test get_messages propagates memory access errors.""" from maseval.interface.agents.camel import CamelAgentAdapter mock_memory = MockCamelMemory() @@ -827,8 +827,8 @@ def test_camel_adapter_get_messages_memory_access_failure(): adapter = CamelAgentAdapter(agent_instance=mock_agent, name="test_agent") - messages = adapter.get_messages() - assert len(messages) == 0 + with pytest.raises(Exception, match="Memory access failed"): + adapter.get_messages() def test_camel_adapter_gather_config_with_model(): @@ -1079,3 +1079,108 @@ def test_workforce_tracer_truncates_long_content(): assert len(traces["completed_tasks"][0]["content"]) == 200 assert len(traces["completed_tasks"][0]["result"]) == 200 + + +# ============================================================================= +# logs Property: ToolCallingRecord, Context Tokens, External Tool Calls +# ============================================================================= + + +def test_camel_logs_with_tool_calling_records(): + """Test logs property extracts tool call details from ToolCallingRecord objects.""" + from maseval.interface.agents.camel import CamelAgentAdapter + + mock_agent = create_mock_camel_agent() + adapter = CamelAgentAdapter(agent_instance=mock_agent, name="tool_log_agent") + + # Create ToolCallingRecord mocks + tc1 = Mock() + tc1.tool_name = "search" + tc1.args = {"query": "test"} + tc1.result = "Found 3 results" + tc1.tool_call_id = "tc_001" + tc1.images = None # No images + + tc2 = Mock() + tc2.tool_name = "image_gen" + tc2.args = {"prompt": "cat"} + tc2.result = "Generated image" + tc2.tool_call_id = "tc_002" + tc2.images = [Mock(), Mock()] # 2 images + + mock_response = MockCamelResponse( + content="Here are results", + terminated=False, + info={"tool_calls": [tc1, tc2]}, + ) + adapter._responses.append(mock_response) + + logs = adapter.logs + assert len(logs) == 1 + + assert "tool_calls" in logs[0] + assert len(logs[0]["tool_calls"]) == 2 + + # First tool call - no images + assert logs[0]["tool_calls"][0]["name"] == "search" + assert logs[0]["tool_calls"][0]["arguments"] == {"query": "test"} + assert logs[0]["tool_calls"][0]["result"] == "Found 3 results" + assert logs[0]["tool_calls"][0]["id"] == "tc_001" + assert "images_count" not in logs[0]["tool_calls"][0] + + # Second tool call - with images + assert logs[0]["tool_calls"][1]["name"] == "image_gen" + assert logs[0]["tool_calls"][1]["id"] == "tc_002" + assert logs[0]["tool_calls"][1]["images_count"] == 2 + + +def test_camel_logs_with_context_termination_and_response_id(): + """Test logs property extracts context_tokens, termination_reasons, and response_id.""" + from maseval.interface.agents.camel import CamelAgentAdapter + + mock_agent = create_mock_camel_agent() + adapter = CamelAgentAdapter(agent_instance=mock_agent, name="ctx_agent") + + mock_response = MockCamelResponse( + content="Response", + terminated=False, + info={ + "num_tokens": 4096, + "termination_reasons": ["max_tokens"], + "id": "resp_abc", + }, + ) + adapter._responses.append(mock_response) + + logs = adapter.logs + assert len(logs) == 1 + assert logs[0]["context_tokens"] == 4096 + assert logs[0]["termination_reasons"] == ["max_tokens"] + assert logs[0]["response_id"] == "resp_abc" + + +def test_camel_logs_with_external_tool_call_requests(): + """Test logs property extracts external_tool_call_requests as stringified list.""" + from maseval.interface.agents.camel import CamelAgentAdapter + + mock_agent = create_mock_camel_agent() + adapter = CamelAgentAdapter(agent_instance=mock_agent, name="ext_tool_agent") + + mock_req1 = Mock() + mock_req1.__str__ = Mock(return_value="ExtToolCall(name='api_call', args={})") + mock_req2 = Mock() + mock_req2.__str__ = Mock(return_value="ExtToolCall(name='db_query', args={})") + + mock_response = MockCamelResponse( + content="Response", + terminated=False, + info={"external_tool_call_requests": [mock_req1, mock_req2]}, + ) + adapter._responses.append(mock_response) + + logs = adapter.logs + assert len(logs) == 1 + assert "external_tool_call_requests" in logs[0] + assert len(logs[0]["external_tool_call_requests"]) == 2 + assert "api_call" in logs[0]["external_tool_call_requests"][0] + assert "db_query" in logs[0]["external_tool_call_requests"][1] diff --git a/tests/test_interface/test_agent_integration/test_langgraph_integration.py b/tests/test_interface/test_agent_integration/test_langgraph_integration.py index c972a464..96536f71 100644 --- a/tests/test_interface/test_agent_integration/test_langgraph_integration.py +++ b/tests/test_interface/test_agent_integration/test_langgraph_integration.py @@ -240,3 +240,301 @@ def agent_node(state: State) -> State: assert log_entry.get("input_tokens") in [None, 0] assert log_entry.get("output_tokens") in [None, 0] assert log_entry.get("total_tokens") in [None, 0] + + +# ============================================================================= +# Stateful Graph Fixture and Tests +# ============================================================================= + + +@pytest.fixture +def langgraph_stateful_setup(): + """Create a stateful LangGraph graph with MemorySaver checkpointer.""" + from langgraph.graph import StateGraph, END + from langgraph.checkpoint.memory import MemorySaver + from typing_extensions import TypedDict + from langchain_core.messages import AIMessage + + class State(TypedDict): + messages: list + + def agent_node(state: State) -> State: + messages = state["messages"] + response = AIMessage(content="Test response") + return {"messages": messages + [response]} + + graph = StateGraph(State) # type: ignore[arg-type] + graph.add_node("agent", agent_node) + graph.set_entry_point("agent") + graph.add_edge("agent", END) + + memory = MemorySaver() + compiled = graph.compile(checkpointer=memory) + config = {"configurable": {"thread_id": "test_thread_1"}} + + return compiled, config + + +def test_langgraph_gather_config_with_checkpointer(langgraph_stateful_setup): + """Test gather_config() captures checkpointer, sanitized config, and graph structure.""" + from maseval.interface.agents.langgraph import LangGraphAgentAdapter + + compiled, config = langgraph_stateful_setup + adapter = LangGraphAgentAdapter(agent_instance=compiled, name="stateful_agent", config=config) + + result = adapter.gather_config() + + assert result["name"] == "stateful_agent" + assert result["adapter_type"] == "LangGraphAgentAdapter" + assert "langgraph_config" in result + + lg_config = result["langgraph_config"] + + # Checkpointer detected + assert lg_config["has_checkpointer"] is True + + # Config sanitized: thread_id replaced with has_thread_id flag + assert lg_config["config"]["configurable"]["has_thread_id"] is True + assert "thread_id" not in lg_config["config"]["configurable"] + + # Graph structure info + assert "graph_info" in lg_config + assert lg_config["graph_info"]["num_nodes"] >= 2 # at least agent + __end__ + assert lg_config["graph_info"]["num_edges"] >= 1 + + +def test_langgraph_gather_config_no_checkpointer(): + """Test gather_config() for a stateless graph without checkpointer.""" + from maseval.interface.agents.langgraph import LangGraphAgentAdapter + from langgraph.graph import StateGraph, END + from typing_extensions import TypedDict + from langchain_core.messages import AIMessage + + class State(TypedDict): + messages: list + + def agent_node(state: State) -> State: + return {"messages": state["messages"] + [AIMessage(content="Response")]} + + graph = StateGraph(State) # type: ignore[arg-type] + graph.add_node("agent", agent_node) + graph.set_entry_point("agent") + graph.add_edge("agent", END) + compiled = graph.compile() + + adapter = LangGraphAgentAdapter(agent_instance=compiled, name="stateless_agent") + result = adapter.gather_config() + + assert "langgraph_config" in result + lg_config = result["langgraph_config"] + assert lg_config["has_checkpointer"] is False + # No config provided, so no config key in the output + assert "config" not in lg_config + + +def test_langgraph_stateful_get_messages(langgraph_stateful_setup): + """Test get_messages() fetches from persistent state for stateful graphs.""" + from maseval.interface.agents.langgraph import LangGraphAgentAdapter + + compiled, config = langgraph_stateful_setup + adapter = LangGraphAgentAdapter(agent_instance=compiled, name="stateful_agent", config=config) + + adapter.run("Hello") + messages = adapter.get_messages() + + assert len(messages) == 2 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "Hello" + assert messages[1]["role"] == "assistant" + assert messages[1]["content"] == "Test response" + + +def test_langgraph_run_checkpoint_metadata(langgraph_stateful_setup): + """Test that logs include checkpoint metadata for stateful graphs.""" + from maseval.interface.agents.langgraph import LangGraphAgentAdapter + + compiled, config = langgraph_stateful_setup + adapter = LangGraphAgentAdapter(agent_instance=compiled, name="stateful_agent", config=config) + + adapter.run("Hello") + + assert len(adapter.logs) == 1 + log_entry = adapter.logs[0] + assert "checkpoint_metadata" in log_entry + assert "source" in log_entry["checkpoint_metadata"] + assert "step" in log_entry["checkpoint_metadata"] + assert "checkpoint_created_at" in log_entry + + +# ============================================================================= +# Message Conversion Tests +# ============================================================================= + + +def test_langgraph_convert_tool_and_system_messages(): + """Test _convert_langchain_messages handles SystemMessage, ToolMessage, and AI with tool_calls.""" + from maseval.interface.agents.langgraph import LangGraphAgentAdapter + from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage + from langgraph.graph import StateGraph, END + from typing_extensions import TypedDict + + class State(TypedDict): + messages: list + + graph = StateGraph(State) # type: ignore[arg-type] + graph.add_node("agent", lambda s: s) # type: ignore[invalid-argument-type] + graph.set_entry_point("agent") + graph.add_edge("agent", END) + compiled = graph.compile() + + adapter = LangGraphAgentAdapter(agent_instance=compiled, name="msg_agent") + + lc_messages = [ + SystemMessage(content="You are helpful."), + HumanMessage(content="What is 2+2?"), + AIMessage( + content="", + tool_calls=[{"id": "call_1", "name": "calculator", "args": {"expr": "2+2"}}], + ), + ToolMessage(content="4", tool_call_id="call_1", name="calculator"), + AIMessage(content="The answer is 4."), + ] + + history = adapter._convert_langchain_messages(lc_messages) + + assert len(history) == 5 + + assert history[0]["role"] == "system" + assert history[0]["content"] == "You are helpful." + + assert history[1]["role"] == "user" + assert history[1]["content"] == "What is 2+2?" + + assert history[2]["role"] == "assistant" + assert history[2]["content"] == "" + assert history[2]["tool_calls"][0]["name"] == "calculator" + + assert history[3]["role"] == "tool" + assert history[3]["content"] == "4" + assert history[3]["tool_call_id"] == "call_1" + assert history[3]["name"] == "calculator" + + assert history[4]["role"] == "assistant" + assert history[4]["content"] == "The answer is 4." + assert "tool_calls" not in history[4] + + +# ============================================================================= +# LLMUser.get_tool() Test +# ============================================================================= + + +def test_langgraph_llm_user_get_tool(): + """Test LangGraphLLMUser.get_tool() returns a LangChain tool named ask_user.""" + from maseval.interface.agents.langgraph import LangGraphLLMUser + from langchain_core.tools import BaseTool + from unittest.mock import Mock + + mock_model = Mock() + user = LangGraphLLMUser( + name="test_user", + model=mock_model, + user_profile={"role": "tester"}, + scenario="test scenario", + initial_query="hello", + ) + + tool = user.get_tool() + + assert isinstance(tool, BaseTool) + assert tool.name == "ask_user" + + +# ============================================================================= +# Phase 2 Hook: _MASEvalLangChainHandler Tests +# ============================================================================= + + +def test_langgraph_callback_handler_chain_and_tool_events(): + """Test _MASEvalLangChainHandler captures chain, tool, and llm events.""" + from maseval.interface.agents.langgraph import _MASEvalLangChainHandler + from uuid import uuid4 + + trace_buffer = [] + handler = _MASEvalLangChainHandler(trace_buffer) + + run_id_chain = uuid4() + parent_run_id = uuid4() + run_id_tool = uuid4() + run_id_llm = uuid4() + + # chain_start + handler.on_chain_start( + serialized={"id": ["langchain", "chains", "MyChain"]}, + inputs={"query": "test"}, + run_id=run_id_chain, + parent_run_id=parent_run_id, + ) + + # tool_start + handler.on_tool_start( + serialized={"name": "calculator"}, + input_str="2+2", + run_id=run_id_tool, + ) + + # tool_end + handler.on_tool_end(output="4", run_id=run_id_tool) + + # llm_end + from unittest.mock import Mock + + handler.on_llm_end(response=Mock(), run_id=run_id_llm) + + # chain_end + handler.on_chain_end(outputs={"result": "4"}, run_id=run_id_chain) + + assert len(trace_buffer) == 5 + + # chain_start + assert trace_buffer[0]["source"] == "langgraph_callback" + assert trace_buffer[0]["event"] == "chain_start" + assert trace_buffer[0]["chain_type"] == "MyChain" + assert trace_buffer[0]["run_id"] == str(run_id_chain) + assert trace_buffer[0]["parent_run_id"] == str(parent_run_id) + + # tool_start + assert trace_buffer[1]["event"] == "tool_start" + assert trace_buffer[1]["tool_name"] == "calculator" + assert trace_buffer[1]["run_id"] == str(run_id_tool) + + # tool_end + assert trace_buffer[2]["event"] == "tool_end" + assert trace_buffer[2]["run_id"] == str(run_id_tool) + + # llm_end + assert trace_buffer[3]["event"] == "llm_end" + assert trace_buffer[3]["run_id"] == str(run_id_llm) + + # chain_end + assert trace_buffer[4]["event"] == "chain_end" + assert trace_buffer[4]["run_id"] == str(run_id_chain) + + +def test_langgraph_callback_handler_no_op_methods(): + """Test that no-op handler methods don't append to trace buffer.""" + from maseval.interface.agents.langgraph import _MASEvalLangChainHandler + from uuid import uuid4 + + trace_buffer = [] + handler = _MASEvalLangChainHandler(trace_buffer) + run_id = uuid4() + + handler.on_chat_model_start(serialized={}, messages=[[]], run_id=run_id) + handler.on_llm_start(serialized={}, prompts=["test"], run_id=run_id) + handler.on_chain_error(error=RuntimeError("test"), run_id=run_id) + handler.on_tool_error(error=RuntimeError("test"), run_id=run_id) + handler.on_llm_error(error=RuntimeError("test"), run_id=run_id) + handler.on_llm_new_token(token="hello", run_id=run_id) + + assert len(trace_buffer) == 0 diff --git a/tests/test_interface/test_agent_integration/test_llamaindex_integration.py b/tests/test_interface/test_agent_integration/test_llamaindex_integration.py index 52779f65..e9895d01 100644 --- a/tests/test_interface/test_agent_integration/test_llamaindex_integration.py +++ b/tests/test_interface/test_agent_integration/test_llamaindex_integration.py @@ -393,3 +393,196 @@ def test_llamaindex_adapter_error_logging(): assert log_entry["error"] == "Test error" assert log_entry["error_type"] == "ValueError" assert "duration_seconds" in log_entry + + +# ============================================================================= +# get_messages() from Agent Memory +# ============================================================================= + + +def test_llamaindex_get_messages_from_agent_memory(): + """Test get_messages() fetches from agent.memory.get_all() when available.""" + from maseval.interface.agents.llamaindex import LlamaIndexAgentAdapter + from llama_index.core.base.llms.types import ChatMessage, MessageRole + from unittest.mock import Mock + + mock_agent = Mock() + mock_memory = Mock() + mock_memory.get_all.return_value = [ + ChatMessage(role=MessageRole.USER, content="What is AI?"), + ChatMessage(role=MessageRole.ASSISTANT, content="AI is artificial intelligence."), + ] + mock_agent.memory = mock_memory + + adapter = LlamaIndexAgentAdapter(mock_agent, "memory_agent") + messages = adapter.get_messages() + + assert len(messages) == 2 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "What is AI?" + assert messages[1]["role"] == "assistant" + assert messages[1]["content"] == "AI is artificial intelligence." + mock_memory.get_all.assert_called_once() + + +# ============================================================================= +# Token Usage Extraction +# ============================================================================= + + +def test_llamaindex_run_token_usage_extraction(): + """Test _run_agent() extracts token usage from result.raw.usage.""" + from maseval.interface.agents.llamaindex import LlamaIndexAgentAdapter + from llama_index.core.base.llms.types import ChatMessage, MessageRole + from unittest.mock import Mock + + mock_agent = Mock() + mock_result = Mock() + mock_result.response = ChatMessage(role=MessageRole.ASSISTANT, content="Answer") + mock_result.raw = Mock() + mock_result.raw.usage = Mock() + mock_result.raw.usage.total_tokens = 150 + mock_result.raw.usage.prompt_tokens = 100 + mock_result.raw.usage.completion_tokens = 50 + mock_agent.run_sync = Mock(return_value=mock_result) + + adapter = LlamaIndexAgentAdapter(mock_agent, "token_agent") + adapter.run("test query") + + assert len(adapter.logs) == 1 + log_entry = adapter.logs[0] + assert log_entry["total_tokens"] == 150 + assert log_entry["input_tokens"] == 100 + assert log_entry["output_tokens"] == 50 + + +# ============================================================================= +# gather_config() with workflow_config +# ============================================================================= + + +def test_llamaindex_gather_config_workflow_config(): + """Test gather_config() includes workflow_config from agent.get_config().""" + from maseval.interface.agents.llamaindex import LlamaIndexAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock(spec=["name", "get_config"]) + mock_agent.name = "workflow_agent" + mock_agent.get_config = Mock(return_value={"timeout": 30, "max_retries": 3}) + + adapter = LlamaIndexAgentAdapter(mock_agent, "config_agent") + config = adapter.gather_config() + + assert "llamaindex_config" in config + assert config["llamaindex_config"]["workflow_config"] == {"timeout": 30, "max_retries": 3} + + +# ============================================================================= +# _convert_single_message() Tool Message Fields +# ============================================================================= + + +def test_llamaindex_convert_tool_message_with_tool_call_id_and_name(): + """Test _convert_single_message() extracts tool_call_id and name from additional_kwargs.""" + from maseval.interface.agents.llamaindex import LlamaIndexAgentAdapter + from llama_index.core.base.llms.types import ChatMessage, MessageRole + from unittest.mock import Mock + + mock_agent = Mock() + adapter = LlamaIndexAgentAdapter(mock_agent, "tool_msg_agent") + + msg = ChatMessage( + role=MessageRole.TOOL, + content="42", + additional_kwargs={"tool_call_id": "call_abc", "name": "calculator"}, + ) + + converted = adapter._convert_single_message(msg) + + assert converted["role"] == "tool" + assert converted["content"] == "42" + assert converted["tool_call_id"] == "call_abc" + assert converted["name"] == "calculator" + + +# ============================================================================= +# Phase 2 Hook: _MASEvalSpanHandler Tests +# ============================================================================= + + +def test_llamaindex_span_handler_enter_exit_lifecycle(): + """Test span handler enter/exit lifecycle records to trace buffer and completed_spans.""" + from maseval.interface.agents.llamaindex import _MASEvalSpanHandler + + handler = _MASEvalSpanHandler() + handler._active = True + + # Enter a span + handler.span_enter(id_="span_1", bound_args={}, parent_id=None) + + assert "span_1" in handler.open_spans + + # Exit the span + handler.span_exit(id_="span_1", bound_args={}) + + assert "span_1" not in handler.open_spans + assert len(handler.completed_spans) == 1 + assert len(handler._trace_buffer) == 1 + + entry = handler._trace_buffer[0] + assert entry["source"] == "llamaindex_span" + assert entry["event"] == "span_exit" + assert entry["span_id"] == "span_1" + assert isinstance(entry["duration"], float) + + +def test_llamaindex_span_handler_drop_lifecycle(): + """Test span handler drop records error info and moves to dropped_spans.""" + from maseval.interface.agents.llamaindex import _MASEvalSpanHandler + + handler = _MASEvalSpanHandler() + handler._active = True + + # Open then drop a span + handler.span_enter(id_="span_2", bound_args={}) + handler.span_drop(id_="span_2", bound_args={}, err=ValueError("timeout")) + + assert "span_2" not in handler.open_spans + assert len(handler.dropped_spans) == 1 + assert len(handler._trace_buffer) == 1 + + entry = handler._trace_buffer[0] + assert entry["event"] == "span_drop" + assert entry["span_id"] == "span_2" + assert entry["error"] == "timeout" + + +def test_llamaindex_span_handler_inactive_skips(): + """Test span handler skips span creation when _active is False.""" + from maseval.interface.agents.llamaindex import _MASEvalSpanHandler + + handler = _MASEvalSpanHandler() + # Default: _active is False + + handler.span_enter(id_="span_3", bound_args={}) + + assert handler.open_spans == {} + assert handler._trace_buffer == [] + + +def test_llamaindex_span_handler_exit_unknown_span(): + """Test span handler gracefully handles exit/drop for unknown span ids.""" + from maseval.interface.agents.llamaindex import _MASEvalSpanHandler + + handler = _MASEvalSpanHandler() + handler._active = True + + # Exit an unknown span - should not crash + handler.span_exit(id_="nonexistent", bound_args={}) + + # Drop an unknown span - should not crash + handler.span_drop(id_="nonexistent", bound_args={}, err=RuntimeError("x")) + + assert handler.completed_spans == [] + assert handler.dropped_spans == [] + assert handler._trace_buffer == [] diff --git a/tests/test_interface/test_agent_integration/test_smolagents_integration.py b/tests/test_interface/test_agent_integration/test_smolagents_integration.py index cfa1627a..7d3a2ac3 100644 --- a/tests/test_interface/test_agent_integration/test_smolagents_integration.py +++ b/tests/test_interface/test_agent_integration/test_smolagents_integration.py @@ -39,10 +39,12 @@ def test_check_smolagents_installed_function(): def test_smolagents_adapter_creation(): """Test that SmolAgentAdapter can be created.""" + from unittest.mock import Mock + from maseval.interface.agents.smolagents import SmolAgentAdapter - # Create adapter with mock agent - agent_adapter = SmolAgentAdapter(agent_instance=object(), name="test_agent") + # Create adapter with mock agent (Mock auto-creates step_callbacks.register) + agent_adapter = SmolAgentAdapter(agent_instance=Mock(), name="test_agent") assert agent_adapter.name == "test_agent" assert agent_adapter.agent is not None @@ -245,10 +247,10 @@ def test_smolagents_adapter_gather_traces_with_planning_step(): assert "observations" not in step_detail -def test_smolagents_adapter_logs_property(): - """Test that SmolAgentAdapter.logs property returns converted memory steps. +def test_smolagents_adapter_extract_current_logs(): + """Test that SmolAgentAdapter._extract_current_logs() returns converted memory steps. - This test validates that the logs property correctly extracts all relevant + This test validates that the log extraction correctly extracts all relevant information from smolagents' internal memory system, including: - Step types (ActionStep, PlanningStep) - Timing information (start_time, end_time, duration) @@ -301,8 +303,9 @@ def test_smolagents_adapter_logs_property(): # Create adapter adapter = SmolAgentAdapter(agent_instance=mock_agent, name="test_agent") - # Access logs property - logs = adapter.logs + # Use _extract_current_logs() to test the conversion logic + # (logs property returns _accumulated_logs, populated only via _run_agent()) + logs = adapter._extract_current_logs() # Verify logs structure assert isinstance(logs, list) @@ -351,7 +354,7 @@ def test_smolagents_adapter_logs_property(): def test_smolagents_adapter_logs_with_errors(): - """Test that adapter.logs captures error information from failed steps.""" + """Test that _extract_current_logs() captures error information from failed steps.""" from maseval.interface.agents.smolagents import SmolAgentAdapter from smolagents import AgentError from smolagents.memory import ActionStep, AgentMemory @@ -381,8 +384,8 @@ def test_smolagents_adapter_logs_with_errors(): # Create adapter adapter = SmolAgentAdapter(agent_instance=mock_agent, name="test_agent") - # Access logs property - logs = adapter.logs + # Use _extract_current_logs() to test the conversion logic + logs = adapter._extract_current_logs() # Verify error is captured assert len(logs) == 1 @@ -390,8 +393,8 @@ def test_smolagents_adapter_logs_with_errors(): assert logs[0]["error"] == "Tool execution failed: Connection timeout" -def test_smolagents_adapter_logs_empty_when_no_steps(): - """Test that adapter.logs returns empty list when no execution has occurred.""" +def test_smolagents_adapter_extract_current_logs_empty_when_no_steps(): + """Test that _extract_current_logs() returns empty list when no execution has occurred.""" from maseval.interface.agents.smolagents import SmolAgentAdapter from smolagents.memory import AgentMemory from unittest.mock import Mock @@ -404,9 +407,391 @@ def test_smolagents_adapter_logs_empty_when_no_steps(): # Create adapter adapter = SmolAgentAdapter(agent_instance=mock_agent, name="test_agent") - # Access logs property - logs = adapter.logs + # Use _extract_current_logs() to test the conversion logic + logs = adapter._extract_current_logs() # Should be empty assert isinstance(logs, list) assert len(logs) == 0 + + +# ============================================================================= +# gather_config() Tests +# ============================================================================= + + +def test_smolagents_gather_config_with_to_dict(): + """Test gather_config() uses agent.to_dict() when available.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + to_dict_data = {"max_steps": 10, "model": {"class": "FakeModel"}, "tools": []} + mock_agent.to_dict = Mock(return_value=to_dict_data) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="config_agent") + config = adapter.gather_config() + + # Base keys + assert config["name"] == "config_agent" + assert config["adapter_type"] == "SmolAgentAdapter" + assert "type" in config + assert "gathered_at" in config + assert "agent_type" in config + + # smolagents_config from to_dict() + assert config["smolagents_config"] == to_dict_data + + +def test_smolagents_gather_config_fallback_without_to_dict(): + """Test gather_config() falls back to manual attribute collection when to_dict is absent.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock( + spec=[ + "memory", + "write_memory_to_messages", + "step_callbacks", + "max_steps", + "planning_interval", + "name", + "description", + "additional_authorized_imports", + "executor_type", + ] + ) + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + mock_agent.max_steps = 5 + mock_agent.planning_interval = 3 + mock_agent.name = "my_agent" + mock_agent.description = "A test agent" + mock_agent.additional_authorized_imports = ["os"] + mock_agent.executor_type = "local" + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="fallback_agent") + config = adapter.gather_config() + + assert "smolagents_config" in config + smolagents_config = config["smolagents_config"] + assert smolagents_config["max_steps"] == 5 + assert smolagents_config["planning_interval"] == 3 + assert smolagents_config["name"] == "my_agent" + assert smolagents_config["description"] == "A test agent" + assert smolagents_config["additional_authorized_imports"] == ["os"] + assert smolagents_config["executor_type"] == "local" + + +def test_smolagents_gather_config_to_dict_raises_falls_back(): + """Test gather_config() falls back to attributes when to_dict() raises.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + mock_agent.to_dict = Mock(side_effect=RuntimeError("serialization failed")) + mock_agent.max_steps = 7 + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="error_agent") + config = adapter.gather_config() + + assert "smolagents_config" in config + assert config["smolagents_config"]["max_steps"] == 7 + + +# ============================================================================= +# _run_agent() and logs Tests +# ============================================================================= + + +def test_smolagents_run_populates_accumulated_logs(): + """Test that run() populates accumulated logs from agent memory.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from smolagents.memory import ActionStep, AgentMemory + from smolagents.monitoring import Timing + from unittest.mock import Mock + import time + + mock_agent = Mock() + mock_agent.memory = AgentMemory(system_prompt="Test") + + start_time = time.time() + step = ActionStep( + step_number=1, + timing=Timing(start_time=start_time, end_time=start_time + 0.1), + observations_images=[], + ) + mock_agent.memory.steps.append(step) + mock_agent.run = Mock(return_value="final answer") + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="run_agent") + result = adapter.run("test query") + + assert result == "final answer" + assert len(adapter.logs) == 1 + assert adapter.logs[0]["step_type"] == "ActionStep" + assert adapter.logs[0]["step_number"] == 1 + + +# ============================================================================= +# _extract_current_logs() TaskStep Branch +# ============================================================================= + + +def test_smolagents_extract_current_logs_task_step(): + """Test _extract_current_logs() handles TaskStep with and without images.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from smolagents.memory import TaskStep, AgentMemory + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = AgentMemory(system_prompt="Test") + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + # TaskStep without images + task_step = TaskStep(task="Solve the puzzle") + mock_agent.memory.steps.append(task_step) + + # TaskStep with images + task_step_with_images = TaskStep(task="Analyze the image") + task_step_with_images.task_images = [Mock(), Mock()] + mock_agent.memory.steps.append(task_step_with_images) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="task_agent") + logs = adapter._extract_current_logs() + + assert len(logs) == 2 + + assert logs[0]["step_type"] == "TaskStep" + assert logs[0]["task"] == "Solve the puzzle" + assert "task_images_count" not in logs[0] + + assert logs[1]["step_type"] == "TaskStep" + assert logs[1]["task"] == "Analyze the image" + assert logs[1]["task_images_count"] == 2 + + +# ============================================================================= +# SmolAgentLLMUser.get_tool() Test +# ============================================================================= + + +def test_smolagents_llm_user_get_tool(): + """Test SmolAgentLLMUser.get_tool() returns a SmolAgentUserSimulationInputTool.""" + from maseval.interface.agents.smolagents import SmolAgentLLMUser + from maseval.interface.agents.smolagents_optional import SmolAgentUserSimulationInputTool + from unittest.mock import Mock + + mock_model = Mock() + user = SmolAgentLLMUser( + name="tool_user", + model=mock_model, + user_profile={"role": "tester"}, + scenario="test scenario", + initial_query="hello", + ) + + tool = user.get_tool() + + assert isinstance(tool, SmolAgentUserSimulationInputTool) + assert hasattr(tool, "forward") + + +# ============================================================================= +# Message Conversion with Tool Calls +# ============================================================================= + + +def test_smolagents_message_conversion_tool_call_attributes(): + """Test _convert_smolagents_messages preserves tool_calls from ChatMessage objects.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from smolagents.models import ChatMessage, ChatMessageToolCall, ChatMessageToolCallFunction, MessageRole + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="msg_agent") + + # Create ChatMessage with tool_calls attribute using proper types + tool_call = ChatMessageToolCall( + id="call_1", + type="function", + function=ChatMessageToolCallFunction(name="search", arguments='{"q": "test"}'), + ) + msg = ChatMessage(role=MessageRole.ASSISTANT, content="Using tool", tool_calls=[tool_call]) + + history = adapter._convert_smolagents_messages([msg]) + + assert len(history) == 1 + assert history[0]["role"] == "assistant" + assert history[0]["content"] == "Using tool" + assert "tool_calls" in history[0] + assert len(history[0]["tool_calls"]) == 1 + assert history[0]["tool_calls"][0].id == "call_1" + assert history[0]["tool_calls"][0].function.name == "search" + + +# ============================================================================= +# Phase 2 Hook: _on_step() Tests +# ============================================================================= + + +def test_smolagents_on_step_action_step(): + """Test _on_step() callback handles ActionStep with tool calls.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from smolagents.memory import ActionStep, ToolCall + from smolagents.monitoring import Timing + from unittest.mock import Mock + import time + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="hook_agent") + + # Create an ActionStep + t = time.time() + action_step = ActionStep(step_number=3, timing=Timing(start_time=t, end_time=t + 0.1), observations_images=[]) + action_step.error = None + action_step.tool_calls = [ToolCall(name="search", arguments={"q": "test"}, id="tc_1")] + + # Create a mock agent with a name attribute + mock_calling_agent = Mock() + mock_calling_agent.name = "sub_agent" + + # Call _on_step directly + adapter._on_step(action_step, agent=mock_calling_agent) + + assert len(adapter._trace_buffer) == 1 + entry = adapter._trace_buffer[0] + assert entry["source"] == "smolagents_step_callback" + assert entry["step_type"] == "ActionStep" + assert entry["agent_name"] == "sub_agent" + assert entry["step_number"] == 3 + assert entry["has_error"] is False + assert entry["tool_calls"] == ["search"] + + +def test_smolagents_on_step_planning_step(): + """Test _on_step() callback handles PlanningStep.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from smolagents.memory import PlanningStep + from smolagents.monitoring import Timing + from smolagents.models import ChatMessage, MessageRole + from unittest.mock import Mock + import time + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="hook_agent") + + t = time.time() + planning_step = PlanningStep( + timing=Timing(start_time=t, end_time=t + 0.1), + model_input_messages=[], + model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="plan"), + plan="Step 1\nStep 2\nStep 3", + ) + + adapter._on_step(planning_step, agent=None) + + assert len(adapter._trace_buffer) == 1 + entry = adapter._trace_buffer[0] + assert entry["source"] == "smolagents_step_callback" + assert entry["step_type"] == "PlanningStep" + assert entry["agent_name"] is None + assert entry["plan_length"] == len("Step 1\nStep 2\nStep 3") # 20 + + +def test_smolagents_message_conversion_dict_format_with_tool_fields(): + """Test _convert_smolagents_messages handles dict-format messages with tool fields.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="dict_msg_agent") + + # Dict-format messages with tool_calls, tool_call_id, name, metadata + dict_messages = [ + { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "call_1", "function": {"name": "calc", "arguments": "{}"}}], + "metadata": {"source": "test"}, + }, + { + "role": "tool", + "content": "42", + "tool_call_id": "call_1", + "name": "calc", + }, + { + "role": "user", + "content": "Thanks", + }, + ] + + history = adapter._convert_smolagents_messages(dict_messages) + + assert len(history) == 3 + + # Assistant with tool_calls and metadata + assert history[0]["role"] == "assistant" + assert history[0]["tool_calls"][0]["id"] == "call_1" + assert history[0]["metadata"]["source"] == "test" + + # Tool message with tool_call_id and name + assert history[1]["role"] == "tool" + assert history[1]["content"] == "42" + assert history[1]["tool_call_id"] == "call_1" + assert history[1]["name"] == "calc" + + # Regular user message + assert history[2]["role"] == "user" + assert history[2]["content"] == "Thanks" + + +def test_smolagents_message_conversion_non_string_role(): + """Test _convert_smolagents_messages handles roles that are neither enums nor strings.""" + from maseval.interface.agents.smolagents import SmolAgentAdapter + from unittest.mock import Mock + + mock_agent = Mock() + mock_agent.memory = Mock() + mock_agent.memory.steps = [] + mock_agent.write_memory_to_messages = Mock(return_value=[]) + + adapter = SmolAgentAdapter(agent_instance=mock_agent, name="role_agent") + + # Create a ChatMessage-like object with a role that's not an enum and not a string + msg = Mock() + msg.role = 42 # integer role (edge case) + msg.content = "test" + msg.tool_calls = None + msg.tool_call_id = None + + history = adapter._convert_smolagents_messages([msg]) + + assert len(history) == 1 + assert history[0]["role"] == "42" # converted via str().lower() + assert history[0]["content"] == "test"