table_chat_agent

langroid/agent/special/table_chat_agent.py

Agent that supports asking queries about a tabular dataset, internally represented as a Pandas dataframe. The TableChatAgent is configured with a dataset, which can be a Pandas df, file or URL. The delimiter/separator is auto-detected. In response to a user query, the Agent's LLM generates a Pandas expression (involving a dataframe df) to answer the query. The expression is passed via the pandas_eval tool/function-call, which is handled by the Agent's pandas_eval method. This method evaluates the expression and returns the result as a string.

WARNING: This Agent should be used only with trusted input, as it can execute system commands.

The full_eval flag is false by default, which means that the input is sanitized against most common code injection attack vectors. full_eval may be set to True to disable sanitization at all. Both cases should be used with caution.

`PandasEvalTool` ¶

Bases: ToolMessage

Tool/function to evaluate a pandas expression involving a dataframe df

`TableChatAgent(config)` ¶

Bases: ChatAgent

Agent for chatting with a collection of documents.

Source code in langroid/agent/special/table_chat_agent.py

def __init__(self, config: TableChatAgentConfig):
    if isinstance(config.data, pd.DataFrame):
        df = config.data
    else:
        df = read_tabular_data(config.data, config.separator)

    df.columns = df.columns.str.strip().str.replace(" +", "_", regex=True)

    self.df = df
    summary = dataframe_summary(df)
    config.system_message = config.system_message.format(summary=summary)

    super().__init__(config)
    self.config: TableChatAgentConfig = config

    logger.info(
        f"""TableChatAgent initialized with dataframe of shape {self.df.shape}
        and columns: 
        {self.df.columns}
        """
    )
    # enable the agent to use and handle the PandasEvalTool
    self.enable_message(PandasEvalTool)

`pandas_eval(msg)` ¶

Handle a PandasEvalTool message by evaluating the expression field and returning the result. Args: msg (PandasEvalTool): The tool-message to handle.

Returns:

Name	Type	Description
`str`	`str`	The result of running the code along with any print output.

Source code in langroid/agent/special/table_chat_agent.py

def pandas_eval(self, msg: PandasEvalTool) -> str:
    """
    Handle a PandasEvalTool message by evaluating the `expression` field
        and returning the result.
    Args:
        msg (PandasEvalTool): The tool-message to handle.

    Returns:
        str: The result of running the code along with any print output.
    """
    self.sent_expression = True
    exprn = msg.expression
    vars = {"df": self.df}
    # Create a string-based I/O stream
    code_out = io.StringIO()

    # Temporarily redirect standard output to our string-based I/O stream
    sys.stdout = code_out

    # Evaluate the last line and get the result;
    # SECURITY MITIGATION: Eval input is sanitized by default to prevent most
    # common code injection attack vectors.
    try:
        if not self.config.full_eval:
            exprn = sanitize_command(exprn)
        code = compile(exprn, "<calc>", "eval")
        eval_result = eval(code, vars, {})
    except Exception as e:
        eval_result = f"ERROR: {type(e)}: {e}"

    if eval_result is None:
        eval_result = ""

    # Always restore the original standard output
    sys.stdout = sys.__stdout__

    # If df has been modified in-place, save the changes back to self.df
    self.df = vars["df"]

    # Get the resulting string from the I/O stream
    print_result = code_out.getvalue() or ""
    sep = "\n" if print_result else ""
    # Combine the print and eval results
    result = f"{print_result}{sep}{eval_result}"
    if result == "":
        result = "No result"
    # Return the result
    return result

`handle_message_fallback(msg)` ¶

Handle various LLM deviations

Source code in langroid/agent/special/table_chat_agent.py

def handle_message_fallback(
    self, msg: str | ChatDocument
) -> str | ChatDocument | None:
    """Handle various LLM deviations"""
    if isinstance(msg, ChatDocument) and msg.metadata.sender == lr.Entity.LLM:
        if msg.content.strip() == DONE and self.sent_expression:
            # LLM sent an expression (i.e. used the `pandas_eval` tool)
            # but upon receiving the results, simply said DONE without
            # narrating the result as instructed.
            return """
                You forgot to PRESENT the answer to the user's query
                based on the results from `pandas_eval` tool.
            """
        if self.sent_expression:
            # LLM forgot to say DONE
            self.sent_expression = False
            return DONE + " " + PASS
        else:
            # LLM forgot to use the `pandas_eval` tool
            return """
                You forgot to use the `pandas_eval` tool/function 
                to find the answer.
                Try again using the `pandas_eval` tool/function.
                """
    return None

`dataframe_summary(df)` ¶

Generate a structured summary for a pandas DataFrame containing numerical and categorical values.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	The input DataFrame to summarize.	required

Returns:

Name	Type	Description
`str`	`str`	A nicely structured and formatted summary string.

Source code in langroid/agent/special/table_chat_agent.py

@no_type_check
def dataframe_summary(df: pd.DataFrame) -> str:
    """
    Generate a structured summary for a pandas DataFrame containing numerical
    and categorical values.

    Args:
        df (pd.DataFrame): The input DataFrame to summarize.

    Returns:
        str: A nicely structured and formatted summary string.
    """

    # Column names display
    col_names_str = (
        "COLUMN NAMES:\n" + " ".join([f"'{col}'" for col in df.columns]) + "\n\n"
    )

    # Numerical data summary
    num_summary = df.describe().map(lambda x: "{:.2f}".format(x))
    num_str = "Numerical Column Summary:\n" + num_summary.to_string() + "\n\n"

    # Categorical data summary
    cat_columns = df.select_dtypes(include=[np.object_]).columns
    cat_summary_list = []

    for col in cat_columns:
        unique_values = df[col].unique()
        if len(unique_values) < 10:
            cat_summary_list.append(f"'{col}': {', '.join(map(str, unique_values))}")
        else:
            cat_summary_list.append(f"'{col}': {df[col].nunique()} unique values")

    cat_str = "Categorical Column Summary:\n" + "\n".join(cat_summary_list) + "\n\n"

    # Missing values summary
    nan_summary = df.isnull().sum().rename("missing_values").to_frame()
    nan_str = "Missing Values Column Summary:\n" + nan_summary.to_string() + "\n"

    # Combine the summaries into one structured string
    summary_str = col_names_str + num_str + cat_str + nan_str

    return summary_str

table_chat_agent

PandasEvalTool ¶

TableChatAgent(config) ¶

pandas_eval(msg) ¶

handle_message_fallback(msg) ¶

dataframe_summary(df) ¶

`PandasEvalTool` ¶

`TableChatAgent(config)` ¶

`pandas_eval(msg)` ¶

`handle_message_fallback(msg)` ¶

`dataframe_summary(df)` ¶