Agent that supports asking queries about a tabular dataset, internally
represented as a Pandas dataframe. The TableChatAgent is configured with a
dataset, which can be a Pandas df, file or URL. The delimiter/separator
is auto-detected. In response to a user query, the Agent's LLM generates a Pandas
expression (involving a dataframe df) to answer the query.
The expression is passed via the pandas_eval tool/function-call,
which is handled by the Agent's pandas_eval method. This method evaluates
the expression and returns the result as a string.
WARNING: This Agent should be used only with trusted input, as it can execute system
commands. 
The full_eval flag is false by default, which means that the input is sanitized
against most common code injection attack vectors. full_eval may be set to True to 
disable sanitization at all. Both cases should be used with caution.
  
    
            
              Bases: ToolMessage
        Tool/function to evaluate a pandas expression involving a dataframe df
  
  
     
 
              TableChatAgent(config)
    
            
              Bases: ChatAgent
        Agent for chatting with a collection of documents.
                  
                    Source code in langroid/agent/special/table_chat_agent.py
                    |  | def __init__(self, config: TableChatAgentConfig):
    if isinstance(config.data, pd.DataFrame):
        df = config.data
    else:
        df = read_tabular_data(config.data, config.separator)
    df.columns = df.columns.str.strip().str.replace(" +", "_", regex=True)
    self.df = df
    summary = dataframe_summary(df)
    config.system_message = config.system_message.format(summary=summary)
    super().__init__(config)
    self.config: TableChatAgentConfig = config
    logger.info(
        f"""TableChatAgent initialized with dataframe of shape {self.df.shape}
        and columns: 
        {self.df.columns}
        """
    )
    # enable the agent to use and handle the PandasEvalTool
    self.enable_message(PandasEvalTool)
 | 
 
  
            pandas_eval(msg)
    
        Handle a PandasEvalTool message by evaluating the expression field
    and returning the result.
Args:
    msg (PandasEvalTool): The tool-message to handle.
    Returns:
    
      
        
| Name | Type | Description | 
      
      
          
| str | str | 
                The result of running the code along with any print output. | 
      
    
            
              Source code in langroid/agent/special/table_chat_agent.py
              |  | def pandas_eval(self, msg: PandasEvalTool) -> str:
    """
    Handle a PandasEvalTool message by evaluating the `expression` field
        and returning the result.
    Args:
        msg (PandasEvalTool): The tool-message to handle.
    Returns:
        str: The result of running the code along with any print output.
    """
    self.sent_expression = True
    exprn = msg.expression
    vars = {"df": self.df}
    # Create a string-based I/O stream
    code_out = io.StringIO()
    # Temporarily redirect standard output to our string-based I/O stream
    sys.stdout = code_out
    # Evaluate the last line and get the result;
    # SECURITY MITIGATION: Eval input is sanitized by default to prevent most
    # common code injection attack vectors.
    try:
        if not self.config.full_eval:
            exprn = sanitize_command(exprn)
        code = compile(exprn, "<calc>", "eval")
        eval_result = eval(code, vars, {})
    except Exception as e:
        eval_result = f"ERROR: {type(e)}: {e}"
    if eval_result is None:
        eval_result = ""
    # Always restore the original standard output
    sys.stdout = sys.__stdout__
    # If df has been modified in-place, save the changes back to self.df
    self.df = vars["df"]
    # Get the resulting string from the I/O stream
    print_result = code_out.getvalue() or ""
    sep = "\n" if print_result else ""
    # Combine the print and eval results
    result = f"{print_result}{sep}{eval_result}"
    if result == "":
        result = "No result"
    # Return the result
    return result
 | 
 
     
 
            handle_message_fallback(msg)
    
        Handle various LLM deviations
            
              Source code in langroid/agent/special/table_chat_agent.py
              |  | def handle_message_fallback(
    self, msg: str | ChatDocument
) -> str | ChatDocument | None:
    """Handle various LLM deviations"""
    if isinstance(msg, ChatDocument) and msg.metadata.sender == lr.Entity.LLM:
        if msg.content.strip() == DONE and self.sent_expression:
            # LLM sent an expression (i.e. used the `pandas_eval` tool)
            # but upon receiving the results, simply said DONE without
            # narrating the result as instructed.
            return """
                You forgot to PRESENT the answer to the user's query
                based on the results from `pandas_eval` tool.
            """
        if self.sent_expression:
            # LLM forgot to say DONE
            self.sent_expression = False
            return DONE + " " + PASS
        else:
            # LLM forgot to use the `pandas_eval` tool
            return """
                You forgot to use the `pandas_eval` tool/function 
                to find the answer.
                Try again using the `pandas_eval` tool/function.
                """
    return None
 | 
 
     
 
   
     
 
            dataframe_summary(df)
    
        Generate a structured summary for a pandas DataFrame containing numerical
and categorical values.
Parameters:
    
      
        
          | Name | Type | Description | Default | 
      
      
          
            | df | DataFrame | 
                The input DataFrame to summarize. | required | 
      
    
    Returns:
    
      
        
| Name | Type | Description | 
      
      
          
| str | str | 
                A nicely structured and formatted summary string. | 
      
    
            
              Source code in langroid/agent/special/table_chat_agent.py
              |  | @no_type_check
def dataframe_summary(df: pd.DataFrame) -> str:
    """
    Generate a structured summary for a pandas DataFrame containing numerical
    and categorical values.
    Args:
        df (pd.DataFrame): The input DataFrame to summarize.
    Returns:
        str: A nicely structured and formatted summary string.
    """
    # Column names display
    col_names_str = (
        "COLUMN NAMES:\n" + " ".join([f"'{col}'" for col in df.columns]) + "\n\n"
    )
    # Numerical data summary
    num_summary = df.describe().map(lambda x: "{:.2f}".format(x))
    num_str = "Numerical Column Summary:\n" + num_summary.to_string() + "\n\n"
    # Categorical data summary
    cat_columns = df.select_dtypes(include=[np.object_]).columns
    cat_summary_list = []
    for col in cat_columns:
        unique_values = df[col].unique()
        if len(unique_values) < 10:
            cat_summary_list.append(f"'{col}': {', '.join(map(str, unique_values))}")
        else:
            cat_summary_list.append(f"'{col}': {df[col].nunique()} unique values")
    cat_str = "Categorical Column Summary:\n" + "\n".join(cat_summary_list) + "\n\n"
    # Missing values summary
    nan_summary = df.isnull().sum().rename("missing_values").to_frame()
    nan_str = "Missing Values Column Summary:\n" + nan_summary.to_string() + "\n"
    # Combine the summaries into one structured string
    summary_str = col_names_str + num_str + cat_str + nan_str
    return summary_str
 |