Agent that supports asking queries about a tabular dataset, internally
represented as a Pandas dataframe. The TableChatAgent
is configured with a
dataset, which can be a Pandas df, file or URL. The delimiter/separator
is auto-detected. In response to a user query, the Agent's LLM generates a Pandas
expression (involving a dataframe df
) to answer the query.
The expression is passed via the pandas_eval
tool/function-call,
which is handled by the Agent's pandas_eval
method. This method evaluates
the expression and returns the result as a string.
Bases: ToolMessage
Tool/function to evaluate a pandas expression involving a dataframe df
TableChatAgent(config)
Bases: ChatAgent
Agent for chatting with a collection of documents.
Source code in langroid/agent/special/table_chat_agent.py
| def __init__(self, config: TableChatAgentConfig):
if isinstance(config.data, pd.DataFrame):
df = config.data
else:
df = read_tabular_data(config.data, config.separator)
df.columns = df.columns.str.strip().str.replace(" +", "_", regex=True)
self.df = df
summary = dataframe_summary(df)
config.system_message = config.system_message.format(summary=summary)
super().__init__(config)
self.config: TableChatAgentConfig = config
logger.info(
f"""TableChatAgent initialized with dataframe of shape {self.df.shape}
and columns:
{self.df.columns}
"""
)
# enable the agent to use and handle the PandasEvalTool
self.enable_message(PandasEvalTool)
|
pandas_eval(msg)
Handle a PandasEvalTool message by evaluating the expression
field
and returning the result.
Args:
msg (PandasEvalTool): The tool-message to handle.
Returns:
Name | Type |
Description |
str |
str
|
The result of running the code along with any print output.
|
Source code in langroid/agent/special/table_chat_agent.py
| def pandas_eval(self, msg: PandasEvalTool) -> str:
"""
Handle a PandasEvalTool message by evaluating the `expression` field
and returning the result.
Args:
msg (PandasEvalTool): The tool-message to handle.
Returns:
str: The result of running the code along with any print output.
"""
self.sent_expression = True
exprn = msg.expression
local_vars = {"df": self.df}
# Create a string-based I/O stream
code_out = io.StringIO()
# Temporarily redirect standard output to our string-based I/O stream
sys.stdout = code_out
# Evaluate the last line and get the result
try:
eval_result = pd.eval(exprn, local_dict=local_vars)
except Exception as e:
eval_result = f"ERROR: {type(e)}: {e}"
if eval_result is None:
eval_result = ""
# Always restore the original standard output
sys.stdout = sys.__stdout__
# If df has been modified in-place, save the changes back to self.df
self.df = local_vars["df"]
# Get the resulting string from the I/O stream
print_result = code_out.getvalue() or ""
sep = "\n" if print_result else ""
# Combine the print and eval results
result = f"{print_result}{sep}{eval_result}"
if result == "":
result = "No result"
# Return the result
return result
|
handle_message_fallback(msg)
Handle various LLM deviations
Source code in langroid/agent/special/table_chat_agent.py
| def handle_message_fallback(
self, msg: str | ChatDocument
) -> str | ChatDocument | None:
"""Handle various LLM deviations"""
if isinstance(msg, ChatDocument) and msg.metadata.sender == lr.Entity.LLM:
if msg.content.strip() == DONE and self.sent_expression:
# LLM sent an expression (i.e. used the `pandas_eval` tool)
# but upon receiving the results, simply said DONE without
# narrating the result as instructed.
return """
You forgot to PRESENT the answer to the user's query
based on the results from `pandas_eval` tool.
"""
if self.sent_expression:
# LLM forgot to say DONE
self.sent_expression = False
return DONE + " " + PASS
else:
# LLM forgot to use the `pandas_eval` tool
return """
You forgot to use the `pandas_eval` tool/function
to find the answer.
Try again using the `pandas_eval` tool/function.
"""
return None
|
dataframe_summary(df)
Generate a structured summary for a pandas DataFrame containing numerical
and categorical values.
Parameters:
Name |
Type |
Description |
Default |
df |
DataFrame
|
The input DataFrame to summarize.
|
required
|
Returns:
Name | Type |
Description |
str |
str
|
A nicely structured and formatted summary string.
|
Source code in langroid/agent/special/table_chat_agent.py
| @no_type_check
def dataframe_summary(df: pd.DataFrame) -> str:
"""
Generate a structured summary for a pandas DataFrame containing numerical
and categorical values.
Args:
df (pd.DataFrame): The input DataFrame to summarize.
Returns:
str: A nicely structured and formatted summary string.
"""
# Column names display
col_names_str = (
"COLUMN NAMES:\n" + " ".join([f"'{col}'" for col in df.columns]) + "\n\n"
)
# Numerical data summary
num_summary = df.describe().map(lambda x: "{:.2f}".format(x))
num_str = "Numerical Column Summary:\n" + num_summary.to_string() + "\n\n"
# Categorical data summary
cat_columns = df.select_dtypes(include=[np.object_]).columns
cat_summary_list = []
for col in cat_columns:
unique_values = df[col].unique()
if len(unique_values) < 10:
cat_summary_list.append(f"'{col}': {', '.join(map(str, unique_values))}")
else:
cat_summary_list.append(f"'{col}': {df[col].nunique()} unique values")
cat_str = "Categorical Column Summary:\n" + "\n".join(cat_summary_list) + "\n\n"
# Missing values summary
nan_summary = df.isnull().sum().rename("missing_values").to_frame()
nan_str = "Missing Values Column Summary:\n" + nan_summary.to_string() + "\n"
# Combine the summaries into one structured string
summary_str = col_names_str + num_str + cat_str + nan_str
return summary_str
|