Structured Data QA#

Overview#


Example of running natural language queries on structured data
This example uses byte-genie API endpoints to process/prepare the data,
and a langchain agent to execute queries.

import libraries#

import os
import openai
import pandas as pd
from langchain.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_types import AgentType
from langchain.llms import OpenAI
import utils.common
from utils.logging import logger
from utils.byte_genie import ByteGenie

init byte-genie#

bg = ByteGenie(
    secrets_file='secrets.json',
    task_mode='sync',
    calc_mode='async',
    verbose=1,
)

read OpenAI api key#

secrets = utils.common.read_secrets(secrets_file='secrets.json')
openai.api_key = secrets['OPENAI_KEY']
os.environ['OPENAI_API_KEY'] = openai.api_key

sample titanic data#

data = [{'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '22.0', 'sibsp': '1', 'parch': '0', 'fare': '7.25', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '38.0', 'sibsp': '1', 'parch': '0', 'fare': '71.2833', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '26.0', 'sibsp': '0', 'parch': '0', 'fare': '7.925', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '35.0', 'sibsp': '1', 'parch': '0', 'fare': '53.1', 'embarked': 'S', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '35.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '8.4583', 'embarked': 'Q', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '54.0', 'sibsp': '0', 'parch': '0', 'fare': '51.8625', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'E', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '2.0', 'sibsp': '3', 'parch': '1', 'fare': '21.075', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '27.0', 'sibsp': '0', 'parch': '2', 'fare': '11.1333', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '14.0', 'sibsp': '1', 'parch': '0', 'fare': '30.0708', 'embarked': 'C', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '4.0', 'sibsp': '1', 'parch': '1', 'fare': '16.7', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'G', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '58.0', 'sibsp': '0', 'parch': '0', 'fare': '26.55', 'embarked': 'S', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '20.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '39.0', 'sibsp': '1', 'parch': '5', 'fare': '31.275', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '14.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8542', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '55.0', 'sibsp': '0', 'parch': '0', 'fare': '16.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '2.0', 'sibsp': '4', 'parch': '1', 'fare': '29.125', 'embarked': 'Q', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '13.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '31.0', 'sibsp': '1', 'parch': '0', 'fare': '18.0', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.225', 'embarked': 'C', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '35.0', 'sibsp': '0', 'parch': '0', 'fare': '26.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'male', 'age': '34.0', 'sibsp': '0', 'parch': '0', 'fare': '13.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'D', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '15.0', 'sibsp': '0', 'parch': '0', 'fare': '8.0292', 'embarked': 'Q', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'male', 'age': '28.0', 'sibsp': '0', 'parch': '0', 'fare': '35.5', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'A', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '8.0', 'sibsp': '3', 'parch': '1', 'fare': '21.075', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '38.0', 'sibsp': '1', 'parch': '5', 'fare': '31.3875', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.225', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '19.0', 'sibsp': '3', 'parch': '2', 'fare': '263.0', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8792', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8958', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '40.0', 'sibsp': '0', 'parch': '0', 'fare': '27.7208', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': 'nan', 'sibsp': '1', 'parch': '0', 'fare': '146.5208', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'B', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.75', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '66.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '28.0', 'sibsp': '1', 'parch': '0', 'fare': '82.1708', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '42.0', 'sibsp': '1', 'parch': '0', 'fare': '52.0', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '18.0', 'sibsp': '2', 'parch': '0', 'fare': '18.0', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '14.0', 'sibsp': '1', 'parch': '0', 'fare': '11.2417', 'embarked': 'C', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '40.0', 'sibsp': '1', 'parch': '0', 'fare': '9.475', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'female', 'age': '27.0', 'sibsp': '1', 'parch': '0', 'fare': '21.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8958', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '3.0', 'sibsp': '1', 'parch': '2', 'fare': '41.5792', 'embarked': 'C', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '19.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8792', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '1', 'parch': '0', 'fare': '15.5', 'embarked': 'Q', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.75', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '2', 'parch': '0', 'fare': '21.6792', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '18.0', 'sibsp': '1', 'parch': '0', 'fare': '17.8', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '7.0', 'sibsp': '4', 'parch': '1', 'fare': '39.6875', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '49.0', 'sibsp': '1', 'parch': '0', 'fare': '76.7292', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'D', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '29.0', 'sibsp': '1', 'parch': '0', 'fare': '26.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '65.0', 'sibsp': '0', 'parch': '1', 'fare': '61.9792', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'B', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '35.5', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '28.5', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '5.0', 'sibsp': '1', 'parch': '2', 'fare': '27.75', 'embarked': 'S', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '11.0', 'sibsp': '5', 'parch': '2', 'fare': '46.9', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '22.0', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '38.0', 'sibsp': '0', 'parch': '0', 'fare': '80.0', 'embarked': 'nan', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'B', 'embark_town': 'nan', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '45.0', 'sibsp': '1', 'parch': '0', 'fare': '83.475', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '4.0', 'sibsp': '3', 'parch': '2', 'fare': '27.9', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '27.7208', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '1', 'parch': '1', 'fare': '15.2458', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '29.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'F', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '19.0', 'sibsp': '0', 'parch': '0', 'fare': '8.1583', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '17.0', 'sibsp': '4', 'parch': '2', 'fare': '7.925', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '26.0', 'sibsp': '2', 'parch': '0', 'fare': '8.6625', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '32.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '16.0', 'sibsp': '5', 'parch': '2', 'fare': '46.9', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '73.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '26.0', 'sibsp': '1', 'parch': '0', 'fare': '14.4542', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': '32.0', 'sibsp': '0', 'parch': '0', 'fare': '56.4958', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}]
data = pd.DataFrame(data)

crate langchain agent#

agent = create_pandas_dataframe_agent(
    ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    data,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)

test some queries with langchain agent#

first query#

output = agent.run("which 'class' had the most survivors?")
"""
Output:
'The "Third" class had the most survivors.'
"""

second query#

output = agent.run("how many women were there in the class with most survivors?")
"""
Output:
'There were 19 women in the class with the most survivors.'
"""

third query#

output = agent.run("what is the difference in number of male and female survivors by class?")
"""
output:
'The difference in the number of male and female survivors by class is as follows:\n\n- First class: 111111 female survivors and 0100000100 male survivors\n- Second class: 11011111 female survivors and 101000 male survivors\n- Third class: 1110011011101011010 female survivors and 00000000010000000000010001 male survivors'
"""

fourth query#

output = agent.run("what is the difference in survival rate between men and women by class?")
"""
Output:
"To calculate the difference in survival rate between men and women by class, 
we can group the data by the 'sex' and 'class' columns and then calculate the mean of the 'survived' column for each group. 
The difference in survival rate can be obtained by subtracting the survival rate of men from the survival rate of women for each class.\n\n
Here's the code to calculate the difference in survival rate:\n\n
```python\nimport pandas as pd\n\n# Group the data by 'sex' and 'class' columns and calculate the mean of 'survived' column\ngrouped = df.groupby(['sex', 'class'])['survived'].mean()\n\n# Calculate the difference in survival rate between men and women for each class\ndifference = grouped.loc['female'] - grouped.loc['male']\n\ndifference\n```\n\nThis will give you the difference in survival rate between men and women for each class."
"""

Observations on langchain agent results#


As we can see, the langchain agent worked well for the first two, and relatively simpler, queries,
but failed to provide a similarly good answer for the last two, and relative more complex, queries.
Now, we will test if we can improve the langchain agent's ability to answer this more complex question
by first processing data using byte-genie API.
We will do that in following steps:
* generate meta-data for input data
* use generated meta-data to filter out the columns needed to answer the query
* filter data over the relevant columns
* aggregate relevant data to make it easier to answer the query
* run the query on langchain agent

Leverage Byte-Genie to improve LLM-agent performance#

set query#

query = "what is the difference in number of male and female survivors by class?"

generate meta-data for input data#

resp = bg.generate_metadata(
    data=[{'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '22.0', 'sibsp': '1', 'parch': '0', 'fare': '7.25', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '38.0', 'sibsp': '1', 'parch': '0', 'fare': '71.2833', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '26.0', 'sibsp': '0', 'parch': '0', 'fare': '7.925', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '35.0', 'sibsp': '1', 'parch': '0', 'fare': '53.1', 'embarked': 'S', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '35.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '8.4583', 'embarked': 'Q', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '54.0', 'sibsp': '0', 'parch': '0', 'fare': '51.8625', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'E', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '2.0', 'sibsp': '3', 'parch': '1', 'fare': '21.075', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '27.0', 'sibsp': '0', 'parch': '2', 'fare': '11.1333', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '14.0', 'sibsp': '1', 'parch': '0', 'fare': '30.0708', 'embarked': 'C', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '4.0', 'sibsp': '1', 'parch': '1', 'fare': '16.7', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'G', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '58.0', 'sibsp': '0', 'parch': '0', 'fare': '26.55', 'embarked': 'S', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '20.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '39.0', 'sibsp': '1', 'parch': '5', 'fare': '31.275', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '14.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8542', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '55.0', 'sibsp': '0', 'parch': '0', 'fare': '16.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '2.0', 'sibsp': '4', 'parch': '1', 'fare': '29.125', 'embarked': 'Q', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '13.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '31.0', 'sibsp': '1', 'parch': '0', 'fare': '18.0', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.225', 'embarked': 'C', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '35.0', 'sibsp': '0', 'parch': '0', 'fare': '26.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'male', 'age': '34.0', 'sibsp': '0', 'parch': '0', 'fare': '13.0', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'D', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '15.0', 'sibsp': '0', 'parch': '0', 'fare': '8.0292', 'embarked': 'Q', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'male', 'age': '28.0', 'sibsp': '0', 'parch': '0', 'fare': '35.5', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'A', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '8.0', 'sibsp': '3', 'parch': '1', 'fare': '21.075', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '38.0', 'sibsp': '1', 'parch': '5', 'fare': '31.3875', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.225', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '19.0', 'sibsp': '3', 'parch': '2', 'fare': '263.0', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8792', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8958', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '40.0', 'sibsp': '0', 'parch': '0', 'fare': '27.7208', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': 'nan', 'sibsp': '1', 'parch': '0', 'fare': '146.5208', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'B', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.75', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '66.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '28.0', 'sibsp': '1', 'parch': '0', 'fare': '82.1708', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '42.0', 'sibsp': '1', 'parch': '0', 'fare': '52.0', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '18.0', 'sibsp': '2', 'parch': '0', 'fare': '18.0', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '14.0', 'sibsp': '1', 'parch': '0', 'fare': '11.2417', 'embarked': 'C', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '40.0', 'sibsp': '1', 'parch': '0', 'fare': '9.475', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'female', 'age': '27.0', 'sibsp': '1', 'parch': '0', 'fare': '21.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.8958', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '3.0', 'sibsp': '1', 'parch': '2', 'fare': '41.5792', 'embarked': 'C', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '19.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8792', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '8.05', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '1', 'parch': '0', 'fare': '15.5', 'embarked': 'Q', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '7.75', 'embarked': 'Q', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Queenstown', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '2', 'parch': '0', 'fare': '21.6792', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '18.0', 'sibsp': '1', 'parch': '0', 'fare': '17.8', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '7.0', 'sibsp': '4', 'parch': '1', 'fare': '39.6875', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '7.8', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '49.0', 'sibsp': '1', 'parch': '0', 'fare': '76.7292', 'embarked': 'C', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'D', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '29.0', 'sibsp': '1', 'parch': '0', 'fare': '26.0', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '65.0', 'sibsp': '0', 'parch': '1', 'fare': '61.9792', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'B', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '1', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '35.5', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '28.5', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '5.0', 'sibsp': '1', 'parch': '2', 'fare': '27.75', 'embarked': 'S', 'class': 'Second', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '11.0', 'sibsp': '5', 'parch': '2', 'fare': '46.9', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '22.0', 'sibsp': '0', 'parch': '0', 'fare': '7.2292', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '1', 'sex': 'female', 'age': '38.0', 'sibsp': '0', 'parch': '0', 'fare': '80.0', 'embarked': 'nan', 'class': 'First', 'who': 'woman', 'adult_male': 'False', 'deck': 'B', 'embark_town': 'nan', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': '45.0', 'sibsp': '1', 'parch': '0', 'fare': '83.475', 'embarked': 'S', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'C', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '4.0', 'sibsp': '3', 'parch': '2', 'fare': '27.9', 'embarked': 'S', 'class': 'Third', 'who': 'child', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '1', 'sex': 'male', 'age': 'nan', 'sibsp': '0', 'parch': '0', 'fare': '27.7208', 'embarked': 'C', 'class': 'First', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': 'nan', 'sibsp': '1', 'parch': '1', 'fare': '15.2458', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'yes', 'alone': 'False'}, {'survived': '1', 'pclass': '2', 'sex': 'female', 'age': '29.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'woman', 'adult_male': 'False', 'deck': 'F', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '19.0', 'sibsp': '0', 'parch': '0', 'fare': '8.1583', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '1', 'pclass': '3', 'sex': 'female', 'age': '17.0', 'sibsp': '4', 'parch': '2', 'fare': '7.925', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'False'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '26.0', 'sibsp': '2', 'parch': '0', 'fare': '8.6625', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '32.0', 'sibsp': '0', 'parch': '0', 'fare': '10.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'female', 'age': '16.0', 'sibsp': '5', 'parch': '2', 'fare': '46.9', 'embarked': 'S', 'class': 'Third', 'who': 'woman', 'adult_male': 'False', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'False'}, {'survived': '0', 'pclass': '2', 'sex': 'male', 'age': '21.0', 'sibsp': '0', 'parch': '0', 'fare': '73.5', 'embarked': 'S', 'class': 'Second', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'no', 'alone': 'True'}, {'survived': '0', 'pclass': '3', 'sex': 'male', 'age': '26.0', 'sibsp': '1', 'parch': '0', 'fare': '14.4542', 'embarked': 'C', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Cherbourg', 'alive': 'no', 'alone': 'False'}, {'survived': '1', 'pclass': '3', 'sex': 'male', 'age': '32.0', 'sibsp': '0', 'parch': '0', 'fare': '56.4958', 'embarked': 'S', 'class': 'Third', 'who': 'man', 'adult_male': 'True', 'deck': 'nan', 'embark_town': 'Southampton', 'alive': 'yes', 'alone': 'True'}],
    data_context='this a dataset about titanic passengers and its survivors',
)
metadata = resp.get_data()
"""
meta_data
[{'column_description': "The 'survived' column indicates whether a data entry represents a survival outcome, with unique values '0' for not survived and '1' for survived.", 'column_name': 'survived'}, {'column_description': "The 'pclass' column represents the passenger class, with possible values of 1, 2, or 3.", 'column_name': 'pclass'}, {'column_description': "The 'sex' column in the data has unique values of 'male' and 'female', providing information about the gender of individuals.", 'column_name': 'sex'}, {'column_description': "Column 'age' contains numerical values representing the ages of individuals, with some missing values ('nan') included.", 'column_name': 'age'}, {'column_description': "The 'sibsp' column contains the number of siblings/spouses aboard the Titanic, with unique values ranging from 0 to 5.", 'column_name': 'sibsp'}, {'column_description': "The 'parch' column represents the number of parents/children aboard the Titanic. It contains values 0, 1, 2, or 5.", 'column_name': 'parch'}, {'column_description': "The 'fare' column in the data table contains unique values representing the fares paid for a particular service or product.", 'column_name': 'fare'}, {'column_description': "This column represents the port of embarkation for passengers, with possible values being 'S' (Southampton), 'C' (Cherbourg), 'Q' (Queenstown), or 'nan' (missing data).", 'column_name': 'embarked'}, {'column_description': "This column indicates the class of the data, with unique values: 'Third', 'First', and 'Second'.", 'column_name': 'class'}, {'column_description': "A column containing values denoting the person's demographic, with unique values for 'man', 'woman', and 'child'.", 'column_name': 'who'}, {'column_description': "This column indicates whether individuals are adult males, with unique values of 'True' and 'False'.", 'column_name': 'adult_male'}, {'column_description': "The 'deck' column in the data table contains unique values representing deck classifications (nan, A, B, C, D, E, F, G).", 'column_name': 'deck'}, {'column_description': "Column 'embark_town' indicates the town where passengers boarded the ship, with unique values including 'Southampton', 'Cherbourg', 'Queenstown', and 'nan'.", 'column_name': 'embark_town'}, {'column_description': "Column 'alive' indicates whether the data entry refers to an entity that is currently alive or not. Possible values are 'yes' for alive and 'no' for not alive.", 'column_name': 'alive'}, {'column_description': "The 'alone' column indicates if an individual was alone ('True') or not ('False').", 'column_name': 'alone'}]
"""

filter columns relevant to the input query#

resp = bg.filter_columns(
    metadata=metadata,
    query=query,
)
relevant_cols = resp.get_data()
"""
relevant_cols
['survived', 'sex', 'class']
"""

aggreate data to make it easier to answer query#

resp = bg.aggregate_data(
    data=data[relevant_cols].to_dict('records'),
    query=query,
)
agg_data = resp.get_data()
"""
agg_data
[{'class': 'First', 'difference': '-76', 'female_survivors': '91', 'male_survivors': '15'}, {'class': 'Second', 'difference': '-58', 'female_survivors': '70', 'male_survivors': '12'}, {'class': 'Third', 'difference': '-25', 'female_survivors': '72', 'male_survivors': '47'}]
"""

create an LLM agent on agg_data#

agent = create_pandas_dataframe_agent(
    ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    pd.DataFrame(agg_data),
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)

run query on agg_data#

output = agent.run(query)
"""
Output:
'The difference in the number of male and female survivors by class is as follows:\n\n- For the "First" class, there were 91 female survivors and 15 male survivors, resulting in a difference of -76.\n- For the "Second" class, there were 70 female survivors and 12 male survivors, resulting in a difference of -58.\n- For the "Third" class, there were 72 female survivors and 47 male survivors, resulting in a difference of -25.'
"""
"""
Observation:
We can see that after a few processing steps that filter the data over relevant columns, 
and aggregate it in a way suitable for the query, the same langchain agent is able to find answer the query,
for which it previously provided an incorrect answer, when used directly on the raw data.
The data processing steps taken in this example are quite generic, and could be used with any structured data, 
to improve question-answering accuracy. 
"""

A more robust approach#

A more robust approach to improve LLM agent performance would be to generate multiple variants of the input query,
#

and follow the same process to filter and aggregate data for each query, and generate a final answer using the LLM agent,
#

and finally present the user either with multiple answers, or the best possible answer amongst them#

Generate multiple variants of input query#

resp = bg.create_query_variants(
    metadata=data[relevant_cols].to_dict('records'),
    query=query,
)
query_variants = resp.get_data()

Here we can execute the same intermediate data processing steps using ByteGenie, and running the query via an LLM agent, as before#

rank answers#


After running multiple query variants, let's say we have the following answers:
['73', 'difference (female - male) in first class: 46; difference (female - male) in second class: 53; difference (female - male) in third class: 25', 'difference in male and female survivors is 46']
to our original query:
'what is the difference in number of male and female survivors by class?'
based on our filtered data
[{'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '1', 'sex': 'male', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}]
We can rank these answers as
query = 'what is the difference in number of male and female survivors by class?'
answers = ['73', 'difference (female - male) in first class: 46; difference (female - male) in second class: 53; difference (female - male) in third class: 25', 'difference in male and female survivors is 46']
data = [{'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '1', 'sex': 'male', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'First'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'female', 'class': 'Third'}, {'survived': '0', 'sex': 'male', 'class': 'Second'}, {'survived': '0', 'sex': 'male', 'class': 'Third'}, {'survived': '1', 'sex': 'male', 'class': 'Third'}]
resp = bg.rank_answers_to_query(
    data=data,
    query=query,
    answers=answers
)
ranked_answers = resp.get_data()
"""
ranked_answers
[{'answer': 'difference (female-male) in first class: 46; difference (female-male) in second class: 53; difference (female-male) in third class: 25', 'rank': 1, 'rank_explanation': 'This answer provides the specific differences in the number of male and female survivors by class.'}, 
{'answer': 'difference in male and female survivors is 46', 'rank': 2, 'rank_explanation': 'This answer provides a general difference in the number of male and female survivors, but does not specify the breakdown by class.'}, 
{'answer': '73', 'rank': 3, 'rank_explanation': 'This answer does not provide any additional information or breakdown by class.'}]
"""