1. 개요
- LLM으로 Pandas 코딩 할 수 있음
- 데이터 분석 코딩 몰라도 대화형으로 구현 가능할 날이 곧 올듯
2. LanChain
https://python.langchain.com/docs/integrations/toolkits/pandas/
3. LlamaIndex
https://docs.llamaindex.ai/en/stable/examples/query_engine/pandas_query_engine/
4. codebook
1) LangChain
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
import pandas as pd
from langchain_openai import OpenAI
import os
os.environ["OPENAI_API_KEY"] = ''
df = pd.read_csv("toydata.csv")
df.info()
agent = create_pandas_dataframe_agent(
ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
df,
verbose=True,
agent_type=AgentType.OPENAI_FUNCTIONS,
)
agent.invoke("how many rows are there?")
2) LlamaIndex
!pip install llama-index llama-index-experimental
import logging
import sys
from IPython.display import Markdown, display
import pandas as pd
from llama_index.query_engine.pandas import PandasQueryEngine
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# Test on some sample data
df = pd.DataFrame(
{
"city": ["Toronto", "Tokyo", "Berlin"],
"population": [2930000, 13960000, 3645000],
}
)
query_engine = PandasQueryEngine(df=df, verbose=True)
response = query_engine.query(
"What is the city with the highest population?",
)
display(Markdown(f"{response}"))
# get pandas python instructions
print(response.metadata["pandas_instruction_str"])
df = pd.read_csv('toydata.csv')
df['var1'] = pd.to_numeric(df['건조감'], errors='coerce')
df['var2'] = pd.to_numeric(df['눈곱'], errors='coerce')
df.info()
query_engine = PandasQueryEngine(df=df, verbose=True)
response = query_engine.query(
"What is the correlation between 'var1' and 'var2'?",
)
display(Markdown(f"{response}"))
# get pandas python instructions
print(response.metadata["pandas_instruction_str"])