AskPandas is an open-source Python library that lets you query and transform CSV data using natural language, powered by free, local open-source LLMs via Ollama. No API keys, no cloud, no cost.
pip install askpandas# macOS/Linux
curl -fsSL https://ollama.com/install.sh | sh
# Windows: Download from https://ollama.com/downloadollama pull phi3:mini # Very small, very fastollama serveimport askpandas as ap
# Set up AI
llm = ap.OllamaLLM(model_name="phi3:mini")
ap.set_llm(llm)
# Load your data
df = ap.DataFrame("your_data.csv")
# Ask questions in plain English!
result = df.chat("What is the total revenue?")
print(result)import askpandas as ap
import pandas as pd
# Create sample data
data = {
'product': ['Apple', 'Banana', 'Orange'],
'price': [2.50, 1.00, 1.50],
'quantity': [100, 200, 150]
}
df = pd.DataFrame(data)
df['revenue'] = df['price'] * df['quantity']
# Create AskPandas DataFrame
sales_df = ap.DataFrame(df)
# AI-powered analysis
result = sales_df.chat("What is the total revenue?")
# Output: Total Revenue: $675.00
# More complex queries
result = sales_df.chat("Show me the top 3 products by revenue")
# Output: Product analysis with rankings
result = sales_df.chat("Calculate average price by product")
# Output: Average Price: $1.67# Basic questions
df.chat("What is the average price?")
# Output: Average Price: $1.67
df.chat("Show me the top 5 customers by revenue")
# Output: Customer rankings with revenue amounts
df.chat("How many sales were made in each region?")
# Output: Regional sales breakdown
# Complex analysis
df.chat("""
Analyze our sales performance:
1. Calculate total revenue by month
2. Show the trend over time
3. Identify the best performing products
4. Create a visualization
""")
# Output: Comprehensive analysis with insights# Charts are created automatically
df.chat("Create a bar chart of sales by region")
df.chat("Plot revenue trends over time")
df.chat("Show correlation between price and quantity")
df.chat("Display distribution of customer ages")# Automatic data assessment
df.chat("Check for missing values and duplicates")
df.chat("Identify outliers in numeric columns")
df.chat("Clean column names and standardize formats")
df.chat("Validate data types and suggest improvements")# Work with multiple files
customers = ap.DataFrame("customers.csv")
orders = ap.DataFrame("orders.csv")
products = ap.DataFrame("products.csv")
# Cross-dataset insights
ap.chat("""
Customer analysis:
1. Join customers with their orders
2. Calculate lifetime value by segment
3. Show purchase patterns
4. Identify high-value customers
""", customers, orders, products)import askpandas as ap
# Load sales data
sales = ap.DataFrame("sales_data.csv")
# Comprehensive sales report
sales.chat("What is our total revenue?")
# Output: Total Revenue: $78,586.11
sales.chat("Show me the top 3 products by revenue")
# Output: Product rankings with revenue amounts
sales.chat("Calculate average order value by region")
# Output: Regional performance metrics
sales.chat("How many sales were made in each region?")
# Output: Regional sales breakdown# Customer behavior analysis
customers = ap.DataFrame("customers.csv")
transactions = ap.DataFrame("transactions.csv")
ap.chat("""
Customer behavior insights:
1. Customer lifetime value analysis
2. Purchase frequency patterns
3. Churn prediction factors
4. Customer satisfaction metrics
5. Personalized marketing recommendations
""", customers, transactions)# Financial data processing
financial = ap.DataFrame("financial_data.csv")
financial.chat("""
Financial performance review:
1. Profit and loss analysis
2. Cash flow trends
3. Expense categorization
4. Budget vs actual comparison
5. Financial ratios and KPIs
6. Risk assessment and recommendations
""")# Research data analysis
research = ap.DataFrame("research_data.csv")
research.chat("""
Statistical analysis:
1. Descriptive statistics for all variables
2. Correlation analysis between key factors
3. Hypothesis testing results
4. Outlier detection and treatment
5. Data distribution visualizations
6. Statistical significance testing
""")import askpandas as ap
# Set your preferences
ap.set_config(
verbose=True, # See what's happening
plot_style="seaborn", # Beautiful charts
output_dir="my_analysis", # Save results here
max_execution_time=120, # Allow longer analysis
enable_history=True # Track all queries
)# Create custom charts
from askpandas.visualization.charts import create_bar_chart, save_plot
# Custom bar chart
fig = create_bar_chart(
df.df,
x_col="category",
y_col="value",
title="My Custom Chart",
figsize=(12, 8)
)
# Save with high quality
save_plot(fig, "custom_chart.png", dpi=300)# Get help with your queries
query = "Show me sales trends"
analysis = ap.analyze_query(query)
print(f"Query type: {analysis['primary_category']}")
# Get suggestions
suggestions = ap.get_query_examples('visualization')
print("Try these:", suggestions[:3])
# Validate your query
validation = ap.validate_query(query, df.columns)
if validation['is_valid']:
print("✅ Query is valid!")# ✅ Good - Specific and clear
df.chat("Calculate total revenue by month for 2024, excluding returns")
# ❌ Avoid - Too vague
df.chat("Analyze this data")
# ✅ Good - Step-by-step analysis
df.chat("""
1. Filter data for Q4 2024
2. Group by product category
3. Calculate sum of revenue
4. Sort by revenue descending
5. Show top 10 results
""")
# ✅ Good - Include context
df.chat("Show customer retention rate, considering customers who made purchases in both 2023 and 2024")# For speed and basic analysis
llm = ap.OllamaLLM(model_name="phi3:mini") # Fastest
# For better quality and complex queries
llm = ap.OllamaLLM(model_name="mistral:7b") # Balanced
# For best results (slower)
llm = ap.OllamaLLM(model_name="llama3.2:13b") # Highest quality# Clean your data first
df = ap.DataFrame("messy_data.csv")
# Ask AskPandas to help clean it
df.chat("""
Help me clean this data:
1. Identify and handle missing values
2. Remove duplicates
3. Fix data type issues
4. Standardize column names
5. Show me what was cleaned
""")
# Then analyze the clean data
df.chat("Now analyze the cleaned data for insights")pip install askpandaspip install "askpandas[full]"git clone https://github.com/irfanalidv/AskPandas
cd AskPandas
pip install -e ".[dev]"- ✅ macOS - Native support with Apple Silicon optimization
- ✅ Linux - Full compatibility with all distributions
- ✅ Windows - Complete support with WSL2 recommended
- ✅ Cloud - Works on Google Colab, AWS, Azure, etc.
"No LLM configured" error?
# Make sure Ollama is running
ollama serve
# Check if model is downloaded
ollama listSlow responses?
# Try a smaller model
ollama pull phi3:mini
# Close other applications to free memoryInstallation issues?
# Update pip
pip install --upgrade pip
# Install with specific Python version
python3.9 -m pip install askpandas# Run the interactive demo
python simple_demo.py
# Try the configuration setup
python simple_config.pyfake_sample.csv- Small sample for testingcomprehensive_sample.csv- Larger dataset for practice- Create your own CSV files and start analyzing!
"AskPandas reduced my data exploration time from hours to minutes. I can now focus on insights instead of coding."
"I can analyze complex datasets without learning Python syntax. Natural language queries are a game-changer!"
"Perfect for exploratory data analysis. I can quickly test hypotheses and generate visualizations for papers."
"Learning data analysis has never been easier. AskPandas makes complex concepts accessible."
- Jupyter notebook integration
- More visualization options (Plotly, Bokeh)
- SQL query generation
- Data pipeline automation
- Enterprise features
- Advanced ML integration
- Real-time data streaming
- Community plugins
- 📖 Documentation: GitHub Wiki
- 🐛 Bug Reports: GitHub Issues
- 💬 Discussions: GitHub Discussions
- ⭐ Star: GitHub Repository
MIT License - Use freely for personal and commercial projects!
- Ollama Team - Making local AI accessible
- HuggingFace - Open-source AI models
- Pandas Community - Amazing data tools
- Open Source Contributors - Building the future together
Want to see everything in action? Run our comprehensive demo:
# Clone the repository
git clone https://github.com/irfanalidv/AskPandas.git
cd AskPandas
# Run the complete demonstration
python final_working_demo.pyThis demo showcases:
- ✅ DataFrame Creation & Analysis - Real data processing
- ✅ Data Quality & Cleaning - Automatic column standardization
- ✅ AI-Powered Queries - Natural language analysis
- ✅ Multi-Dataset Analysis - Joining and complex queries
- ✅ Configuration Management - Customizable settings
- ✅ Query Intelligence - Automatic query categorization
import askpandas as ap
import pandas as pd
# Create test data
data = {'name': ['Alice', 'Bob'], 'age': [25, 30], 'salary': [50000, 60000]}
df = pd.DataFrame(data)
ap_df = ap.DataFrame(df)
# Test basic methods
print(f"Shape: {ap_df.shape()}") # Output: Shape: (2, 3)
print(f"Columns: {ap_df.columns()}") # Output: Columns: ['name', 'age', 'salary']
# Get comprehensive info
print(ap_df.info()) # Output: Detailed DataFrame information
# Statistical description
print(ap_df.describe()) # Output: Statistical summary🚀 Ready to transform your data analysis? Install AskPandas today!
pip install askpandasMade with ❤️ by Md Irfan Ali