Microsoft Foundry Local brings Azure AI Foundry capabilities directly to your Windows 11 development environment, enabling privacy-preserving, low-latency AI development with enterprise-grade tools. This session covers complete installation, configuration, and hands-on deployment of popular models including phi, qwen, deepseek, and GPT-OSS-20B.
By the end of this session, you will:
- Install and configure Foundry Local on Windows 11
- Master CLI commands and configuration options
- Understand model caching strategies for optimal performance
- Successfully run phi, qwen, deepseek, and GPT-OSS-20B models
- Create your first AI application using Foundry Local
- Windows 11: Version 22H2 or later
- RAM: 16GB minimum, 32GB recommended
- Storage: 50GB free space for models and cache
- Hardware: NPU- or GPU-enabled device preferred (Copilot+ PC or NVIDIA GPU)
- Network: High-speed internet for model downloads
# Verify Windows version
winver
# Check available memory
Get-ComputerInfo | Select-Object TotalPhysicalMemory
# Verify PowerShell version (5.1+ required)
$PSVersionTable.PSVersion
# Set up Python environment (recommended)
py -m venv .venv
.venv\Scripts\activate
# Install required dependencies
pip install openai foundry-local-sdkInstall Foundry Local using Winget or download the installer from GitHub:
# Winget (Windows)
winget install --id Microsoft.FoundryLocal --source winget
# Alternatively: download installer from the official repo
# https://aka.ms/foundry-local-installer# Check Foundry Local version
foundry --version
# Verify CLI accessibility and categories
foundry --help
foundry model --help
foundry cache --help
foundry service --help# General command structure
foundry [category] [command] [options]
# Main categories
foundry model # manage and run models
foundry service # manage the local service
foundry cache # manage local model cache
# Common commands
foundry model list # list available models
foundry model run phi-4-mini # run a model (downloads as needed)
foundry cache ls # list cached modelsFoundry Local implements intelligent model caching to optimize performance and storage:
# Show cache contents
foundry cache ls
# Optional: change cache directory (advanced)
foundry cache cd "C:\\FoundryLocal\\Cache"
foundry cache ls# List catalog and run Phi (auto-downloads best variant for your hardware)
foundry model list
foundry model run phi-4-mini# Run Qwen2.5 models (downloads on first run)
foundry model run qwen2.5-7b
foundry model run qwen2.5-14b# Run DeepSeek model
foundry model run deepseek-r1-7b# Run the latest OpenAI open-source model (requires recent Foundry Local and sufficient GPU VRAM)
foundry model run gpt-oss-20b
# Check version if you encounter errors (requires 0.6.87+ per docs)
foundry --versionCreate a production-ready chat application using the OpenAI SDK with Foundry Local integration, following the patterns from our Sample 01.
# chat_quickstart.py (Sample 01 pattern)
import os
import sys
from openai import OpenAI
try:
from foundry_local import FoundryLocalManager
FOUNDRY_SDK_AVAILABLE = True
except ImportError:
FOUNDRY_SDK_AVAILABLE = False
print("⚠️ Install foundry-local-sdk: pip install foundry-local-sdk")
def create_client():
"""Create OpenAI client with Foundry Local or Azure OpenAI."""
# Check for Azure OpenAI configuration
azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
if azure_endpoint and azure_api_key:
# Azure OpenAI path
model = os.environ.get("MODEL", "your-deployment-name")
client = OpenAI(
base_url=f"{azure_endpoint}/openai",
api_key=azure_api_key,
default_query={"api-version": "2024-08-01-preview"},
)
print(f"🌐 Using Azure OpenAI with model: {model}")
return client, model
# Foundry Local path with SDK management
alias = os.environ.get("MODEL", "phi-4-mini")
if FOUNDRY_SDK_AVAILABLE:
try:
# Use FoundryLocalManager for proper service management
manager = FoundryLocalManager(alias)
model_info = manager.get_model_info(alias)
client = OpenAI(
base_url=manager.endpoint,
api_key=manager.api_key
)
model = model_info.id
print(f"🏠 Using Foundry Local SDK with model: {model}")
return client, model
except Exception as e:
print(f"⚠️ Foundry SDK failed ({e}), using manual configuration")
# Fallback to manual configuration
base_url = os.environ.get("BASE_URL", "http://localhost:8000")
api_key = os.environ.get("API_KEY", "")
model = alias
client = OpenAI(
base_url=f"{base_url}/v1",
api_key=api_key
)
print(f"🔧 Manual configuration with model: {model}")
return client, model
def main():
"""Main chat function."""
client, model = create_client()
print("Foundry Local Chat Interface (type 'quit' to exit)\n")
conversation_history = []
while True:
user_input = input("You: ")
if user_input.lower() == 'quit':
break
try:
# Add user message to history
conversation_history.append({"role": "user", "content": user_input})
# Create chat completion
response = client.chat.completions.create(
model=model,
messages=conversation_history,
max_tokens=500,
temperature=0.7
)
assistant_message = response.choices[0].message.content
conversation_history.append({"role": "assistant", "content": assistant_message})
print(f"Assistant: {assistant_message}\n")
except Exception as e:
print(f"Error: {e}\n")
if __name__ == "__main__":
main()# Ensure the model is running in another terminal
foundry model run phi-4-mini
# Option 1: Using FoundryLocalManager (recommended)
python chat_quickstart.py "Explain what Foundry Local is"
# Option 2: Manual configuration with environment variables
set BASE_URL=http://localhost:8000
set MODEL=phi-4-mini
set API_KEY=
python chat_quickstart.py "Write a welcome message"
# Option 3: Azure OpenAI configuration
set AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
set AZURE_OPENAI_API_KEY=your-api-key
set AZURE_OPENAI_API_VERSION=2024-08-01-preview
set MODEL=your-deployment-name
python chat_quickstart.py "Hello from Azure OpenAI"# Issue: "Could not use Foundry SDK" warning
pip install foundry-local-sdk
# Or set environment variables for manual configuration
# Issue: Connection refused
foundry service status
foundry service ps # Check loaded models
# Issue: Model not found
foundry model list
foundry model run phi-4-mini
# Issue: Cache problems or low disk space
foundry cache ls
foundry cache clean
# Issue: GPT-OSS-20B not supported on your version
foundry --version
winget upgrade --id Microsoft.FoundryLocal
# Test API endpoint
curl http://localhost:8000/v1/models# Quick CPU and process view
Get-Process | Sort-Object -Property CPU -Descending | Select-Object -First 10
Get-Counter '\\Processor(_Total)\\% Processor Time' -SampleInterval 1 -MaxSamples 10| Variable | Description | Default | Required |
|---|---|---|---|
MODEL |
Model alias or name | phi-4-mini |
No |
BASE_URL |
Foundry Local base URL | http://localhost:8000 |
No |
API_KEY |
API key (usually not needed for local) | "" |
No |
AZURE_OPENAI_ENDPOINT |
Azure OpenAI endpoint | - | For Azure |
AZURE_OPENAI_API_KEY |
Azure OpenAI API key | - | For Azure |
AZURE_OPENAI_API_VERSION |
Azure API version | 2024-08-01-preview |
No |
- Use OpenAI SDK: Prefer OpenAI SDK over raw HTTP requests for better maintainability
- FoundryLocalManager: Use the official SDK for service management when available
- Error Handling: Implement proper fallback strategies for production applications
- Upgrade Regularly: Keep Foundry Local updated to access new models and fixes
- Start Small: Begin with smaller models (Phi mini, Qwen 7B) and scale up
- Monitor Resources: Track CPU/GPU/memory while tuning prompts and settings
# deploy-models.ps1
$models = @(
"phi-4-mini",
"qwen2.5-7b"
)
foreach ($model in $models) {
Write-Host "Running $model..."
foundry model run $model --verbose
}# sdk_integration_test.py (matching Sample 01 pattern)
import os
from openai import OpenAI
from foundry_local import FoundryLocalManager
def test_model_integration(model_alias):
"""Test OpenAI SDK integration with different models."""
try:
# Use FoundryLocalManager for proper setup
manager = FoundryLocalManager(model_alias)
model_info = manager.get_model_info(model_alias)
client = OpenAI(
base_url=manager.endpoint,
api_key=manager.api_key
)
# Test basic completion
response = client.chat.completions.create(
model=model_info.id,
messages=[{"role": "user", "content": "Say hello and state your model name."}],
max_tokens=50
)
print(f"✅ {model_alias}: {response.choices[0].message.content}")
return True
except Exception as e:
print(f"❌ {model_alias}: {e}")
return False
# Test multiple models
models_to_test = ["phi-4-mini", "qwen2.5-7b"]
for model in models_to_test:
test_model_integration(model)# health_check.py
from openai import OpenAI
from foundry_local import FoundryLocalManager
def comprehensive_health_check():
"""Perform comprehensive health check of Foundry Local service."""
try:
# Initialize with a common model
manager = FoundryLocalManager("phi-4-mini")
client = OpenAI(
base_url=manager.endpoint,
api_key=manager.api_key
)
# 1. Check service connectivity
models_response = client.models.list()
available_models = [model.id for model in models_response.data]
print(f"✅ Service healthy - {len(available_models)} models available")
# 2. Test each available model
for model_id in available_models:
try:
response = client.chat.completions.create(
model=model_id,
messages=[{"role": "user", "content": "Test"}],
max_tokens=10
)
print(f"✅ {model_id}: Working")
except Exception as e:
print(f"❌ {model_id}: {e}")
return True
except Exception as e:
print(f"❌ Service check failed: {e}")
return False
comprehensive_health_check()- Get started with Foundry Local: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-local/get-started
- CLI reference and commands overview: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-local/reference/reference-cli
- OpenAI SDK integration: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-local/how-to/how-to-integrate-with-inference-sdks
- Compile Hugging Face models: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-local/how-to/how-to-compile-hugging-face-models
- Microsoft Foundry Local GitHub: https://github.com/microsoft/Foundry-Local
- OpenAI Python SDK: https://github.com/openai/openai-python
- Sample 01: Quick Chat via OpenAI SDK: samples/01/README.md
- Sample 02: Advanced SDK Integration: samples/02/README.md